diff options
Diffstat (limited to 'src')
50 files changed, 4915 insertions, 2160 deletions
| diff --git a/src/Makefile.am b/src/Makefile.am index be35b24..911aecd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,13 +4,24 @@ libname = libonig.la  AM_CFLAGS = -Wall  AM_CPPFLAGS = -I$(top_srcdir) -I$(includedir) -include_HEADERS = oniguruma.h oniggnu.h onigposix.h +include_HEADERS = oniguruma.h oniggnu.h + +posix_headers = onigposix.h + +if ENABLE_POSIX_API +posix_sources = regposix.c regposerr.c +include_HEADERS += $(posix_headers) +else +posix_sources = +endif + +  lib_LTLIBRARIES = $(libname)  libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \  	regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \  	regenc.c regsyntax.c regtrav.c regversion.c st.c \ -	regposix.c regposerr.c \ +  $(posix_sources) \  	unicode.c \  	unicode_unfold_key.c \  	unicode_fold1_key.c \ diff --git a/src/Makefile.windows b/src/Makefile.windows index 046345a..1ce8ce2 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -15,8 +15,7 @@ LINKFLAGS = -link -incremental:no -pdb:none  INSTALL = install -c
  CP      = copy
  CC = cl
 -DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
 -RUBYDIR = ..
 +DEFS = -DHAVE_CONFIG_H
  subdirs = 
 @@ -25,44 +24,43 @@ libname   = $(libbase)_s.lib  dllname   = $(libbase).dll
  dlllib    = $(libbase).lib
 -onigheaders  = oniguruma.h regint.h regparse.h regenc.h st.h
 -posixheaders = onigposix.h
 +onigheaders  = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
 +posixheaders = $(ONIG_DIR)/onigposix.h
  headers      = $(posixheaders) $(onigheaders)
 -onigobjs     = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \
 -	       regexec.obj regenc.obj regsyntax.obj regtrav.obj \
 -	       regversion.obj st.obj onig_init.obj
 -posixobjs    = regposix.obj regposerr.obj
 +onigobjs     = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
 +	       $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
 +	       $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
 +posixobjs    = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
  libobjs      = $(onigobjs) $(posixobjs)
 -jp_objs      =  euc_jp.obj sjis.obj
 -iso8859_objs =  iso8859_1.obj  iso8859_2.obj \
 -		iso8859_3.obj  iso8859_4.obj \
 -		iso8859_5.obj  iso8859_6.obj \
 -		iso8859_7.obj  iso8859_8.obj \
 -		iso8859_9.obj  iso8859_10.obj \
 -		iso8859_11.obj iso8859_13.obj \
 -		iso8859_14.obj iso8859_15.obj \
 -		iso8859_16.obj
 -
 -encobjs = ascii.obj utf8.obj \
 -		unicode.obj \
 -		utf16_be.obj utf16_le.obj \
 -		utf32_be.obj utf32_le.obj \
 +jp_objs      =  $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
 +iso8859_objs =  $(BUILD_DIR)/iso8859_1.obj  $(BUILD_DIR)/iso8859_2.obj \
 +		$(BUILD_DIR)/iso8859_3.obj  $(BUILD_DIR)/iso8859_4.obj \
 +		$(BUILD_DIR)/iso8859_5.obj  $(BUILD_DIR)/iso8859_6.obj \
 +		$(BUILD_DIR)/iso8859_7.obj  $(BUILD_DIR)/iso8859_8.obj \
 +		$(BUILD_DIR)/iso8859_9.obj  $(BUILD_DIR)/iso8859_10.obj \
 +		$(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
 +		$(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
 +		$(BUILD_DIR)/iso8859_16.obj
 +
 +encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
 +		$(BUILD_DIR)/unicode.obj \
 +		$(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
 +		$(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
  		$(jp_objs) $(iso8859_objs) \
 -		euc_tw.obj euc_kr.obj big5.obj \
 -		gb18030.obj \
 -		koi8_r.obj  \
 -		cp1251.obj \
 -		euc_jp_prop.obj sjis_prop.obj \
 -		unicode_unfold_key.obj unicode_fold1_key.obj \
 -		unicode_fold2_key.obj unicode_fold3_key.obj	# koi8.obj
 -
 -onigsources  = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \
 -	       regsyntax.c regtrav.c regversion.c reggnu.c st.c
 -posixsources = regposix.c regposerr.c
 +		$(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
 +		$(BUILD_DIR)/gb18030.obj \
 +		$(BUILD_DIR)/koi8_r.obj  \
 +		$(BUILD_DIR)/cp1251.obj \
 +		$(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
 +		$(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
 +		$(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj	# $(BUILD_DIR)/koi8.obj
 +
 +onigsources  = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
 +	       $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
 +posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
  libsources   = $(posixsources) $(onigsources)
 -rubysources  = $(onigsources)
  patchfiles   = re.c.168.patch re.c.181.patch
  distfiles    = README COPYING HISTORY \
 @@ -77,7 +75,7 @@ makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'  .SUFFIXES:
  .SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
 -.c.obj:
 +{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
  	$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
  # targets
 @@ -96,58 +94,58 @@ $(libname): $(libobjs) $(encobjs)  $(dllname): $(libobjs) $(encobjs)
  	$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
 -regparse.obj:  regparse.c $(onigheaders) config.h st.h
 -regext.obj:    regext.c   $(onigheaders) config.h
 -regtrav.obj:   regtrav.c  $(onigheaders) config.h
 -regcomp.obj:   regcomp.c  $(onigheaders) config.h
 -regexec.obj:   regexec.c  regint.h regenc.h oniguruma.h config.h
 -reggnu.obj:    reggnu.c   regint.h regenc.h oniguruma.h config.h oniggnu.h
 -regerror.obj:  regerror.c regint.h regenc.h oniguruma.h config.h
 -regenc.obj:    regenc.c   regenc.h oniguruma.h config.h
 -regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h
 -regversion.obj: regversion.c oniguruma.h config.h
 -regposix.obj:  regposix.c $(posixheaders) oniguruma.h config.h
 -regposerr.obj: regposerr.c $(posixheaders) config.h
 -st.obj:        st.c regint.h oniguruma.h config.h st.h
 -onig_init.obj: onig_init.c oniguruma.h
 -
 -ascii.obj:      ascii.c regenc.h config.h
 -unicode.obj:    unicode.c unicode_fold_data.c unicode_property_data.c regenc.h config.h
 -utf8.obj:       utf8.c regenc.h config.h
 -utf16_be.obj:   utf16_be.c regenc.h config.h
 -utf16_le.obj:   utf16_le.c regenc.h config.h
 -utf32_be.obj:   utf32_be.c regenc.h config.h
 -utf32_le.obj:   utf32_le.c regenc.h config.h
 -euc_jp.obj:     euc_jp.c regenc.h config.h
 -euc_tw.obj:     euc_tw.c regenc.h config.h
 -euc_kr.obj:     euc_kr.c regenc.h config.h
 -sjis.obj:       sjis.c regenc.h config.h
 -iso8859_1.obj:  iso8859_1.c regenc.h config.h
 -iso8859_2.obj:  iso8859_2.c regenc.h config.h
 -iso8859_3.obj:  iso8859_3.c regenc.h config.h
 -iso8859_4.obj:  iso8859_4.c regenc.h config.h
 -iso8859_5.obj:  iso8859_5.c regenc.h config.h
 -iso8859_6.obj:  iso8859_6.c regenc.h config.h
 -iso8859_7.obj:  iso8859_7.c regenc.h config.h
 -iso8859_8.obj:  iso8859_8.c regenc.h config.h
 -iso8859_9.obj:  iso8859_9.c regenc.h config.h
 -iso8859_10.obj: iso8859_10.c regenc.h config.h
 -iso8859_11.obj: iso8859_11.c regenc.h config.h
 -iso8859_13.obj: iso8859_13.c regenc.h config.h
 -iso8859_14.obj: iso8859_14.c regenc.h config.h
 -iso8859_15.obj: iso8859_15.c regenc.h config.h
 -iso8859_16.obj: iso8859_16.c regenc.h config.h
 -koi8.obj:       koi8.c   regenc.h config.h
 -koi8_r.obj:     koi8_r.c regenc.h config.h
 -cp1251.obj:     cp1251.c regenc.h config.h
 -big5.obj:       big5.c   regenc.h config.h
 -gb18030.obj:    gb18030.c regenc.h config.h
 -euc_jp_prop.obj:  euc_jp_prop.c regenc.h
 -sjis_prop.obj:    sjis_prop.c regenc.h
 -unicode_unfold_key.obj: unicode_unfold_key.c regenc.h config.h
 -unicode_fold1_key.obj: unicode_fold1_key.c regenc.h config.h
 -unicode_fold2_key.obj: unicode_fold2_key.c regenc.h config.h
 -unicode_fold3_key.obj: unicode_fold3_key.c regenc.h config.h
 +$(BUILD_DIR)/regparse.obj:  $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regext.obj:    $(ONIG_DIR)/regext.c   $(onigheaders) $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regtrav.obj:   $(ONIG_DIR)/regtrav.c  $(onigheaders) $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regcomp.obj:   $(ONIG_DIR)/regcomp.c  $(onigheaders) $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regexec.obj:   $(ONIG_DIR)/regexec.c  $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/reggnu.obj:    $(ONIG_DIR)/reggnu.c   $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
 +$(BUILD_DIR)/regerror.obj:  $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regenc.obj:    $(ONIG_DIR)/regenc.c   $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regposix.obj:  $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/st.obj:        $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
 +$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
 +
 +$(BUILD_DIR)/ascii.obj:      $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/unicode.obj:    $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/utf8.obj:       $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/utf16_be.obj:   $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/utf16_le.obj:   $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/utf32_be.obj:   $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/utf32_le.obj:   $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/euc_jp.obj:     $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/euc_tw.obj:     $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/euc_kr.obj:     $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/sjis.obj:       $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_1.obj:  $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_2.obj:  $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_3.obj:  $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_4.obj:  $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_5.obj:  $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_6.obj:  $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_7.obj:  $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_8.obj:  $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_9.obj:  $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/koi8.obj:       $(ONIG_DIR)/koi8.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/koi8_r.obj:     $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/cp1251.obj:     $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/big5.obj:       $(ONIG_DIR)/big5.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/gb18030.obj:    $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/euc_jp_prop.obj:  $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
 +$(BUILD_DIR)/sjis_prop.obj:    $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
 +$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 +$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
  # C library test
  ctest: $(testc)
 @@ -158,26 +156,23 @@ ptest: $(testp)  	.\$(testp)
  $(testc): $(testc).c $(libname)
 -	$(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
 +	$(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
  $(testp): $(testc).c $(dlllib)
 -	$(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
 -
 -#$(testc)u.c: test.rb testconvu.rb
 -#	ruby -Ke testconvu.rb test.rb > $@
 +	$(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib)
  $(testc)u: $(testc)u.c $(libname)
 -	$(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
 +	$(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
  clean:
 -	del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
 +	del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
  samples: all
 -	$(CC) $(CFLAGS) -I. /Fe:simple  sample\simple.c  $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:posix   sample\posix.c   $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:names   sample\names.c   $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:listcap sample\listcap.c $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:sql     sample\sql.c     $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:encode  sample\encode.c  $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:syntax  sample\syntax.c  $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o simple  $(ONIG_DIR)\sample\simple.c  $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o posix   $(ONIG_DIR)\sample\posix.c   $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o names   $(ONIG_DIR)\sample\names.c   $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o sql     $(ONIG_DIR)\sample\sql.c     $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o encode  $(ONIG_DIR)\sample\encode.c  $(dlllib)
 +	$(CC) $(CFLAGS) -I. -o syntax  $(ONIG_DIR)\sample\syntax.c  $(dlllib)
\ No newline at end of file diff --git a/src/ascii.c b/src/ascii.c index b21878d..7efaa26 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -2,7 +2,7 @@    ascii.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,59 @@   * SUCH DAMAGE.   */ -#include "regenc.h" +#include "regint.h"   /* for USE_CALLOUT */ + +static int +init(void) +{ +#ifdef USE_CALLOUT + +    int id; +    OnigEncoding enc; +    char* name; +    unsigned int t_long; +    unsigned int args[4]; +    OnigValue    opts[4]; + +    enc = ONIG_ENCODING_ASCII; +    t_long = ONIG_TYPE_LONG; + +    name = "FAIL";        BC0_P(name, fail); +    name = "MISMATCH";    BC0_P(name, mismatch); +    name = "MAX";         BC_B(name, max, 1, &t_long); + +    name = "ERROR"; +    args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; +    BC_P_O(name, error, 1, args, 1, opts); + +    name = "COUNT"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, count, 1, args, 1, opts); + +    name = "TOTAL_COUNT"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, total_count, 1, args, 1, opts); + +    name = "CMP"; +    args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    args[1] = ONIG_TYPE_STRING; +    args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + +  return ONIG_NORMAL; +} + +#if 0 +static int +is_initialized(void) +{ +  /* Don't use this function */ +  /* can't answer, because builtin callout entries removed in onig_end() */ +  return 0; +} +#endif  static int  ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -41,8 +93,8 @@ ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)  OnigEncodingType OnigEncodingASCII = {    onigenc_single_byte_mbc_enc_len,    "US-ASCII",  /* name */ -  1,           /* max byte length */ -  1,           /* min byte length */ +  1,           /* max enc length */ +  1,           /* min enc length */    onigenc_is_mbc_newline_0x0a,    onigenc_single_byte_mbc_to_code,    onigenc_single_byte_code_to_mbclen, @@ -55,7 +107,8 @@ OnigEncodingType OnigEncodingASCII = {    onigenc_not_support_get_ctype_code_range,    onigenc_single_byte_left_adjust_char_head,    onigenc_always_true_is_allowed_reverse_match, -  NULL, /* init */ -  NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  init, +  0, /* is_initialized */ +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; @@ -2,7 +2,7 @@    big5.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -186,5 +186,6 @@ OnigEncodingType OnigEncodingBIG5 = {    big5_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index e13fad1..b59cc8d 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -31,6 +31,9 @@  /* Define to 1 if you have the <string.h> header file. */  #cmakedefine HAVE_STRING_H  ${HAVE_STRING_H} +/* Define to 1 if you have the <limits.h> header file. */ +#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} +  /* Define to 1 if you have the <sys/times.h> header file. */  #cmakedefine HAVE_SYS_TIMES_H  ${HAVE_SYS_TIMES_H} @@ -64,9 +67,6 @@  /* Define to 1 if you have the ANSI C header files. */  #cmakedefine STDC_HEADERS  ${STDC_HEADERS} -/* Define if combination explosion check */ -#cmakedefine USE_COMBINATION_EXPLOSION_CHECK  ${USE_COMBINATION_EXPLOSION_CHECK} -  /* Define if enable CR+NL as line terminator */  #cmakedefine USE_CRNL_AS_LINE_TERMINATOR  ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/cp1251.c b/src/cp1251.c index 4d655bb..f7b43c3 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -2,7 +2,7 @@    cp1251.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2006-2016  Byte      <byte AT mail DOT kna DOT ru> + * Copyright (c) 2006-2018  Byte      <byte AT mail DOT kna DOT ru>   *                          K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   * @@ -199,5 +199,6 @@ OnigEncodingType OnigEncodingCP1251 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 42c3bce..8dd6ac1 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@    euc_jp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -306,5 +306,6 @@ OnigEncodingType OnigEncodingEUC_JP = {    is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/euc_kr.c b/src/euc_kr.c index 450caf1..08bfa1c 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@    euc_kr.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -184,5 +184,6 @@ OnigEncodingType OnigEncodingEUC_CN = {    euckr_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/euc_tw.c b/src/euc_tw.c index b3ee628..dbf0eac 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@    euc_tw.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -167,5 +167,6 @@ OnigEncodingType OnigEncodingEUC_TW = {    euctw_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/gb18030.c b/src/gb18030.c index c8b5865..073c83b 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,7 +2,7 @@    gb18030.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2005-2016  KUBO Takehiro <kubo AT jiubao DOT org> + * Copyright (c) 2005-2018  KUBO Takehiro <kubo AT jiubao DOT org>   *                          K.Kosako <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   * @@ -534,5 +534,6 @@ OnigEncodingType OnigEncodingGB18030 = {    gb18030_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_1.c b/src/iso8859_1.c index 573931f..bcd7e26 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -2,7 +2,7 @@    iso8859_1.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -271,5 +271,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index 91b18d4..a5946cc 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -2,7 +2,7 @@    iso8859_10.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -238,5 +238,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index 518be25..ec94fd1 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -2,7 +2,7 @@    iso8859_11.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index d1f39a2..fba7fd4 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -2,7 +2,7 @@    iso8859_13.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 3361b0d..e1f71f5 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -2,7 +2,7 @@    iso8859_14.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -240,5 +240,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index b09e876..236e9e7 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -2,7 +2,7 @@    iso8859_15.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 29a350d..42045bd 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -2,7 +2,7 @@    iso8859_16.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index 9eb3536..db93046 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -2,7 +2,7 @@    iso8859_2.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 862823a..6fe5e6f 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -2,7 +2,7 @@    iso8859_3.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index db706da..ee1eb93 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -2,7 +2,7 @@    iso8859_4.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index 0e03e9c..7d828e1 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -2,7 +2,7 @@    iso8859_5.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -225,5 +225,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 6289af5..a959e98 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -2,7 +2,7 @@    iso8859_6.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 75b520f..e695523 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -2,7 +2,7 @@    iso8859_7.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -221,5 +221,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 5f18345..66b63b8 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -2,7 +2,7 @@    iso8859_8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index d0c06bb..d780293 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -2,7 +2,7 @@    iso8859_9.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; @@ -2,7 +2,7 @@    koi8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -249,5 +249,6 @@ OnigEncodingType OnigEncodingKOI8 = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/koi8_r.c b/src/koi8_r.c index f8ef34f..e88cfe3 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -2,7 +2,7 @@    koi8_r.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -211,5 +211,6 @@ OnigEncodingType OnigEncodingKOI8_R = {    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  onigenc_always_true_is_valid_mbc_string +  onigenc_always_true_is_valid_mbc_string, +  0, 0, 0  }; diff --git a/src/onig_init.c b/src/onig_init.c index 9f53568..7ad98b7 100644 --- a/src/onig_init.c +++ b/src/onig_init.c @@ -2,7 +2,7 @@    onig_init.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2016-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@   * SUCH DAMAGE.   */ -#include "oniguruma.h" +#include "regint.h"  /* onig_init(): deprecated function */  extern int diff --git a/src/oniggnu.h b/src/oniggnu.h index 3da9f23..d688883 100644 --- a/src/oniggnu.h +++ b/src/oniggnu.h @@ -35,10 +35,12 @@  extern "C" {  #endif -#define RE_MBCTYPE_ASCII         0 -#define RE_MBCTYPE_EUC           1 -#define RE_MBCTYPE_SJIS          2 -#define RE_MBCTYPE_UTF8          3 +enum { +  RE_MBCTYPE_ASCII = 0, +  RE_MBCTYPE_EUC   = 1, +  RE_MBCTYPE_SJIS  = 2, +  RE_MBCTYPE_UTF8  = 3 +};  /* GNU regex options */  #ifndef RE_NREGS diff --git a/src/onigposix.h b/src/onigposix.h index 22211e4..da0f919 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@    onigposix.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -97,7 +97,7 @@ typedef struct {  #ifndef ONIG_EXTERN  #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) +#if defined(ONIGURUMA_EXPORT)  #define ONIG_EXTERN   extern __declspec(dllexport)  #else  #define ONIG_EXTERN   extern __declspec(dllimport) diff --git a/src/oniguruma.h b/src/oniguruma.h index 5ad4469..349c00e 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@    oniguruma.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -35,30 +35,10 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6 -#define ONIGURUMA_VERSION_MINOR   7 -#define ONIGURUMA_VERSION_TEENY   0 +#define ONIGURUMA_VERSION_MINOR   8 +#define ONIGURUMA_VERSION_TEENY   1 -#ifdef __cplusplus -# ifndef  HAVE_PROTOTYPES -#  define HAVE_PROTOTYPES 1 -# endif -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif +#define ONIGURUMA_VERSION_INT     60801  #ifndef P_  #if defined(__STDC__) || defined(_WIN32) @@ -69,16 +49,12 @@ extern "C" {  #endif  #ifndef PV_ -#ifdef HAVE_STDARG_PROTOTYPES  # define PV_(args) args -#else -# define PV_(args) () -#endif  #endif  #ifndef ONIG_EXTERN  #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) +#if defined(ONIGURUMA_EXPORT)  #define ONIG_EXTERN   extern __declspec(dllexport)  #else  #define ONIG_EXTERN   extern __declspec(dllimport) @@ -96,10 +72,6 @@ extern "C" {  #define UChar OnigUChar  #endif -#ifdef _WIN32 -#include <windows.h> -#endif -  typedef unsigned int   OnigCodePoint;  typedef unsigned char  OnigUChar;  typedef unsigned int   OnigCtype; @@ -166,6 +138,9 @@ typedef struct OnigEncodingTypeST {    int    (*init)(void);    int    (*is_initialized)(void);    int    (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end); +  unsigned int flag; +  OnigCodePoint sb_range; +  int index;  } OnigEncodingType;  typedef OnigEncodingType* OnigEncoding; @@ -243,21 +218,24 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;  /* 18: 6(max-byte) * 3(case-fold chars) */  /* character types */ -#define ONIGENC_CTYPE_NEWLINE   0 -#define ONIGENC_CTYPE_ALPHA     1 -#define ONIGENC_CTYPE_BLANK     2 -#define ONIGENC_CTYPE_CNTRL     3 -#define ONIGENC_CTYPE_DIGIT     4 -#define ONIGENC_CTYPE_GRAPH     5 -#define ONIGENC_CTYPE_LOWER     6 -#define ONIGENC_CTYPE_PRINT     7 -#define ONIGENC_CTYPE_PUNCT     8 -#define ONIGENC_CTYPE_SPACE     9 -#define ONIGENC_CTYPE_UPPER    10 -#define ONIGENC_CTYPE_XDIGIT   11 -#define ONIGENC_CTYPE_WORD     12 -#define ONIGENC_CTYPE_ALNUM    13  /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII    14 +typedef enum { +  ONIGENC_CTYPE_NEWLINE = 0, +  ONIGENC_CTYPE_ALPHA   = 1, +  ONIGENC_CTYPE_BLANK   = 2, +  ONIGENC_CTYPE_CNTRL   = 3, +  ONIGENC_CTYPE_DIGIT   = 4, +  ONIGENC_CTYPE_GRAPH   = 5, +  ONIGENC_CTYPE_LOWER   = 6, +  ONIGENC_CTYPE_PRINT   = 7, +  ONIGENC_CTYPE_PUNCT   = 8, +  ONIGENC_CTYPE_SPACE   = 9, +  ONIGENC_CTYPE_UPPER   = 10, +  ONIGENC_CTYPE_XDIGIT  = 11, +  ONIGENC_CTYPE_WORD    = 12, +  ONIGENC_CTYPE_ALNUM   = 13,  /* alpha || digit */ +  ONIGENC_CTYPE_ASCII   = 14 +} OnigEncCtype; +  #define ONIGENC_MAX_STD_CTYPE  ONIGENC_CTYPE_ASCII @@ -365,7 +343,8 @@ ONIG_EXTERN  int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));  ONIG_EXTERN  int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); - +ONIG_EXTERN +UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end));  /* PART: regular expression */ @@ -513,6 +492,8 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP   (1U<<25) /* (?~...) */  #define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER   (1U<<26) /* \X \y \Y */  #define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL     (1U<<27) /* (?R), (?&name)... */ +#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */ +#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME      (1U<<29) /* (*name) (*name{a,..}) */  /* syntax (behavior) */  #define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1U<<31) /* not implemented */ @@ -552,6 +533,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIG_NORMAL                                            0  #define ONIG_MISMATCH                                         -1  #define ONIG_NO_SUPPORT_CONFIG                                -2 +#define ONIG_ABORT                                            -3  /* internal error */  #define ONIGERR_MEMORY                                         -5 @@ -562,6 +544,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIGERR_UNEXPECTED_BYTECODE                           -14  #define ONIGERR_MATCH_STACK_LIMIT_OVER                        -15  #define ONIGERR_PARSE_DEPTH_LIMIT_OVER                        -16 +#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER                     -17  #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED                -21  #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR  -22  #define ONIGERR_FAIL_TO_INITIALIZE                            -23 @@ -616,6 +599,12 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIGERR_INVALID_IF_ELSE_SYNTAX                       -224  #define ONIGERR_INVALID_ABSENT_GROUP_PATTERN                 -225  #define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN       -226 +#define ONIGERR_INVALID_CALLOUT_PATTERN                      -227 +#define ONIGERR_INVALID_CALLOUT_NAME                         -228 +#define ONIGERR_UNDEFINED_CALLOUT_NAME                       -229 +#define ONIGERR_INVALID_CALLOUT_BODY                         -230 +#define ONIGERR_INVALID_CALLOUT_TAG_NAME                     -231 +#define ONIGERR_INVALID_CALLOUT_ARG                          -232  #define ONIGERR_INVALID_CODE_POINT_VALUE                     -400  #define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400  #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401 @@ -681,49 +670,8 @@ extern void onig_null_warn P_((const char* s));  #define ONIG_CHAR_TABLE_SIZE   256 -typedef struct re_pattern_buffer { -  /* common members of BBuf(bytes-buffer) */ -  unsigned char* p;         /* compiled pattern */ -  unsigned int used;        /* used space for p */ -  unsigned int alloc;       /* allocated space for p */ - -  int num_mem;                   /* used memory(...) num counted from 1 */ -  int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */ -  int num_null_check;            /* OP_EMPTY_CHECK_START/END id counter */ -  int num_comb_exp_check;        /* combination explosion check */ -  int num_call;                  /* number of subexp call */ -  unsigned int capture_history;  /* (?@...) flag (1-31) */ -  unsigned int bt_mem_start;     /* need backtrack flag */ -  unsigned int bt_mem_end;       /* need backtrack flag */ -  int stack_pop_level; -  int repeat_range_alloc; -  OnigRepeatRange* repeat_range; - -  OnigEncoding      enc; -  OnigOptionType    options; -  OnigSyntaxType*   syntax; -  OnigCaseFoldType  case_fold_flag; -  void*             name_table; - -  /* optimization info (string search, char-map and anchors) */ -  int            optimize;          /* optimize flag */ -  int            threshold_len;     /* search str-length for apply optimize */ -  int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ -  OnigLen   anchor_dmin;       /* (SEMI_)END_BUF anchor distance */ -  OnigLen   anchor_dmax;       /* (SEMI_)END_BUF anchor distance */ -  int            sub_anchor;        /* start-anchor for exact or map */ -  unsigned char *exact; -  unsigned char *exact_end; -  unsigned char  map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ -  int           *int_map;                   /* BM skip for exact_len > 255 */ -  int           *int_map_backward;          /* BM skip for backward search */ -  OnigLen   dmin;                      /* min-distance of exact or map */ -  OnigLen   dmax;                      /* max-distance of exact or map */ - -  /* regex_t link chain */ -  struct re_pattern_buffer* chain;  /* escape compile-conflict */ -} OnigRegexType; - +struct re_pattern_buffer; +typedef struct re_pattern_buffer OnigRegexType;  typedef OnigRegexType*  OnigRegex;  #ifndef ONIG_ESCAPE_REGEX_T_COLLISION @@ -740,10 +688,74 @@ typedef struct {    OnigCaseFoldType   case_fold_flag;  } OnigCompileInfo; + +/* types for callout */ +typedef enum { +  ONIG_CALLOUT_IN_PROGRESS   = 1, /* 1<<0 */ +  ONIG_CALLOUT_IN_RETRACTION = 2  /* 1<<1 */ +} OnigCalloutIn; + +#define ONIG_CALLOUT_IN_BOTH  (ONIG_CALLOUT_IN_PROGRESS | ONIG_CALLOUT_IN_RETRACTION) + +typedef enum { +  ONIG_CALLOUT_OF_CONTENTS = 0, +  ONIG_CALLOUT_OF_NAME     = 1 +} OnigCalloutOf; + +typedef enum { +  ONIG_CALLOUT_TYPE_SINGLE              = 0, +  ONIG_CALLOUT_TYPE_START_CALL          = 1, +  ONIG_CALLOUT_TYPE_BOTH_CALL           = 2, +  ONIG_CALLOUT_TYPE_START_MARK_END_CALL = 3, +} OnigCalloutType; + + +#define ONIG_NON_NAME_ID        -1 +#define ONIG_NON_CALLOUT_NUM     0 + +#define ONIG_CALLOUT_MAX_ARGS_NUM     4 +#define ONIG_CALLOUT_DATA_SLOT_NUM    5 + +struct OnigCalloutArgsStruct; +typedef struct OnigCalloutArgsStruct OnigCalloutArgs; + +typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data); + +/* callout function return values (less than -1: error code) */ +typedef enum { +  ONIG_CALLOUT_FAIL     =  1, +  ONIG_CALLOUT_SUCCESS  =  0 +} OnigCalloutResult; + +typedef enum { +  ONIG_TYPE_VOID     = 0, +  ONIG_TYPE_LONG     = 1<<0, +  ONIG_TYPE_CHAR     = 1<<1, +  ONIG_TYPE_STRING   = 1<<2, +  ONIG_TYPE_POINTER  = 1<<3, +  ONIG_TYPE_TAG      = 1<<4, +} OnigType; + +typedef union { +  long l; +  OnigCodePoint c; +  struct { +    OnigUChar* start; +    OnigUChar* end; +  } s; +  void* p; +  int tag;  /* tag -> callout_num */ +} OnigValue; + + +struct OnigMatchParamStruct; +typedef struct OnigMatchParamStruct OnigMatchParam; + +  /* Oniguruma Native API */  ONIG_EXTERN -int onig_initialize P_((OnigEncoding encodings[], int n)); +int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings));  /* onig_init(): deprecated function. Use onig_initialize(). */  ONIG_EXTERN  int onig_init P_((void)); @@ -756,7 +768,7 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));  ONIG_EXTERN  int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));  ONIG_EXTERN -int  onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +int  onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));  int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));  ONIG_EXTERN  int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); @@ -765,12 +777,16 @@ void onig_free P_((OnigRegex));  ONIG_EXTERN  void onig_free_body P_((OnigRegex));  ONIG_EXTERN -int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);  ONIG_EXTERN  int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));  ONIG_EXTERN +int onig_search_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN  int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));  ONIG_EXTERN +int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN  OnigRegion* onig_region_new P_((void));  ONIG_EXTERN  void onig_region_init P_((OnigRegion* region)); @@ -843,6 +859,10 @@ unsigned int onig_get_match_stack_limit_size P_((void));  ONIG_EXTERN  int onig_set_match_stack_limit_size P_((unsigned int size));  ONIG_EXTERN +unsigned long onig_get_retry_limit_in_match P_((void)); +ONIG_EXTERN +int onig_set_retry_limit_in_match P_((unsigned long n)); +ONIG_EXTERN  unsigned int onig_get_parse_depth_limit P_((void));  ONIG_EXTERN  int onig_set_capture_num_limit P_((int num)); @@ -857,6 +877,121 @@ const char* onig_version P_((void));  ONIG_EXTERN  const char* onig_copyright P_((void)); +/* for OnigMatchParam */ +ONIG_EXTERN +OnigMatchParam* onig_new_match_param P_((void)); +ONIG_EXTERN +void onig_free_match_param P_((OnigMatchParam* p)); +ONIG_EXTERN +void onig_free_match_param_content P_((OnigMatchParam* p)); +ONIG_EXTERN +int onig_initialize_match_param P_((OnigMatchParam* mp)); +ONIG_EXTERN +int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, unsigned int limit)); +ONIG_EXTERN +int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit)); +ONIG_EXTERN +int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); + +/* for callout functions */ +ONIG_EXTERN +OnigCalloutFunc onig_get_progress_callout P_((void)); +ONIG_EXTERN +int onig_set_progress_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +OnigCalloutFunc onig_get_retraction_callout P_((void)); +ONIG_EXTERN +int onig_set_retraction_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */ +ONIG_EXTERN +OnigUChar* onig_get_callout_name_by_name_id P_((int id)); +ONIG_EXTERN +int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end)); +ONIG_EXTERN +int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val)); + +/* used in callout functions */ +ONIG_EXTERN +int onig_get_callout_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigCalloutIn onig_get_callout_in_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_name_id_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_passed_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_arg_by_callout_args P_((OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val)); +ONIG_EXTERN +const OnigUChar* onig_get_string_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_string_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_start_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_right_range_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_current_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigRegex onig_get_regex_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +unsigned long onig_get_retry_counter_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_callout_tag_is_exist_at_callout_num P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_start P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_end P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +int onig_get_callout_data_dont_clear_old P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self_dont_clear_old P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_get_capture_range_in_callout P_((OnigCalloutArgs* args, int mem_num, int* begin, int* end)); +ONIG_EXTERN +int onig_get_used_stack_size_in_callout P_((OnigCalloutArgs* args, int* used_num, int* used_bytes)); + +/* builtin callout functions */ +ONIG_EXTERN +int onig_builtin_fail P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_max P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_cmp P_((OnigCalloutArgs* args, void* user_data)); + +ONIG_EXTERN +int onig_setup_builtin_monitors_by_ascii_encoded_name P_((void* fp)); +  #ifdef __cplusplus  }  #endif diff --git a/src/regcomp.c b/src/regcomp.c index 63df18b..a19109f 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@    regcomp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -78,7 +78,7 @@ int_stack_push(int_stack* s, int v)  {    if (s->n >= s->alloc) {      int new_size = s->alloc * 2; -    int* nv = (int* )xrealloc(s->v, new_size); +    int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);      if (IS_NULL(nv)) return ONIGERR_MEMORY;      s->alloc = new_size; @@ -121,26 +121,28 @@ onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)    return 0;  } - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) +static int +int_multiply_cmp(int x, int y, int v)  { -  int len = (int )(end - s); +  if (x == 0 || y == 0) return -1; -  if (len > 0) { -    UChar* r = (UChar* )xmalloc(len + 1); -    CHECK_NULL_RETURN(r); -    xmemcpy(r, s, len); -    r[len] = (UChar )0; -    return r; +  if (x < INT_MAX / y) { +    int xy = x * y; +    if (xy > v) return 1; +    else { +      if (xy == v) return 0; +      else return -1; +    }    } -  else return NULL; +  else +    return 1;  } + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif +  static void  swap_node(Node* a, Node* b)  { @@ -200,20 +202,6 @@ bitset_is_empty(BitSetRef bs)    return 1;  } -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ -  int i, n; - -  n = 0; -  for (i = 0; i < SINGLE_BYTE_SIZE; i++) { -    if (BITSET_AT(bs, i)) n++; -  } -  return n; -} -#endif -  extern int  onig_bbuf_init(BBuf* buf, int size)  { @@ -282,17 +270,6 @@ add_opcode(regex_t* reg, int opcode)    return 0;  } -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ -  StateCheckNumType n = (StateCheckNumType )num; - -  BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM); -  return 0; -} -#endif -  static int  add_rel_addr(regex_t* reg, int addr)  { @@ -811,7 +788,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,  }  static int -is_anychar_star_quantifier(QuantNode* qn) +is_anychar_infinite_greedy(QuantNode* qn)  {    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&        NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) @@ -823,254 +800,21 @@ is_anychar_star_quantifier(QuantNode* qn)  #define QUANTIFIER_EXPAND_LIMIT_SIZE   50  #define CKN_ON   (ckn > 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QuantNode* qn, regex_t* reg) -{ -  int len, mod_tlen, cklen; -  int ckn; -  int infinite = IS_REPEAT_INFINITE(qn->upper); -  int empty_info = qn->body_empty_info; -  int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - -  if (tlen < 0) return tlen; - -  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - -  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - -  /* anychar repeat */ -  if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { -    if (qn->greedy && infinite) { -      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) -        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; -      else -        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; -    } -  } - -  if (empty_info == QUANT_BODY_IS_NOT_EMPTY) -    mod_tlen = tlen; -  else -    mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - -  if (infinite && qn->lower <= 1) { -    if (qn->greedy) { -      if (qn->lower == 1) -        len = SIZE_OP_JUMP; -      else -        len = 0; - -      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; -    } -    else { -      if (qn->lower == 0) -        len = SIZE_OP_JUMP; -      else -        len = 0; - -      len += mod_tlen + SIZE_OP_PUSH + cklen; -    } -  } -  else if (qn->upper == 0) { -    if (qn->is_refered != 0) /* /(?<n>..){0}/ */ -      len = SIZE_OP_JUMP + tlen; -    else -      len = 0; -  } -  else if (qn->upper == 1 && qn->greedy) { -    if (qn->lower == 0) { -      if (CKN_ON) { -        len = SIZE_OP_STATE_CHECK_PUSH + tlen; -      } -      else { -        len = SIZE_OP_PUSH + tlen; -      } -    } -    else { -      len = tlen; -    } -  } -  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ -    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; -  } -  else { -    len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; -    if (CKN_ON) -      len += SIZE_OP_STATE_CHECK; -  } - -  return len; -} - -static int -compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) -{ -  int r, mod_tlen; -  int ckn; -  int infinite = IS_REPEAT_INFINITE(qn->upper); -  int empty_info = qn->body_empty_info; -  int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - -  if (tlen < 0) return tlen; - -  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - -  if (is_anychar_star_quantifier(qn)) { -    r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); -    if (r != 0) return r; -    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { -      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) -        r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); -      else -        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); -      if (r != 0) return r; -      if (CKN_ON) { -        r = add_state_check_num(reg, ckn); -        if (r != 0) return r; -      } - -      return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); -    } -    else { -      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) { -        r = add_opcode(reg, (CKN_ON ? -                             OP_STATE_CHECK_ANYCHAR_ML_STAR -                             : OP_ANYCHAR_ML_STAR)); -      } -      else { -        r = add_opcode(reg, (CKN_ON ? -                             OP_STATE_CHECK_ANYCHAR_STAR -                             : OP_ANYCHAR_STAR)); -      } -      if (r != 0) return r; -      if (CKN_ON) -        r = add_state_check_num(reg, ckn); - -      return r; -    } -  } - -  if (empty_info == QUANT_BODY_IS_NOT_EMPTY) -    mod_tlen = tlen; -  else -    mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - -  if (infinite && qn->lower <= 1) { -    if (qn->greedy) { -      if (qn->lower == 1) { -        r = add_opcode_rel_addr(reg, OP_JUMP, -                       (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); -        if (r != 0) return r; -      } - -      if (CKN_ON) { -        r = add_opcode(reg, OP_STATE_CHECK_PUSH); -        if (r != 0) return r; -        r = add_state_check_num(reg, ckn); -        if (r != 0) return r; -        r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); -      } -      else { -        r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); -      } -      if (r != 0) return r; -      r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); -      if (r != 0) return r; -      r = add_opcode_rel_addr(reg, OP_JUMP, -                -(mod_tlen + (int )SIZE_OP_JUMP -                + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); -    } -    else { -      if (qn->lower == 0) { -        r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); -        if (r != 0) return r; -      } -      r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); -      if (r != 0) return r; -      if (CKN_ON) { -        r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); -        if (r != 0) return r; -        r = add_state_check_num(reg, ckn); -        if (r != 0) return r; -        r = add_rel_addr(reg, -                         -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); -      } -      else -        r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); -    } -  } -  else if (qn->upper == 0) { -    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ -      r = add_opcode_rel_addr(reg, OP_JUMP, tlen); -      if (r != 0) return r; -      r = compile_tree(NODE_QUANT_BODY(qn), reg, env); -    } -    else -      r = 0; -  } -  else if (qn->upper == 1 && qn->greedy) { -    if (qn->lower == 0) { -      if (CKN_ON) { -        r = add_opcode(reg, OP_STATE_CHECK_PUSH); -        if (r != 0) return r; -        r = add_state_check_num(reg, ckn); -        if (r != 0) return r; -        r = add_rel_addr(reg, tlen); -      } -      else { -        r = add_opcode_rel_addr(reg, OP_PUSH, tlen); -      } -      if (r != 0) return r; -    } - -    r = compile_tree(NODE_QUANT_BODY(qn), reg, env); -  } -  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ -    if (CKN_ON) { -      r = add_opcode(reg, OP_STATE_CHECK_PUSH); -      if (r != 0) return r; -      r = add_state_check_num(reg, ckn); -      if (r != 0) return r; -      r = add_rel_addr(reg, SIZE_OP_JUMP); -    } -    else { -      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); -    } - -    if (r != 0) return r; -    r = add_opcode_rel_addr(reg, OP_JUMP, tlen); -    if (r != 0) return r; -    r = compile_tree(NODE_QUANT_BODY(qn), reg, env); -  } -  else { -    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); -    if (CKN_ON) { -      if (r != 0) return r; -      r = add_opcode(reg, OP_STATE_CHECK); -      if (r != 0) return r; -      r = add_state_check_num(reg, ckn); -    } -  } -  return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ -  static int  compile_length_quantifier_node(QuantNode* qn, regex_t* reg)  {    int len, mod_tlen;    int infinite = IS_REPEAT_INFINITE(qn->upper); -  int empty_info = qn->body_empty_info; +  enum QuantBodyEmpty empty_info = qn->body_empty_info;    int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);    if (tlen < 0) return tlen; +  if (tlen == 0) return 0;    /* anychar repeat */ -  if (is_anychar_star_quantifier(qn)) { -    if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) { +  if (is_anychar_infinite_greedy(qn)) { +    if (qn->lower <= 1 || +        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {        if (IS_NOT_NULL(qn->next_head_exact))          return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;        else @@ -1084,7 +828,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)      mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);    if (infinite && -      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +      (qn->lower <= 1 || +       int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {        len = SIZE_OP_JUMP;      } @@ -1107,8 +852,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)      len = SIZE_OP_JUMP + tlen;    }    else if (!infinite && qn->greedy && -           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper -                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +           (qn->upper == 1 || +            int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, +                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      len = tlen * qn->lower;      len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);    } @@ -1128,13 +874,15 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)  {    int i, r, mod_tlen;    int infinite = IS_REPEAT_INFINITE(qn->upper); -  int empty_info = qn->body_empty_info; +  enum QuantBodyEmpty empty_info = qn->body_empty_info;    int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);    if (tlen < 0) return tlen; +  if (tlen == 0) return 0; -  if (is_anychar_star_quantifier(qn) && -      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +  if (is_anychar_infinite_greedy(qn) && +      (qn->lower <= 1 || +       int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);      if (r != 0) return r;      if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1159,7 +907,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);    if (infinite && -      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +      (qn->lower <= 1 || +       int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {        if (qn->greedy) {          if (IS_NOT_NULL(qn->head_exact)) @@ -1223,8 +972,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      r = compile_tree(NODE_QUANT_BODY(qn), reg, env);    }    else if (! infinite && qn->greedy && -           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper -                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +           (qn->upper == 1 || +            int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, +                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      int n = qn->upper - qn->lower;      r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); @@ -1250,7 +1000,6 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)    }    return r;  } -#endif /* USE_COMBINATION_EXPLOSION_CHECK */  static int  compile_length_option_node(EnclosureNode* node, regex_t* reg) @@ -1358,7 +1107,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)        if (tlen < 0) return tlen;        len = tlen * qn->lower -        + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; +        + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;      }      else {        len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; @@ -1505,14 +1254,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)        len = compile_length_tree(NODE_QUANT_BODY(qn), reg);        if (len < 0) return len; -      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); +      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);        if (r != 0) return r;        r = compile_tree(NODE_QUANT_BODY(qn), reg, env);        if (r != 0) return r; -      r = add_opcode(reg, OP_POP); +      r = add_opcode(reg, OP_POP_OUT);        if (r != 0) return r;        r = add_opcode_rel_addr(reg, OP_JUMP, -             -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); +           -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));      }      else {        r = add_opcode(reg, OP_ATOMIC_START); @@ -1762,6 +1511,30 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg)      if (r != 0) return r;      r = add_mem_num(reg, node->id);      break; + +#ifdef USE_CALLOUT +  case GIMMICK_CALLOUT: +    switch (node->detail_type) { +    case ONIG_CALLOUT_OF_CONTENTS: +    case ONIG_CALLOUT_OF_NAME: +      { +        r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ? +                                  OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME); +        if (r != 0) return r; +        if (node->detail_type == ONIG_CALLOUT_OF_NAME) { +          r = add_mem_num(reg, node->id); +          if (r != 0) return r; +        } +        r = add_mem_num(reg, node->num); +        if (r != 0) return r; +      } +      break; + +    default: +      r = ONIGERR_TYPE_BUG; +      break; +    } +#endif    }    return r; @@ -1785,6 +1558,23 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)    case GIMMICK_UPDATE_VAR:      len = SIZE_OP_UPDATE_VAR;      break; + +#ifdef USE_CALLOUT +  case GIMMICK_CALLOUT: +    switch (node->detail_type) { +    case ONIG_CALLOUT_OF_CONTENTS: +      len = SIZE_OP_CALLOUT_CONTENTS; +      break; +    case ONIG_CALLOUT_OF_NAME: +      len = SIZE_OP_CALLOUT_NAME; +      break; + +    default: +      len = ONIGERR_TYPE_BUG; +      break; +    } +    break; +#endif    }    return len; @@ -2337,7 +2127,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)  #ifdef USE_CALL  static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)  {    int i, offset;    EnclosureNode* en; @@ -3725,11 +3515,12 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* re  }  static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], -                            UChar *p, int slen, UChar *end, regex_t* reg, -                            Node **rnode) +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, +                            int slen, UChar *end, regex_t* reg, Node **rnode)  { -  int r, i, j, len, varlen; +  int r, i, j; +  int len; +  int varlen;    Node *anode, *var_anode, *snode, *xnode, *an;    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -3972,145 +3763,8 @@ expand_case_fold_string(Node* node, regex_t* reg)    return r;  } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT         512 -#define CEC_INFINITE_NUM          0x7fffffff - -#define CEC_IN_INFINITE_REPEAT    (1<<0) -#define CEC_IN_FINITE_REPEAT      (1<<1) -#define CEC_CONT_BIG_REPEAT       (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ -  int r = state; - -  switch (NODE_TYPE(node)) { -  case NODE_LIST: -    { -      do { -        r = setup_comb_exp_check(NODE_CAR(node), r, env); -      } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); -    } -    break; - -  case NODE_ALT: -    { -      int ret; -      do { -        ret = setup_comb_exp_check(NODE_CAR(node), state, env); -        r |= ret; -      } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); -    } -    break; - -  case NODE_QUANT: -    { -      int var_num; -      int child_state = state; -      int add_state = 0; -      QuantNode* qn = QUANT_(node); -      Node* target = NODE_QUANT_BODY(qn); - -      if (! IS_REPEAT_INFINITE(qn->upper)) { -        if (qn->upper > 1) { -          /* {0,1}, {1,1} are allowed */ -          child_state |= CEC_IN_FINITE_REPEAT; - -          /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ -          if (env->backrefed_mem == 0) { -            if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) { -              EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn)); -              if (en->type == ENCLOSURE_MEMORY) { -                if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) { -                  QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en)); -                  if (IS_REPEAT_INFINITE(q->upper) -                      && q->greedy == qn->greedy) { -                    qn->upper = (qn->lower == 0 ? 1 : qn->lower); -                    if (qn->upper == 1) -                      child_state = state; -                  } -                } -              } -            } -          } -        } -      } - -      if (state & CEC_IN_FINITE_REPEAT) { -        qn->comb_exp_check_num = -1; -      } -      else { -        if (IS_REPEAT_INFINITE(qn->upper)) { -          var_num = CEC_INFINITE_NUM; -          child_state |= CEC_IN_INFINITE_REPEAT; -        } -        else { -          var_num = qn->upper - qn->lower; -        } - -        if (var_num >= CEC_THRES_NUM_BIG_REPEAT) -          add_state |= CEC_CONT_BIG_REPEAT; - -        if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || -            ((state & CEC_CONT_BIG_REPEAT) != 0 && -             var_num >= CEC_THRES_NUM_BIG_REPEAT)) { -          if (qn->comb_exp_check_num == 0) { -            env->num_comb_exp_check++; -            qn->comb_exp_check_num = env->num_comb_exp_check; -            if (env->curr_max_regnum > env->comb_exp_max_regnum) -              env->comb_exp_max_regnum = env->curr_max_regnum; -          } -        } -      } - -      r = setup_comb_exp_check(target, child_state, env); -      r |= add_state; -    } -    break; - -  case NODE_ENCLOSURE: -    { -      EnclosureNode* en = ENCLOSURE_(node); - -      switch (en->type) { -      case ENCLOSURE_MEMORY: -        { -          if (env->curr_max_regnum < en->m.regnum) -            env->curr_max_regnum = en->m.regnum; - -          r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); -        } -        break; - -      default: -        r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); -        break; -      } -    } -    break; - -#ifdef USE_CALL -  case NODE_CALL: -    if (NODE_IS_RECURSION(node)) -      env->has_recursion = 1; -    else -      r = setup_comb_exp_check(NODE_BODY(node), state, env); -    break; -#endif - -  default: -    break; -  } - -  return r; -} -#endif -  #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -static int +static enum QuantBodyEmpty  quantifiers_memory_node_info(Node* node)  {    int r = QUANT_BODY_IS_EMPTY; @@ -4638,7 +4292,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)  #define ALLOWED_TYPE_IN_LB \    ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \    | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ -  | BIT_NODE_CALL ) +  | BIT_NODE_CALL | BIT_NODE_GIMMICK)  #define ALLOWED_ENCLOSURE_IN_LB       ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )  #define ALLOWED_ENCLOSURE_IN_LB_NOT   (1<<ENCLOSURE_OPTION) @@ -4765,7 +4419,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)    }  #ifdef USE_OP_PUSH_OR_JUMP_EXACT -  if (qn->greedy && (qn->body_empty_info != 0)) { +  if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {      if (NODE_TYPE(body) == NODE_QUANT) {        QuantNode* tqn = QUANT_(body);        if (IS_NOT_NULL(tqn->head_exact)) { @@ -4948,10 +4602,10 @@ set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,  typedef struct {    OnigLen min;  /* min byte length */    OnigLen max;  /* max byte length */ -} MinMaxLen; +} MinMax;  typedef struct { -  MinMaxLen        mmd; +  MinMax           mmd;    OnigEncoding     enc;    OnigOptionType   options;    OnigCaseFoldType case_fold_flag; @@ -4964,35 +4618,35 @@ typedef struct {  } OptAnc;  typedef struct { -  MinMaxLen  mmd;   /* info position */ +  MinMax     mmd;   /* position */    OptAnc     anc; -  int   reach_end; -  int   ignore_case; -  int   len; -  UChar s[OPT_EXACT_MAXLEN]; +  int        reach_end; +  int        ignore_case; +  int        len; +  UChar      s[OPT_EXACT_MAXLEN];  } OptExact;  typedef struct { -  MinMaxLen mmd;    /* info position */ -  OptAnc anc; -  int   value;      /* weighted value */ -  UChar map[ONIG_CHAR_TABLE_SIZE]; +  MinMax    mmd;    /* position */ +  OptAnc    anc; +  int       value;  /* weighted value */ +  UChar     map[ONIG_CHAR_TABLE_SIZE];  } OptMap;  typedef struct { -  MinMaxLen len; -  OptAnc   anc; -  OptExact exb;     /* boundary */ -  OptExact exm;     /* middle */ -  OptExact expr;    /* prec read (?=...) */ -  OptMap   map;     /* boundary */ +  MinMax    len; +  OptAnc    anc; +  OptExact  exb;     /* boundary */ +  OptExact  exm;     /* middle */ +  OptExact  expr;    /* prec read (?=...) */ +  OptMap    map;     /* boundary */  } NodeOpt;  static int  map_position_value(OnigEncoding enc, int i)  { -  static const short int ByteValTable[] = { +  static const short int Vals[] = {       5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,       1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,      12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5, @@ -5003,18 +4657,18 @@ map_position_value(OnigEncoding enc, int i)       6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1    }; -  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { +  if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {      if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)        return 20;      else -      return (int )ByteValTable[i]; +      return (int )Vals[i];    }    else      return 4;   /* Take it easy. */  }  static int -distance_value(MinMaxLen* mm) +distance_value(MinMax* mm)  {    /* 1000 / (min-max-dist + 1) */    static const short int dist_vals[] = { @@ -5043,7 +4697,7 @@ distance_value(MinMaxLen* mm)  }  static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)  {    if (v2 <= 0) return -1;    if (v1 <= 0) return  1; @@ -5060,40 +4714,40 @@ comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)  }  static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) +is_equal_mml(MinMax* a, MinMax* b)  {    return (a->min == b->min && a->max == b->max) ? 1 : 0;  }  static void -set_mml(MinMaxLen* mml, OnigLen min, OnigLen max) +set_mml(MinMax* l, OnigLen min, OnigLen max)  { -  mml->min = min; -  mml->max = max; +  l->min = min; +  l->max = max;  }  static void -clear_mml(MinMaxLen* mml) +clear_mml(MinMax* l)  { -  mml->min = mml->max = 0; +  l->min = l->max = 0;  }  static void -copy_mml(MinMaxLen* to, MinMaxLen* from) +copy_mml(MinMax* to, MinMax* from)  {    to->min = from->min;    to->max = from->max;  }  static void -add_mml(MinMaxLen* to, MinMaxLen* from) +add_mml(MinMax* to, MinMax* from)  {    to->min = distance_add(to->min, from->min);    to->max = distance_add(to->max, from->max);  }  static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +alt_merge_mml(MinMax* to, MinMax* from)  {    if (to->min > from->min) to->min = from->min;    if (to->max < from->max) to->max = from->max; @@ -5106,10 +4760,10 @@ copy_opt_env(OptEnv* to, OptEnv* from)  }  static void -clear_opt_anc_info(OptAnc* anc) +clear_opt_anc_info(OptAnc* a)  { -  anc->left  = 0; -  anc->right = 0; +  a->left  = 0; +  a->right = 0;  }  static void @@ -5139,11 +4793,10 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,  }  static int -is_left(int anc) +is_left(int a)  { -  if (anc == ANCHOR_END_BUF  || anc == ANCHOR_SEMI_END_BUF || -      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || -      anc == ANCHOR_PREC_READ_NOT) +  if (a == ANCHOR_END_BUF  || a == ANCHOR_SEMI_END_BUF || +      a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)      return 0;    return 1; @@ -5183,20 +4836,20 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)  }  static int -is_full_opt_exact(OptExact* ex) +is_full_opt_exact(OptExact* e)  { -  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +  return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);  }  static void -clear_opt_exact(OptExact* ex) +clear_opt_exact(OptExact* e)  { -  clear_mml(&ex->mmd); -  clear_opt_anc_info(&ex->anc); -  ex->reach_end   = 0; -  ex->ignore_case = 0; -  ex->len         = 0; -  ex->s[0]        = '\0'; +  clear_mml(&e->mmd); +  clear_opt_anc_info(&e->anc); +  e->reach_end   = 0; +  e->ignore_case = 0; +  e->len         = 0; +  e->s[0]        = '\0';  }  static void @@ -5205,24 +4858,28 @@ copy_opt_exact(OptExact* to, OptExact* from)    *to = *from;  } -static void +static int  concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)  { -  int i, j, len; +  int i, j, len, r;    UChar *p, *end;    OptAnc tanc;    if (! to->ignore_case && add->ignore_case) { -    if (to->len >= add->len) return ;  /* avoid */ +    if (to->len >= add->len) return 0;  /* avoid */      to->ignore_case = 1;    } +  r = 0;    p = add->s;    end = p + add->len;    for (i = to->len; p < end; ) {      len = enclen(enc, p); -    if (i + len > OPT_EXACT_MAXLEN) break; +    if (i + len > OPT_EXACT_MAXLEN) { +      r = 1; /* 1:full */ +      break; +    }      for (j = 0; j < len && p < end; j++)        to->s[i++] = *p++;    } @@ -5233,11 +4890,12 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)    concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);    if (! to->reach_end) tanc.right = 0;    copy_opt_anc_info(&to->anc, &tanc); + +  return r;  }  static void -concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, -                     int raw ARG_UNUSED, OnigEncoding enc) +concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)  {    int i, j, len;    UChar *p; @@ -5291,31 +4949,31 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)  static void  select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)  { -  int v1, v2; +  int vn, va; -  v1 = now->len; -  v2 = alt->len; +  vn = now->len; +  va = alt->len; -  if (v2 == 0) { +  if (va == 0) {      return ;    } -  else if (v1 == 0) { +  else if (vn == 0) {      copy_opt_exact(now, alt);      return ;    } -  else if (v1 <= 2 && v2 <= 2) { +  else if (vn <= 2 && va <= 2) {      /* ByteValTable[x] is big value --> low price */ -    v2 = map_position_value(enc, now->s[0]); -    v1 = map_position_value(enc, alt->s[0]); +    va = map_position_value(enc, now->s[0]); +    vn = map_position_value(enc, alt->s[0]); -    if (now->len > 1) v1 += 5; -    if (alt->len > 1) v2 += 5; +    if (now->len > 1) vn += 5; +    if (alt->len > 1) va += 5;    } -  if (now->ignore_case == 0) v1 *= 2; -  if (alt->ignore_case == 0) v2 *= 2; +  if (now->ignore_case == 0) vn *= 2; +  if (alt->ignore_case == 0) va *= 2; -  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) +  if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)      copy_opt_exact(now, alt);  } @@ -5354,17 +5012,17 @@ copy_opt_map(OptMap* to, OptMap* from)  }  static void -add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc) +add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)  { -  if (map->map[c] == 0) { -    map->map[c] = 1; -    map->value += map_position_value(enc, c); +  if (m->map[c] == 0) { +    m->map[c] = 1; +    m->value += map_position_value(enc, c);    }  }  static int  add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, -                     OnigEncoding enc, OnigCaseFoldType case_fold_flag) +                     OnigEncoding enc, OnigCaseFoldType fold_flag)  {    OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -5372,8 +5030,8 @@ add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,    add_char_opt_map(map, p[0], enc); -  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); -  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); +  fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag); +  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);    if (n < 0) return n;    for (i = 0; i < n; i++) { @@ -5389,7 +5047,7 @@ select_opt_map(OptMap* now, OptMap* alt)  {    static int z = 1<<15; /* 32768: something big value */ -  int v1, v2; +  int vn, va;    if (alt->value == 0) return ;    if (now->value == 0) { @@ -5397,9 +5055,9 @@ select_opt_map(OptMap* now, OptMap* alt)      return ;    } -  v1 = z / now->value; -  v2 = z / alt->value; -  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) +  vn = z / now->value; +  va = z / alt->value; +  if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)      copy_opt_map(now, alt);  } @@ -5407,13 +5065,13 @@ static int  comp_opt_exact_or_map(OptExact* e, OptMap* m)  {  #define COMP_EM_BASE  20 -  int ve, vm; +  int ae, am;    if (m->value <= 0) return -1; -  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); -  vm = COMP_EM_BASE * 5 * 2 / m->value; -  return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +  ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); +  am = COMP_EM_BASE * 5 * 2 / m->value; +  return comp_distance_value(&e->mmd, &m->mmd, ae, am);  }  static void @@ -5444,11 +5102,11 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)  }  static void -set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd) +set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)  { -  copy_mml(&(opt->exb.mmd),  mmd); -  copy_mml(&(opt->expr.mmd), mmd); -  copy_mml(&(opt->map.mmd),  mmd); +  copy_mml(&(opt->exb.mmd),  plen); +  copy_mml(&(opt->expr.mmd), plen); +  copy_mml(&(opt->map.mmd),  plen);  }  static void @@ -5543,10 +5201,12 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)  static int  optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)  { -  OnigEncoding enc;    int i; -  int r = 0; +  int r; +  NodeOpt xo; +  OnigEncoding enc; +  r = 0;    enc = env->enc;    clear_node_opt_info(opt);    set_bound_node_opt_info(opt, &env->mmd); @@ -5555,15 +5215,14 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)    case NODE_LIST:      {        OptEnv nenv; -      NodeOpt nopt;        Node* nd = node;        copy_opt_env(&nenv, env);        do { -        r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv); +        r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);          if (r == 0) { -          add_mml(&nenv.mmd, &nopt.len); -          concat_left_node_opt_info(enc, opt, &nopt); +          add_mml(&nenv.mmd, &xo.len); +          concat_left_node_opt_info(enc, opt, &xo);          }        } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));      } @@ -5571,14 +5230,13 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)    case NODE_ALT:      { -      NodeOpt nopt;        Node* nd = node;        do { -        r = optimize_nodes(NODE_CAR(nd), &nopt, env); +        r = optimize_nodes(NODE_CAR(nd), &xo, env);          if (r == 0) { -          if (nd == node) copy_node_opt_info(opt, &nopt); -          else            alt_merge_node_opt_info(opt, &nopt, env); +          if (nd == node) copy_node_opt_info(opt, &xo); +          else            alt_merge_node_opt_info(opt, &xo, env);          }        } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));      } @@ -5588,11 +5246,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)      {        StrNode* sn = STR_(node);        int slen = (int )(sn->end - sn->s); -      int is_raw = NODE_STRING_IS_RAW(node); +      /* int is_raw = NODE_STRING_IS_RAW(node); */        if (! NODE_STRING_IS_AMBIG(node)) { -        concat_opt_exact_str(&opt->exb, sn->s, sn->end, -                             NODE_STRING_IS_RAW(node), enc); +        concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);          if (slen > 0) {            add_char_opt_map(&opt->map, *(sn->s), enc);          } @@ -5606,7 +5263,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)            max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;          }          else { -          concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc); +          concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);            opt->exb.ignore_case = 1;            if (slen > 0) { @@ -5709,19 +5366,17 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)      case ANCHOR_PREC_READ:        { -        NodeOpt nopt; - -        r = optimize_nodes(NODE_BODY(node), &nopt, env); +        r = optimize_nodes(NODE_BODY(node), &xo, env);          if (r == 0) { -          if (nopt.exb.len > 0) -            copy_opt_exact(&opt->expr, &nopt.exb); -          else if (nopt.exm.len > 0) -            copy_opt_exact(&opt->expr, &nopt.exm); +          if (xo.exb.len > 0) +            copy_opt_exact(&opt->expr, &xo.exb); +          else if (xo.exm.len > 0) +            copy_opt_exact(&opt->expr, &xo.exm);            opt->expr.reach_end = 0; -          if (nopt.map.value > 0) -            copy_opt_map(&opt->map, &nopt.map); +          if (xo.map.value > 0) +            copy_opt_map(&opt->map, &xo.map);          }        }        break; @@ -5771,48 +5426,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)    case NODE_QUANT:      {        OnigLen min, max; -      NodeOpt nopt;        QuantNode* qn = QUANT_(node); -      r = optimize_nodes(NODE_BODY(node), &nopt, env); +      r = optimize_nodes(NODE_BODY(node), &xo, env);        if (r != 0) break; -      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { +      if (qn->lower > 0) { +        copy_node_opt_info(opt, &xo); +        if (xo.exb.len > 0) { +          if (xo.exb.reach_end) { +            for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { +              int rc = concat_opt_exact(&opt->exb, &xo.exb, enc); +              if (rc > 0) break; +            } +            if (i < qn->lower) opt->exb.reach_end = 0; +          } +        } + +        if (qn->lower != qn->upper) { +          opt->exb.reach_end = 0; +          opt->exm.reach_end = 0; +        } +        if (qn->lower > 1) +          opt->exm.reach_end = 0; +      } + +      if (IS_REPEAT_INFINITE(qn->upper)) {          if (env->mmd.max == 0 &&              NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {            if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) -            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); +            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);            else -            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); +            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);          } + +        max = (xo.len.max > 0 ? INFINITE_LEN : 0);        }        else { -        if (qn->lower > 0) { -          copy_node_opt_info(opt, &nopt); -          if (nopt.exb.len > 0) { -            if (nopt.exb.reach_end) { -              for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { -                concat_opt_exact(&opt->exb, &nopt.exb, enc); -              } -              if (i < qn->lower) opt->exb.reach_end = 0; -            } -          } - -          if (qn->lower != qn->upper) { -            opt->exb.reach_end = 0; -            opt->exm.reach_end = 0; -          } -          if (qn->lower > 1) -            opt->exm.reach_end = 0; -        } +        max = distance_multiply(xo.len.max, qn->upper);        } -      min = distance_multiply(nopt.len.min, qn->lower); -      if (IS_REPEAT_INFINITE(qn->upper)) -        max = (nopt.len.max > 0 ? INFINITE_LEN : 0); -      else -        max = distance_multiply(nopt.len.max, qn->upper); - +      min = distance_multiply(xo.len.min, qn->lower);        set_mml(&opt->len, min, max);      }      break; @@ -5848,9 +5502,9 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)  #endif            {              r = optimize_nodes(NODE_BODY(node), opt, env); -            if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { +            if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {                if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) -                remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); +                remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);              }            }          break; @@ -5862,24 +5516,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)        case ENCLOSURE_IF_ELSE:          {            OptEnv nenv; -          NodeOpt nopt;            copy_opt_env(&nenv, env); -          r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); +          r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);            if (r == 0) { -            add_mml(&nenv.mmd, &nopt.len); -            concat_left_node_opt_info(enc, opt, &nopt); +            add_mml(&nenv.mmd, &xo.len); +            concat_left_node_opt_info(enc, opt, &xo);              if (IS_NOT_NULL(en->te.Then)) { -              r = optimize_nodes(en->te.Then, &nopt, &nenv); +              r = optimize_nodes(en->te.Then, &xo, &nenv);                if (r == 0) { -                concat_left_node_opt_info(enc, opt, &nopt); +                concat_left_node_opt_info(enc, opt, &xo);                }              }              if (IS_NOT_NULL(en->te.Else)) { -              r = optimize_nodes(en->te.Else, &nopt, env); +              r = optimize_nodes(en->te.Else, &xo, env);                if (r == 0) -                alt_merge_node_opt_info(opt, &nopt, env); +                alt_merge_node_opt_info(opt, &xo, env);              }            }          } @@ -5914,12 +5567,12 @@ set_optimize_exact(regex_t* reg, OptExact* e)      CHECK_NULL_RETURN_MEMERR(reg->exact);      xmemcpy(reg->exact, e->s, e->len);      reg->exact_end = reg->exact + e->len; -    reg->optimize = ONIG_OPTIMIZE_EXACT_IC; +    reg->optimize = OPTIMIZE_EXACT_IC;    }    else {      int allow_reverse; -    reg->exact = str_dup(e->s, e->s + e->len); +    reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);      CHECK_NULL_RETURN_MEMERR(reg->exact);      reg->exact_end = reg->exact + e->len; @@ -5932,10 +5585,10 @@ set_optimize_exact(regex_t* reg, OptExact* e)        if (r != 0) return r;        reg->optimize = (allow_reverse != 0 -                       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); +                       ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);      }      else { -      reg->optimize = ONIG_OPTIMIZE_EXACT; +      reg->optimize = OPTIMIZE_EXACT;      }    } @@ -5957,7 +5610,7 @@ set_optimize_map(regex_t* reg, OptMap* m)    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)      reg->map[i] = m->map[i]; -  reg->optimize   = ONIG_OPTIMIZE_MAP; +  reg->optimize   = OPTIMIZE_MAP;    reg->dmin       = m->mmd.min;    reg->dmax       = m->mmd.max; @@ -5994,11 +5647,11 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)    if (r != 0) return r;    reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | -        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | +        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |          ANCHOR_LOOK_BEHIND);    if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) -    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; +    reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;    reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |         ANCHOR_PREC_READ_NOT); @@ -6038,7 +5691,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)  static void  clear_optimize_info(regex_t* reg)  { -  reg->optimize      = ONIG_OPTIMIZE_NONE; +  reg->optimize      = OPTIMIZE_NONE;    reg->anchor        = 0;    reg->anchor_dmin   = 0;    reg->anchor_dmax   = 0; @@ -6141,14 +5794,14 @@ print_anchor(FILE* f, int anchor)      q = 1;      fprintf(f, "end-line");    } -  if (anchor & ANCHOR_ANYCHAR_STAR) { +  if (anchor & ANCHOR_ANYCHAR_INF) {      if (q) fprintf(f, ", ");      q = 1; -    fprintf(f, "anychar-star"); +    fprintf(f, "anychar-inf");    } -  if (anchor & ANCHOR_ANYCHAR_STAR_ML) { +  if (anchor & ANCHOR_ANYCHAR_INF_ML) {      if (q) fprintf(f, ", "); -    fprintf(f, "anychar-star-ml"); +    fprintf(f, "anychar-inf-ml");    }    fprintf(f, "]"); @@ -6180,7 +5833,7 @@ print_optimize_info(FILE* f, regex_t* reg)      }      fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));    } -  else if (reg->optimize & ONIG_OPTIMIZE_MAP) { +  else if (reg->optimize & OPTIMIZE_MAP) {      int c, i, n = 0;      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) @@ -6208,6 +5861,66 @@ print_optimize_info(FILE* f, regex_t* reg)  #endif +extern RegexExt* +onig_get_regex_ext(regex_t* reg) +{ +  if (IS_NULL(REG_EXTP(reg))) { +    RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext)); +    if (IS_NULL(ext)) return 0; + +    ext->pattern      = 0; +    ext->pattern_end  = 0; +#ifdef USE_CALLOUT +    ext->tag_table    = 0; +    ext->callout_num  = 0; +    ext->callout_list_alloc = 0; +    ext->callout_list = 0; +#endif + +    REG_EXTPL(reg) = (void* )ext; +  } + +  return REG_EXTP(reg); +} + +static void +free_regex_ext(RegexExt* ext) +{ +  if (IS_NOT_NULL(ext)) { +    if (IS_NOT_NULL(ext->pattern)) +      xfree((void* )ext->pattern); + +#ifdef USE_CALLOUT +    if (IS_NOT_NULL(ext->tag_table)) +      onig_callout_tag_table_free(ext->tag_table); + +    if (IS_NOT_NULL(ext->callout_list)) +      onig_free_reg_callout_list(ext->callout_num, ext->callout_list); +#endif + +    xfree(ext); +  } +} + +extern int +onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end) +{ +  RegexExt* ext; +  UChar* s; + +  ext = onig_get_regex_ext(reg); +  CHECK_NULL_RETURN_MEMERR(ext); + +  s = onigenc_strdup(reg->enc, pattern, pattern_end); +  CHECK_NULL_RETURN_MEMERR(s); + +  ext->pattern     = s; +  ext->pattern_end = s + (pattern_end - pattern); + +  return ONIG_NORMAL; +} + +  extern void  onig_free_body(regex_t* reg)  { @@ -6217,7 +5930,10 @@ onig_free_body(regex_t* reg)      if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);      if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);      if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range); -    if (IS_NOT_NULL(REG_EXTP(reg)))         xfree(REG_EXTP(reg)); +    if (IS_NOT_NULL(REG_EXTP(reg))) { +      free_regex_ext(REG_EXTP(reg)); +      REG_EXTPL(reg) = 0; +    }      onig_names_free(reg);    } @@ -6245,9 +5961,6 @@ onig_transfer(regex_t* to, regex_t* from)  } -#ifdef ONIG_DEBUG_COMPILE -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif  #ifdef ONIG_DEBUG_PARSE  static void print_tree P_((FILE* f, Node* node));  #endif @@ -6286,9 +5999,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    reg->num_null_check     = 0;    reg->repeat_range_alloc = 0;    reg->repeat_range       = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  reg->num_comb_exp_check = 0; -#endif    r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);    if (r != 0) goto err; @@ -6346,33 +6056,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    }    reg->bt_mem_start |= reg->bt_mem_end; -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  if (scan_env.backrefed_mem == 0 -#ifdef USE_CALL -      || scan_env.num_call == 0 -#endif -      ) { -    setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_CALL -    if (scan_env.has_recursion != 0) { -      scan_env.num_comb_exp_check = 0; -    } -    else -#endif -    if (scan_env.comb_exp_max_regnum > 0) { -      int i; -      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { -        if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) { -          scan_env.num_comb_exp_check = 0; -          break; -        } -      } -    } -  } - -  reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif -    clear_optimize_info(reg);  #ifndef ONIG_DONT_OPTIMIZE    r = set_optimize_info_from_tree(root, reg, &scan_env); @@ -6398,13 +6081,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,      r = add_opcode(reg, OP_END);  #ifdef USE_CALL      if (scan_env.num_call > 0) { -      r = unset_addr_list_fix(&uslist, reg); +      r = fix_unset_addr_list(&uslist, reg);        unset_addr_list_end(&uslist);        if (r != 0) goto err;      }  #endif -    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) +    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) +#ifdef USE_CALLOUT +        || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0) +#endif +        )        reg->stack_pop_level = STACK_POP_LEVEL_ALL;      else {        if (reg->bt_mem_start != 0) @@ -6422,7 +6109,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,  #ifdef ONIG_DEBUG_COMPILE    onig_print_names(stderr, reg); -  print_compiled_byte_code_list(stderr, reg); +  onig_print_compiled_byte_code_list(stderr, reg);  #endif   end: @@ -6464,11 +6151,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl  #if 0      return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;  #else -    r = onig_initialize(NULL, 0); -    if (r != 0) -      return ONIGERR_FAIL_TO_INITIALIZE; - -    r = onig_initialize_encoding(enc); +    r = onig_initialize(&enc, 1);      if (r != 0)        return ONIGERR_FAIL_TO_INITIALIZE; @@ -6569,16 +6252,21 @@ onig_initialize(OnigEncoding encodings[], int n)        return r;    } -  return 0; +  return ONIG_NORMAL;  } -static OnigEndCallListItemType* EndCallTop; +typedef struct EndCallListItem { +  struct EndCallListItem* next; +  void (*func)(void); +} EndCallListItemType; + +static EndCallListItemType* EndCallTop;  extern void onig_add_end_call(void (*func)(void))  { -  OnigEndCallListItemType* item; +  EndCallListItemType* item; -  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); +  item = (EndCallListItemType* )xmalloc(sizeof(*item));    if (item == 0) return ;    item->next = EndCallTop; @@ -6590,7 +6278,7 @@ extern void onig_add_end_call(void (*func)(void))  static void  exec_end_call_list(void)  { -  OnigEndCallListItemType* prev; +  EndCallListItemType* prev;    void (*func)(void);    while (EndCallTop != 0) { @@ -6608,6 +6296,12 @@ onig_end(void)  {    exec_end_call_list(); +#ifdef USE_CALLOUT +  onig_global_callout_names_free(); +#endif + +  onigenc_end(); +    onig_inited = 0;    return 0; @@ -6673,144 +6367,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)  } -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL     -1 -#define ARG_NON          0 -#define ARG_RELADDR      1 -#define ARG_ABSADDR      2 -#define ARG_LENGTH       3 -#define ARG_MEMNUM       4 -#define ARG_OPTION       5 -#define ARG_STATE_CHECK  6 -#define ARG_MODE         7 - -OnigOpInfoType OnigOpInfo[] = { -  { OP_FINISH,            "finish",          ARG_NON }, -  { OP_END,               "end",             ARG_NON }, -  { OP_EXACT1,            "exact1",          ARG_SPECIAL }, -  { OP_EXACT2,            "exact2",          ARG_SPECIAL }, -  { OP_EXACT3,            "exact3",          ARG_SPECIAL }, -  { OP_EXACT4,            "exact4",          ARG_SPECIAL }, -  { OP_EXACT5,            "exact5",          ARG_SPECIAL }, -  { OP_EXACTN,            "exactn",          ARG_SPECIAL }, -  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL }, -  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL }, -  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL }, -  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL }, -  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL }, -  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL }, -  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL }, -  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL }, -  { OP_CCLASS,            "cclass",          ARG_SPECIAL }, -  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL }, -  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL }, -  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL }, -  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL }, -  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL }, -#ifdef USE_OP_CCLASS_NODE -  { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL }, -#endif -  { OP_ANYCHAR,           "anychar",         ARG_NON }, -  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON }, -  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON }, -  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON }, -  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, -  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, -  { OP_WORD,                "word",            ARG_NON }, -  { OP_WORD_ASCII,          "word-ascii",      ARG_NON }, -  { OP_NO_WORD,             "not-word",        ARG_NON }, -  { OP_NO_WORD_ASCII,       "not-word-ascii",  ARG_NON }, -  { OP_WORD_BOUNDARY,       "word-boundary",     ARG_MODE }, -  { OP_NO_WORD_BOUNDARY,    "not-word-boundary", ARG_MODE }, -  { OP_WORD_BEGIN,          "word-begin",      ARG_MODE }, -  { OP_WORD_END,            "word-end",        ARG_MODE }, -  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON }, -  { OP_END_BUF,             "end-buf",         ARG_NON }, -  { OP_BEGIN_LINE,          "begin-line",      ARG_NON }, -  { OP_END_LINE,            "end-line",        ARG_NON }, -  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON }, -  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON }, -  { OP_BACKREF1,            "backref1",             ARG_NON }, -  { OP_BACKREF2,            "backref2",             ARG_NON }, -  { OP_BACKREF_N,            "backref-n",           ARG_MEMNUM  }, -  { OP_BACKREF_N_IC,         "backref-n-ic",        ARG_SPECIAL }, -  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL }, -  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL }, -  { OP_BACKREF_WITH_LEVEL,  "backref_with_level",   ARG_SPECIAL }, -  { OP_BACKREF_CHECK,       "backref_check",        ARG_SPECIAL }, -  { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, -  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  }, -  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  }, -  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  }, -  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  }, -  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  }, -  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  }, -  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  }, -  { OP_SET_OPTION,          "set-option",           ARG_OPTION  }, -  { OP_FAIL,                "fail",                 ARG_NON }, -  { OP_JUMP,                "jump",                 ARG_RELADDR }, -  { OP_PUSH,                "push",                 ARG_RELADDR }, -  { OP_PUSH_SUPER,          "push_SUPER",           ARG_RELADDR }, -  { OP_POP,                 "pop",                  ARG_NON }, -  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL }, -  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL }, -  { OP_REPEAT,              "repeat",               ARG_SPECIAL }, -  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL }, -  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  }, -  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  }, -  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  }, -  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  }, -  { OP_EMPTY_CHECK_START,   "empty-check-start",    ARG_MEMNUM  }, -  { OP_EMPTY_CHECK_END,     "empty-check-end",      ARG_MEMNUM  }, -  { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM  }, -  { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM  }, -  { OP_PREC_READ_START,      "push-pos",             ARG_NON }, -  { OP_PREC_READ_END,        "pop-pos",              ARG_NON }, -  { OP_PREC_READ_NOT_START,  "prec-read-not-start",  ARG_RELADDR }, -  { OP_PREC_READ_NOT_END,    "prec-read-not-end",    ARG_NON }, -  { OP_ATOMIC_START,         "atomic-start",         ARG_NON }, -  { OP_ATOMIC_END,           "atomic-end",           ARG_NON }, -  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL }, -  { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, -  { OP_LOOK_BEHIND_NOT_END,  "look-behind-not-end",  ARG_NON }, -  { OP_CALL,                 "call",                 ARG_ABSADDR }, -  { OP_RETURN,               "return",               ARG_NON }, -  { OP_PUSH_SAVE_VAL,        "push-save-val",        ARG_SPECIAL }, -  { OP_UPDATE_VAR,           "update-var",           ARG_SPECIAL }, -  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL }, -  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, -  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK }, -  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK }, -  { OP_STATE_CHECK_ANYCHAR_ML_STAR, -    "state-check-anychar-ml*", ARG_STATE_CHECK }, -  { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ -  int i; - -  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { -    if (opcode == OnigOpInfo[i].opcode) -      return OnigOpInfo[i].name; -  } -  return ""; -} - -static int -op2arg_type(int opcode) -{ -  int i; - -  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { -    if (opcode == OnigOpInfo[i].opcode) -      return OnigOpInfo[i].arg_type; -  } -  return ARG_SPECIAL; -} +#ifdef ONIG_DEBUG_PARSE  static void  p_string(FILE* f, int len, UChar* s) @@ -6820,326 +6377,6 @@ p_string(FILE* f, int len, UChar* s)  }  static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ -  int x = len * mb_len; - -  fprintf(f, ":%d:", len); -  while (x-- > 0) { fputc(*s++, f); } -} - -static void -p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) -{ -  RelAddrType curr = (RelAddrType )(p - start); - -  fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, -                              OnigEncoding enc) -{ -  int i, n, arg_type; -  RelAddrType addr; -  LengthType len; -  MemNumType mem; -  StateCheckNumType scn; -  OnigCodePoint code; -  OnigOptionType option; -  ModeType mode; -  UChar *q; - -  fprintf(f, "%s", op2name(*bp)); -  arg_type = op2arg_type(*bp); -  if (arg_type != ARG_SPECIAL) { -    bp++; -    switch (arg_type) { -    case ARG_NON: -      break; -    case ARG_RELADDR: -      GET_RELADDR_INC(addr, bp); -      fputc(':', f); -      p_rel_addr(f, addr, bp, start); -      break; -    case ARG_ABSADDR: -      GET_ABSADDR_INC(addr, bp); -      fprintf(f, ":{/%d}", addr); -      break; -    case ARG_LENGTH: -      GET_LENGTH_INC(len, bp); -      fprintf(f, ":%d", len); -      break; -    case ARG_MEMNUM: -      mem = *((MemNumType* )bp); -      bp += SIZE_MEMNUM; -      fprintf(f, ":%d", mem); -      break; -    case ARG_OPTION: -      { -        OnigOptionType option = *((OnigOptionType* )bp); -        bp += SIZE_OPTION; -        fprintf(f, ":%d", option); -      } -      break; - -    case ARG_STATE_CHECK: -      scn = *((StateCheckNumType* )bp); -      bp += SIZE_STATE_CHECK_NUM; -      fprintf(f, ":%d", scn); -      break; - -    case ARG_MODE: -      mode = *((ModeType* )bp); -      bp += SIZE_MODE; -      fprintf(f, ":%d", mode); -      break; -    } -  } -  else { -    switch (*bp++) { -    case OP_EXACT1: -    case OP_ANYCHAR_STAR_PEEK_NEXT: -    case OP_ANYCHAR_ML_STAR_PEEK_NEXT: -      p_string(f, 1, bp++); break; -    case OP_EXACT2: -      p_string(f, 2, bp); bp += 2; break; -    case OP_EXACT3: -      p_string(f, 3, bp); bp += 3; break; -    case OP_EXACT4: -      p_string(f, 4, bp); bp += 4; break; -    case OP_EXACT5: -      p_string(f, 5, bp); bp += 5; break; -    case OP_EXACTN: -      GET_LENGTH_INC(len, bp); -      p_len_string(f, len, 1, bp); -      bp += len; -      break; -     -    case OP_EXACTMB2N1: -      p_string(f, 2, bp); bp += 2; break; -    case OP_EXACTMB2N2: -      p_string(f, 4, bp); bp += 4; break; -    case OP_EXACTMB2N3: -      p_string(f, 6, bp); bp += 6; break; -    case OP_EXACTMB2N: -      GET_LENGTH_INC(len, bp); -      p_len_string(f, len, 2, bp); -      bp += len * 2; -      break; -    case OP_EXACTMB3N: -      GET_LENGTH_INC(len, bp); -      p_len_string(f, len, 3, bp); -      bp += len * 3; -      break; -    case OP_EXACTMBN: -      { -        int mb_len; -       -        GET_LENGTH_INC(mb_len, bp); -        GET_LENGTH_INC(len, bp); -        fprintf(f, ":%d:%d:", mb_len, len); -        n = len * mb_len; -        while (n-- > 0) { fputc(*bp++, f); } -      } -      break; - -    case OP_EXACT1_IC: -      len = enclen(enc, bp); -      p_string(f, len, bp); -      bp += len; -      break; -    case OP_EXACTN_IC: -      GET_LENGTH_INC(len, bp); -      p_len_string(f, len, 1, bp); -      bp += len; -      break; - -    case OP_CCLASS: -      n = bitset_on_num((BitSetRef )bp); -      bp += SIZE_BITSET; -      fprintf(f, ":%d", n); -      break; - -    case OP_CCLASS_NOT: -      n = bitset_on_num((BitSetRef )bp); -      bp += SIZE_BITSET; -      fprintf(f, ":%d", n); -      break; - -    case OP_CCLASS_MB: -    case OP_CCLASS_MB_NOT: -      GET_LENGTH_INC(len, bp); -      q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -      ALIGNMENT_RIGHT(q); -#endif -      GET_CODE_POINT(code, q); -      bp += len; -      fprintf(f, ":%d:%d", (int )code, len); -      break; - -    case OP_CCLASS_MIX: -    case OP_CCLASS_MIX_NOT: -      n = bitset_on_num((BitSetRef )bp); -      bp += SIZE_BITSET; -      GET_LENGTH_INC(len, bp); -      q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -      ALIGNMENT_RIGHT(q); -#endif -      GET_CODE_POINT(code, q); -      bp += len; -      fprintf(f, ":%d:%d:%d", n, (int )code, len); -      break; - -#ifdef USE_OP_CCLASS_NODE -    case OP_CCLASS_NODE: -      { -        CClassNode *cc; - -        GET_POINTER_INC(cc, bp); -        n = bitset_on_num(cc->bs); -        fprintf(f, ":%p:%d", cc, n); -      } -      break; -#endif - -    case OP_BACKREF_N_IC: -      mem = *((MemNumType* )bp); -      bp += SIZE_MEMNUM; -      fprintf(f, ":%d", mem); -      break; - -    case OP_BACKREF_MULTI_IC: -    case OP_BACKREF_MULTI: -    case OP_BACKREF_CHECK: -      fputs(" ", f); -      GET_LENGTH_INC(len, bp); -      for (i = 0; i < len; i++) { -        GET_MEMNUM_INC(mem, bp); -        if (i > 0) fputs(", ", f); -        fprintf(f, "%d", mem); -      } -      break; - -    case OP_BACKREF_WITH_LEVEL: -      GET_OPTION_INC(option, bp); -      fprintf(f, ":%d", option); -      /* fall */ -    case OP_BACKREF_CHECK_WITH_LEVEL: -      { -        LengthType level; - -        GET_LENGTH_INC(level, bp); -        fprintf(f, ":%d", level); - -        fputs(" ", f); -        GET_LENGTH_INC(len, bp); -        for (i = 0; i < len; i++) { -          GET_MEMNUM_INC(mem, bp); -          if (i > 0) fputs(", ", f); -          fprintf(f, "%d", mem); -        } -      } -      break; - -    case OP_REPEAT: -    case OP_REPEAT_NG: -      { -        mem = *((MemNumType* )bp); -        bp += SIZE_MEMNUM; -        addr = *((RelAddrType* )bp); -        bp += SIZE_RELADDR; -        fprintf(f, ":%d:%d", mem, addr); -      } -      break; - -    case OP_PUSH_OR_JUMP_EXACT1: -    case OP_PUSH_IF_PEEK_NEXT: -      addr = *((RelAddrType* )bp); -      bp += SIZE_RELADDR; -      fputc(':', f); -      p_rel_addr(f, addr, bp, start); -      p_string(f, 1, bp); -      bp += 1; -      break; - -    case OP_LOOK_BEHIND: -      GET_LENGTH_INC(len, bp); -      fprintf(f, ":%d", len); -      break; - -    case OP_LOOK_BEHIND_NOT_START: -      GET_RELADDR_INC(addr, bp); -      GET_LENGTH_INC(len, bp); -      fprintf(f, ":%d:", len); -      p_rel_addr(f, addr, bp, start); -      break; - -    case OP_STATE_CHECK_PUSH: -    case OP_STATE_CHECK_PUSH_OR_JUMP: -      scn = *((StateCheckNumType* )bp); -      bp += SIZE_STATE_CHECK_NUM; -      addr = *((RelAddrType* )bp); -      bp += SIZE_RELADDR; -      fprintf(f, ":%d:", scn); -      p_rel_addr(f, addr, bp, start); -      break; - -    case OP_PUSH_SAVE_VAL: -      { -        SaveType type; -        GET_SAVE_TYPE_INC(type, bp); -        GET_MEMNUM_INC(mem, bp); -        fprintf(f, ":%d:%d", type, mem); -      } -      break; - -    case OP_UPDATE_VAR: -      { -        UpdateVarType type; -        GET_UPDATE_VAR_TYPE_INC(type, bp); -        GET_MEMNUM_INC(mem, bp); -        fprintf(f, ":%d:%d", type, mem); -      } -      break; - -    default: -      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); -    } -  } -  if (nextp) *nextp = bp; -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_COMPILE -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ -  UChar* bp; -  UChar* start = reg->p; -  UChar* end   = reg->p + reg->used; - -  fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", -          reg->bt_mem_start, reg->bt_mem_end); -  fprintf(f, "code-length: %d\n", reg->used); - -  bp = start; -  while (bp < end) { -    int pos = bp - start; - -    fprintf(f, "%4d: ", pos); -    onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); -    fprintf(f, "\n"); -  } -  fprintf(f, "\n"); -} -#endif - -#ifdef ONIG_DEBUG_PARSE - -static void  Indent(FILE* f, int indent)  {    int i; @@ -7334,6 +6571,17 @@ print_indent_tree(FILE* f, Node* node, int indent)      case GIMMICK_UPDATE_VAR:        fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);        break; +#ifdef USE_CALLOUT +    case GIMMICK_CALLOUT: +      switch (GIMMICK_(node)->detail_type) { +      case ONIG_CALLOUT_OF_CONTENTS: +        fprintf(f, "callout:contents:%d", GIMMICK_(node)->num); +        break; +      case ONIG_CALLOUT_OF_NAME: +        fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num); +        break; +      } +#endif      }      break; diff --git a/src/regenc.c b/src/regenc.c index 7ded5a8..21f3536 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -2,7 +2,7 @@    regenc.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,66 @@  OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; +#define INITED_LIST_SIZE  20 + +static int InitedListNum; + +static struct { +  OnigEncoding enc; +  int          inited; +} InitedList[INITED_LIST_SIZE]; + +static int +enc_inited_entry(OnigEncoding enc) +{ +  int i; + +  for (i = 0; i < InitedListNum; i++) { +    if (InitedList[i].enc == enc) { +      InitedList[i].inited = 1; +      return i; +    } +  } + +  i = InitedListNum; +  if (i < INITED_LIST_SIZE - 1) { +    InitedList[i].enc    = enc; +    InitedList[i].inited = 1; +    InitedListNum++; +    return i; +  } + +  return -1; +} + +static int +enc_is_inited(OnigEncoding enc) +{ +  int i; + +  for (i = 0; i < InitedListNum; i++) { +    if (InitedList[i].enc == enc) { +      return InitedList[i].inited; +    } +  } + +  return 0; +} + +extern int +onigenc_end(void) +{ +  int i; + +  for (i = 0; i < InitedListNum; i++) { +    InitedList[i].enc    = 0; +    InitedList[i].inited = 0; +  } + +  InitedListNum = 0; +  return ONIG_NORMAL; +} +  extern int  onigenc_init(void)  { @@ -40,8 +100,23 @@ onigenc_init(void)  extern int  onig_initialize_encoding(OnigEncoding enc)  { -  if (enc->init != 0 && (enc->is_initialized() == 0)) { -    int r = (enc->init)(); +  int r; + +  if (enc != ONIG_ENCODING_ASCII && +      ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { +    OnigEncoding ascii = ONIG_ENCODING_ASCII; +    if (ascii->init != 0 && enc_is_inited(ascii) == 0) { +      r = ascii->init(); +      if (r != ONIG_NORMAL) return r; +      enc_inited_entry(ascii); +    } +  } + +  if (enc->init != 0 && +      enc_is_inited(enc) == 0) { +    r = (enc->init)(); +    if (r == ONIG_NORMAL) +      enc_inited_entry(enc);      return r;    } @@ -62,6 +137,25 @@ onigenc_set_default_encoding(OnigEncoding enc)  }  extern UChar* +onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end) +{ +  int slen, term_len, i; +  UChar *r; + +  slen = (int )(end - s); +  term_len = ONIGENC_MBC_MINLEN(enc); + +  r = (UChar* )xmalloc(slen + term_len); +  CHECK_NULL_RETURN(r); +  xmemcpy(r, s, slen); + +  for (i = 0; i < term_len; i++) +    r[slen + i] = (UChar )0; + +  return r; +} + +extern UChar*  onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)  {    UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); diff --git a/src/regenc.h b/src/regenc.h index cda3bcd..4dd89ba 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@    regenc.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -29,11 +29,12 @@   * SUCH DAMAGE.   */ -#ifndef PACKAGE -/* PACKAGE is defined in config.h */ -#include "config.h" +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT  #endif +#include "config.h" +  #ifdef ONIG_ESCAPE_UCHAR_COLLISION  #undef ONIG_ESCAPE_UCHAR_COLLISION  #endif @@ -118,51 +119,53 @@ struct PropertyNameCtype {  #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII +  /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); +extern int onigenc_end(void); +extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));  /* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); +extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); +extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); +extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); +extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));  /* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); -ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); -//ONIG_EXTERN const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); +extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); +extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); +extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); +//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len));  /* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); +extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); +extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));  #define UTF16_IS_SURROGATE_FIRST(c)    (((c) & 0xfc) == 0xd8) @@ -218,21 +221,21 @@ extern int onig_codes_byte_at(OnigCodePoint code[], int at);  #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \    OnigEncISO_8859_1_ToUpperCaseTable[c] -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN int +extern int  onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); -ONIG_EXTERN UChar* +extern UChar*  onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));  /* defined in regexec.c, but used in enc/xxx.c */  extern int  onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); -ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar  OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; +extern OnigEncoding  OnigEncDefaultCharEncoding; +extern const UChar  OnigEncAsciiToLowerCaseTable[]; +extern const UChar  OnigEncAsciiToUpperCaseTable[]; +extern const unsigned short OnigEncAsciiCtypeTable[];  #define ONIGENC_IS_ASCII_CODE(code)  ((code) < 0x80) @@ -249,4 +252,6 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];  #define ONIGENC_IS_UNICODE_ENCODING(enc) \    ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype) +#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)  ((enc)->min_enc_len == 1) +  #endif /* REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c index e7d2570..70efe9a 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@    regerror.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -43,19 +43,21 @@ onig_error_code_to_format(int code)  {    char *p; -  if (code >= 0) return (UChar* )0; -    switch (code) {    case ONIG_MISMATCH:      p = "mismatch"; break;    case ONIG_NO_SUPPORT_CONFIG:      p = "no support in this configuration"; break; +  case ONIG_ABORT: +    p = "abort"; break;    case ONIGERR_MEMORY:      p = "fail to memory allocation"; break;    case ONIGERR_MATCH_STACK_LIMIT_OVER:      p = "match-stack limit over"; break;    case ONIGERR_PARSE_DEPTH_LIMIT_OVER:      p = "parse depth limit over"; break; +  case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER: +    p = "retry-limit-in-match over"; break;    case ONIGERR_TYPE_BUG:      p = "undefined type (bug)"; break;    case ONIGERR_PARSER_BUG: @@ -172,6 +174,18 @@ onig_error_code_to_format(int code)      p = "invalid absent group pattern"; break;    case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN:      p = "invalid absent group generator pattern"; break; +  case ONIGERR_INVALID_CALLOUT_PATTERN: +    p = "invalid callout pattern"; break; +  case ONIGERR_INVALID_CALLOUT_NAME: +    p = "invalid callout name"; break; +  case ONIGERR_UNDEFINED_CALLOUT_NAME: +    p = "undefined callout name"; break; +  case ONIGERR_INVALID_CALLOUT_BODY: +    p = "invalid callout body"; break; +  case ONIGERR_INVALID_CALLOUT_TAG_NAME: +    p = "invalid callout tag name"; break; +  case ONIGERR_INVALID_CALLOUT_ARG: +    p = "invalid callout arg"; break;    case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:      p = "not supported encoding combination"; break;    case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: diff --git a/src/regexec.c b/src/regexec.c index 53f42ee..35e3698 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@    regexec.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -26,11 +26,8 @@   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF   * SUCH DAMAGE.   */ -  #include "regint.h" -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -  #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \    ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) @@ -40,6 +37,565 @@     ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))  #endif +#define CHECK_INTERRUPT_IN_MATCH + +#ifdef USE_CALLOUT +typedef struct { +  int last_match_at_call_counter; +  struct { +    OnigType  type; +    OnigValue val; +  } slot[ONIG_CALLOUT_DATA_SLOT_NUM]; +} CalloutData; +#endif + +struct OnigMatchParamStruct { +  unsigned int    match_stack_limit; +  unsigned long   retry_limit_in_match; +  OnigCalloutFunc progress_callout_of_contents; +  OnigCalloutFunc retraction_callout_of_contents; +#ifdef USE_CALLOUT +  int             match_at_call_counter; +  void*           callout_user_data; +  CalloutData*    callout_data; +  int             callout_data_alloc_num; +#endif +}; + +extern int +onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param, +                                               unsigned int limit) +{ +  param->match_stack_limit = limit; +  return ONIG_NORMAL; +} + +extern int +onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, +                                             unsigned long limit) +{ +  param->retry_limit_in_match = limit; +  return ONIG_NORMAL; +} + +extern int +onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ +  param->progress_callout_of_contents = f; +  return ONIG_NORMAL; +} + +extern int +onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ +  param->retraction_callout_of_contents = f; +  return ONIG_NORMAL; +} + + + +typedef struct { +  void* stack_p; +  int   stack_n; +  OnigOptionType options; +  OnigRegion*    region; +  int            ptr_num; +  const UChar*   start;   /* search start position (for \G: BEGIN_POSITION) */ +  unsigned int   match_stack_limit; +  unsigned long  retry_limit_in_match; +  OnigMatchParam* mp; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +  int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */ +  UChar* best_s; +#endif +} MatchArg; + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL     -1 +#define ARG_NON          0 +#define ARG_RELADDR      1 +#define ARG_ABSADDR      2 +#define ARG_LENGTH       3 +#define ARG_MEMNUM       4 +#define ARG_OPTION       5 +#define ARG_MODE         6 + +typedef struct { +  short int opcode; +  char*     name; +  short int arg_type; +} OpInfoType; + +static OpInfoType OpInfo[] = { +  { OP_FINISH,            "finish",          ARG_NON }, +  { OP_END,               "end",             ARG_NON }, +  { OP_EXACT1,            "exact1",          ARG_SPECIAL }, +  { OP_EXACT2,            "exact2",          ARG_SPECIAL }, +  { OP_EXACT3,            "exact3",          ARG_SPECIAL }, +  { OP_EXACT4,            "exact4",          ARG_SPECIAL }, +  { OP_EXACT5,            "exact5",          ARG_SPECIAL }, +  { OP_EXACTN,            "exactn",          ARG_SPECIAL }, +  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL }, +  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL }, +  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL }, +  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL }, +  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL }, +  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL }, +  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL }, +  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL }, +  { OP_CCLASS,            "cclass",          ARG_SPECIAL }, +  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL }, +  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL }, +  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL }, +  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL }, +  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE +  { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL }, +#endif +  { OP_ANYCHAR,           "anychar",         ARG_NON }, +  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON }, +  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON }, +  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON }, +  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, +  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, +  { OP_WORD,                "word",            ARG_NON }, +  { OP_WORD_ASCII,          "word-ascii",      ARG_NON }, +  { OP_NO_WORD,             "not-word",        ARG_NON }, +  { OP_NO_WORD_ASCII,       "not-word-ascii",  ARG_NON }, +  { OP_WORD_BOUNDARY,       "word-boundary",     ARG_MODE }, +  { OP_NO_WORD_BOUNDARY,    "not-word-boundary", ARG_MODE }, +  { OP_WORD_BEGIN,          "word-begin",      ARG_MODE }, +  { OP_WORD_END,            "word-end",        ARG_MODE }, +  { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON }, +  { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON }, +  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON }, +  { OP_END_BUF,             "end-buf",         ARG_NON }, +  { OP_BEGIN_LINE,          "begin-line",      ARG_NON }, +  { OP_END_LINE,            "end-line",        ARG_NON }, +  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON }, +  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON }, +  { OP_BACKREF1,            "backref1",             ARG_NON }, +  { OP_BACKREF2,            "backref2",             ARG_NON }, +  { OP_BACKREF_N,            "backref-n",           ARG_MEMNUM  }, +  { OP_BACKREF_N_IC,         "backref-n-ic",        ARG_SPECIAL }, +  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL }, +  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL }, +  { OP_BACKREF_WITH_LEVEL,  "backref_with_level",   ARG_SPECIAL }, +  { OP_BACKREF_CHECK,       "backref_check",        ARG_SPECIAL }, +  { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, +  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  }, +  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  }, +  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  }, +  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  }, +  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  }, +  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  }, +  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  }, +  { OP_SET_OPTION,          "set-option",           ARG_OPTION  }, +  { OP_FAIL,                "fail",                 ARG_NON }, +  { OP_JUMP,                "jump",                 ARG_RELADDR }, +  { OP_PUSH,                "push",                 ARG_RELADDR }, +  { OP_PUSH_SUPER,          "push-super",           ARG_RELADDR }, +  { OP_POP_OUT,             "pop-out",              ARG_NON }, +  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL }, +  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL }, +  { OP_REPEAT,              "repeat",               ARG_SPECIAL }, +  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL }, +  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  }, +  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  }, +  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  }, +  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  }, +  { OP_EMPTY_CHECK_START,   "empty-check-start",    ARG_MEMNUM  }, +  { OP_EMPTY_CHECK_END,     "empty-check-end",      ARG_MEMNUM  }, +  { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM  }, +  { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM  }, +  { OP_PREC_READ_START,      "push-pos",             ARG_NON }, +  { OP_PREC_READ_END,        "pop-pos",              ARG_NON }, +  { OP_PREC_READ_NOT_START,  "prec-read-not-start",  ARG_RELADDR }, +  { OP_PREC_READ_NOT_END,    "prec-read-not-end",    ARG_NON }, +  { OP_ATOMIC_START,         "atomic-start",         ARG_NON }, +  { OP_ATOMIC_END,           "atomic-end",           ARG_NON }, +  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL }, +  { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, +  { OP_LOOK_BEHIND_NOT_END,  "look-behind-not-end",  ARG_NON }, +  { OP_CALL,                 "call",                 ARG_ABSADDR }, +  { OP_RETURN,               "return",               ARG_NON }, +  { OP_PUSH_SAVE_VAL,        "push-save-val",        ARG_SPECIAL }, +  { OP_UPDATE_VAR,           "update-var",           ARG_SPECIAL }, +#ifdef USE_CALLOUT +  { OP_CALLOUT_CONTENTS,     "callout-contents",     ARG_SPECIAL }, +  { OP_CALLOUT_NAME,         "callout-name",         ARG_SPECIAL }, +#endif +  { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ +  int i; + +  for (i = 0; OpInfo[i].opcode >= 0; i++) { +    if (opcode == OpInfo[i].opcode) +      return OpInfo[i].name; +  } +  return ""; +} + +static int +op2arg_type(int opcode) +{ +  int i; + +  for (i = 0; OpInfo[i].opcode >= 0; i++) { +    if (opcode == OpInfo[i].opcode) +      return OpInfo[i].arg_type; +  } +  return ARG_SPECIAL; +} + +static void +p_string(FILE* f, int len, UChar* s) +{ +  fputs(":", f); +  while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ +  int x = len * mb_len; + +  fprintf(f, ":%d:", len); +  while (x-- > 0) { fputc(*s++, f); } +} + +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ +  RelAddrType curr = (RelAddrType )(p - start); + +  fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + +static int +bitset_on_num(BitSetRef bs) +{ +  int i, n; + +  n = 0; +  for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +    if (BITSET_AT(bs, i)) n++; +  } +  return n; +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, +                              OnigEncoding enc) +{ +  int i, n, arg_type; +  RelAddrType addr; +  LengthType len; +  MemNumType mem; +  OnigCodePoint code; +  OnigOptionType option; +  ModeType mode; +  UChar *q; + +  fprintf(f, "%s", op2name(*bp)); +  arg_type = op2arg_type(*bp); +  if (arg_type != ARG_SPECIAL) { +    bp++; +    switch (arg_type) { +    case ARG_NON: +      break; +    case ARG_RELADDR: +      GET_RELADDR_INC(addr, bp); +      fputc(':', f); +      p_rel_addr(f, addr, bp, start); +      break; +    case ARG_ABSADDR: +      GET_ABSADDR_INC(addr, bp); +      fprintf(f, ":{/%d}", addr); +      break; +    case ARG_LENGTH: +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d", len); +      break; +    case ARG_MEMNUM: +      mem = *((MemNumType* )bp); +      bp += SIZE_MEMNUM; +      fprintf(f, ":%d", mem); +      break; +    case ARG_OPTION: +      { +        OnigOptionType option = *((OnigOptionType* )bp); +        bp += SIZE_OPTION; +        fprintf(f, ":%d", option); +      } +      break; + +    case ARG_MODE: +      mode = *((ModeType* )bp); +      bp += SIZE_MODE; +      fprintf(f, ":%d", mode); +      break; +    } +  } +  else { +    switch (*bp++) { +    case OP_EXACT1: +    case OP_ANYCHAR_STAR_PEEK_NEXT: +    case OP_ANYCHAR_ML_STAR_PEEK_NEXT: +      p_string(f, 1, bp++); break; +    case OP_EXACT2: +      p_string(f, 2, bp); bp += 2; break; +    case OP_EXACT3: +      p_string(f, 3, bp); bp += 3; break; +    case OP_EXACT4: +      p_string(f, 4, bp); bp += 4; break; +    case OP_EXACT5: +      p_string(f, 5, bp); bp += 5; break; +    case OP_EXACTN: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 1, bp); +      bp += len; +      break; + +    case OP_EXACTMB2N1: +      p_string(f, 2, bp); bp += 2; break; +    case OP_EXACTMB2N2: +      p_string(f, 4, bp); bp += 4; break; +    case OP_EXACTMB2N3: +      p_string(f, 6, bp); bp += 6; break; +    case OP_EXACTMB2N: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 2, bp); +      bp += len * 2; +      break; +    case OP_EXACTMB3N: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 3, bp); +      bp += len * 3; +      break; +    case OP_EXACTMBN: +      { +        int mb_len; + +        GET_LENGTH_INC(mb_len, bp); +        GET_LENGTH_INC(len, bp); +        fprintf(f, ":%d:%d:", mb_len, len); +        n = len * mb_len; +        while (n-- > 0) { fputc(*bp++, f); } +      } +      break; + +    case OP_EXACT1_IC: +      len = enclen(enc, bp); +      p_string(f, len, bp); +      bp += len; +      break; +    case OP_EXACTN_IC: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 1, bp); +      bp += len; +      break; + +    case OP_CCLASS: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      fprintf(f, ":%d", n); +      break; + +    case OP_CCLASS_NOT: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      fprintf(f, ":%d", n); +      break; + +    case OP_CCLASS_MB: +    case OP_CCLASS_MB_NOT: +      GET_LENGTH_INC(len, bp); +      q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +      ALIGNMENT_RIGHT(q); +#endif +      GET_CODE_POINT(code, q); +      bp += len; +      fprintf(f, ":%d:%d", (int )code, len); +      break; + +    case OP_CCLASS_MIX: +    case OP_CCLASS_MIX_NOT: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      GET_LENGTH_INC(len, bp); +      q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +      ALIGNMENT_RIGHT(q); +#endif +      GET_CODE_POINT(code, q); +      bp += len; +      fprintf(f, ":%d:%d:%d", n, (int )code, len); +      break; + +#ifdef USE_OP_CCLASS_NODE +    case OP_CCLASS_NODE: +      { +        CClassNode *cc; + +        GET_POINTER_INC(cc, bp); +        n = bitset_on_num(cc->bs); +        fprintf(f, ":%p:%d", cc, n); +      } +      break; +#endif + +    case OP_BACKREF_N_IC: +      mem = *((MemNumType* )bp); +      bp += SIZE_MEMNUM; +      fprintf(f, ":%d", mem); +      break; + +    case OP_BACKREF_MULTI_IC: +    case OP_BACKREF_MULTI: +    case OP_BACKREF_CHECK: +      fputs(" ", f); +      GET_LENGTH_INC(len, bp); +      for (i = 0; i < len; i++) { +        GET_MEMNUM_INC(mem, bp); +        if (i > 0) fputs(", ", f); +        fprintf(f, "%d", mem); +      } +      break; + +    case OP_BACKREF_WITH_LEVEL: +      GET_OPTION_INC(option, bp); +      fprintf(f, ":%d", option); +      /* fall */ +    case OP_BACKREF_CHECK_WITH_LEVEL: +      { +        LengthType level; + +        GET_LENGTH_INC(level, bp); +        fprintf(f, ":%d", level); + +        fputs(" ", f); +        GET_LENGTH_INC(len, bp); +        for (i = 0; i < len; i++) { +          GET_MEMNUM_INC(mem, bp); +          if (i > 0) fputs(", ", f); +          fprintf(f, "%d", mem); +        } +      } +      break; + +    case OP_REPEAT: +    case OP_REPEAT_NG: +      { +        mem = *((MemNumType* )bp); +        bp += SIZE_MEMNUM; +        addr = *((RelAddrType* )bp); +        bp += SIZE_RELADDR; +        fprintf(f, ":%d:%d", mem, addr); +      } +      break; + +    case OP_PUSH_OR_JUMP_EXACT1: +    case OP_PUSH_IF_PEEK_NEXT: +      addr = *((RelAddrType* )bp); +      bp += SIZE_RELADDR; +      fputc(':', f); +      p_rel_addr(f, addr, bp, start); +      p_string(f, 1, bp); +      bp += 1; +      break; + +    case OP_LOOK_BEHIND: +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d", len); +      break; + +    case OP_LOOK_BEHIND_NOT_START: +      GET_RELADDR_INC(addr, bp); +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d:", len); +      p_rel_addr(f, addr, bp, start); +      break; + +    case OP_PUSH_SAVE_VAL: +      { +        SaveType type; +        GET_SAVE_TYPE_INC(type, bp); +        GET_MEMNUM_INC(mem, bp); +        fprintf(f, ":%d:%d", type, mem); +      } +      break; + +    case OP_UPDATE_VAR: +      { +        UpdateVarType type; +        GET_UPDATE_VAR_TYPE_INC(type, bp); +        GET_MEMNUM_INC(mem, bp); +        fprintf(f, ":%d:%d", type, mem); +      } +      break; + +#ifdef USE_CALLOUT +    case OP_CALLOUT_CONTENTS: +      { +        GET_MEMNUM_INC(mem,  bp); // number +        fprintf(f, ":%d", mem); +      } +      break; + +    case OP_CALLOUT_NAME: +      { +        int id; + +        GET_MEMNUM_INC(id,   bp); // id +        GET_MEMNUM_INC(mem,  bp); // number + +        fprintf(f, ":%d:%d", id, mem); +      } +      break; +#endif + +    default: +      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); +    } +  } +  if (nextp) *nextp = bp; +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_COMPILE +extern void +onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ +  UChar* bp; +  UChar* start = reg->p; +  UChar* end   = reg->p + reg->used; + +  fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", +          reg->bt_mem_start, reg->bt_mem_end); +  fprintf(f, "code-length: %d\n", reg->used); + +  bp = start; +  while (bp < end) { +    int pos = bp - start; + +    fprintf(f, "%4d: ", pos); +    onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); +    fprintf(f, "\n"); +  } +  fprintf(f, "\n"); +} +#endif + +  #ifdef USE_CAPTURE_HISTORY  static void history_tree_free(OnigCaptureTreeNode* node); @@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  #endif  } +#ifdef USE_CALLOUT +#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \ +  args.in            = (ain);\ +  args.name_id       = (aname_id);\ +  args.num           = anum;\ +  args.regex         = reg;\ +  args.string        = str;\ +  args.string_end    = end;\ +  args.start         = sstart;\ +  args.right_range   = right_range;\ +  args.current       = s;\ +  args.retry_in_match_counter = retry_in_match_counter;\ +  args.msa           = msa;\ +  args.stk_base      = stk_base;\ +  args.stk           = stk;\ +  args.mem_start_stk = mem_start_stk;\ +  args.mem_end_stk   = mem_end_stk;\ +  result = (func)(&args, user);\ +} while (0) + +#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\ +  int result;\ +  OnigCalloutArgs args;\ +  CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\ +  switch (result) {\ +  case ONIG_CALLOUT_FAIL:\ +  case ONIG_CALLOUT_SUCCESS:\ +    break;\ +  default:\ +    if (result > 0) {\ +      result = ONIGERR_INVALID_ARGUMENT;\ +    }\ +    best_len = result;\ +    goto finish;\ +    break;\ +  }\ +} while(0) +#endif +  /** stack **/  #define INVALID_STACK_INDEX   -1 @@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  #define STK_ALT                   (0x0002 | STK_ALT_FLAG)  #define STK_ALT_PREC_READ_NOT     (0x0004 | STK_ALT_FLAG)  #define STK_ALT_LOOK_BEHIND_NOT   (0x0006 | STK_ALT_FLAG) +  /* handled by normal-POP */ -#define STK_MEM_START              0x0100 -#define STK_MEM_END                0x8200 -#define STK_REPEAT_INC             0x0300 -#define STK_STATE_CHECK_MARK       0x1000 +#define STK_MEM_START              0x0010 +#define STK_MEM_END                0x8030 +#define STK_REPEAT_INC             0x0050 +#ifdef USE_CALLOUT +#define STK_CALLOUT                0x0070 +#endif +  /* avoided by normal-POP */  #define STK_VOID                   0x0000  /* for fill a blank */  #define STK_EMPTY_CHECK_START      0x3000  #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */ -#define STK_MEM_END_MARK           0x8400 -#define STK_TO_VOID_START          0x0500  /* mark for "(?>...)" */ -#define STK_REPEAT                 0x0600 -#define STK_CALL_FRAME             0x0700 -#define STK_RETURN                 0x0800 -#define STK_SAVE_VAL               0x0900 +#define STK_MEM_END_MARK           0x8100 +#define STK_TO_VOID_START          0x1200  /* mark for "(?>...)" */ +#define STK_REPEAT                 0x0300 +#define STK_CALL_FRAME             0x0400 +#define STK_RETURN                 0x0500 +#define STK_SAVE_VAL               0x0600  /* stack type check mask */  #define STK_MASK_POP_USED          STK_ALT_FLAG -#define STK_MASK_TO_VOID_TARGET    0x10fe +#define STK_MASK_POP_HANDLED       0x0010 +#define STK_MASK_POP_HANDLED_TIL   (STK_MASK_POP_HANDLED | 0x0004) +#define STK_MASK_TO_VOID_TARGET    0x100e  #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */  typedef intptr_t StackIndex;  typedef struct _StackType {    unsigned int type; -  int id; +  int zid;    union {      struct {        UChar *pcode;      /* byte code position */        UChar *pstr;       /* string position */        UChar *pstr_prev;  /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK -      unsigned int state_check; -#endif      } state;      struct {        int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ @@ -378,67 +976,66 @@ typedef struct _StackType {        UChar* v;        UChar* v2;      } val; +#ifdef USE_CALLOUT +    struct { +      int num; +      OnigCalloutFunc func; +    } callout; +#endif    } u;  } StackType; +#ifdef USE_CALLOUT + +struct OnigCalloutArgsStruct { +  OnigCalloutIn    in; +  int              name_id;   /* name id or ONIG_NON_NAME_ID */ +  int              num; +  OnigRegex        regex; +  const OnigUChar* string; +  const OnigUChar* string_end; +  const OnigUChar* start; +  const OnigUChar* right_range; +  const OnigUChar* current;  // current matching position +  unsigned long    retry_in_match_counter; + +  /* invisible to users */ +  MatchArg*   msa; +  StackType*  stk_base; +  StackType*  stk; +  StackIndex* mem_start_stk; +  StackIndex* mem_end_stk; +}; + +#endif +  #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \    (msa).stack_p  = (void* )0;\    (msa).options  = (arg_option);\    (msa).region   = (arg_region);\    (msa).start    = (arg_start);\ +  (msa).match_stack_limit  = (mp)->match_stack_limit;\ +  (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ +  (msa).mp = mp;\    (msa).best_len = ONIG_MISMATCH;\    (msa).ptr_num  = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \  } while(0)  #else -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \    (msa).stack_p  = (void* )0;\    (msa).options  = (arg_option);\    (msa).region   = (arg_region);\    (msa).start    = (arg_start);\ +  (msa).match_stack_limit  = (mp)->match_stack_limit;\ +  (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ +  (msa).mp = mp;\    (msa).ptr_num  = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \  } while(0)  #endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE  16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\ -  if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ -    unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ -    offset = ((offset) * (state_num)) >> 3;\ -    if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ -      if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ -        (msa).state_check_buff = (void* )xmalloc(size);\ -      else \ -        (msa).state_check_buff = (void* )xalloca(size);\ -      xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ -              (size_t )(size - (offset))); \ -      (msa).state_check_buff_size = size;\ -    }\ -    else {\ -      (msa).state_check_buff = (void* )0;\ -      (msa).state_check_buff_size = 0;\ -    }\ -  }\ -  else {\ -    (msa).state_check_buff = (void* )0;\ -    (msa).state_check_buff_size = 0;\ -  }\ -} while(0) - -#define MATCH_ARG_FREE(msa) do {\ -  if ((msa).stack_p) xfree((msa).stack_p);\ -  if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ -    if ((msa).state_check_buff) xfree((msa).state_check_buff);\ -  }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)  #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p) -#endif  #define ALLOCA_PTR_NUM_LIMIT   50 @@ -495,25 +1092,303 @@ typedef struct _StackType {    mem_end_stk   = mem_start_stk + num_mem + 1;\  } while(0) -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; +static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;  extern unsigned int  onig_get_match_stack_limit_size(void)  { -  return MatchStackLimitSize; +  return MatchStackLimit;  }  extern int  onig_set_match_stack_limit_size(unsigned int size)  { -  MatchStackLimitSize = size; +  MatchStackLimit = size;    return 0;  } +#ifdef USE_RETRY_LIMIT_IN_MATCH + +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; + +#define CHECK_RETRY_LIMIT_IN_MATCH  do {\ +  if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +} while (0) + +#else + +#define CHECK_RETRY_LIMIT_IN_MATCH + +#endif /* USE_RETRY_LIMIT_IN_MATCH */ + +extern unsigned long +onig_get_retry_limit_in_match(void) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH +  return RetryLimitInMatch; +#else +  //return ONIG_NO_SUPPORT_CONFIG; +  return 0; +#endif +} + +extern int +onig_set_retry_limit_in_match(unsigned long size) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH +  RetryLimitInMatch = size; +  return 0; +#else +  return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +static OnigCalloutFunc DefaultProgressCallout; +static OnigCalloutFunc DefaultRetractionCallout; + +extern OnigMatchParam* +onig_new_match_param(void) +{ +  OnigMatchParam* p; + +  p = (OnigMatchParam* )xmalloc(sizeof(*p)); +  if (IS_NOT_NULL(p)) { +    onig_initialize_match_param(p); +  } + +  return p; +} + +extern void +onig_free_match_param_content(OnigMatchParam* p) +{ +#ifdef USE_CALLOUT +  if (IS_NOT_NULL(p->callout_data)) { +    xfree(p->callout_data); +    p->callout_data = 0; +  } +#endif +} + +extern void +onig_free_match_param(OnigMatchParam* p) +{ +  if (IS_NOT_NULL(p)) { +    onig_free_match_param_content(p); +    xfree(p); +  } +} + +extern int +onig_initialize_match_param(OnigMatchParam* mp) +{ +  mp->match_stack_limit  = MatchStackLimit; +#ifdef USE_RETRY_LIMIT_IN_MATCH +  mp->retry_limit_in_match = RetryLimitInMatch; +#endif +  mp->progress_callout_of_contents   = DefaultProgressCallout; +  mp->retraction_callout_of_contents = DefaultRetractionCallout; + +#ifdef USE_CALLOUT +  mp->match_at_call_counter  = 0; +  mp->callout_user_data      = 0; +  mp->callout_data           = 0; +  mp->callout_data_alloc_num = 0; +#endif + +  return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +static int +adjust_match_param(regex_t* reg, OnigMatchParam* mp) +{ +  RegexExt* ext = REG_EXTP(reg); + +  mp->match_at_call_counter = 0; + +  if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL; + +  if (ext->callout_num > mp->callout_data_alloc_num) { +    CalloutData* d; +    size_t n = ext->callout_num * sizeof(*d); +    if (IS_NOT_NULL(mp->callout_data)) +      d = (CalloutData* )xrealloc(mp->callout_data, n); +    else +      d = (CalloutData* )xmalloc(n); +    CHECK_NULL_RETURN_MEMERR(d); + +    mp->callout_data = d; +    mp->callout_data_alloc_num = ext->callout_num; +  } + +  xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData)); +  return ONIG_NORMAL; +} + +#define ADJUST_MATCH_PARAM(reg, mp) \ +  r = adjust_match_param(reg, mp);\ +  if (r != ONIG_NORMAL) return r; + +#define CALLOUT_DATA_AT_NUM(mp, num)  ((mp)->callout_data + ((num) - 1)) + +extern int +onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args) +{ +  OnigMatchParam* mp; +  int num; +  CalloutData* d; + +  mp  = args->msa->mp; +  num = args->num; + +  d = CALLOUT_DATA_AT_NUM(mp, num); +  if (d->last_match_at_call_counter != mp->match_at_call_counter) { +    xmemset(d, 0, sizeof(*d)); +    d->last_match_at_call_counter = mp->match_at_call_counter; +    return d->last_match_at_call_counter; +  } + +  return 0; +} + +extern int +onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp, +                                     int callout_num, int slot, +                                     OnigType* type, OnigValue* val) +{ +  OnigType t; +  CalloutData* d; + +  if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + +  d = CALLOUT_DATA_AT_NUM(mp, callout_num); +  t = d->slot[slot].type; +  if (IS_NOT_NULL(type)) *type = t; +  if (IS_NOT_NULL(val))  *val  = d->slot[slot].val; +  return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, +                                                          int slot, OnigType* type, +                                                          OnigValue* val) +{ +  return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp, +                                              args->num, slot, type, val); +} + +extern int +onig_get_callout_data(regex_t* reg, OnigMatchParam* mp, +                      int callout_num, int slot, +                      OnigType* type, OnigValue* val) +{ +  OnigType t; +  CalloutData* d; + +  if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + +  d = CALLOUT_DATA_AT_NUM(mp, callout_num); +  if (d->last_match_at_call_counter != mp->match_at_call_counter) { +    xmemset(d, 0, sizeof(*d)); +    d->last_match_at_call_counter = mp->match_at_call_counter; +  } + +  t = d->slot[slot].type; +  if (IS_NOT_NULL(type)) *type = t; +  if (IS_NOT_NULL(val))  *val  = d->slot[slot].val; +  return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, +                             const UChar* tag, const UChar* tag_end, int slot, +                             OnigType* type, OnigValue* val) +{ +  int num; + +  num = onig_get_callout_num_by_tag(reg, tag, tag_end); +  if (num < 0)  return num; +  if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +  return onig_get_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, +                                      int callout_num, int slot, +                                      OnigType* type, OnigValue* val) +{ +  return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot, +                               type, val); +} + +extern int +onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, +                                           int slot, OnigType* type, OnigValue* val) +{ +  return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot, +                               type, val); +} + +extern int +onig_set_callout_data(regex_t* reg, OnigMatchParam* mp, +                      int callout_num, int slot, +                      OnigType type, OnigValue* val) +{ +  CalloutData* d; + +  if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + +  d = CALLOUT_DATA_AT_NUM(mp, callout_num); +  d->slot[slot].type = type; +  d->slot[slot].val  = *val; +  d->last_match_at_call_counter = mp->match_at_call_counter; + +  return ONIG_NORMAL; +} + +extern int +onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, +                             const UChar* tag, const UChar* tag_end, int slot, +                             OnigType type, OnigValue* val) +{ +  int num; + +  num = onig_get_callout_num_by_tag(reg, tag, tag_end); +  if (num < 0)  return num; +  if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +  return onig_set_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, +                                      int callout_num, int slot, +                                      OnigType type, OnigValue* val) +{ +  return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot, +                               type, val); +} + +extern int +onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, +                                           int slot, OnigType type, OnigValue* val) +{ +  return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot, +                               type, val); +} + +#else +#define ADJUST_MATCH_PARAM(reg, mp) +#endif /* USE_CALLOUT */ + +  static int  stack_double(int is_alloca, char** arg_alloc_base,               StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, -             OnigMatchArg* msa) +             MatchArg* msa)  {    unsigned int n;    int used; @@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base,      xmemcpy(new_alloc_base, alloc_base, size);    }    else { -    if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { -      if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) +    if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { +      if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)          return ONIGERR_MATCH_STACK_LIMIT_OVER;        else -        n = MatchStackLimitSize; +        n = msa->match_stack_limit;      }      new_alloc_base = (char* )xrealloc(alloc_base, new_size);      if (IS_NULL(new_alloc_base)) { @@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ -  (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ -  if (IS_NOT_NULL(state_check_buff)) {\ -    int x = STATE_CHECK_POS(s,snum);\ -    (v) = state_check_buff[x/8] & (1<<(x%8));\ -  }\ -  else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ -  else if ((stk)->type == STK_STATE_CHECK_MARK) { \ -    int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ -    state_check_buff[x/8] |= (1<<(x%8));\ -  } -  #define STACK_PUSH(stack_type,pat,s,sprev) do {\    STACK_ENSURE(1);\    stk->type = (stack_type);\    stk->u.state.pcode     = (pat);\    stk->u.state.pstr      = (s);\    stk->u.state.pstr_prev = (sprev);\ -  stk->u.state.state_check = 0;\    STACK_INC;\  } while(0)  #define STACK_PUSH_ENSURED(stack_type,pat) do {\    stk->type = (stack_type);\    stk->u.state.pcode = (pat);\ -  stk->u.state.state_check = 0;\ -  STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ -  STACK_ENSURE(1);\ -  stk->type = STK_ALT;\ -  stk->u.state.pcode     = (pat);\ -  stk->u.state.pstr      = (s);\ -  stk->u.state.pstr_prev = (sprev);\ -  stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\    STACK_INC;\  } while(0) -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ -  if (IS_NOT_NULL(state_check_buff)) {   \ -    STACK_ENSURE(1);\ -    stk->type = STK_STATE_CHECK_MARK;\ -    stk->u.state.pstr = (s);\ -    stk->u.state.state_check = (snum);\ -    STACK_INC;\ -  }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ -  STACK_ENSURE(1);\ +#ifdef ONIG_DEBUG_MATCH +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\    stk->type = (stack_type);\ -  stk->u.state.pcode     = (pat);\ -  stk->u.state.pstr      = (s);\ -  stk->u.state.pstr_prev = (sprev);\ +  stk->u.state.pcode = (pat);\ +  stk->u.state.pstr      = s;\ +  stk->u.state.pstr_prev = sprev;\    STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +} while (0) +#else +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\    stk->type = (stack_type);\    stk->u.state.pcode = (pat);\    STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ +} while (0) +#endif  #define STACK_PUSH_ALT(pat,s,sprev)       STACK_PUSH(STK_ALT,pat,s,sprev)  #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) @@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_REPEAT(sid, pat) do {\    STACK_ENSURE(1);\    stk->type = STK_REPEAT;\ -  stk->id   = (sid);\ +  stk->zid  = (sid);\    stk->u.repeat.pcode  = (pat);\    stk->u.repeat.count  = 0;\    STACK_INC;\ @@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_MEM_START(mnum, s) do {\    STACK_ENSURE(1);\    stk->type = STK_MEM_START;\ -  stk->id   = (mnum);\ +  stk->zid  = (mnum);\    stk->u.mem.pstr     = (s);\    stk->u.mem.start    = mem_start_stk[mnum];\    stk->u.mem.end      = mem_end_stk[mnum];\ @@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_MEM_END(mnum, s) do {\    STACK_ENSURE(1);\    stk->type = STK_MEM_END;\ -  stk->id   = (mnum);\ +  stk->zid  = (mnum);\    stk->u.mem.pstr   = (s);\    stk->u.mem.start  = mem_start_stk[mnum];\    stk->u.mem.end    = mem_end_stk[mnum];\ @@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_MEM_END_MARK(mnum) do {\    STACK_ENSURE(1);\    stk->type = STK_MEM_END_MARK;\ -  stk->id   = (mnum);\ +  stk->zid  = (mnum);\    STACK_INC;\  } while(0) @@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base,    while (k > stk_base) {\      k--;\      if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ -      && k->id == (mnum)) {\ +      && k->zid == (mnum)) {\        level++;\      }\ -    else if (k->type == STK_MEM_START && k->id == (mnum)) {\ +    else if (k->type == STK_MEM_START && k->zid == (mnum)) {\        if (level == 0) break;\        level--;\      }\ @@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\    STACK_ENSURE(1);\    stk->type = STK_EMPTY_CHECK_START;\ -  stk->id   = (cnum);\ +  stk->zid  = (cnum);\    stk->u.empty_check.pstr = (s);\    STACK_INC;\  } while(0) @@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\    STACK_ENSURE(1);\    stk->type = STK_EMPTY_CHECK_END;\ -  stk->id   = (cnum);\ +  stk->zid  = (cnum);\    STACK_INC;\  } while(0) @@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\    STACK_ENSURE(1);\    stk->type = STK_SAVE_VAL;\ -  stk->id   = (sid);\ +  stk->zid  = (sid);\    stk->u.val.type = (stype);\    stk->u.val.v    = (UChar* )(sval);\    STACK_INC;\ @@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\    STACK_ENSURE(1);\    stk->type = STK_SAVE_VAL;\ -  stk->id   = (sid);\ +  stk->zid  = (sid);\    stk->u.val.type = (stype);\    stk->u.val.v    = (UChar* )(sval);\    stk->u.val.v2   = sprev;\ @@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      k--;\      STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \      if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ -        && k->id == (sid)) {\ +        && k->zid == (sid)) {\        if (level == 0) {\          (sval) = k->u.val.v;\          break;\ @@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      k--;\      STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \      if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ -        && k->id == (sid)) {\ +        && k->zid == (sid)) {\        if (level == 0) {\          (sval) = k->u.val.v;\          sprev  = k->u.val.v2;\ @@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base,    }\  } while (0) +#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ +  STACK_ENSURE(1);\ +  stk->type = STK_CALLOUT;\ +  stk->zid  = ONIG_NON_NAME_ID;\ +  stk->u.callout.num = (anum);\ +  stk->u.callout.func = (func);\ +  STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\ +  STACK_ENSURE(1);\ +  stk->type = STK_CALLOUT;\ +  stk->zid  = (aid);\ +  stk->u.callout.num = (anum);\ +  stk->u.callout.func = (func);\ +  STACK_INC;\ +} while(0) +  #ifdef ONIG_DEBUG  #define STACK_BASE_CHECK(p, at) \    if ((p) < stk_base) {\ @@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base,    STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \  } while(0) + +#ifdef USE_CALLOUT +#define POP_CALLOUT_CASE \ +  else if (stk->type == STK_CALLOUT) {\ +    RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\ +  } +#else +#define POP_CALLOUT_CASE +#endif +  #define STACK_POP  do {\    switch (pop_level) {\    case STACK_POP_LEVEL_FREE:\ @@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base,        stk--;\        STACK_BASE_CHECK(stk, "STACK_POP"); \        if ((stk->type & STK_MASK_POP_USED) != 0)  break;\ -      ELSE_IF_STATE_CHECK_MARK(stk);\      }\      break;\    case STACK_POP_LEVEL_MEM_START:\ @@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base,        STACK_BASE_CHECK(stk, "STACK_POP 2"); \        if ((stk->type & STK_MASK_POP_USED) != 0)  break;\        else if (stk->type == STK_MEM_START) {\ -        mem_start_stk[stk->id] = stk->u.mem.start;\ -        mem_end_stk[stk->id]   = stk->u.mem.end;\ +        mem_start_stk[stk->zid] = stk->u.mem.start;\ +        mem_end_stk[stk->zid]   = stk->u.mem.end;\        }\ -      ELSE_IF_STATE_CHECK_MARK(stk);\      }\      break;\    default:\ @@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base,        stk--;\        STACK_BASE_CHECK(stk, "STACK_POP 3"); \        if ((stk->type & STK_MASK_POP_USED) != 0)  break;\ -      else if (stk->type == STK_MEM_START) {\ -        mem_start_stk[stk->id] = stk->u.mem.start;\ -        mem_end_stk[stk->id]   = stk->u.mem.end;\ -      }\ -      else if (stk->type == STK_REPEAT_INC) {\ -        STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ -      }\ -      else if (stk->type == STK_MEM_END) {\ -        mem_start_stk[stk->id] = stk->u.mem.start;\ -        mem_end_stk[stk->id]   = stk->u.mem.end;\ +      else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ +        if (stk->type == STK_MEM_START) {\ +          mem_start_stk[stk->zid] = stk->u.mem.start;\ +          mem_end_stk[stk->zid]   = stk->u.mem.end;\ +        }\ +        else if (stk->type == STK_REPEAT_INC) {\ +          STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ +        }\ +        else if (stk->type == STK_MEM_END) {\ +          mem_start_stk[stk->zid] = stk->u.mem.start;\ +          mem_end_stk[stk->zid]   = stk->u.mem.end;\ +        }\ +        POP_CALLOUT_CASE\        }\ -      ELSE_IF_STATE_CHECK_MARK(stk);\      }\      break;\    }\  } while(0) -#define STACK_POP_TIL_ALT_PREC_READ_NOT  do {\ +#define POP_TIL_BODY(aname, til_type) do {\    while (1) {\      stk--;\ -    STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ -    if (stk->type == STK_ALT_PREC_READ_NOT) break;\ -    else if (stk->type == STK_MEM_START) {\ -      mem_start_stk[stk->id] = stk->u.mem.start;\ -      mem_end_stk[stk->id]   = stk->u.mem.end;\ -    }\ -    else if (stk->type == STK_REPEAT_INC) {\ -      STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ -    }\ -    else if (stk->type == STK_MEM_END) {\ -      mem_start_stk[stk->id] = stk->u.mem.start;\ -      mem_end_stk[stk->id]   = stk->u.mem.end;\ +    STACK_BASE_CHECK(stk, (aname));\ +    if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ +      if (stk->type == (til_type)) break;\ +      else {\ +        if (stk->type == STK_MEM_START) {\ +          mem_start_stk[stk->zid] = stk->u.mem.start;\ +          mem_end_stk[stk->zid]   = stk->u.mem.end;\ +        }\ +        else if (stk->type == STK_REPEAT_INC) {\ +          STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ +        }\ +        else if (stk->type == STK_MEM_END) {\ +          mem_start_stk[stk->zid] = stk->u.mem.start;\ +          mem_end_stk[stk->zid]   = stk->u.mem.end;\ +        }\ +        /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ +      }\      }\ -    ELSE_IF_STATE_CHECK_MARK(stk);\    }\  } while(0) +#define STACK_POP_TIL_ALT_PREC_READ_NOT  do {\ +  POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\ +} while(0) +  #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT  do {\ -  while (1) {\ -    stk--;\ -    STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ -    if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\ -    else if (stk->type == STK_MEM_START) {\ -      mem_start_stk[stk->id] = stk->u.mem.start;\ -      mem_end_stk[stk->id]   = stk->u.mem.end;\ -    }\ -    else if (stk->type == STK_REPEAT_INC) {\ -      STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ -    }\ -    else if (stk->type == STK_MEM_END) {\ -      mem_start_stk[stk->id] = stk->u.mem.start;\ -      mem_end_stk[stk->id]   = stk->u.mem.end;\ -    }\ -    ELSE_IF_STATE_CHECK_MARK(stk);\ -  }\ +  POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\  } while(0) +  #define STACK_EXEC_TO_VOID(k) do {\    k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \      if (IS_TO_VOID_TARGET(k)) {\ +      if (k->type == STK_TO_VOID_START) {\ +        k->type = STK_VOID;\ +        break;\ +      }\        k->type = STK_VOID;\      }\ -    else if (k->type == STK_TO_VOID_START) {\ -      k->type = STK_VOID;\ -      break;\ -    }\    }\  } while(0) @@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \      if (k->type == STK_EMPTY_CHECK_START) {\ -      if (k->id == (sid)) {\ +      if (k->zid == (sid)) {\          (isnull) = (k->u.empty_check.pstr == (s));\          break;\        }\ @@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \      if (k->type == STK_EMPTY_CHECK_START) {\ -      if (k->id == (sid)) {\ +      if (k->zid == (sid)) {\          if (k->u.empty_check.pstr != (s)) {\            (isnull) = 0;\            break;\ @@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base,                if (k->u.mem.end == INVALID_STACK_INDEX) {\                  (isnull) = 0; break;\                }\ -              if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ +              if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\                  endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\                else\                  endp = (UChar* )k->u.mem.end;\ @@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \      if (k->type == STK_EMPTY_CHECK_START) {\ -      if (k->id == (sid)) {\ +      if (k->zid == (sid)) {\          if (level == 0) {\            if (k->u.empty_check.pstr != (s)) {\              (isnull) = 0;\ @@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base,                  if (k->u.mem.end == INVALID_STACK_INDEX) {\                    (isnull) = 0; break;\                  }\ -                if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ +                if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\                    endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\                  else\                    endp = (UChar* )k->u.mem.end;\ @@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base,        }\      }\      else if (k->type == STK_EMPTY_CHECK_END) {\ -      if (k->id == (sid)) level++;\ +      if (k->zid == (sid)) level++;\      }\    }\  } while(0) @@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base,      STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \      if (k->type == STK_REPEAT) {\        if (level == 0) {\ -        if (k->id == (sid)) {\ +        if (k->zid == (sid)) {\            break;\          }\        }\ @@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,  #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)  #define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE  #define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range -#else -#define INIT_RIGHT_RANGE    right_range = (UChar* )end -#endif  #ifdef USE_CAPTURE_HISTORY  static int @@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,    while (k < stk_top) {      if (k->type == STK_MEM_START) { -      n = k->id; +      n = k->zid;        if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&            MEM_STATUS_AT(reg->capture_history, n) != 0) {          child = history_node_new(); @@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,        }      }      else if (k->type == STK_MEM_END) { -      if (k->id == node->group) { +      if (k->zid == node->group) {          node->end = (int )(k->u.mem.pstr - str);          *kp = k;          return 0; @@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg,      }      else if (level == nest) {        if (k->type == STK_MEM_START) { -        if (mem_is_in_memp(k->id, mem_num, memp)) { +        if (mem_is_in_memp(k->zid, mem_num, memp)) {            pstart = k->u.mem.pstr;            if (IS_NOT_NULL(pend)) {              if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ @@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg,          }        }        else if (k->type == STK_MEM_END) { -        if (mem_is_in_memp(k->id, mem_num, memp)) { +        if (mem_is_in_memp(k->zid, mem_num, memp)) {            pend = k->u.mem.pstr;          }        } @@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg,      }      else if (level == nest) {        if (k->type == STK_MEM_END) { -        if (mem_is_in_memp(k->id, mem_num, memp)) { +        if (mem_is_in_memp(k->zid, mem_num, memp)) {            return 1;          }        } @@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH;  static int OpPrevTarget = OP_FAIL;  static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +#define SOP_IN(opcode) do {\    if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\    OpCurr = opcode;\    OpCounter[opcode]++;\    GETTIME(ts);\  } while(0) -#define MOP_OUT do {\ +#define SOP_OUT do {\    GETTIME(te);\    OpTime[OpCurr] += TIMEDIFF(te, ts);\  } while(0) @@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f)    r = fprintf(f, "   count      prev        time\n");    if (r < 0) return -1; -  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { +  for (i = 0; OpInfo[i].opcode >= 0; i++) {      r = fprintf(f, "%8d: %8d: %10ld: %s\n", -                OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); +                OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);      if (r < 0) return -1;    }    r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); @@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f)  #else  #define STACK_INC     stk++ -#define MOP_IN(opcode) -#define MOP_OUT +#define SOP_IN(opcode) +#define SOP_OUT  #endif @@ -1459,10 +2307,8 @@ typedef struct {  /* if sstart == str then set sprev to NULL. */  static int  match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -         const UChar* in_right_range, -#endif -         const UChar* sstart, UChar* sprev, OnigMatchArg* msa) +         const UChar* in_right_range, const UChar* sstart, UChar* sprev, +         MatchArg* msa)  {    static UChar FinishCode[] = { OP_FINISH }; @@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    StackIndex *repeat_stk;    StackIndex *mem_start_stk, *mem_end_stk;    UChar* keep; -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  int scv; -  unsigned char* state_check_buff = msa->state_check_buff; -  int num_comb_exp_check = reg->num_comb_exp_check; +#ifdef USE_RETRY_LIMIT_IN_MATCH +  unsigned long retry_limit_in_match; +  unsigned long retry_in_match_counter;  #endif + +#ifdef USE_CALLOUT +  int of; +#endif +    UChar *p = reg->p;    OnigOptionType option = reg->options;    OnigEncoding encode = reg->enc;    OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef USE_CALLOUT +  msa->mp->match_at_call_counter++; +#endif + +#ifdef USE_RETRY_LIMIT_IN_MATCH +  retry_limit_in_match = msa->retry_limit_in_match; +#endif +    //n = reg->num_repeat + reg->num_mem * 2;    pop_level = reg->stack_pop_level;    num_mem = reg->num_mem; @@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            (int )(end - str), (int )(sstart - str));  #endif -  STACK_PUSH_ENSURED(STK_ALT, FinishCode);  /* bottom stack */    best_len = ONIG_MISMATCH;    keep = s = (UChar* )sstart; +  STACK_PUSH_BOTTOM(STK_ALT, FinishCode);  /* bottom stack */    INIT_RIGHT_RANGE; +#ifdef USE_RETRY_LIMIT_IN_MATCH +  retry_in_match_counter = 0; +#endif +    while (1) {  #ifdef ONIG_DEBUG_MATCH      { @@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        fputs((char* )buf, stderr);        for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); -      fprintf(stderr, "%4d: ", (int )(p - reg->p)); +      if (p == FinishCode) +        fprintf(stderr, "----: "); +      else +        fprintf(stderr, "%4d: ", (int )(p - reg->p));        onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);        fprintf(stderr, "\n");      } @@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      sbegin = s;      switch (*p++) { -    case OP_END:  MOP_IN(OP_END); +    case OP_END:  SOP_IN(OP_END);        n = (int )(s - sstart);        if (n > best_len) {          OnigRegion* region; @@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE      end_best_len:  #endif -      MOP_OUT; +      SOP_OUT;        if (IS_FIND_CONDITION(option)) {          if (IS_FIND_NOT_EMPTY(option) && s == sstart) { @@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto finish;        break; -    case OP_EXACT1:  MOP_IN(OP_EXACT1); +    case OP_EXACT1:  SOP_IN(OP_EXACT1);        DATA_ENSURE(1);        if (*p != *s) goto fail;        p++; s++; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_EXACT1_IC:  MOP_IN(OP_EXACT1_IC); +    case OP_EXACT1_IC:  SOP_IN(OP_EXACT1_IC);        {          int len;          UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            p++; q++;          }        } -      MOP_OUT; +      SOP_OUT;        break; -    case OP_EXACT2:  MOP_IN(OP_EXACT2); +    case OP_EXACT2:  SOP_IN(OP_EXACT2);        DATA_ENSURE(2);        if (*p != *s) goto fail;        p++; s++;        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACT3:  MOP_IN(OP_EXACT3); +    case OP_EXACT3:  SOP_IN(OP_EXACT3);        DATA_ENSURE(3);        if (*p != *s) goto fail;        p++; s++; @@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACT4:  MOP_IN(OP_EXACT4); +    case OP_EXACT4:  SOP_IN(OP_EXACT4);        DATA_ENSURE(4);        if (*p != *s) goto fail;        p++; s++; @@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACT5:  MOP_IN(OP_EXACT5); +    case OP_EXACT5:  SOP_IN(OP_EXACT5);        DATA_ENSURE(5);        if (*p != *s) goto fail;        p++; s++; @@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTN:  MOP_IN(OP_EXACTN); +    case OP_EXACTN:  SOP_IN(OP_EXACTN);        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen);        while (tlen-- > 0) {          if (*p++ != *s++) goto fail;        }        sprev = s - 1; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTN_IC:  MOP_IN(OP_EXACTN_IC); +    case OP_EXACTN_IC:  SOP_IN(OP_EXACTN_IC);        {          int len;          UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTMB2N1:  MOP_IN(OP_EXACTMB2N1); +    case OP_EXACTMB2N1:  SOP_IN(OP_EXACTMB2N1);        DATA_ENSURE(2);        if (*p != *s) goto fail;        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_EXACTMB2N2:  MOP_IN(OP_EXACTMB2N2); +    case OP_EXACTMB2N2:  SOP_IN(OP_EXACTMB2N2);        DATA_ENSURE(4);        if (*p != *s) goto fail;        p++; s++; @@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTMB2N3:  MOP_IN(OP_EXACTMB2N3); +    case OP_EXACTMB2N3:  SOP_IN(OP_EXACTMB2N3);        DATA_ENSURE(6);        if (*p != *s) goto fail;        p++; s++; @@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTMB2N:  MOP_IN(OP_EXACTMB2N); +    case OP_EXACTMB2N:  SOP_IN(OP_EXACTMB2N);        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen * 2);        while (tlen-- > 0) { @@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - 2; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTMB3N:  MOP_IN(OP_EXACTMB3N); +    case OP_EXACTMB3N:  SOP_IN(OP_EXACTMB3N);        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen * 3);        while (tlen-- > 0) { @@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - 3; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EXACTMBN:  MOP_IN(OP_EXACTMBN); +    case OP_EXACTMBN:  SOP_IN(OP_EXACTMBN);        GET_LENGTH_INC(tlen,  p);  /* mb-len */        GET_LENGTH_INC(tlen2, p);  /* string len */        tlen2 *= tlen; @@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - tlen; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_CCLASS:  MOP_IN(OP_CCLASS); +    case OP_CCLASS:  SOP_IN(OP_CCLASS);        DATA_ENSURE(1);        if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;        p += SIZE_BITSET;        s += enclen(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */ -      MOP_OUT; +      SOP_OUT;        break; -    case OP_CCLASS_MB:  MOP_IN(OP_CCLASS_MB); +    case OP_CCLASS_MB:  SOP_IN(OP_CCLASS_MB);        if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;      cclass_mb: @@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #endif        }        p += tlen; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_CCLASS_MIX:  MOP_IN(OP_CCLASS_MIX); +    case OP_CCLASS_MIX:  SOP_IN(OP_CCLASS_MIX);        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_HEAD(encode, s)) {          p += SIZE_BITSET; @@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p += tlen;          s++;        } -      MOP_OUT; +      SOP_OUT;        break; -    case OP_CCLASS_NOT:  MOP_IN(OP_CCLASS_NOT); +    case OP_CCLASS_NOT:  SOP_IN(OP_CCLASS_NOT);        DATA_ENSURE(1);        if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;        p += SIZE_BITSET;        s += enclen(encode, s); -      MOP_OUT; +      SOP_OUT;        break; -    case OP_CCLASS_MB_NOT:  MOP_IN(OP_CCLASS_MB_NOT); +    case OP_CCLASS_MB_NOT:  SOP_IN(OP_CCLASS_MB_NOT);        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_HEAD(encode, s)) {          s++; @@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p += tlen;      cc_mb_not_success: -      MOP_OUT; +      SOP_OUT;        break; -    case OP_CCLASS_MIX_NOT:  MOP_IN(OP_CCLASS_MIX_NOT); +    case OP_CCLASS_MIX_NOT:  SOP_IN(OP_CCLASS_MIX_NOT);        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_HEAD(encode, s)) {          p += SIZE_BITSET; @@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p += tlen;          s++;        } -      MOP_OUT; +      SOP_OUT;        break;  #ifdef USE_OP_CCLASS_NODE -    case OP_CCLASS_NODE:  MOP_IN(OP_CCLASS_NODE); +    case OP_CCLASS_NODE:  SOP_IN(OP_CCLASS_NODE);        {          OnigCodePoint code;          void *node; @@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          code = ONIGENC_MBC_TO_CODE(encode, ss, s);          if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;        } -      MOP_OUT; +      SOP_OUT;        break;  #endif -    case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR); +    case OP_ANYCHAR:  SOP_IN(OP_ANYCHAR);        DATA_ENSURE(1);        n = enclen(encode, s);        DATA_ENSURE(n);        if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;        s += n; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_ANYCHAR_ML:  MOP_IN(OP_ANYCHAR_ML); +    case OP_ANYCHAR_ML:  SOP_IN(OP_ANYCHAR_ML);        DATA_ENSURE(1);        n = enclen(encode, s);        DATA_ENSURE(n);        s += n; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_ANYCHAR_STAR:  MOP_IN(OP_ANYCHAR_STAR); +    case OP_ANYCHAR_STAR:  SOP_IN(OP_ANYCHAR_STAR);        while (DATA_ENSURE_CHECK1) {          STACK_PUSH_ALT(p, s, sprev);          n = enclen(encode, s); @@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          sprev = s;          s += n;        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_ANYCHAR_ML_STAR:  MOP_IN(OP_ANYCHAR_ML_STAR); +    case OP_ANYCHAR_ML_STAR:  SOP_IN(OP_ANYCHAR_ML_STAR);        while (DATA_ENSURE_CHECK1) {          STACK_PUSH_ALT(p, s, sprev);          n = enclen(encode, s); @@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            s++;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_ANYCHAR_STAR_PEEK_NEXT:  MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); +    case OP_ANYCHAR_STAR_PEEK_NEXT:  SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);        while (DATA_ENSURE_CHECK1) {          if (*p == *s) {            STACK_PUSH_ALT(p + 1, s, sprev); @@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          s += n;        }        p++; -      MOP_OUT; +      SOP_OUT;        break; -    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); +    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);        while (DATA_ENSURE_CHECK1) {          if (*p == *s) {            STACK_PUSH_ALT(p + 1, s, sprev); @@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }        }        p++; -      MOP_OUT; -      break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -    case OP_STATE_CHECK_ANYCHAR_STAR:  MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); -      GET_STATE_CHECK_NUM_INC(mem, p); -      while (DATA_ENSURE_CHECK1) { -        STATE_CHECK_VAL(scv, mem); -        if (scv) goto fail; - -        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); -        n = enclen(encode, s); -        DATA_ENSURE(n); -        if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail; -        sprev = s; -        s += n; -      } -      MOP_OUT; +      SOP_OUT;        break; -    case OP_STATE_CHECK_ANYCHAR_ML_STAR: -      MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - -      GET_STATE_CHECK_NUM_INC(mem, p); -      while (DATA_ENSURE_CHECK1) { -        STATE_CHECK_VAL(scv, mem); -        if (scv) goto fail; - -        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); -        n = enclen(encode, s); -        if (n > 1) { -          DATA_ENSURE(n); -          sprev = s; -          s += n; -        } -        else { -          sprev = s; -          s++; -        } -      } -      MOP_OUT; -      break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -    case OP_WORD:  MOP_IN(OP_WORD); +    case OP_WORD:  SOP_IN(OP_WORD);        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_WORD(encode, s, end))          goto fail;        s += enclen(encode, s); -      MOP_OUT; +      SOP_OUT;        break; -    case OP_WORD_ASCII:  MOP_IN(OP_WORD_ASCII); +    case OP_WORD_ASCII:  SOP_IN(OP_WORD_ASCII);        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))          goto fail;        s += enclen(encode, s); -      MOP_OUT; +      SOP_OUT;        break; -    case OP_NO_WORD:  MOP_IN(OP_NO_WORD); +    case OP_NO_WORD:  SOP_IN(OP_NO_WORD);        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_WORD(encode, s, end))          goto fail;        s += enclen(encode, s); -      MOP_OUT; +      SOP_OUT;        break; -    case OP_NO_WORD_ASCII:  MOP_IN(OP_NO_WORD_ASCII); +    case OP_NO_WORD_ASCII:  SOP_IN(OP_NO_WORD_ASCII);        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))          goto fail;        s += enclen(encode, s); -      MOP_OUT; +      SOP_OUT;        break; -    case OP_WORD_BOUNDARY:  MOP_IN(OP_WORD_BOUNDARY); +    case OP_WORD_BOUNDARY:  SOP_IN(OP_WORD_BOUNDARY);        {          ModeType mode;          GET_MODE_INC(mode, p); // ascii_mode @@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              goto fail;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_NO_WORD_BOUNDARY:  MOP_IN(OP_NO_WORD_BOUNDARY); +    case OP_NO_WORD_BOUNDARY:  SOP_IN(OP_NO_WORD_BOUNDARY);        {          ModeType mode;          GET_MODE_INC(mode, p); // ascii_mode @@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              goto fail;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break;  #ifdef USE_WORD_BEGIN_END -    case OP_WORD_BEGIN:  MOP_IN(OP_WORD_BEGIN); +    case OP_WORD_BEGIN:  SOP_IN(OP_WORD_BEGIN);        {          ModeType mode;          GET_MODE_INC(mode, p); // ascii_mode          if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { -          if (ON_STR_BEGIN(s) || -              ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { -            MOP_OUT; +          if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { +            SOP_OUT;              continue;            }          } @@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto fail;        break; -    case OP_WORD_END:  MOP_IN(OP_WORD_END); +    case OP_WORD_END:  SOP_IN(OP_WORD_END);        {          ModeType mode;          GET_MODE_INC(mode, p); // ascii_mode          if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {            if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { -            MOP_OUT; +            SOP_OUT;              continue;            }          } @@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #endif      case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: -      MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +      SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);        if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { -        MOP_OUT; +        SOP_OUT;          continue;        }        goto fail;        break;      case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: -      MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +      SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);        if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))          goto fail; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_BEGIN_BUF:  MOP_IN(OP_BEGIN_BUF); +    case OP_BEGIN_BUF:  SOP_IN(OP_BEGIN_BUF);        if (! ON_STR_BEGIN(s)) goto fail; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_END_BUF:  MOP_IN(OP_END_BUF); +    case OP_END_BUF:  SOP_IN(OP_END_BUF);        if (! ON_STR_END(s)) goto fail; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_BEGIN_LINE:  MOP_IN(OP_BEGIN_LINE); +    case OP_BEGIN_LINE:  SOP_IN(OP_BEGIN_LINE);        if (ON_STR_BEGIN(s)) {          if (IS_NOTBOL(msa->options)) goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { -        MOP_OUT; +        SOP_OUT;          continue;        }        goto fail;        break; -    case OP_END_LINE:  MOP_IN(OP_END_LINE); +    case OP_END_LINE:  SOP_IN(OP_END_LINE);        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (IS_NOTEOL(msa->options)) goto fail; -          MOP_OUT; +          SOP_OUT;            continue;  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          }  #endif        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { -        MOP_OUT; +        SOP_OUT;          continue;        }  #ifdef USE_CRNL_AS_LINE_TERMINATOR        else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { -        MOP_OUT; +        SOP_OUT;          continue;        }  #endif        goto fail;        break; -    case OP_SEMI_END_BUF:  MOP_IN(OP_SEMI_END_BUF); +    case OP_SEMI_END_BUF:  SOP_IN(OP_SEMI_END_BUF);        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (IS_NOTEOL(msa->options)) goto fail; -          MOP_OUT; +          SOP_OUT;            continue;  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          } @@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&                 ON_STR_END(s + enclen(encode, s))) { -        MOP_OUT; +        SOP_OUT;          continue;        }  #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          UChar* ss = s + enclen(encode, s);          ss += enclen(encode, ss);          if (ON_STR_END(ss)) { -          MOP_OUT; +          SOP_OUT;            continue;          }        } @@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto fail;        break; -    case OP_BEGIN_POSITION:  MOP_IN(OP_BEGIN_POSITION); +    case OP_BEGIN_POSITION:  SOP_IN(OP_BEGIN_POSITION);        if (s != msa->start)          goto fail; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_MEMORY_START_PUSH:  MOP_IN(OP_MEMORY_START_PUSH); +    case OP_MEMORY_START_PUSH:  SOP_IN(OP_MEMORY_START_PUSH);        GET_MEMNUM_INC(mem, p);        STACK_PUSH_MEM_START(mem, s); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_MEMORY_START:  MOP_IN(OP_MEMORY_START); +    case OP_MEMORY_START:  SOP_IN(OP_MEMORY_START);        GET_MEMNUM_INC(mem, p);        mem_start_stk[mem] = (StackIndex )((void* )s); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_MEMORY_END_PUSH:  MOP_IN(OP_MEMORY_END_PUSH); +    case OP_MEMORY_END_PUSH:  SOP_IN(OP_MEMORY_END_PUSH);        GET_MEMNUM_INC(mem, p);        STACK_PUSH_MEM_END(mem, s); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_MEMORY_END:  MOP_IN(OP_MEMORY_END); +    case OP_MEMORY_END:  SOP_IN(OP_MEMORY_END);        GET_MEMNUM_INC(mem, p);        mem_end_stk[mem] = (StackIndex )((void* )s); -      MOP_OUT; +      SOP_OUT;        continue;        break;  #ifdef USE_CALL -    case OP_MEMORY_END_PUSH_REC:  MOP_IN(OP_MEMORY_END_PUSH_REC); +    case OP_MEMORY_END_PUSH_REC:  SOP_IN(OP_MEMORY_END_PUSH_REC);        GET_MEMNUM_INC(mem, p);        STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */        STACK_PUSH_MEM_END(mem, s);        mem_start_stk[mem] = GET_STACK_INDEX(stkp); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_MEMORY_END_REC:  MOP_IN(OP_MEMORY_END_REC); +    case OP_MEMORY_END_REC:  SOP_IN(OP_MEMORY_END_REC);        GET_MEMNUM_INC(mem, p);        mem_end_stk[mem] = (StackIndex )((void* )s);        STACK_GET_MEM_START(mem, stkp); @@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);        STACK_PUSH_MEM_END_MARK(mem); -      MOP_OUT; +      SOP_OUT;        continue;        break;  #endif -    case OP_BACKREF1:  MOP_IN(OP_BACKREF1); +    case OP_BACKREF1:  SOP_IN(OP_BACKREF1);        mem = 1;        goto backref;        break; -    case OP_BACKREF2:  MOP_IN(OP_BACKREF2); +    case OP_BACKREF2:  SOP_IN(OP_BACKREF2);        mem = 2;        goto backref;        break; -    case OP_BACKREF_N:  MOP_IN(OP_BACKREF_N); +    case OP_BACKREF_N:  SOP_IN(OP_BACKREF_N);        GET_MEMNUM_INC(mem, p);      backref:        { @@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          while (sprev + (len = enclen(encode, sprev)) < s)            sprev += len; -        MOP_OUT; +        SOP_OUT;          continue;        }        break; -    case OP_BACKREF_N_IC:  MOP_IN(OP_BACKREF_N_IC); +    case OP_BACKREF_N_IC:  SOP_IN(OP_BACKREF_N_IC);        GET_MEMNUM_INC(mem, p);        {          int len; @@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          while (sprev + (len = enclen(encode, sprev)) < s)            sprev += len; -        MOP_OUT; +        SOP_OUT;          continue;        }        break; -    case OP_BACKREF_MULTI:  MOP_IN(OP_BACKREF_MULTI); +    case OP_BACKREF_MULTI:  SOP_IN(OP_BACKREF_MULTI);        {          int len, is_fail;          UChar *pstart, *pend, *swork; @@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        break; -    case OP_BACKREF_MULTI_IC:  MOP_IN(OP_BACKREF_MULTI_IC); +    case OP_BACKREF_MULTI_IC:  SOP_IN(OP_BACKREF_MULTI_IC);        {          int len, is_fail;          UChar *pstart, *pend, *swork; @@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        break; @@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          else            goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        break;  #endif -    case OP_BACKREF_CHECK:  MOP_IN(OP_BACKREF_CHECK); +    case OP_BACKREF_CHECK:  SOP_IN(OP_BACKREF_CHECK);        {          GET_LENGTH_INC(tlen, p);          for (i = 0; i < tlen; i++) { @@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        break; @@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          else            goto fail; -        MOP_OUT; +        SOP_OUT;          continue;        }        break;  #endif  #if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */ -    case OP_SET_OPTION_PUSH:  MOP_IN(OP_SET_OPTION_PUSH); +    case OP_SET_OPTION_PUSH:  SOP_IN(OP_SET_OPTION_PUSH);        GET_OPTION_INC(option, p);        STACK_PUSH_ALT(p, s, sprev);        p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_SET_OPTION:  MOP_IN(OP_SET_OPTION); +    case OP_SET_OPTION:  SOP_IN(OP_SET_OPTION);        GET_OPTION_INC(option, p); -      MOP_OUT; +      SOP_OUT;        continue;        break;  #endif -    case OP_EMPTY_CHECK_START:  MOP_IN(OP_EMPTY_CHECK_START); +    case OP_EMPTY_CHECK_START:  SOP_IN(OP_EMPTY_CHECK_START);        GET_MEMNUM_INC(mem, p);    /* mem: null check id */        STACK_PUSH_EMPTY_CHECK_START(mem, s); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_EMPTY_CHECK_END:  MOP_IN(OP_EMPTY_CHECK_END); +    case OP_EMPTY_CHECK_END:  SOP_IN(OP_EMPTY_CHECK_END);        {          int is_empty; @@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            }          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break;  #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -    case OP_EMPTY_CHECK_END_MEMST:  MOP_IN(OP_EMPTY_CHECK_END_MEMST); +    case OP_EMPTY_CHECK_END_MEMST:  SOP_IN(OP_EMPTY_CHECK_END_MEMST);        {          int is_empty; @@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            goto empty_check_found;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break;  #endif  #ifdef USE_CALL      case OP_EMPTY_CHECK_END_MEMST_PUSH: -      MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); +      SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);        {          int is_empty; @@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            STACK_PUSH_EMPTY_CHECK_END(mem);          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break;  #endif -    case OP_JUMP:  MOP_IN(OP_JUMP); +    case OP_JUMP:  SOP_IN(OP_JUMP);        GET_RELADDR_INC(addr, p);        p += addr; -      MOP_OUT; -      CHECK_INTERRUPT_IN_MATCH_AT; +      SOP_OUT; +      CHECK_INTERRUPT_IN_MATCH;        continue;        break; -    case OP_PUSH:  MOP_IN(OP_PUSH); +    case OP_PUSH:  SOP_IN(OP_PUSH);        GET_RELADDR_INC(addr, p);        STACK_PUSH_ALT(p + addr, s, sprev); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_PUSH_SUPER:  MOP_IN(OP_PUSH_SUPER); +    case OP_PUSH_SUPER:  SOP_IN(OP_PUSH_SUPER);        GET_RELADDR_INC(addr, p);        STACK_PUSH_SUPER_ALT(p + addr, s, sprev); -      MOP_OUT; -      continue; -      break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -    case OP_STATE_CHECK_PUSH:  MOP_IN(OP_STATE_CHECK_PUSH); -      GET_STATE_CHECK_NUM_INC(mem, p); -      STATE_CHECK_VAL(scv, mem); -      if (scv) goto fail; - -      GET_RELADDR_INC(addr, p); -      STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); -      MOP_OUT; -      continue; -      break; - -    case OP_STATE_CHECK_PUSH_OR_JUMP:  MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); -      GET_STATE_CHECK_NUM_INC(mem, p); -      GET_RELADDR_INC(addr, p); -      STATE_CHECK_VAL(scv, mem); -      if (scv) { -        p += addr; -      } -      else { -        STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); -      } -      MOP_OUT; -      continue; -      break; - -    case OP_STATE_CHECK:  MOP_IN(OP_STATE_CHECK); -      GET_STATE_CHECK_NUM_INC(mem, p); -      STATE_CHECK_VAL(scv, mem); -      if (scv) goto fail; - -      STACK_PUSH_STATE_CHECK(s, mem); -      MOP_OUT; +      SOP_OUT;        continue;        break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ -    case OP_POP:  MOP_IN(OP_POP); +    case OP_POP_OUT:  SOP_IN(OP_POP_OUT);        STACK_POP_ONE; -      MOP_OUT; +      // for stop backtrack +      //CHECK_RETRY_LIMIT_IN_MATCH; +      SOP_OUT;        continue;        break; -    case OP_PUSH_OR_JUMP_EXACT1:  MOP_IN(OP_PUSH_OR_JUMP_EXACT1); +    case OP_PUSH_OR_JUMP_EXACT1:  SOP_IN(OP_PUSH_OR_JUMP_EXACT1);        GET_RELADDR_INC(addr, p);        if (*p == *s && DATA_ENSURE_CHECK1) {          p++;          STACK_PUSH_ALT(p + addr, s, sprev); -        MOP_OUT; +        SOP_OUT;          continue;        }        p += (addr + 1); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_PUSH_IF_PEEK_NEXT:  MOP_IN(OP_PUSH_IF_PEEK_NEXT); +    case OP_PUSH_IF_PEEK_NEXT:  SOP_IN(OP_PUSH_IF_PEEK_NEXT);        GET_RELADDR_INC(addr, p);        if (*p == *s) {          p++;          STACK_PUSH_ALT(p + addr, s, sprev); -        MOP_OUT; +        SOP_OUT;          continue;        }        p++; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_REPEAT:  MOP_IN(OP_REPEAT); +    case OP_REPEAT:  SOP_IN(OP_REPEAT);        {          GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */          GET_RELADDR_INC(addr, p); @@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            STACK_PUSH_ALT(p + addr, s, sprev);          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_REPEAT_NG:  MOP_IN(OP_REPEAT_NG); +    case OP_REPEAT_NG:  SOP_IN(OP_REPEAT_NG);        {          GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */          GET_RELADDR_INC(addr, p); @@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            p += addr;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_REPEAT_INC:  MOP_IN(OP_REPEAT_INC); +    case OP_REPEAT_INC:  SOP_IN(OP_REPEAT_INC);        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        si = repeat_stk[mem];        stkp = STACK_AT(si); @@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p = stkp->u.repeat.pcode;        }        STACK_PUSH_REPEAT_INC(si); -      MOP_OUT; -      CHECK_INTERRUPT_IN_MATCH_AT; +      SOP_OUT; +      CHECK_INTERRUPT_IN_MATCH;        continue;        break; -    case OP_REPEAT_INC_SG:  MOP_IN(OP_REPEAT_INC_SG); +    case OP_REPEAT_INC_SG:  SOP_IN(OP_REPEAT_INC_SG);        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        STACK_GET_REPEAT(mem, stkp);        si = GET_STACK_INDEX(stkp);        goto repeat_inc;        break; -    case OP_REPEAT_INC_NG:  MOP_IN(OP_REPEAT_INC_NG); +    case OP_REPEAT_INC_NG:  SOP_IN(OP_REPEAT_INC_NG);        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        si = repeat_stk[mem];        stkp = STACK_AT(si); @@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {          STACK_PUSH_REPEAT_INC(si);        } -      MOP_OUT; -      CHECK_INTERRUPT_IN_MATCH_AT; +      SOP_OUT; +      CHECK_INTERRUPT_IN_MATCH;        continue;        break; -    case OP_REPEAT_INC_NG_SG:  MOP_IN(OP_REPEAT_INC_NG_SG); +    case OP_REPEAT_INC_NG_SG:  SOP_IN(OP_REPEAT_INC_NG_SG);        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        STACK_GET_REPEAT(mem, stkp);        si = GET_STACK_INDEX(stkp);        goto repeat_inc_ng;        break; -    case OP_PREC_READ_START:  MOP_IN(OP_PREC_READ_START); +    case OP_PREC_READ_START:  SOP_IN(OP_PREC_READ_START);        STACK_PUSH_POS(s, sprev); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_PREC_READ_END:  MOP_IN(OP_PREC_READ_END); +    case OP_PREC_READ_END:  SOP_IN(OP_PREC_READ_END);        {          STACK_EXEC_TO_VOID(stkp);          s     = stkp->u.state.pstr;          sprev = stkp->u.state.pstr_prev;        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_PREC_READ_NOT_START:  MOP_IN(OP_PREC_READ_NOT_START); +    case OP_PREC_READ_NOT_START:  SOP_IN(OP_PREC_READ_NOT_START);        GET_RELADDR_INC(addr, p);        STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_PREC_READ_NOT_END:  MOP_IN(OP_PREC_READ_NOT_END); +    case OP_PREC_READ_NOT_END:  SOP_IN(OP_PREC_READ_NOT_END);        STACK_POP_TIL_ALT_PREC_READ_NOT;        goto fail;        break; -    case OP_ATOMIC_START:  MOP_IN(OP_ATOMIC_START); +    case OP_ATOMIC_START:  SOP_IN(OP_ATOMIC_START);        STACK_PUSH_TO_VOID_START; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_ATOMIC_END:  MOP_IN(OP_ATOMIC_END); +    case OP_ATOMIC_END:  SOP_IN(OP_ATOMIC_END);        STACK_EXEC_TO_VOID(stkp); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_LOOK_BEHIND:  MOP_IN(OP_LOOK_BEHIND); +    case OP_LOOK_BEHIND:  SOP_IN(OP_LOOK_BEHIND);        GET_LENGTH_INC(tlen, p);        s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);        if (IS_NULL(s)) goto fail;        sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_LOOK_BEHIND_NOT_START:  MOP_IN(OP_LOOK_BEHIND_NOT_START); +    case OP_LOOK_BEHIND_NOT_START:  SOP_IN(OP_LOOK_BEHIND_NOT_START);        GET_RELADDR_INC(addr, p);        GET_LENGTH_INC(tlen, p);        q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          s = q;          sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_LOOK_BEHIND_NOT_END:  MOP_IN(OP_LOOK_BEHIND_NOT_END); +    case OP_LOOK_BEHIND_NOT_END:  SOP_IN(OP_LOOK_BEHIND_NOT_END);        STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;        goto fail;        break;  #ifdef USE_CALL -    case OP_CALL:  MOP_IN(OP_CALL); +    case OP_CALL:  SOP_IN(OP_CALL);        GET_ABSADDR_INC(addr, p);        STACK_PUSH_CALL_FRAME(p);        p = reg->p + addr; -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_RETURN:  MOP_IN(OP_RETURN); +    case OP_RETURN:  SOP_IN(OP_RETURN);        STACK_RETURN(p);        STACK_PUSH_RETURN; -      MOP_OUT; +      SOP_OUT;        continue;        break;  #endif -    case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); +    case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL);        {          SaveType type;          GET_SAVE_TYPE_INC(type, p); @@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break;          }        } -      MOP_OUT; +      SOP_OUT;        continue;        break; -    case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); +    case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR);        {          UpdateVarType type;          enum SaveType save_type; @@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break;          }        } -      MOP_OUT; +      SOP_OUT; +      continue; +      break; + +#ifdef USE_CALLOUT +    case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); +      of = ONIG_CALLOUT_OF_CONTENTS; +      goto callout_common_entry; + +      SOP_OUT;        continue;        break; +    case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); +      { +        int call_result; +        int name_id; +        int num; +        int in; +        CalloutListEntry* e; +        OnigCalloutFunc func; +        OnigCalloutArgs args; + +        of = ONIG_CALLOUT_OF_NAME; +        GET_MEMNUM_INC(name_id, p); + +      callout_common_entry: +        GET_MEMNUM_INC(num, p); +        e = onig_reg_callout_list_at(reg, num); +        in = e->in; +        if (of == ONIG_CALLOUT_OF_NAME) { +          func = onig_get_callout_start_func(reg, num); +        } +        else { +          name_id = ONIG_NON_NAME_ID; +          func = msa->mp->progress_callout_of_contents; +        } + +        if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) { +          CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id, +                       num, msa->mp->callout_user_data, args, call_result); +          switch (call_result) { +          case ONIG_CALLOUT_FAIL: +            goto fail; +            break; +          case ONIG_CALLOUT_SUCCESS: +            goto retraction_callout2; +            break; +          default: /* error code */ +            if (call_result > 0) { +              call_result = ONIGERR_INVALID_ARGUMENT; +            } +            best_len = call_result; +            goto finish; +            break; +          } +        } +        else { +        retraction_callout2: +          if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) { +            if (of == ONIG_CALLOUT_OF_NAME) { +              if (IS_NOT_NULL(func)) { +                STACK_PUSH_CALLOUT_NAME(name_id, num, func); +              } +            } +            else { +              func = msa->mp->retraction_callout_of_contents; +              if (IS_NOT_NULL(func)) { +                STACK_PUSH_CALLOUT_CONTENTS(num, func); +              } +            } +          } +        } +      } +      SOP_OUT; +      continue; +      break; +#endif +      case OP_FINISH:        goto finish;        break;      fail: -      MOP_OUT; +      SOP_OUT;        /* fall */ -    case OP_FAIL:  MOP_IN(OP_FAIL); +    case OP_FAIL:  SOP_IN(OP_FAIL);        STACK_POP;        p     = stk->u.state.pcode;        s     = stk->u.state.pstr;        sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -      if (stk->u.state.state_check != 0) { -        stk->type = STK_STATE_CHECK_MARK; -        stk++; -      } -#endif - -      MOP_OUT; +      CHECK_RETRY_LIMIT_IN_MATCH; +      SOP_OUT;        continue;        break; @@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,   unexpected_bytecode_error:    STACK_SAVE;    return ONIGERR_UNEXPECTED_BYTECODE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match_over: +  STACK_SAVE; +  return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +#endif  } @@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[],    }    return (UChar* )NULL;  } -  extern int  onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,             OnigRegion* region, OnigOptionType option)  {    int r; -  UChar *prev; -  OnigMatchArg msa; +  OnigMatchParam mp; -  MATCH_ARG_INIT(msa, reg, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  { -    int offset = at - str; -    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); -  } -#endif +  onig_initialize_match_param(&mp); +  r = onig_match_with_param(reg, str, end, at, region, option, &mp); +  onig_free_match_param_content(&mp); +  return r; +} + +extern int +onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, +                      const UChar* at, OnigRegion* region, OnigOptionType option, +                      OnigMatchParam* mp) +{ +  int r; +  UChar *prev; +  MatchArg msa; +  ADJUST_MATCH_PARAM(reg, mp); +  MATCH_ARG_INIT(msa, reg, option, region, at, mp);    if (region  #ifdef USE_POSIX_API_REGION_OPTION        && !IS_POSIX_REGION(option) @@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,      }      prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); -    r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -                 end, -#endif -                 at, prev, &msa); +    r = match_at(reg, str, end, end, at, prev, &msa);    }   end: @@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,   retry:    switch (reg->optimize) { -  case ONIG_OPTIMIZE_EXACT: +  case OPTIMIZE_EXACT:      p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);      break; -  case ONIG_OPTIMIZE_EXACT_IC: +  case OPTIMIZE_EXACT_IC:      p = slow_search_ic(reg->enc, reg->case_fold_flag,                         reg->exact, reg->exact_end, p, end, range);      break; -  case ONIG_OPTIMIZE_EXACT_BM: +  case OPTIMIZE_EXACT_BM:      p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);      break; -  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: +  case OPTIMIZE_EXACT_BM_NO_REV:      p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);      break; -  case ONIG_OPTIMIZE_MAP: +  case OPTIMIZE_MAP:      p = map_search(reg->enc, reg->map, p, range);      break;    } @@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,   retry:    switch (reg->optimize) { -  case ONIG_OPTIMIZE_EXACT: +  case OPTIMIZE_EXACT:    exact_method:      p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,                               range, adjrange, end, p);      break; -  case ONIG_OPTIMIZE_EXACT_IC: +  case OPTIMIZE_EXACT_IC:      p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,                                  reg->exact, reg->exact_end,                                  range, adjrange, end, p);      break; -  case ONIG_OPTIMIZE_EXACT_BM: -  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: +  case OPTIMIZE_EXACT_BM: +  case OPTIMIZE_EXACT_BM_NO_REV:  #ifdef USE_INT_MAP_BACKWARD      if (IS_NULL(reg->int_map_backward)) {        int r; @@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,  #endif      break; -  case ONIG_OPTIMIZE_MAP: +  case OPTIMIZE_MAP:      p = map_search_backward(reg->enc, reg->map, range, adjrange, p);      break;    } @@ -3725,12 +4590,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,              OnigOptionType option)  {    int r; +  OnigMatchParam mp; + +  onig_initialize_match_param(&mp); +  r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); +  onig_free_match_param_content(&mp); +  return r; + +} + +extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, +                       const UChar* start, const UChar* range, OnigRegion* region, +                       OnigOptionType option, OnigMatchParam* mp) +{ +  int r;    UChar *s, *prev; -  OnigMatchArg msa; +  MatchArg msa;    const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE    const UChar *orig_range = range; -#endif  #ifdef ONIG_DEBUG_SEARCH    fprintf(stderr, @@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,       str, (int )(end - str), (int )(start - str), (int )(range - str));  #endif +  ADJUST_MATCH_PARAM(reg, mp); +    if (region  #ifdef USE_POSIX_API_REGION_OPTION        && !IS_POSIX_REGION(option) @@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,    } -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE  #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE  #define MATCH_AND_RETURN_CHECK(upper_range) \    r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,      else goto finish; /* error */ \    }  #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ -  r = match_at(reg, str, end, s, prev, &msa);\ -  if (r != ONIG_MISMATCH) {\ -    if (r >= 0) {\ -      if (! IS_FIND_LONGEST(reg->options)) {\ -        goto match;\ -      }\ -    }\ -    else goto finish; /* error */ \ -  } -#else -#define MATCH_AND_RETURN_CHECK(none) \ -  r = match_at(reg, str, end, s, prev, &msa);\ -  if (r != ONIG_MISMATCH) {\ -    if (r >= 0) {\ -      goto match;\ -    }\ -    else goto finish; /* error */ \ -  } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */    /* anchor optimize: resume search range */ @@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,          goto end_buf;        }      } -    else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { +    else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) {        goto begin_position;      }    } @@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,        s = (UChar* )start;        prev = (UChar* )NULL; -      MATCH_ARG_INIT(msa, reg, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK -      msa.state_check_buff = (void* )0; -      msa.state_check_buff_size = 0;   /* NO NEED, for valgrind */ -#endif +      MATCH_ARG_INIT(msa, reg, option, region, start, mp);        MATCH_AND_RETURN_CHECK(end);        goto mismatch;      } @@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,            (int )(end - str), (int )(start - str), (int )(range - str));  #endif -  MATCH_ARG_INIT(msa, reg, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  { -    int offset = (MIN(start, range) - str); -    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); -  } -#endif +  MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);    s = (UChar* )start;    if (range > start) {   /* forward search */ @@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,      else        prev = (UChar* )NULL; -    if (reg->optimize != ONIG_OPTIMIZE_NONE) { +    if (reg->optimize != OPTIMIZE_NONE) {        UChar *sch_range, *low, *high, *low_prev;        sch_range = (UChar* )range; @@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,          if (! forward_search_range(reg, str, end, s, sch_range,                                     &low, &high, (UChar** )NULL)) goto mismatch; -        if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { +        if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) {            do {              MATCH_AND_RETURN_CHECK(orig_range);              prev = s; @@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,      }    }    else {  /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE      if (orig_start < end)        orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif -    if (reg->optimize != ONIG_OPTIMIZE_NONE) { +    if (reg->optimize != OPTIMIZE_NONE) {        UChar *low, *high, *adjrange, *sch_start;        if (range < end) @@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)    *to = *from;  } + +/* for callout functions */ + +#ifdef USE_CALLOUT + +extern OnigCalloutFunc +onig_get_progress_callout(void) +{ +  return DefaultProgressCallout; +} + +extern int +onig_set_progress_callout(OnigCalloutFunc f) +{ +  DefaultProgressCallout = f; +  return ONIG_NORMAL; +} + +extern OnigCalloutFunc +onig_get_retraction_callout(void) +{ +  return DefaultRetractionCallout; +} + +extern int +onig_set_retraction_callout(OnigCalloutFunc f) +{ +  DefaultRetractionCallout = f; +  return ONIG_NORMAL; +} + +extern int +onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) +{ +  return args->num; +} + +extern OnigCalloutIn +onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) +{ +  return args->in; +} + +extern int +onig_get_name_id_by_callout_args(OnigCalloutArgs* args) +{ +  return args->name_id; +} + +extern const UChar* +onig_get_contents_by_callout_args(OnigCalloutArgs* args) +{ +  int num; +  CalloutListEntry* e; + +  num = args->num; +  e = onig_reg_callout_list_at(args->regex, num); +  if (IS_NULL(e)) return 0; +  if (e->of == ONIG_CALLOUT_OF_CONTENTS) { +    return e->u.content.start; +  } + +  return 0; +} + +extern const UChar* +onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) +{ +  int num; +  CalloutListEntry* e; + +  num = args->num; +  e = onig_reg_callout_list_at(args->regex, num); +  if (IS_NULL(e)) return 0; +  if (e->of == ONIG_CALLOUT_OF_CONTENTS) { +    return e->u.content.end; +  } + +  return 0; +} + +extern int +onig_get_args_num_by_callout_args(OnigCalloutArgs* args) +{ +  int num; +  CalloutListEntry* e; + +  num = args->num; +  e = onig_reg_callout_list_at(args->regex, num); +  if (IS_NULL(e)) return 0; +  if (e->of == ONIG_CALLOUT_OF_NAME) { +    return e->u.arg.num; +  } + +  return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) +{ +  int num; +  CalloutListEntry* e; + +  num = args->num; +  e = onig_reg_callout_list_at(args->regex, num); +  if (IS_NULL(e)) return 0; +  if (e->of == ONIG_CALLOUT_OF_NAME) { +    return e->u.arg.passed_num; +  } + +  return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, +                             OnigType* type, OnigValue* val) +{ +  int num; +  CalloutListEntry* e; + +  num = args->num; +  e = onig_reg_callout_list_at(args->regex, num); +  if (IS_NULL(e)) return 0; +  if (e->of == ONIG_CALLOUT_OF_NAME) { +    if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; +    if (IS_NOT_NULL(val))  *val  = e->u.arg.vals[index]; +    return ONIG_NORMAL; +  } + +  return ONIGERR_INVALID_ARGUMENT; +} + +extern const UChar* +onig_get_string_by_callout_args(OnigCalloutArgs* args) +{ +  return args->string; +} + +extern const UChar* +onig_get_string_end_by_callout_args(OnigCalloutArgs* args) +{ +  return args->string_end; +} + +extern const UChar* +onig_get_start_by_callout_args(OnigCalloutArgs* args) +{ +  return args->start; +} + +extern const UChar* +onig_get_right_range_by_callout_args(OnigCalloutArgs* args) +{ +  return args->right_range; +} + +extern const UChar* +onig_get_current_by_callout_args(OnigCalloutArgs* args) +{ +  return args->current; +} + +extern OnigRegex +onig_get_regex_by_callout_args(OnigCalloutArgs* args) +{ +  return args->regex; +} + +extern unsigned long +onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) +{ +  return args->retry_in_match_counter; +} + + +extern int +onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end) +{ +  OnigRegex    reg; +  const UChar* str; +  StackType*   stk_base; +  int i; + +  i = mem_num; +  reg = a->regex; +  str = a->string; +  stk_base = a->stk_base; + +  if (i > 0) { +    if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { +      if (MEM_STATUS_AT(reg->bt_mem_start, i)) +        *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); +      else +        *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); + +      *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) +                     ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr +                     : (UChar* )((void* )a->mem_end_stk[i])) - str); +    } +    else { +      *begin = *end = ONIG_REGION_NOTPOS; +    } +  } +  else if (i == 0) { +#if 0 +    *begin = a->start   - str; +    *end   = a->current - str; +#else +    return ONIGERR_INVALID_ARGUMENT; +#endif +  } +  else +    return ONIGERR_INVALID_ARGUMENT; + +  return ONIG_NORMAL; +} + +extern int +onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes) +{ +  int n; + +  n = (int )(a->stk - a->stk_base); + +  if (used_num != 0) +    *used_num = n; + +  if (used_bytes != 0) +    *used_bytes = n * sizeof(StackType); + +  return ONIG_NORMAL; +} + + +/* builtin callout functions */ + +extern int +onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ +  return ONIG_CALLOUT_FAIL; +} + +extern int +onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ +  return ONIG_MISMATCH; +} + +#if 0 +extern int +onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ +  return ONIG_CALLOUT_SUCCESS; +} +#endif + +extern int +onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ +  int r; +  int n; +  OnigValue val; + +  r = onig_get_arg_by_callout_args(args, 0, 0, &val); +  if (r != ONIG_NORMAL) return r; + +  n = (int )val.l; +  if (n >= 0) { +    n = ONIGERR_INVALID_CALLOUT_BODY; +  } + +  return n; +} + +extern int +onig_builtin_count(OnigCalloutArgs* args, void* user_data) +{ +  (void )onig_check_callout_data_and_clear_old_values(args); + +  return onig_builtin_total_count(args, user_data); +} + +extern int +onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ +  int r; +  int slot; +  OnigType  type; +  OnigValue val; +  OnigValue aval; +  OnigCodePoint count_type; + +  r = onig_get_arg_by_callout_args(args, 0, &type, &aval); +  if (r != ONIG_NORMAL) return r; + +  count_type = aval.c; +  if (count_type != '>' && count_type != 'X' && count_type != '<') +    return ONIGERR_INVALID_CALLOUT_ARG; + +  r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0, +                                                                &type, &val); +  if (r < ONIG_NORMAL) +    return r; +  else if (r > ONIG_NORMAL) { +    /* type == void: initial state */ +    val.l = 0; +  } + +  if (args->in == ONIG_CALLOUT_IN_RETRACTION) { +    slot = 2; +    if (count_type == '<') +      val.l++; +    else if (count_type == 'X') +      val.l--; +  } +  else { +    slot = 1; +    if (count_type != '<') +      val.l++; +  } + +  r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val); +  if (r != ONIG_NORMAL) return r; + +  /* slot 1: in progress counter, slot 2: in retraction counter */ +  r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot, +                                                                &type, &val); +  if (r < ONIG_NORMAL) +    return r; +  else if (r > ONIG_NORMAL) { +    val.l = 0; +  } + +  val.l++; +  r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); +  if (r != ONIG_NORMAL) return r; + +  return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ +  int r; +  int slot; +  OnigType  type; +  OnigValue val; +  OnigValue aval; + +  (void )onig_check_callout_data_and_clear_old_values(args); + +  slot = 0; +  r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); +  if (r < ONIG_NORMAL) +    return r; +  else if (r > ONIG_NORMAL) { +    /* type == void: initial state */ +    type  = ONIG_TYPE_LONG; +    val.l = 0; +  } + +  r = onig_get_arg_by_callout_args(args, 0, &type, &aval); +  if (r != ONIG_NORMAL) return r; + +  if (args->in == ONIG_CALLOUT_IN_RETRACTION) { +    val.l--; +  } +  else { +    if (val.l >= aval.l) return ONIG_CALLOUT_FAIL; +    val.l++; +  } + +  r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); +  if (r != ONIG_NORMAL) return r; + +  return ONIG_CALLOUT_SUCCESS; +} + +enum OP_CMP { +  OP_EQ, +  OP_NE, +  OP_LT, +  OP_GT, +  OP_LE, +  OP_GE +}; + +extern int +onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ +  int r; +  int slot; +  long lv; +  long rv; +  OnigType  type; +  OnigValue val; +  regex_t* reg; +  enum OP_CMP op; + +  reg = args->regex; + +  r = onig_get_arg_by_callout_args(args, 0, &type, &val); +  if (r != ONIG_NORMAL) return r; + +  if (type == ONIG_TYPE_TAG) { +    r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); +    if (r < ONIG_NORMAL) return r; +    else if (r > ONIG_NORMAL) +      lv = 0L; +    else +      lv = val.l; +  } +  else { /* ONIG_TYPE_LONG */ +    lv = val.l; +  } + +  r = onig_get_arg_by_callout_args(args, 2, &type, &val); +  if (r != ONIG_NORMAL) return r; + +  if (type == ONIG_TYPE_TAG) { +    r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); +    if (r < ONIG_NORMAL) return r; +    else if (r > ONIG_NORMAL) +      rv = 0L; +    else +      rv = val.l; +  } +  else { /* ONIG_TYPE_LONG */ +    rv = val.l; +  } + +  slot = 0; +  r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); +  if (r < ONIG_NORMAL) +    return r; +  else if (r > ONIG_NORMAL) { +    /* type == void: initial state */ +    OnigCodePoint c1, c2; +    UChar* p; + +    r = onig_get_arg_by_callout_args(args, 1, &type, &val); +    if (r != ONIG_NORMAL) return r; + +    p = val.s.start; +    c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); +    p += ONIGENC_MBC_ENC_LEN(reg->enc, p); +    if (p < val.s.end) { +      c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); +      p += ONIGENC_MBC_ENC_LEN(reg->enc, p); +      if (p != val.s.end)  return ONIGERR_INVALID_CALLOUT_ARG; +    } +    else +      c2 = 0; + +    switch (c1) { +    case '=': +      if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; +      op = OP_EQ; +      break; +    case '!': +      if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; +      op = OP_NE; +      break; +    case '<': +      if (c2 == '=') op = OP_LE; +      else if (c2 == 0) op = OP_LT; +      else  return ONIGERR_INVALID_CALLOUT_ARG; +      break; +    case '>': +      if (c2 == '=') op = OP_GE; +      else if (c2 == 0) op = OP_GT; +      else  return ONIGERR_INVALID_CALLOUT_ARG; +      break; +    default: +      return ONIGERR_INVALID_CALLOUT_ARG; +      break; +    } +    val.l = (long )op; +    r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); +    if (r != ONIG_NORMAL) return r; +  } +  else { +    op = (enum OP_CMP )val.l; +  } + +  switch (op) { +  case OP_EQ: r = (lv == rv); break; +  case OP_NE: r = (lv != rv); break; +  case OP_LT: r = (lv <  rv); break; +  case OP_GT: r = (lv >  rv); break; +  case OP_LE: r = (lv <= rv); break; +  case OP_GE: r = (lv >= rv); break; +  } + +  return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS; +} + + +#include <stdio.h> + +static FILE* OutFp; + +/* name start with "onig_" for macros. */ +static int +onig_builtin_monitor(OnigCalloutArgs* args, void* user_data) +{ +  int r; +  int num; +  size_t tag_len; +  const UChar* start; +  const UChar* right; +  const UChar* current; +  const UChar* string; +  const UChar* strend; +  const UChar* tag_start; +  const UChar* tag_end; +  regex_t* reg; +  OnigCalloutIn in; +  OnigType type; +  OnigValue val; +  char buf[20]; +  FILE* fp; + +  fp = OutFp; + +  r = onig_get_arg_by_callout_args(args, 0, &type, &val); +  if (r != ONIG_NORMAL) return r; + +  in = onig_get_callout_in_by_callout_args(args); +  if (in == ONIG_CALLOUT_IN_PROGRESS) { +    if (val.c == '<') +      return ONIG_CALLOUT_SUCCESS; +  } +  else { +    if (val.c != 'X' && val.c != '<') +      return ONIG_CALLOUT_SUCCESS; +  } + +  num       = onig_get_callout_num_by_callout_args(args); +  start     = onig_get_start_by_callout_args(args); +  right     = onig_get_right_range_by_callout_args(args); +  current   = onig_get_current_by_callout_args(args); +  string    = onig_get_string_by_callout_args(args); +  strend    = onig_get_string_end_by_callout_args(args); +  reg       = onig_get_regex_by_callout_args(args); +  tag_start = onig_get_callout_tag_start(reg, num); +  tag_end   = onig_get_callout_tag_end(reg, num); + +  if (tag_start == 0) +    xsnprintf(buf, sizeof(buf), "#%d", num); +  else { +    /* CAUTION: tag string is not terminated with NULL. */ +    int i; + +    tag_len = tag_end - tag_start; +    if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1; +    for (i = 0; i < tag_len; i++) buf[i] = tag_start[i]; +    buf[tag_len] = '\0'; +  } + +  fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n", +          buf, +          in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=", +          (int )(current - string), +          (int )(start   - string), +          (int )(right   - string), +          (int )(strend  - string)); +  fflush(fp); + +  return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) +{ +  int id; +  char* name; +  OnigEncoding enc; +  unsigned int ts[4]; +  OnigValue opts[4]; + +  if (IS_NOT_NULL(fp)) +    OutFp = (FILE* )fp; +  else +    OutFp = stdout; + +  enc = ONIG_ENCODING_ASCII; + +  name = "MON"; +  ts[0] = ONIG_TYPE_CHAR; +  opts[0].c = '>'; +  BC_B_O(name, monitor, 1, ts, 1, opts); + +  return ONIG_NORMAL; +} + +#endif /* USE_CALLOUT */ diff --git a/src/reggnu.c b/src/reggnu.c index 50eb9b4..37c7519 100644 --- a/src/reggnu.c +++ b/src/reggnu.c @@ -2,7 +2,7 @@    reggnu.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -28,10 +28,7 @@   */  #include "regint.h" - -#ifndef ONIGGNU_H  #include "oniggnu.h" -#endif  extern void  re_free_registers(OnigRegion* r) @@ -140,8 +137,7 @@ re_mbcinit(int mb_code)      break;    } -  onig_initialize(0, 0); -  onig_initialize_encoding(enc); +  onig_initialize(&enc, 1);    onigenc_set_default_encoding(enc);  } diff --git a/src/regint.h b/src/regint.h index 256b045..ba8407a 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@    regint.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -57,29 +57,48 @@  /* config */  /* spec. config */  #define USE_CALL +#define USE_CALLOUT  #define USE_BACKREF_WITH_LEVEL        /* \k<name+n>, \k<name-n> */  #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT  /* /(?:()|())*\2/ */  #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */  #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +#define USE_RETRY_LIMIT_IN_MATCH +  /* internal config */  #define USE_OP_PUSH_OR_JUMP_EXACT  #define USE_QUANT_PEEK_NEXT  #define USE_ST_LIBRARY +#include "regenc.h" + +#ifdef __cplusplus +# ifndef  HAVE_STDARG_PROTOTYPES +#  define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef  HAVE_STDARG_PROTOTYPES +#  define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifdef HAVE_STDARG_H +# ifndef  HAVE_STDARG_PROTOTYPES +#  define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +  #define INIT_MATCH_STACK_SIZE                     160  #define DEFAULT_MATCH_STACK_LIMIT_SIZE              0 /* unlimited */ +#define DEFAULT_RETRY_LIMIT_IN_MATCH         10000000  #define DEFAULT_PARSE_DEPTH_LIMIT                4096 -#if defined(__GNUC__) -#  define ARG_UNUSED  __attribute__ ((unused)) -#else -#  define ARG_UNUSED -#endif -  /* */  /* escape other system UChar definition */ -#include "config.h"  #ifdef ONIG_ESCAPE_UCHAR_COLLISION  #undef ONIG_ESCAPE_UCHAR_COLLISION  #endif @@ -89,15 +108,12 @@  #define USE_VARIABLE_META_CHARS  #define USE_POSIX_API_REGION_OPTION  #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */     /* (X*)* */  #define xmalloc     malloc  #define xrealloc    realloc  #define xcalloc     calloc  #define xfree       free -#define CHECK_INTERRUPT_IN_MATCH_AT -  #define st_init_table                  onig_st_init_table  #define st_init_table_with_size        onig_st_init_table_with_size  #define st_init_numtable               onig_st_init_numtable @@ -118,9 +134,6 @@  /* */  #define onig_st_is_member              st_is_member -#define STATE_CHECK_STRING_THRESHOLD_LEN             7 -#define STATE_CHECK_BUFF_MAX_SIZE               0x4000 -  #define xmemset     memset  #define xmemcpy     memcpy  #define xmemmove    memmove @@ -140,6 +153,10 @@  #include <stddef.h> +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif +  #ifdef HAVE_STDLIB_H  #include <stdlib.h>  #endif @@ -184,8 +201,6 @@ typedef unsigned int  uintptr_t;  #endif  #endif -#include "regenc.h" -  #ifdef MIN  #undef MIN  #endif @@ -237,14 +252,93 @@ typedef unsigned int  uintptr_t;  #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +#ifdef USE_CALLOUT + +typedef struct { +  int           flag; +  OnigCalloutOf of; +  int           in; +  int           name_id; +  const UChar*  tag_start; +  const UChar*  tag_end; +  OnigCalloutType type; +  OnigCalloutFunc start_func; +  OnigCalloutFunc end_func; +  union { +    struct { +      const UChar* start; +      const UChar* end; +    } content; +    struct { +      int num; +      int passed_num; +      OnigType  types[ONIG_CALLOUT_MAX_ARGS_NUM]; +      OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; +    } arg; +  } u; +} CalloutListEntry; + +#endif +  typedef struct { -  int  num_keeper; -  int* keepers; -} RegExt; +  const UChar* pattern; +  const UChar* pattern_end; +#ifdef USE_CALLOUT +  void*  tag_table; +  int    callout_num; +  int    callout_list_alloc; +  CalloutListEntry* callout_list;    /* index: callout num */ +#endif +} RegexExt; -#define REG_EXTP(reg)      (RegExt* )((reg)->chain) +#define REG_EXTP(reg)      ((RegexExt* )((reg)->chain))  #define REG_EXTPL(reg)     ((reg)->chain) +struct re_pattern_buffer { +  /* common members of BBuf(bytes-buffer) */ +  unsigned char* p;         /* compiled pattern */ +  unsigned int used;        /* used space for p */ +  unsigned int alloc;       /* allocated space for p */ + +  int num_mem;                   /* used memory(...) num counted from 1 */ +  int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */ +  int num_null_check;            /* OP_EMPTY_CHECK_START/END id counter */ +  int num_comb_exp_check;        /* no longer used (combination explosion check) */ +  int num_call;                  /* number of subexp call */ +  unsigned int capture_history;  /* (?@...) flag (1-31) */ +  unsigned int bt_mem_start;     /* need backtrack flag */ +  unsigned int bt_mem_end;       /* need backtrack flag */ +  int stack_pop_level; +  int repeat_range_alloc; +  OnigRepeatRange* repeat_range; + +  OnigEncoding      enc; +  OnigOptionType    options; +  OnigSyntaxType*   syntax; +  OnigCaseFoldType  case_fold_flag; +  void*             name_table; + +  /* optimization info (string search, char-map and anchors) */ +  int            optimize;          /* optimize flag */ +  int            threshold_len;     /* search str-length for apply optimize */ +  int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ +  OnigLen   anchor_dmin;       /* (SEMI_)END_BUF anchor distance */ +  OnigLen   anchor_dmax;       /* (SEMI_)END_BUF anchor distance */ +  int            sub_anchor;        /* start-anchor for exact or map */ +  unsigned char *exact; +  unsigned char *exact_end; +  unsigned char  map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ +  int           *int_map;                   /* BM skip for exact_len > 255 */ +  int           *int_map_backward;          /* BM skip for backward search */ +  OnigLen   dmin;                      /* min-distance of exact or map */ +  OnigLen   dmax;                      /* max-distance of exact or map */ + +  /* regex_t link chain */ +  struct re_pattern_buffer* chain;  /* escape compile-conflict */ +}; + +  /* stack pop level */  enum StackPopLevel {    STACK_POP_LEVEL_FREE      = 0, @@ -253,12 +347,14 @@ enum StackPopLevel {  };  /* optimize flags */ -#define ONIG_OPTIMIZE_NONE              0 -#define ONIG_OPTIMIZE_EXACT             1   /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM          2   /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV  3   /* BM   (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC          4   /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP               5   /* char map */ +enum OptimizeType { +  OPTIMIZE_NONE            = 0, +  OPTIMIZE_EXACT           = 1,  /* Slow Search */ +  OPTIMIZE_EXACT_BM        = 2,  /* Boyer Moore Search */ +  OPTIMIZE_EXACT_BM_NO_REV = 3,  /* BM   (but not simple match) */ +  OPTIMIZE_EXACT_IC        = 4,  /* Slow Search (ignore case) */ +  OPTIMIZE_MAP             = 5   /* char map */ +};  /* bit status */  typedef unsigned int  MemStatusType; @@ -467,8 +563,8 @@ typedef struct _BBuf {  #define ANCHOR_NO_WORD_BOUNDARY (1<<11)  #define ANCHOR_WORD_BEGIN       (1<<12)  #define ANCHOR_WORD_END         (1<<13) -#define ANCHOR_ANYCHAR_STAR     (1<<14)   /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML  (1<<15)   /* ".*" optimize info (multi-line) */ +#define ANCHOR_ANYCHAR_INF      (1<<14) +#define ANCHOR_ANYCHAR_INF_ML   (1<<15)  #define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY    (1<<16)  #define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) @@ -557,7 +653,7 @@ enum OpCode {    OP_JUMP,    OP_PUSH,    OP_PUSH_SUPER, -  OP_POP, +  OP_POP_OUT,    OP_PUSH_OR_JUMP_EXACT1,  /* if match exact then push, else jump. */    OP_PUSH_IF_PEEK_NEXT,    /* if match exact then push, else none. */    OP_REPEAT,               /* {n,m} */ @@ -581,16 +677,14 @@ enum OpCode {    OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */    OP_LOOK_BEHIND_NOT_END,   /* (?<!...) end   */ -  OP_CALL,                 /* \g<name> */ +  OP_CALL,                  /* \g<name> */    OP_RETURN,    OP_PUSH_SAVE_VAL,    OP_UPDATE_VAR, - -  OP_STATE_CHECK_PUSH,         /* combination explosion check and push */ -  OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump  */ -  OP_STATE_CHECK,              /* check only */ -  OP_STATE_CHECK_ANYCHAR_STAR, -  OP_STATE_CHECK_ANYCHAR_ML_STAR, +#ifdef USE_CALLOUT +  OP_CALLOUT_CONTENTS,      /* (?{...}) (?{{...}}) */ +  OP_CALLOUT_NAME,          /* (*name) (*name[tag](args...)) */ +#endif    /* no need: IS_DYNAMIC_OPTION() == 0 */    OP_SET_OPTION_PUSH,    /* set option and push recover option */ @@ -627,7 +721,6 @@ typedef int ModeType;  #define SIZE_ABSADDR          sizeof(AbsAddrType)  #define SIZE_LENGTH           sizeof(LengthType)  #define SIZE_MEMNUM           sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM  sizeof(StateCheckNumType)  #define SIZE_REPEATNUM        sizeof(RepeatNumType)  #define SIZE_OPTION           sizeof(OnigOptionType)  #define SIZE_CODE_POINT       sizeof(OnigCodePoint) @@ -643,7 +736,6 @@ typedef int ModeType;  #define GET_REPEATNUM_INC(num,p)   PLATFORM_GET_INC(num,    p, RepeatNumType)  #define GET_OPTION_INC(option,p)   PLATFORM_GET_INC(option, p, OnigOptionType)  #define GET_POINTER_INC(ptr,p)     PLATFORM_GET_INC(ptr,    p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p)  PLATFORM_GET_INC(num, p, StateCheckNumType)  #define GET_SAVE_TYPE_INC(type,p)       PLATFORM_GET_INC(type, p, SaveType)  #define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)  #define GET_MODE_INC(mode,p)            PLATFORM_GET_INC(mode, p, ModeType) @@ -662,7 +754,7 @@ typedef int ModeType;  #define SIZE_OP_JUMP                   (SIZE_OPCODE + SIZE_RELADDR)  #define SIZE_OP_PUSH                   (SIZE_OPCODE + SIZE_RELADDR)  #define SIZE_OP_PUSH_SUPER             (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP                     SIZE_OPCODE +#define SIZE_OP_POP_OUT                 SIZE_OPCODE  #define SIZE_OP_PUSH_OR_JUMP_EXACT1    (SIZE_OPCODE + SIZE_RELADDR + 1)  #define SIZE_OP_PUSH_IF_PEEK_NEXT      (SIZE_OPCODE + SIZE_RELADDR + 1)  #define SIZE_OP_REPEAT_INC             (SIZE_OPCODE + SIZE_MEMNUM) @@ -693,11 +785,9 @@ typedef int ModeType;  #define SIZE_OP_PUSH_SAVE_VAL          (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)  #define SIZE_OP_UPDATE_VAR             (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK            (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH       (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#ifdef USE_CALLOUT +#define SIZE_OP_CALLOUT_CONTENTS       (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_CALLOUT_NAME           (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)  #endif  #define MC_ESC(syn)               (syn)->meta_char_table.esc @@ -751,44 +841,14 @@ typedef int ModeType;  #define NCCLASS_CLEAR_NOT(nd)   NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)  #define IS_NCCLASS_NOT(nd)      IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -typedef struct { -  void* stack_p; -  int   stack_n; -  OnigOptionType options; -  OnigRegion*    region; -  int   ptr_num; -  const UChar* start;   /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -  int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */ -  UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  void* state_check_buff; -  int   state_check_buff_size; -#endif -} OnigMatchArg; - - -typedef struct OnigEndCallListItem { -  struct OnigEndCallListItem* next; -  void (*func)(void); -} OnigEndCallListItemType; -  extern void onig_add_end_call(void (*func)(void));  #ifdef ONIG_DEBUG -typedef struct { -  short int opcode; -  char*     name; -  short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - - -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc)); +#ifdef ONIG_DEBUG_COMPILE +extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); +#endif  #ifdef ONIG_DEBUG_STATISTICS  extern void onig_statistics_init P_((void)); @@ -803,6 +863,85 @@ extern int    onig_bbuf_init P_((BBuf* buf, int size));  extern int    onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));  extern void   onig_transfer P_((regex_t* to, regex_t* from));  extern int    onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); +extern RegexExt* onig_get_regex_ext(regex_t* reg); +extern int    onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); + +#ifdef USE_CALLOUT + +extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id); +extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id); +extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id); +extern int             onig_callout_tag_table_free(void* table); +extern void            onig_free_reg_callout_list(int n, CalloutListEntry* list); +extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num); +extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num); + +/* for definition of builtin callout */ +#define BC0_P(name, func)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                              (UChar* )(name), (UChar* )((name) + len),\ +                              ONIG_CALLOUT_IN_PROGRESS,\ +                              onig_builtin_ ## func, 0, 0, 0, 0, 0);\ +  if (id < 0) return id;\ +} while(0) + +#define BC0_R(name, func)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                              (UChar* )(name), (UChar* )((name) + len),\ +                              ONIG_CALLOUT_IN_RETRACTION,\ +                              onig_builtin_ ## func, 0, 0, 0, 0, 0);\ +  if (id < 0) return id;\ +} while(0) + +#define BC0_B(name, func)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                              (UChar* )(name), (UChar* )((name) + len),\ +                              ONIG_CALLOUT_IN_BOTH,\ +                              onig_builtin_ ## func, 0, 0, 0, 0, 0);\ +  if (id < 0) return id;\ +} while(0) + +#define BC_P(name, func, na, ts)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                              (UChar* )(name), (UChar* )((name) + len),\ +                              ONIG_CALLOUT_IN_PROGRESS,\ +                                onig_builtin_ ## func, 0, (na), (ts), 0, 0); \ +  if (id < 0) return id;\ +} while(0) + +#define BC_P_O(name, func, nts, ts, nopts, opts)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                           (UChar* )(name), (UChar* )((name) + len),\ +                           ONIG_CALLOUT_IN_PROGRESS,\ +                           onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ +  if (id < 0) return id;\ +} while(0) + +#define BC_B(name, func, na, ts)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                              (UChar* )(name), (UChar* )((name) + len),\ +                              ONIG_CALLOUT_IN_BOTH,\ +                              onig_builtin_ ## func, 0, (na), (ts), 0, 0);\ +  if (id < 0) return id;\ +} while(0) + +#define BC_B_O(name, func, nts, ts, nopts, opts)  do {\ +  int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ +  id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ +                           (UChar* )(name), (UChar* )((name) + len),\ +                           ONIG_CALLOUT_IN_BOTH,\ +                           onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ +  if (id < 0) return id;\ +} while(0) + +#endif /* USE_CALLOUT */ +  /* strend hash */  typedef void hash_table_type; diff --git a/src/regparse.c b/src/regparse.c index 1e4dc30..6e95a14 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@    regparse.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,7 @@   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF   * SUCH DAMAGE.   */ +  #include "regparse.h"  #include "st.h" @@ -33,10 +34,17 @@  #include <stdio.h>  #endif +#define INIT_TAG_NAMES_ALLOC_NUM   5 +  #define WARN_BUFSIZE    256  #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \ +  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */) +#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \ +  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') +  OnigSyntaxType OnigSyntaxOniguruma = {    (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | @@ -50,6 +58,8 @@ OnigSyntaxType OnigSyntaxOniguruma = {        ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |        ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |        ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | +      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | +      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |        ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |        ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |        ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | @@ -188,6 +198,16 @@ onig_set_parse_depth_limit(unsigned int depth)    return 0;  } +static int +positive_int_multiply(int x, int y) +{ +  if (x == 0 || y == 0) return 0; + +  if (x < INT_MAX / y) +    return x * y; +  else +    return -1; +}  static void  bbuf_free(BBuf* bbuf) @@ -331,25 +351,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end)    }  } -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ -  int slen, term_len, i; -  UChar *r; - -  slen = (int )(end - s); -  term_len = ONIGENC_MBC_MINLEN(enc); - -  r = (UChar* )xmalloc(slen + term_len); -  CHECK_NULL_RETURN(r); -  xmemcpy(r, s, slen); - -  for (i = 0; i < term_len; i++) -    r[slen + i] = (UChar )0; - -  return r; -} -  static int  save_entry(ScanEnv* env, enum SaveType type, int* id)  { @@ -521,6 +522,106 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,    return result;  } + +typedef struct { +  OnigEncoding enc; +  int    type; // callout type: single or not +  UChar* s; +  UChar* end; +} st_callout_name_key; + +static int +callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y) +{ +  UChar *p, *q; +  int c; + +  if (x->enc  != y->enc)  return 1; +  if (x->type != y->type) return 1; +  if ((x->end - x->s) != (y->end - y->s)) +    return 1; + +  p = x->s; +  q = y->s; +  while (p < x->end) { +    c = (int )*p - (int )*q; +    if (c != 0) return c; + +    p++; q++; +  } + +  return 0; +} + +static int +callout_name_table_hash(st_callout_name_key* x) +{ +  UChar *p; +  int val = 0; + +  p = x->s; +  while (p < x->end) { +    val = val * 997 + (int )*p++; +  } + +  /* use intptr_t for escape warning in Windows */ +  return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type; +} + +extern hash_table_type* +onig_st_init_callout_name_table_with_size(int size) +{ +  static struct st_hash_type hashType = { +    callout_name_table_cmp, +    callout_name_table_hash, +  }; + +  return (hash_table_type* ) +           onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_callout_name_table(hash_table_type* table, +                                  OnigEncoding enc, +                                  int type, +                                  const UChar* str_key, +                                  const UChar* end_key, +                                  hash_data_type *value) +{ +  st_callout_name_key key; + +  key.enc  = enc; +  key.type = type; +  key.s    = (UChar* )str_key; +  key.end  = (UChar* )end_key; + +  return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +st_insert_callout_name_table(hash_table_type* table, +                             OnigEncoding enc, int type, +                             UChar* str_key, UChar* end_key, +                             hash_data_type value) +{ +  st_callout_name_key* key; +  int result; + +  key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key)); +  CHECK_NULL_RETURN_MEMERR(key); + +  /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */ +  key->enc  = enc; +  key->type = type; +  key->s    = str_key; +  key->end  = end_key; +  result = onig_st_insert(table, (st_data_t )key, value); +  if (result) { +    xfree(key); +  } +  return result; +} +  #endif /* USE_ST_LIBRARY */ @@ -537,6 +638,8 @@ typedef struct {  #ifdef USE_ST_LIBRARY +#define INIT_NAMES_ALLOC_NUM    5 +  typedef st_table  NameTable;  typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */ @@ -862,13 +965,13 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)    if (IS_NULL(e)) {  #ifdef USE_ST_LIBRARY      if (IS_NULL(t)) { -      t = onig_st_init_strend_table_with_size(5); +      t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);        reg->name_table = (void* )t;      }      e = (NameEntry* )xmalloc(sizeof(NameEntry));      CHECK_NULL_RETURN_MEMERR(e); -    e->name = strdup_with_null(reg->enc, name, name_end); +    e->name = onigenc_strdup(reg->enc, name, name_end);      if (IS_NULL(e->name)) {        xfree(e);  return ONIGERR_MEMORY;      } @@ -919,7 +1022,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)      }      e = &(t->e[t->num]);      t->num++; -    e->name = strdup_with_null(reg->enc, name, name_end); +    e->name = onigenc_strdup(reg->enc, name, name_end);      if (IS_NULL(e->name)) return ONIGERR_MEMORY;      e->name_len = name_end - name;  #endif @@ -1019,6 +1122,781 @@ onig_noname_group_capture_is_active(regex_t* reg)    return 1;  } +#ifdef USE_CALLOUT + +typedef struct { +  OnigCalloutType type; +  int             in; +  OnigCalloutFunc start_func; +  OnigCalloutFunc end_func; +  int             arg_num; +  int             opt_arg_num; +  unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM]; +  OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM]; +  UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */ +} CalloutNameListEntry; + +typedef struct { +  int  n; +  int  alloc; +  CalloutNameListEntry* v; +} CalloutNameListType; + +static CalloutNameListType* GlobalCalloutNameList; + +static int +make_callout_func_list(CalloutNameListType** rs, int init_size) +{ +  CalloutNameListType* s; +  CalloutNameListEntry* v; + +  *rs = 0; + +  s = xmalloc(sizeof(*s)); +  if (IS_NULL(s)) return ONIGERR_MEMORY; + +  v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size); +  if (IS_NULL(v)) { +    xfree(s); +    return ONIGERR_MEMORY; +  } + +  s->n = 0; +  s->alloc = init_size; +  s->v = v; + +  *rs = s; +  return ONIG_NORMAL; +} + +static void +free_callout_func_list(CalloutNameListType* s) +{ +  if (IS_NOT_NULL(s)) { +    if (IS_NOT_NULL(s->v)) { +      int i, j; + +      for (i = 0; i < s->n; i++) { +        CalloutNameListEntry* e = s->v + i; +        for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) { +          if (e->arg_types[j] == ONIG_TYPE_STRING) { +            UChar* p = e->opt_defaults[j].s.start; +            if (IS_NOT_NULL(p)) xfree(p); +          } +        } +      } +      xfree(s->v); +    } +    xfree(s); +  } +} + +static int +callout_func_list_add(CalloutNameListType* s, int* rid) +{ +  if (s->n >= s->alloc) { +    int new_size = s->alloc * 2; +    CalloutNameListEntry* nv = (CalloutNameListEntry* ) +      xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size); +    if (IS_NULL(nv)) return ONIGERR_MEMORY; + +    s->alloc = new_size; +    s->v = nv; +  } + +  *rid = s->n; + +  xmemset(&(s->v[s->n]), 0, sizeof(*(s->v))); +  s->n++; +  return ONIG_NORMAL; +} + + +typedef struct { +  UChar* name; +  int    name_len;   /* byte length */ +  int    id; +} CalloutNameEntry; + +#ifdef USE_ST_LIBRARY +typedef st_table  CalloutNameTable; +#else +typedef struct { +  CalloutNameEntry* e; +  int               num; +  int               alloc; +} CalloutNameTable; +#endif + +static CalloutNameTable* GlobalCalloutNameTable; +static int CalloutNameIDCounter; + +#ifdef USE_ST_LIBRARY + +static int +i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e, +                          void* arg ARG_UNUSED) +{ +  xfree(e->name); +  /*xfree(key->s); */ /* is same as e->name */ +  xfree(key); +  xfree(e); +  return ST_DELETE; +} + +static int +callout_name_table_clear(CalloutNameTable* t) +{ +  if (IS_NOT_NULL(t)) { +    onig_st_foreach(t, i_free_callout_name_entry, 0); +  } +  return 0; +} + +static int +global_callout_name_table_free(void) +{ +  if (IS_NOT_NULL(GlobalCalloutNameTable)) { +    int r = callout_name_table_clear(GlobalCalloutNameTable); +    if (r != 0) return r; + +    onig_st_free_table(GlobalCalloutNameTable); +    GlobalCalloutNameTable = 0; +    CalloutNameIDCounter = 0; +  } + +  return 0; +} + +static CalloutNameEntry* +callout_name_find(OnigEncoding enc, int is_not_single, +                  const UChar* name, const UChar* name_end) +{ +  int r; +  CalloutNameEntry* e; +  CalloutNameTable* t = GlobalCalloutNameTable; + +  e = (CalloutNameEntry* )NULL; +  if (IS_NOT_NULL(t)) { +    r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, +                                          (HashDataType* )((void* )(&e))); +    if (r == 0) { /* not found */ +      if (enc != ONIG_ENCODING_ASCII && +          ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { +        enc = ONIG_ENCODING_ASCII; +        onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, +                                          (HashDataType* )((void* )(&e))); +      } +    } +  } +  return e; +} + +#else + +static int +callout_name_table_clear(CalloutNameTable* t) +{ +  int i; +  CalloutNameEntry* e; + +  if (IS_NOT_NULL(t)) { +    for (i = 0; i < t->num; i++) { +      e = &(t->e[i]); +      if (IS_NOT_NULL(e->name)) { +        xfree(e->name); +        e->name     = NULL; +        e->name_len = 0; +        e->id       = 0; +        e->func     = 0; +      } +    } +    if (IS_NOT_NULL(t->e)) { +      xfree(t->e); +      t->e = NULL; +    } +    t->num = 0; +  } +  return 0; +} + +static int +global_callout_name_table_free(void) +{ +  if (IS_NOT_NULL(GlobalCalloutNameTable)) { +    int r = callout_name_table_clear(GlobalCalloutNameTable); +    if (r != 0) return r; + +    xfree(GlobalCalloutNameTable); +    GlobalCalloutNameTable = 0; +    CalloutNameIDCounter = 0; +  } +  return 0; +} + +static CalloutNameEntry* +callout_name_find(UChar* name, UChar* name_end) +{ +  int i, len; +  CalloutNameEntry* e; +  CalloutNameTable* t = Calloutnames; + +  if (IS_NOT_NULL(t)) { +    len = name_end - name; +    for (i = 0; i < t->num; i++) { +      e = &(t->e[i]); +      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) +        return e; +    } +  } +  return (CalloutNameEntry* )NULL; +} + +#endif + +/* name string must be single byte char string. */ +static int +callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc, +                   int is_not_single, UChar* name, UChar* name_end) +{ +  int r; +  CalloutNameEntry* e; +  CalloutNameTable* t = GlobalCalloutNameTable; + +  *rentry = 0; +  if (name_end - name <= 0) +    return ONIGERR_INVALID_CALLOUT_NAME; + +  e = callout_name_find(enc, is_not_single, name, name_end); +  if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY +    if (IS_NULL(t)) { +      t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); +      GlobalCalloutNameTable = t; +    } +    e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); +    CHECK_NULL_RETURN_MEMERR(e); + +    e->name = onigenc_strdup(enc, name, name_end); +    if (IS_NULL(e->name)) { +      xfree(e);  return ONIGERR_MEMORY; +    } + +    r = st_insert_callout_name_table(t, enc, is_not_single, +                                     e->name, (e->name + (name_end - name)), +                                     (HashDataType )e); +    if (r < 0) return r; + +#else + +    int alloc; + +    if (IS_NULL(t)) { +      alloc = INIT_NAMES_ALLOC_NUM; +      t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable)); +      CHECK_NULL_RETURN_MEMERR(t); +      t->e     = NULL; +      t->alloc = 0; +      t->num   = 0; + +      t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc); +      if (IS_NULL(t->e)) { +        xfree(t); +        return ONIGERR_MEMORY; +      } +      t->alloc = alloc; +      GlobalCalloutNameTable = t; +      goto clear; +    } +    else if (t->num == t->alloc) { +      int i; + +      alloc = t->alloc * 2; +      t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc); +      CHECK_NULL_RETURN_MEMERR(t->e); +      t->alloc = alloc; + +    clear: +      for (i = t->num; i < t->alloc; i++) { +        t->e[i].name       = NULL; +        t->e[i].name_len   = 0; +        t->e[i].id         = 0; +      } +    } +    e = &(t->e[t->num]); +    t->num++; +    e->name = onigenc_strdup(enc, name, name_end); +    if (IS_NULL(e->name)) return ONIGERR_MEMORY; +#endif + +    CalloutNameIDCounter++; +    e->id = CalloutNameIDCounter; +    e->name_len = (int )(name_end - name); +  } + +  *rentry = e; +  return e->id; +} + +static int +is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ +  UChar* p; +  OnigCodePoint c; + +  if (name >= name_end) return 0; + +  p = name; +  while (p < name_end) { +    c = ONIGENC_MBC_TO_CODE(enc, p, name_end); +    if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c)) +      return 0; + +    if (p == name) { +      if (c >= '0' && c <= '9') return 0; +    } + +    p += ONIGENC_MBC_ENC_LEN(enc, p); +  } + +  return 1; +} + +static int +is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ +  UChar* p; +  OnigCodePoint c; + +  if (name >= name_end) return 0; + +  p = name; +  while (p < name_end) { +    c = ONIGENC_MBC_TO_CODE(enc, p, name_end); +    if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c)) +      return 0; + +    if (p == name) { +      if (c >= '0' && c <= '9') return 0; +    } + +    p += ONIGENC_MBC_ENC_LEN(enc, p); +  } + +  return 1; +} + +extern int +onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, +                         UChar* name, UChar* name_end, int in, +                         OnigCalloutFunc start_func, +                         OnigCalloutFunc end_func, +                         int arg_num, unsigned int arg_types[], +                         int opt_arg_num, OnigValue opt_defaults[]) +{ +  int r; +  int i; +  int j; +  int id; +  int is_not_single; +  CalloutNameEntry* e; +  CalloutNameListEntry* fe; + +  if (callout_type != ONIG_CALLOUT_TYPE_SINGLE) +    return ONIGERR_INVALID_ARGUMENT; + +  if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM) +    return ONIGERR_INVALID_CALLOUT_ARG; + +  if (opt_arg_num < 0 || opt_arg_num > arg_num) +    return ONIGERR_INVALID_CALLOUT_ARG; + +  if (start_func == 0 && end_func == 0) +    return ONIGERR_INVALID_CALLOUT_ARG; + +  if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0) +    return ONIGERR_INVALID_CALLOUT_ARG; + +  for (i = 0; i < arg_num; i++) { +    unsigned int t = arg_types[i]; +    if (t == ONIG_TYPE_VOID) +      return ONIGERR_INVALID_CALLOUT_ARG; +    else { +      if (i >= arg_num - opt_arg_num) { +        if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && +            t != ONIG_TYPE_TAG) +          return ONIGERR_INVALID_CALLOUT_ARG; +      } +      else { +        if (t != ONIG_TYPE_LONG) { +          t = t & ~ONIG_TYPE_LONG; +          if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG) +            return ONIGERR_INVALID_CALLOUT_ARG; +        } +      } +    } +  } + +  if (! is_allowed_callout_name(enc, name, name_end)) { +    return ONIGERR_INVALID_CALLOUT_NAME; +  } + +  is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE); +  id = callout_name_entry(&e, enc, is_not_single, name, name_end); +  if (id < 0) return id; + +  r = ONIG_NORMAL; +  if (IS_NULL(GlobalCalloutNameList)) { +    r = make_callout_func_list(&GlobalCalloutNameList, 10); +    if (r != ONIG_NORMAL) return r; +  } + +  while (id >= GlobalCalloutNameList->n) { +    int rid; +    r = callout_func_list_add(GlobalCalloutNameList, &rid); +    if (r != ONIG_NORMAL) return r; +  } + +  fe = GlobalCalloutNameList->v + id; +  fe->type         = callout_type; +  fe->in           = in; +  fe->start_func   = start_func; +  fe->end_func     = end_func; +  fe->arg_num      = arg_num; +  fe->opt_arg_num  = opt_arg_num; +  fe->name         = e->name; + +  for (i = 0; i < arg_num; i++) { +    fe->arg_types[i] = arg_types[i]; +  } +  for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { +    if (fe->arg_types[i] == ONIG_TYPE_STRING) { +      OnigValue* val = opt_defaults + j; +      UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end); +      CHECK_NULL_RETURN_MEMERR(ds); + +      fe->opt_defaults[i].s.start = ds; +      fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start); +    } +    else { +      fe->opt_defaults[i] = opt_defaults[j]; +    } +  } + +  r = id; // return id +  return r; +} + +static int +get_callout_name_id_by_name(OnigEncoding enc, int is_not_single, +                            UChar* name, UChar* name_end, int* rid) +{ +  int r; +  CalloutNameEntry* e; + +  if (! is_allowed_callout_name(enc, name, name_end)) { +    return ONIGERR_INVALID_CALLOUT_NAME; +  } + +  e = callout_name_find(enc, is_not_single, name, name_end); +  if (IS_NULL(e)) { +    return ONIGERR_UNDEFINED_CALLOUT_NAME; +  } + +  r = ONIG_NORMAL; +  *rid = e->id; + +  return r; +} + +extern OnigCalloutFunc +onig_get_callout_start_func(regex_t* reg, int callout_num) +{ +  /* If used for callouts of contents, return 0. */ +  CalloutListEntry* e; + +  e = onig_reg_callout_list_at(reg, callout_num); +  return e->start_func; +} + +extern const UChar* +onig_get_callout_tag_start(regex_t* reg, int callout_num) +{ +  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); +  return e->tag_start; +} + +extern const UChar* +onig_get_callout_tag_end(regex_t* reg, int callout_num) +{ +  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); +  return e->tag_end; +} + + +extern OnigCalloutType +onig_get_callout_type_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].type; +} + +extern OnigCalloutFunc +onig_get_callout_start_func_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].start_func; +} + +extern OnigCalloutFunc +onig_get_callout_end_func_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].end_func; +} + +extern int +onig_get_callout_in_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].in; +} + +static int +get_callout_arg_num_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].arg_num; +} + +static int +get_callout_opt_arg_num_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].opt_arg_num; +} + +static unsigned int +get_callout_arg_type_by_name_id(int name_id, int index) +{ +  return GlobalCalloutNameList->v[name_id].arg_types[index]; +} + +static OnigValue +get_callout_opt_default_by_name_id(int name_id, int index) +{ +  return GlobalCalloutNameList->v[name_id].opt_defaults[index]; +} + +extern UChar* +onig_get_callout_name_by_name_id(int name_id) +{ +  return GlobalCalloutNameList->v[name_id].name; +} + +extern int +onig_global_callout_names_free(void) +{ +  free_callout_func_list(GlobalCalloutNameList); +  GlobalCalloutNameList = 0; + +  global_callout_name_table_free(); +  return ONIG_NORMAL; +} + + +typedef st_table   CalloutTagTable; +typedef intptr_t   CalloutTagVal; + +#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0) + +static int +i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg) +{ +  int num; +  RegexExt* ext = (RegexExt* )arg; + +  num = (int )e - 1; +  ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST; +  return ST_CONTINUE; +} + +static int +setup_ext_callout_list_values(regex_t* reg) +{ +  int i, j; +  RegexExt* ext; + +  ext = REG_EXTP(reg); +  if (IS_NOT_NULL(ext->tag_table)) { +    onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set, +                    (st_data_t )ext); +  } + +  for (i = 0; i < ext->callout_num; i++) { +    CalloutListEntry* e = ext->callout_list + i; +    if (e->of == ONIG_CALLOUT_OF_NAME) { +      for (j = 0; j < e->u.arg.num; j++) { +        if (e->u.arg.types[j] == ONIG_TYPE_TAG) { +          UChar* start; +          UChar* end; +          int num; +          start = e->u.arg.vals[j].s.start; +          end   = e->u.arg.vals[j].s.end; +          num = onig_get_callout_num_by_tag(reg, start, end); +          if (num < 0) return num; +          e->u.arg.vals[j].tag = num; +        } +      } +    } +  } + +  return ONIG_NORMAL; +} + +extern int +onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num) +{ +  RegexExt* ext = REG_EXTP(reg); + +  if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0; +  if (callout_num > ext->callout_num) return 0; + +  return (ext->callout_list[callout_num].flag & +          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0  ? 1 : 0; +} + +static int +i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED) +{ +  xfree(key); +  return ST_DELETE; +} + +static int +callout_tag_table_clear(CalloutTagTable* t) +{ +  if (IS_NOT_NULL(t)) { +    onig_st_foreach(t, i_free_callout_tag_entry, 0); +  } +  return 0; +} + +extern int +onig_callout_tag_table_free(void* table) +{ +  CalloutTagTable* t = (CalloutTagTable* )table; + +  if (IS_NOT_NULL(t)) { +    int r = callout_tag_table_clear(t); +    if (r != 0) return r; + +    onig_st_free_table(t); +  } + +  return 0; +} + +extern int +onig_get_callout_num_by_tag(regex_t* reg, +                            const UChar* tag, const UChar* tag_end) +{ +  int r; +  RegexExt* ext; +  CalloutTagVal e; + +  ext = REG_EXTP(reg); +  if (IS_NULL(ext) || IS_NULL(ext->tag_table)) +    return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +  r = onig_st_lookup_strend(ext->tag_table, tag, tag_end, +                            (HashDataType* )((void* )(&e))); +  if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; +  return (int )e; +} + +static CalloutTagVal +callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end) +{ +  CalloutTagVal e; + +  e = -1; +  if (IS_NOT_NULL(t)) { +    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); +  } +  return e; +} + +static int +callout_tag_table_new(CalloutTagTable** rt) +{ +  CalloutTagTable* t; + +  *rt = 0; +  t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM); +  CHECK_NULL_RETURN_MEMERR(t); + +  *rt = t; +  return ONIG_NORMAL; +} + +static int +callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end, +                      CalloutTagVal entry_val) +{ +  int r; +  CalloutTagVal val; + +  if (name_end - name <= 0) +    return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +  val = callout_tag_find(t, name, name_end); +  if (val >= 0) +    return ONIGERR_MULTIPLEX_DEFINED_NAME; + +  r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val); +  if (r < 0) return r; + +  return ONIG_NORMAL; +} + +static int +ext_ensure_tag_table(regex_t* reg) +{ +  int r; +  RegexExt* ext; +  CalloutTagTable* t; + +  ext = onig_get_regex_ext(reg); +  CHECK_NULL_RETURN_MEMERR(ext); + +  if (IS_NULL(ext->tag_table)) { +    r = callout_tag_table_new(&t); +    if (r != ONIG_NORMAL) return r; + +    ext->tag_table = t; +  } + +  return ONIG_NORMAL; +} + +static int +callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, +                  CalloutTagVal entry_val) +{ +  int r; +  RegexExt* ext; +  CalloutListEntry* e; + +  r = ext_ensure_tag_table(reg); +  if (r != ONIG_NORMAL) return r; + +  ext = onig_get_regex_ext(reg); +  r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); + +  e = onig_reg_callout_list_at(reg, (int )entry_val); +  e->tag_start = name; +  e->tag_end   = name_end; + +  return r; +} + +#endif /* USE_CALLOUT */ +  #define INIT_SCANENV_MEMENV_ALLOC_SIZE   16 @@ -1045,12 +1923,6 @@ scan_env_clear(ScanEnv* env)    xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  env->num_comb_exp_check  = 0; -  env->comb_exp_max_regnum = 0; -  env->curr_max_regnum     = 0; -  env->has_recursion       = 0; -#endif    env->parse_depth         = 0;    env->keep_num            = 0;    env->save_num            = 0; @@ -1504,10 +2376,6 @@ node_new_quantifier(int lower, int upper, int by_number)    if (by_number != 0)      NODE_STATUS_ADD(node, NST_BY_NUMBER); -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  QUANT_(node)->comb_exp_check_num = 0; -#endif -    return node;  } @@ -1642,6 +2510,116 @@ node_new_keep(Node** node, ScanEnv* env)    return ONIG_NORMAL;  } +#ifdef USE_CALLOUT + +extern void +onig_free_reg_callout_list(int n, CalloutListEntry* list) +{ +  int i; +  int j; + +  if (IS_NULL(list)) return ; + +  for (i = 0; i < n; i++) { +    if (list[i].of == ONIG_CALLOUT_OF_NAME) { +      for (j = 0; j < list[i].u.arg.passed_num; j++) { +        if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) { +          if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start)) +            xfree(list[i].u.arg.vals[j].s.start); +        } +      } +    } +    else { /* ONIG_CALLOUT_OF_CONTENTS */ +      if (IS_NOT_NULL(list[i].u.content.start)) { +        xfree((void* )list[i].u.content.start); +      } +    } +  } + +  xfree(list); +} + +extern CalloutListEntry* +onig_reg_callout_list_at(regex_t* reg, int num) +{ +  RegexExt* ext = REG_EXTP(reg); +  CHECK_NULL_RETURN(ext); + +  if (num <= 0 || num > ext->callout_num) +    return 0; + +  num--; +  return ext->callout_list + num; +} + +static int +reg_callout_list_entry(ScanEnv* env, int* rnum) +{ +#define INIT_CALLOUT_LIST_NUM  3 + +  int num; +  CalloutListEntry* list; +  CalloutListEntry* e; +  RegexExt* ext; + +  ext = onig_get_regex_ext(env->reg); +  CHECK_NULL_RETURN_MEMERR(ext); + +  if (IS_NULL(ext->callout_list)) { +    list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM); +    CHECK_NULL_RETURN_MEMERR(list); + +    ext->callout_list = list; +    ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM; +    ext->callout_num = 0; +  } + +  num = ext->callout_num + 1; +  if (num > ext->callout_list_alloc) { +    int alloc = ext->callout_list_alloc * 2; +    list = (CalloutListEntry* )xrealloc(ext->callout_list, +                                        sizeof(CalloutListEntry) * alloc); +    CHECK_NULL_RETURN_MEMERR(list); + +    ext->callout_list       = list; +    ext->callout_list_alloc = alloc; +  } + +  e = ext->callout_list + (num - 1); + +  e->flag             = 0; +  e->of               = 0; +  e->in               = ONIG_CALLOUT_OF_CONTENTS; +  e->type             = 0; +  e->tag_start        = 0; +  e->tag_end          = 0; +  e->start_func       = 0; +  e->end_func         = 0; +  e->u.arg.num        = 0; +  e->u.arg.passed_num = 0; + +  ext->callout_num = num; +  *rnum = num; +  return ONIG_NORMAL; +} + +static int +node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id, +                 ScanEnv* env) +{ +  *node = node_new(); +  CHECK_NULL_RETURN_MEMERR(*node); + +  NODE_SET_TYPE(*node, NODE_GIMMICK); +  GIMMICK_(*node)->id          = id; +  GIMMICK_(*node)->num         = num; +  GIMMICK_(*node)->type        = GIMMICK_CALLOUT; +  GIMMICK_(*node)->detail_type = (int )callout_of; + +  return ONIG_NORMAL; +} +#endif +  static int  make_extended_grapheme_cluster(Node** node, ScanEnv* env)  { @@ -2838,7 +3816,7 @@ is_invalid_quantifier_target(Node* node)  /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */  static int -popular_quantifier_num(QuantNode* q) +quantifier_type_num(QuantNode* q)  {    if (q->greedy) {      if (q->lower == 0) { @@ -2889,9 +3867,22 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)    p = QUANT_(pnode);    c = QUANT_(cnode); -  pnum = popular_quantifier_num(p); -  cnum = popular_quantifier_num(c); -  if (pnum < 0 || cnum < 0) return ; +  pnum = quantifier_type_num(p); +  cnum = quantifier_type_num(c); +  if (pnum < 0 || cnum < 0) { +    if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { +      if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { +        int n = positive_int_multiply(p->lower, c->lower); +        if (n >= 0) { +          p->lower = p->upper = n; +          NODE_BODY(pnode) = NODE_BODY(cnode); +          goto remove_cnode; +        } +      } +    } + +    return ; +  }    switch(ReduceTypeTable[cnum][pnum]) {    case RQ_DEL: @@ -2927,6 +3918,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)      break;    } + remove_cnode:    NODE_BODY(cnode) = NULL_NODE;    onig_node_free(cnode);  } @@ -5508,6 +6500,452 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en  static int parse_subexp(Node** top, OnigToken* tok, int term,                          UChar** src, UChar* end, ScanEnv* env); +#ifdef USE_CALLOUT + +/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */ +static int +parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ +  int r; +  int i; +  int in; +  int num; +  OnigCodePoint c; +  UChar* code_start; +  UChar* code_end; +  UChar* contents; +  UChar* tag_start; +  UChar* tag_end; +  int brace_nest; +  CalloutListEntry* e; +  RegexExt* ext; +  OnigEncoding enc = env->enc; +  UChar* p = *src; + +  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + +  brace_nest = 0; +  while (PPEEK_IS('{')) { +    brace_nest++; +    PINC_S; +    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; +  } + +  in = ONIG_CALLOUT_IN_PROGRESS; +  code_start = p; +  while (1) { +    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + +    code_end = p; +    PFETCH_S(c); +    if (c == '}') { +      i = brace_nest; +      while (i > 0) { +        if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; +        PFETCH_S(c); +        if (c == '}') i--; +        else break; +      } +      if (i == 0) break; +    } +  } + +  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + +  PFETCH_S(c); +  if (c == '[') { +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    tag_start = p; +    while (! PEND) { +      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +      tag_end = p; +      PFETCH_S(c); +      if (c == ']') break; +    } +    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) +      return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } +  else { +    tag_start = tag_end = 0; +  } + +  if (c == 'X') { +    in |= ONIG_CALLOUT_IN_RETRACTION; +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } +  else if (c == '<') { +    in = ONIG_CALLOUT_IN_RETRACTION; +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } +  else if (c == '>') { /* no needs (default) */ +    //in = ONIG_CALLOUT_IN_PROGRESS; +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } + +  if (c != cterm) +    return ONIGERR_INVALID_CALLOUT_PATTERN; + +  r = reg_callout_list_entry(env, &num); +  if (r != 0) return r; + +  ext = onig_get_regex_ext(env->reg); +  if (IS_NULL(ext->pattern)) { +    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); +    if (r != ONIG_NORMAL) return r; +  } + +  if (tag_start != tag_end) { +    r = callout_tag_entry(env->reg, tag_start, tag_end, num); +    if (r != ONIG_NORMAL) return r; +  } + +  contents = onigenc_strdup(enc, code_start, code_end); +  CHECK_NULL_RETURN_MEMERR(contents); + +  r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); +  if (r != 0) { +    xfree(contents); +    return r; +  } + +  e = onig_reg_callout_list_at(env->reg, num); +  e->of      = ONIG_CALLOUT_OF_CONTENTS; +  e->in      = in; +  e->name_id = ONIG_NON_NAME_ID; +  e->u.content.start = contents; +  e->u.content.end   = contents + (code_end - code_start); + +  *src = p; +  return 0; +} + +static long +parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) +{ +  long v; +  long d; +  int flag; +  UChar* p; +  OnigCodePoint c; + +  if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG; + +  flag = 1; +  v = 0; +  p = s; +  while (p < end) { +    c = ONIGENC_MBC_TO_CODE(enc, p, end); +    p += ONIGENC_MBC_ENC_LEN(enc, p); +    if (c >= '0' && c <= '9') { +      d = (long )(c - '0'); +      if (v > (max - d) / 10) +        return ONIGERR_INVALID_CALLOUT_ARG; + +      v = v * 10 + d; +    } +    else if (sign_on != 0 && (c == '-' || c == '+')) { +      if (c == '-') flag = -1; +    } +    else +      return ONIGERR_INVALID_CALLOUT_ARG; + +    sign_on = 0; +  } + +  *rl = flag * v; +  return ONIG_NORMAL; +} + +static int +parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, +                   unsigned int types[], OnigValue vals[], ScanEnv* env) +{ +#define MAX_CALLOUT_ARG_BYTE_LENGTH   128 + +  int r; +  int n; +  int esc; +  int cn; +  UChar* s; +  UChar* e; +  UChar* eesc; +  OnigCodePoint c; +  UChar* bufend; +  UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH]; +  OnigEncoding enc = env->enc; +  UChar* p = *src; + +  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + +  n = 0; +  while (n < ONIG_CALLOUT_MAX_ARGS_NUM) { +    c   = 0; +    cn  = 0; +    esc = 0; +    eesc = 0; +    bufend = buf; +    s = e = p; +    while (1) { +      if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + +      e = p; +      PFETCH_S(c); +      if (esc != 0) { +        esc = 0; +        if (c == '\\' || c == cterm || c == ',') { +          /* */ +        } +        else { +          e = eesc; +          cn++; +        } +        goto add_char; +      } +      else { +        if (c == '\\') { +          esc = 1; +          eesc = e; +        } +        else if (c == cterm || c == ',') +          break; +        else { +          size_t clen; + +        add_char: +          if (skip_mode == 0) { +            clen = p - e; +            if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) +              return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ + +            xmemcpy(bufend, e, clen); +            bufend += clen; +          } +          cn++; +        } +      } +    } + +    if (cn != 0) { +      if (skip_mode == 0) { +        if ((types[n] & ONIG_TYPE_LONG) != 0) { +          int fixed = 0; +          if (cn > 0) { +            long rl; +            r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl); +            if (r == ONIG_NORMAL) { +              vals[n].l = rl; +              fixed = 1; +              types[n] = ONIG_TYPE_LONG; +            } +          } + +          if (fixed == 0) { +            types[n] = (types[n] & ~ONIG_TYPE_LONG); +            if (types[n] == ONIG_TYPE_VOID) +              return ONIGERR_INVALID_CALLOUT_ARG; +          } +        } + +        switch (types[n]) { +        case ONIG_TYPE_LONG: +          break; + +        case ONIG_TYPE_CHAR: +          if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG; +          vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend); +          break; + +        case ONIG_TYPE_STRING: +          { +            UChar* rs = onigenc_strdup(enc, buf, bufend); +            CHECK_NULL_RETURN_MEMERR(rs); +            vals[n].s.start = rs; +            vals[n].s.end   = rs + (e - s); +          } +          break; + +        case ONIG_TYPE_TAG: +          if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) +            return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +          vals[n].s.start = s; +          vals[n].s.end   = e; +          break; + +        case ONIG_TYPE_VOID: +        case ONIG_TYPE_POINTER: +          return ONIGERR_PARSER_BUG; +          break; +        } +      } + +      n++; +    } + +    if (c == cterm) break; +  } + +  if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN; + +  *src = p; +  return n; +} + +/* (*name[TAG]) (*name[TAG]{a,b,..}) */ +static int +parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ +  int r; +  int i; +  int in; +  int num; +  int name_id; +  int arg_num; +  int max_arg_num; +  int opt_arg_num; +  int is_not_single; +  OnigCodePoint c; +  UChar* name_start; +  UChar* name_end; +  UChar* tag_start; +  UChar* tag_end; +  Node*  node; +  CalloutListEntry* e; +  RegexExt* ext; +  unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM]; +  OnigValue    vals[ONIG_CALLOUT_MAX_ARGS_NUM]; +  OnigEncoding enc = env->enc; +  UChar* p = *src; + +  //PFETCH_READY; +  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + +  node = 0; +  name_start = p; +  while (1) { +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    name_end = p; +    PFETCH_S(c); +    if (c == cterm || c == '[' || c == '{') break; +  } + +  if (! is_allowed_callout_name(enc, name_start, name_end)) +    return ONIGERR_INVALID_CALLOUT_NAME; + +  if (c == '[') { +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    tag_start = p; +    while (! PEND) { +      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +      tag_end = p; +      PFETCH_S(c); +      if (c == ']') break; +    } +    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) +      return ONIGERR_INVALID_CALLOUT_TAG_NAME; + +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } +  else { +    tag_start = tag_end = 0; +  } + +  if (c == '{') { +    UChar* save; + +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + +    /* read for single check only */ +    save = p; +    arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env); +    if (arg_num < 0) return arg_num; + +    is_not_single = PPEEK_IS(cterm) ?  0 : 1; +    p = save; +    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, +                                    &name_id); +    if (r != ONIG_NORMAL) return r; + +    max_arg_num = get_callout_arg_num_by_name_id(name_id); +    for (i = 0; i < max_arg_num; i++) { +      types[i] = get_callout_arg_type_by_name_id(name_id, i); +    } + +    arg_num = parse_callout_args(0, '}', &p, end, types, vals, env); +    if (arg_num < 0) return arg_num; + +    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    PFETCH_S(c); +  } +  else { +    arg_num = 0; + +    is_not_single = 0; +    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, +                                      &name_id); +    if (r != ONIG_NORMAL) return r; + +    max_arg_num = get_callout_arg_num_by_name_id(name_id); +    for (i = 0; i < max_arg_num; i++) { +      types[i] = get_callout_arg_type_by_name_id(name_id, i); +    } +  } + +  in = onig_get_callout_in_by_name_id(name_id); +  opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id); +  if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) +    return ONIGERR_INVALID_CALLOUT_ARG; + +  if (c != cterm) +    return ONIGERR_INVALID_CALLOUT_PATTERN; + +  r = reg_callout_list_entry(env, &num); +  if (r != 0) return r; + +  ext = onig_get_regex_ext(env->reg); +  if (IS_NULL(ext->pattern)) { +    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); +    if (r != ONIG_NORMAL) return r; +  } + +  if (tag_start != tag_end) { +    r = callout_tag_entry(env->reg, tag_start, tag_end, num); +    if (r != ONIG_NORMAL) return r; +  } + +  r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); +  if (r != ONIG_NORMAL) return r; + +  e = onig_reg_callout_list_at(env->reg, num); +  e->of         = ONIG_CALLOUT_OF_NAME; +  e->in         = in; +  e->name_id    = name_id; +  e->type       = onig_get_callout_type_by_name_id(name_id); +  e->start_func = onig_get_callout_start_func_by_name_id(name_id); +  e->end_func   = onig_get_callout_end_func_by_name_id(name_id); +  e->u.arg.num        = max_arg_num; +  e->u.arg.passed_num = arg_num; +  for (i = 0; i < max_arg_num; i++) { +    e->u.arg.types[i] = types[i]; +    if (i < arg_num) +      e->u.arg.vals[i] = vals[i]; +    else +      e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i); +  } + +  *np = node; +  *src = p; +  return 0; +} +#endif +  static int  parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,                  ScanEnv* env) @@ -5526,8 +6964,8 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,    if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;    option = env->options; -  if (PPEEK_IS('?') && -      IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { +  c = PPEEK; +  if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {      PINC;      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; @@ -5673,6 +7111,18 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,        }        break; +#ifdef USE_CALLOUT +    case '{': +      if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) +        return ONIGERR_UNDEFINED_GROUP_OPTION; + +      r = parse_callout_of_contents(np, ')', &p, end, env); +      if (r != 0) return r; + +      goto end; +      break; +#endif +      case '(':        /* (?()...) */        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) { @@ -5769,6 +7219,29 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              if (c != ')') goto err_if_else;            }          } +#ifdef USE_CALLOUT +        else if (c == '?') { +          if (IS_SYNTAX_OP2(env->syntax, +                            ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) { +            if (! PEND && PPEEK_IS('{')) { +              /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */ +              condition_is_checker = 0; +              PFETCH(c); +              r = parse_callout_of_contents(&condition, ')', &p, end, env); +              if (r != 0) return r; +              goto end_condition; +            } +          } +          goto any_condition; +        } +        else if (c == '*' && +                 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { +          condition_is_checker = 0; +          r = parse_callout_of_name(&condition, ')', &p, end, env); +          if (r != 0) return r; +          goto end_condition; +        } +#endif          else {          any_condition:            PUNFETCH; @@ -5782,6 +7255,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            }          } +      end_condition:          CHECK_NULL_RETURN_MEMERR(condition);          if (PEND) { @@ -5970,6 +7444,16 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,        return ONIGERR_UNDEFINED_GROUP_OPTION;      }    } +#ifdef USE_CALLOUT +  else if (c == '*' && +           IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { +    PINC; +    r = parse_callout_of_name(np, ')', &p, end, env); +    if (r != 0) return r; + +    goto end; +  } +#endif    else {      if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))        goto group; @@ -6040,11 +7524,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)      { /* check redundant double repeat. */        /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */        QuantNode* qnt   = QUANT_(target); -      int nestq_num   = popular_quantifier_num(qn); -      int targetq_num = popular_quantifier_num(qnt); +      int nestq_num   = quantifier_type_num(qn); +      int targetq_num = quantifier_type_num(qnt);  #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -      if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) && +      if (targetq_num >= 0 && nestq_num >= 0 &&            IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {          UChar buf[WARN_BUFSIZE]; @@ -6078,18 +7562,19 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)      warn_exit:  #endif -      if (targetq_num >= 0) { -        if (nestq_num >= 0) { -          onig_reduce_nested_quantifier(qnode, target); -          goto q_exit; -        } -        else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ +      if (targetq_num >= 0 && nestq_num < 0) { +        if (targetq_num == 1 || targetq_num == 2) { /* * or + */            /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */            if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {              qn->upper = (qn->lower == 0 ? 1 : qn->lower);            }          }        } +      else { +        NODE_BODY(qnode) = target; +        onig_reduce_nested_quantifier(qnode, target); +        goto q_exit; +      }      }      break; @@ -6717,6 +8202,9 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,  {    int r;    UChar* p; +#ifdef USE_CALLOUT +  RegexExt* ext; +#endif    names_clear(reg); @@ -6750,6 +8238,14 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,  #endif    reg->num_mem = env->num_mem; + +#ifdef USE_CALLOUT +  ext = REG_EXTP(reg); +  if (IS_NOT_NULL(ext) && ext->callout_num > 0) { +    r = setup_ext_callout_list_values(reg); +  } +#endif +    return r;  } diff --git a/src/regparse.h b/src/regparse.h index 99fe7c9..3ffbea4 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@    regparse.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -51,8 +51,12 @@ enum GimmickType {    GIMMICK_KEEP = 1,    GIMMICK_SAVE = 2,    GIMMICK_UPDATE_VAR = 3, +#ifdef USE_CALLOUT +  GIMMICK_CALLOUT = 4, +#endif  }; +  /* node type bit */  #define NODE_TYPE2BIT(type)      (1<<(type)) @@ -97,7 +101,7 @@ enum GimmickType {    (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_ANYCHAR_INF_MASK  (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)  #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)  enum EnclosureType { @@ -129,10 +133,12 @@ enum EnclosureType {  #define BACKREFS_P(br) \    (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) -#define QUANT_BODY_IS_NOT_EMPTY    0 -#define QUANT_BODY_IS_EMPTY        1 -#define QUANT_BODY_IS_EMPTY_MEM    2 -#define QUANT_BODY_IS_EMPTY_REC    3 +enum QuantBodyEmpty { +  QUANT_BODY_IS_NOT_EMPTY = 0, +  QUANT_BODY_IS_EMPTY     = 1, +  QUANT_BODY_IS_EMPTY_MEM = 2, +  QUANT_BODY_IS_EMPTY_REC = 3 +};  /* node status bits */  #define NST_MIN_FIXED             (1<<0) @@ -221,13 +227,10 @@ typedef struct {    int lower;    int upper;    int greedy; -  int body_empty_info; +  enum QuantBodyEmpty body_empty_info;    struct _Node* head_exact;    struct _Node* next_head_exact;    int is_refered;     /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */ -#endif  } QuantNode;  typedef struct { @@ -330,6 +333,7 @@ typedef struct {    enum GimmickType type;    int  detail_type; +  int  num;    int  id;  } GimmickNode; @@ -398,15 +402,9 @@ typedef struct {    int              num_mem;    int              num_named;    int              mem_alloc; -  MemEnv            mem_env_static[SCANENV_MEMENV_SIZE]; -  MemEnv*           mem_env_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK -  int num_comb_exp_check; -  int comb_exp_max_regnum; -  int curr_max_regnum; -  int has_recursion; -#endif -  unsigned int parse_depth; +  MemEnv           mem_env_static[SCANENV_MEMENV_SIZE]; +  MemEnv*          mem_env_dynamic; +  unsigned int     parse_depth;    int keep_num;    int save_num; @@ -447,6 +445,10 @@ extern int    onig_free_shared_cclass_table P_((void));  extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));  extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); +#ifdef USE_CALLOUT +extern int onig_global_callout_names_free(void); +#endif +  #ifdef ONIG_DEBUG  extern int onig_print_names(FILE*, regex_t*);  #endif diff --git a/src/regposerr.c b/src/regposerr.c index fc71eee..2e2a8e2 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -2,7 +2,7 @@    regposerr.c - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,13 @@   * SUCH DAMAGE.   */ +/* Can't include regint.h etc.. for conflict of regex_t. +   Define ONIGURUMA_EXPORT here for onigposix.h. + */ +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT +#endif +  #include "config.h"  #include "onigposix.h" diff --git a/src/regposix.c b/src/regposix.c index 0fdbcbb..895cf29 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@    regposix.c - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,10 @@ onig2posix_error_code(int code)    static const O2PERR o2p[] = {      { ONIG_MISMATCH,                                      REG_NOMATCH },      { ONIG_NO_SUPPORT_CONFIG,                             REG_EONIG_INTERNAL }, +    { ONIG_ABORT,                                         REG_EONIG_INTERNAL },      { ONIGERR_MEMORY,                                     REG_ESPACE  },      { ONIGERR_MATCH_STACK_LIMIT_OVER,                     REG_EONIG_INTERNAL }, +    { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER,                  REG_EONIG_INTERNAL },      { ONIGERR_TYPE_BUG,                                   REG_EONIG_INTERNAL },      { ONIGERR_PARSER_BUG,                                 REG_EONIG_INTERNAL },      { ONIGERR_STACK_BUG,                                  REG_EONIG_INTERNAL }, @@ -117,6 +119,12 @@ onig2posix_error_code(int code)      { ONIGERR_INVALID_IF_ELSE_SYNTAX,                     REG_BADPAT },      { ONIGERR_INVALID_ABSENT_GROUP_PATTERN,               REG_BADPAT },      { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN,     REG_BADPAT }, +    { ONIGERR_INVALID_CALLOUT_PATTERN,                    REG_BADPAT }, +    { ONIGERR_INVALID_CALLOUT_NAME,                       REG_BADPAT }, +    { ONIGERR_UNDEFINED_CALLOUT_NAME,                     REG_BADPAT }, +    { ONIGERR_INVALID_CALLOUT_BODY,                       REG_BADPAT }, +    { ONIGERR_INVALID_CALLOUT_TAG_NAME,                   REG_BADPAT }, +    { ONIGERR_INVALID_CALLOUT_ARG,                        REG_BADPAT },      { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION,         REG_EONIG_BADARG },      { ONIGERR_LIBRARY_IS_NOT_INITIALIZED,                 REG_EONIG_INTERNAL }    }; @@ -260,8 +268,7 @@ reg_set_encoding(int mb_code)      break;    } -  onig_initialize(0, 0); -  onig_initialize_encoding(enc); +  onig_initialize(&enc, 1);    onigenc_set_default_encoding(enc);  } diff --git a/src/regsyntax.c b/src/regsyntax.c index 3817d38..aa95479 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@    regsyntax.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -176,6 +176,8 @@ OnigSyntaxType OnigSyntaxPerl = {        ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |        ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |        ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | +      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | +      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |        ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |        ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |        ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | @@ -207,6 +209,8 @@ OnigSyntaxType OnigSyntaxPerl_NG = {        ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |        ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |        ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | +      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | +      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |        ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |        ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |        ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | diff --git a/src/regversion.c b/src/regversion.c index 245a001..594a52c 100644 --- a/src/regversion.c +++ b/src/regversion.c @@ -2,7 +2,7 @@    regversion.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,6 @@   * SUCH DAMAGE.   */ -#include "config.h"  #include "regint.h"  #include <stdio.h> @@ -49,7 +48,7 @@ onig_copyright(void)    static char s[58];    xsnprintf(s, sizeof(s), -            "Oniguruma %d.%d.%d : Copyright (C) 2002-2016 K.Kosako", +            "Oniguruma %d.%d.%d : Copyright (C) 2002-2018 K.Kosako",              ONIGURUMA_VERSION_MAJOR,              ONIGURUMA_VERSION_MINOR,              ONIGURUMA_VERSION_TEENY); @@ -2,7 +2,7 @@    sjis.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -321,8 +321,8 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,  OnigEncodingType OnigEncodingSJIS = {    mbc_enc_len,    "Shift_JIS",   /* name */ -  2,             /* max byte length */ -  1,             /* min byte length */ +  2,             /* max enc length */ +  1,             /* min enc length */    onigenc_is_mbc_newline_0x0a,    mbc_to_code,    code_to_mbclen, diff --git a/src/utf16_be.c b/src/utf16_be.c index f220cca..098ab54 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@    utf16_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,50 @@   * SUCH DAMAGE.   */ -#include "regenc.h" +#include "regint.h"  /* for USE_CALLOUT */ + + +static int +init(void) +{ +#ifdef USE_CALLOUT + +    int id; +    OnigEncoding enc; +    char* name; +    unsigned int t_long; +    unsigned int args[4]; +    OnigValue opts[4]; + +    enc = ONIG_ENCODING_UTF16_BE; +    t_long = ONIG_TYPE_LONG; + +    name = "\000F\000A\000I\000L\000\000";            BC0_P(name, fail); +    name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch); +    name = "\000M\000A\000X\000\000";                 BC_B(name, max, 1, &t_long); + +    name = "\000E\000R\000R\000O\000R\000\000"; +    args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; +    BC_P_O(name, error, 1, args, 1, opts); + +    name = "\000C\000O\000U\000N\000T\000\000"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, count, 1, args, 1, opts); + +    name = "\000T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, total_count, 1, args, 1, opts); + +    name = "\000C\000M\000P\000\000"; +    args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    args[1] = ONIG_TYPE_STRING; +    args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + +  return ONIG_NORMAL; +}  static const int EncLen_UTF16[] = {    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -215,8 +258,8 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF16_BE = {    utf16be_mbc_enc_len,    "UTF-16BE",   /* name */ -  4,            /* max byte length */ -  2,            /* min byte length */ +  4,            /* max enc length */ +  2,            /* min enc length */    utf16be_is_mbc_newline,    utf16be_mbc_to_code,    utf16be_code_to_mbclen, @@ -229,7 +272,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {    onigenc_utf16_32_get_ctype_code_range,    utf16be_left_adjust_char_head,    onigenc_always_false_is_allowed_reverse_match, -  NULL, /* init */ -  NULL, /* is_initialized */ +  init, +  0, /* is_initialized */    is_valid_mbc_string  }; diff --git a/src/utf16_le.c b/src/utf16_le.c index 89bc72f..dc0d3f1 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@    utf16_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -26,8 +26,49 @@   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF   * SUCH DAMAGE.   */ +#include "regint.h"  /* for USE_CALLOUT */ -#include "regenc.h" +static int +init(void) +{ +#ifdef USE_CALLOUT + +    int id; +    OnigEncoding enc; +    char* name; +    unsigned int t_long; +    unsigned int args[4]; +    OnigValue    opts[4]; + +    enc = ONIG_ENCODING_UTF16_LE; +    t_long = ONIG_TYPE_LONG; + +    name = "F\000A\000I\000L\000\000\000";            BC0_P(name, fail); +    name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch); +    name = "M\000A\000X\000\000\000";                 BC_B(name, max,   1, &t_long); + +    name = "E\000R\000R\000O\000R\000\000\000"; +    args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; +    BC_P_O(name, error, 1, args, 1, opts); + +    name = "C\000O\000U\000N\000T\000\000\000"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, count, 1, args, 1, opts); + +    name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000"; +    args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; +    BC_B_O(name, total_count, 1, args, 1, opts); + +    name = "C\000M\000P\000\000\000"; +    args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    args[1] = ONIG_TYPE_STRING; +    args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; +    BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + +  return ONIG_NORMAL; +}  static const int EncLen_UTF16[] = {    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -225,8 +266,8 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF16_LE = {    utf16le_mbc_enc_len,    "UTF-16LE",   /* name */ -  4,            /* max byte length */ -  2,            /* min byte length */ +  4,            /* max enc length */ +  2,            /* min enc length */    utf16le_is_mbc_newline,    utf16le_mbc_to_code,    utf16le_code_to_mbclen, @@ -239,7 +280,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {    onigenc_utf16_32_get_ctype_code_range,    utf16le_left_adjust_char_head,    onigenc_always_false_is_allowed_reverse_match, -  NULL, /* init */ -  NULL, /* is_initialized */ +  init, +  0, /* is_initialized */    is_valid_mbc_string  }; diff --git a/src/utf32_be.c b/src/utf32_be.c index d0c7f39..68760bb 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@    utf32_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF32_BE = {    utf32be_mbc_enc_len,    "UTF-32BE",   /* name */ -  4,            /* max byte length */ -  4,            /* min byte length */ +  4,            /* max enc length */ +  4,            /* min enc length */    utf32be_is_mbc_newline,    utf32be_mbc_to_code,    utf32be_code_to_mbclen, diff --git a/src/utf32_le.c b/src/utf32_le.c index 33200d1..8208cd0 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@    utf32_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF32_LE = {    utf32le_mbc_enc_len,    "UTF-32LE",   /* name */ -  4,            /* max byte length */ -  4,            /* min byte length */ +  4,            /* max enc length */ +  4,            /* min enc length */    utf32le_is_mbc_newline,    utf32le_mbc_to_code,    utf32le_code_to_mbclen, @@ -2,7 +2,7 @@    utf8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -280,8 +280,8 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF8 = {    mbc_enc_len,    "UTF-8",     /* name */ -  6,           /* max byte length */ -  1,           /* min byte length */ +  6,           /* max enc length */ +  1,           /* min enc length */    onigenc_is_mbc_newline_0x0a,    mbc_to_code,    code_to_mbclen, | 
