diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2020-11-08 10:59:08 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2020-11-08 10:59:08 +0100 | 
| commit | 22bb4b319b3d722ac7bf041a6374cd40afdc4d53 (patch) | |
| tree | a07d7d0764a8488f4b5ebef1561e2f3d8caacc05 /src | |
| parent | 0f259c3073f341c48468e80e93731daa31698030 (diff) | |
| parent | b4158caf13bc715096c1df7c040c9d25995d76f2 (diff) | |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile.am | 25 | ||||
| -rw-r--r-- | src/Makefile.windows | 2 | ||||
| -rw-r--r-- | src/big5.c | 13 | ||||
| -rw-r--r-- | src/config.h.cmake.in | 6 | ||||
| -rw-r--r-- | src/euc_jp.c | 29 | ||||
| -rw-r--r-- | src/euc_kr.c | 13 | ||||
| -rw-r--r-- | src/euc_tw.c | 19 | ||||
| -rw-r--r-- | src/gb18030.c | 24 | ||||
| -rwxr-xr-x | src/make_property.sh | 2 | ||||
| -rwxr-xr-x | src/make_unicode_property.sh | 2 | ||||
| -rwxr-xr-x | src/make_unicode_property_data.py | 4 | ||||
| -rw-r--r-- | src/onigposix.h | 41 | ||||
| -rw-r--r-- | src/oniguruma.h | 15 | ||||
| -rw-r--r-- | src/regcomp.c | 310 | ||||
| -rw-r--r-- | src/regenc.c | 6 | ||||
| -rw-r--r-- | src/regerror.c | 2 | ||||
| -rw-r--r-- | src/regexec.c | 592 | ||||
| -rw-r--r-- | src/regint.h | 42 | ||||
| -rw-r--r-- | src/regparse.c | 315 | ||||
| -rw-r--r-- | src/regparse.h | 4 | ||||
| -rw-r--r-- | src/regposerr.c | 28 | ||||
| -rw-r--r-- | src/regposix.c | 94 | ||||
| -rw-r--r-- | src/sjis.c | 14 | ||||
| -rw-r--r-- | src/st.c | 8 | ||||
| -rw-r--r-- | src/unicode.c | 13 | ||||
| -rw-r--r-- | src/utf16_be.c | 2 | ||||
| -rw-r--r-- | src/utf16_le.c | 2 | ||||
| -rw-r--r-- | src/utf32_be.c | 7 | ||||
| -rw-r--r-- | src/utf32_le.c | 7 | 
29 files changed, 1106 insertions, 535 deletions
| diff --git a/src/Makefile.am b/src/Makefile.am index 36c2222..44a4167 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,10 +11,15 @@ posix_headers = onigposix.h  if ENABLE_POSIX_API  posix_sources = regposix.c regposerr.c  include_HEADERS += $(posix_headers) +AM_CFLAGS += -DUSE_POSIX_API  else  posix_sources =  endif +if ENABLE_BINARY_COMPATIBLE_POSIX_API +AM_CFLAGS += -DUSE_BINARY_COMPATIBLE_POSIX_API +endif +  lib_LTLIBRARIES = $(libname) @@ -45,13 +50,29 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \  	gb18030.c koi8_r.c cp1251.c \  	onig_init.c -libonig_la_LDFLAGS = -version-info $(LTVERSION) -  EXTRA_DIST = koi8.c mktable.c \  	unicode_fold_data.c unicode_property_data.c \  	unicode_property_data_posix.c \  	unicode_egcb_data.c unicode_wb_data.c + +libonig_la_LDFLAGS = $(EXTRA_LIBONIG_LDFLAGS) -version-info $(LTVERSION) + +if USE_LIBONIG_DEF_FILE + +libonig_la_LDFLAGS +=  -Wl,--output-def,$(LIBONIG_DEF_FILE) + +install-data-hook: +	echo "$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir)"; \ +	$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir) || exit 1 + +uninstall-hook: +	echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)'"; \ +	$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)" + +endif + +  dll:  	$(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \  	$(LIBS) diff --git a/src/Makefile.windows b/src/Makefile.windows index 90ebf28..11d6fd8 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -18,7 +18,7 @@ LINKFLAGS = -link -incremental:no -pdb:none  INSTALL = install -c  CP      = copy  CC = cl -DEFS = -DHAVE_CONFIG_H +DEFS = -DHAVE_CONFIG_H -DUSE_POSIX_API -DUSE_BINARY_COMPATIBLE_POSIX_API  subdirs = @@ -2,7 +2,7 @@    big5.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int  big5_code_to_mbclen(OnigCodePoint code)  {    if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; -  if ((code &    0xff00) != 0) return 2; -  if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1; + +  if ((code & 0xff00) != 0) { +    if (EncLen_BIG5[(int )(code >> 8) & 0xff] == 2) +      return 2; +  } +  else { +    if (EncLen_BIG5[(int )(code & 0xff)] == 1) +      return 1; +  }    return ONIGERR_INVALID_CODE_POINT_VALUE;  } diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index 60db86c..c213a09 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -43,6 +43,12 @@  /* The size of `long', as computed by sizeof. */  #cmakedefine SIZEOF_LONG  ${SIZEOF_LONG} +/* The size of `long long', as computed by sizeof. */ +#cmakedefine SIZEOF_LONG_LONG  ${SIZEOF_LONG_LONG} + +/* The size of `void*', as computed by sizeof. */ +#cmakedefine SIZEOF_VOIDP  ${SIZEOF_VOIDP} +  /* Define if enable CR+NL as line terminator */  #cmakedefine USE_CRNL_AS_LINE_TERMINATOR  ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/euc_jp.c b/src/euc_jp.c index 640b3e3..bfe91bf 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@    euc_jp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -114,10 +114,20 @@ static int  code_to_mbclen(OnigCodePoint code)  {    if (ONIGENC_IS_CODE_ASCII(code)) return 1; -  else if ((code & 0xff0000) != 0) return 3; -  else if ((code &   0xff00) != 0) return 2; -  else -    return ONIGERR_INVALID_CODE_POINT_VALUE; +  else if ((code & 0xff0000) != 0) { +    if (EncLen_EUCJP[(int )(code >> 16) & 0xff] == 3) +      return 3; +  } +  else if ((code & 0xff00) != 0) { +    if (EncLen_EUCJP[(int )(code >> 8) & 0xff] == 2) +      return 2; +  } +  else if (code < 256) { +    if (EncLen_EUCJP[(int )(code & 0xff)] == 1) +      return 1; +  } + +  return ONIGERR_INVALID_CODE_POINT_VALUE;  }  static int @@ -125,8 +135,13 @@ code_to_mbc(OnigCodePoint code, UChar *buf)  {    UChar *p = buf; -  if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); -  if ((code &   0xff00) != 0) *p++ = (UChar )(((code >>  8) & 0xff)); +  if ((code & 0xff0000) != 0) { +    *p++ = (UChar )(((code >> 16) & 0xff)); +    *p++ = (UChar )(((code >>  8) & 0xff)); +  } +  else if ((code & 0xff00) != 0) +    *p++ = (UChar )(((code >>  8) & 0xff)); +    *p++ = (UChar )(code & 0xff);  #if 1 diff --git a/src/euc_kr.c b/src/euc_kr.c index 7fa50af..b0e9fbf 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@    euc_kr.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int  euckr_code_to_mbclen(OnigCodePoint code)  {    if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; -  if ((code &    0xff00) != 0) return 2; -  if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1; + +  if ((code & 0xff00) != 0) { +    if (EncLen_EUCKR[(int )(code >> 8) & 0xff] == 2) +      return 2; +  } +  else { +    if (EncLen_EUCKR[(int )(code & 0xff)] == 1) +      return 1; +  }    return ONIGERR_INVALID_CODE_POINT_VALUE;  } diff --git a/src/euc_tw.c b/src/euc_tw.c index 8e72b97..99dc5ec 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@    euc_tw.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -57,15 +57,22 @@ euctw_mbc_enc_len(const UChar* p)  static int  euctw_code_to_mbclen(OnigCodePoint code)  { -       if ((code & 0xff000000) != 0) return 4; -  else if ((code &   0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; -  else if ((code &     0xff00) != 0) return 2; +  if ((code & 0xff000000) != 0) { +    if (EncLen_EUCTW[(int )(code >> 24) & 0xff] == 4) +      return 4; +  } +  else if ((code & 0xff0000) != 0) +    return ONIGERR_INVALID_CODE_POINT_VALUE; +  else if ((code & 0xff00) != 0) { +    if (EncLen_EUCTW[(int )(code >> 8) & 0xff] == 2) +      return 2; +  }    else {      if (EncLen_EUCTW[(int )(code & 0xff)] == 1)        return 1; - -    return ONIGERR_INVALID_CODE_POINT_VALUE;    } + +  return ONIGERR_INVALID_CODE_POINT_VALUE;  }  static int diff --git a/src/gb18030.c b/src/gb18030.c index 1385a7f..7409d3e 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -89,15 +89,25 @@ gb18030_mbc_enc_len(const UChar* p)  static int  gb18030_code_to_mbclen(OnigCodePoint code)  { -       if ((code & 0xff000000) != 0) return 4; -  else if ((code &   0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; -  else if ((code &     0xff00) != 0) return 2; +  if ((code & 0xff000000) != 0) { +    if (GB18030_MAP[(int )(code >> 24) & 0xff] == CM) +      if (GB18030_MAP[(int )(code >> 16) & 0xff] == C4) +        return 4; +  } +  else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; +  else if ((code & 0xff00) != 0) { +    if (GB18030_MAP[(int )(code >> 8) & 0xff] == CM) { +      char c = GB18030_MAP[(int )code & 0xff]; +      if (c == CM || c == C2) +        return 2; +    } +  }    else { -    if (GB18030_MAP[(int )(code & 0xff)] == CM) -      return ONIGERR_INVALID_CODE_POINT_VALUE; - -    return 1; +    if (GB18030_MAP[(int )(code & 0xff)] != CM) +      return 1;    } + +  return ONIGERR_INVALID_CODE_POINT_VALUE;  }  static int diff --git a/src/make_property.sh b/src/make_property.sh index e5f1244..1c5e0f5 100755 --- a/src/make_property.sh +++ b/src/make_property.sh @@ -1,7 +1,7 @@  #!/bin/sh  GPERF=gperf -SED=gsed +SED=sed  TMP1=gperf1.tmp  TMP2=gperf2.tmp diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh index 5129376..ff7dc62 100755 --- a/src/make_unicode_property.sh +++ b/src/make_unicode_property.sh @@ -1,7 +1,7 @@  #!/bin/sh  GPERF=gperf -SED=gsed +SED=sed  NAME=unicode_property_data  TMP1=gperf1.tmp diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 285c462..d1b3377 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -1,7 +1,7 @@  #!/usr/bin/python  # -*- coding: utf-8 -*-  # make_unicode_property_data.py -# Copyright (c) 2016-2019  K.Kosako +# Copyright (c) 2016-2020  K.Kosako  import sys  import re @@ -405,7 +405,7 @@ def set_max_prop_name(name):  def entry_prop_name(name, index):    set_max_prop_name(name)    if OUTPUT_LIST_MODE and index >= len(POSIX_LIST): -    print >> UPF, "%3d: %s" % (index, name) +    print >> UPF, "%s" % (name)  def entry_and_print_prop_and_index(name, index):    entry_prop_name(name, index) diff --git a/src/onigposix.h b/src/onigposix.h index 37e09ea..3514f80 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -74,19 +74,19 @@ extern "C" {  #define REG_POSIX_ENCODING_UTF16_LE  5 -typedef int regoff_t; +typedef int onig_posix_regoff_t;  typedef struct { -  regoff_t  rm_so; -  regoff_t  rm_eo; -} regmatch_t; +  onig_posix_regoff_t  rm_so; +  onig_posix_regoff_t  rm_eo; +} onig_posix_regmatch_t;  /* POSIX regex_t */  typedef struct {    void*   onig;          /* Oniguruma regex_t*  */    size_t  re_nsub;    int     comp_options; -} regex_t; +} onig_posix_regex_t;  #ifndef P_ @@ -160,16 +160,31 @@ ONIG_EXTERN int onig_end P_((void));  #endif /* ONIGURUMA_H */ -ONIG_EXTERN int    regcomp P_((regex_t* reg, const char* pat, int options)); -ONIG_EXTERN int    regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); -ONIG_EXTERN void   regfree P_((regex_t* reg)); -ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); +ONIG_EXTERN int    onig_posix_regcomp P_((onig_posix_regex_t* reg, const char* pat, int options)); +ONIG_EXTERN int    onig_posix_regexec P_((onig_posix_regex_t* reg, const char* str, size_t nmatch, onig_posix_regmatch_t* matches, int options)); +ONIG_EXTERN void   onig_posix_regfree P_((onig_posix_regex_t* reg)); +ONIG_EXTERN size_t onig_posix_regerror P_((int code, const onig_posix_regex_t* reg, char* buf, size_t size));  /* extended API */ -ONIG_EXTERN void reg_set_encoding P_((int enc)); -ONIG_EXTERN int  reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); -ONIG_EXTERN int  reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); -ONIG_EXTERN int  reg_number_of_names P_((regex_t* reg)); +ONIG_EXTERN void onig_posix_reg_set_encoding P_((int enc)); +ONIG_EXTERN int  onig_posix_reg_name_to_group_numbers P_((onig_posix_regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); +ONIG_EXTERN int  onig_posix_reg_foreach_name P_((onig_posix_regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), void* arg)); +ONIG_EXTERN int  onig_posix_reg_number_of_names P_((onig_posix_regex_t* reg)); + + +/* aliases */ +#define regex_t    onig_posix_regex_t +#define regmatch_t onig_posix_regmatch_t +#define regoff_t   onig_posix_regoff_t + +#define regcomp  onig_posix_regcomp +#define regexec  onig_posix_regexec +#define regfree  onig_posix_regfree +#define regerror onig_posix_regerror +#define reg_set_encoding          onig_posix_reg_set_encoding +#define reg_name_to_group_numbers onig_posix_reg_name_to_group_numbers +#define reg_foreach_name          onig_posix_reg_foreach_name +#define reg_number_of_names       onig_posix_reg_number_of_names  #ifdef __cplusplus  } diff --git a/src/oniguruma.h b/src/oniguruma.h index 15f6ef0..d983fc9 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,9 +36,9 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6  #define ONIGURUMA_VERSION_MINOR   9 -#define ONIGURUMA_VERSION_TEENY   5 +#define ONIGURUMA_VERSION_TEENY   6 -#define ONIGURUMA_VERSION_INT     60905 +#define ONIGURUMA_VERSION_INT     60906  #ifndef P_  #if defined(__STDC__) || defined(_WIN32) @@ -395,8 +395,12 @@ typedef unsigned int        OnigOptionType;  #define ONIG_OPTION_POSIX_IS_ASCII       (ONIG_OPTION_SPACE_IS_ASCII << 1)  #define ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER  (ONIG_OPTION_POSIX_IS_ASCII << 1)  #define ONIG_OPTION_TEXT_SEGMENT_WORD    (ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER << 1) +/* options (search time) */ +#define ONIG_OPTION_NOT_BEGIN_STRING     (ONIG_OPTION_TEXT_SEGMENT_WORD << 1) +#define ONIG_OPTION_NOT_END_STRING       (ONIG_OPTION_NOT_BEGIN_STRING << 1) +#define ONIG_OPTION_NOT_BEGIN_POSITION   (ONIG_OPTION_NOT_END_STRING << 1) -#define ONIG_OPTION_MAXBIT               ONIG_OPTION_TEXT_SEGMENT_WORD  /* limit */ +#define ONIG_OPTION_MAXBIT               ONIG_OPTION_NOT_BEGIN_POSITION  #define ONIG_OPTION_ON(options,regopt)      ((options) |= (regopt))  #define ONIG_OPTION_OFF(options,regopt)     ((options) &= ~(regopt)) @@ -561,6 +565,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIGERR_PARSE_DEPTH_LIMIT_OVER                        -16  #define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER                     -17  #define ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER                    -18 +#define ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER              -19  #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED                -21  #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR  -22  #define ONIGERR_FAIL_TO_INITIALIZE                            -23 @@ -919,6 +924,10 @@ int onig_set_capture_num_limit P_((int num));  ONIG_EXTERN  int onig_set_parse_depth_limit P_((unsigned int depth));  ONIG_EXTERN +unsigned long onig_get_subexp_call_limit_in_search P_((void)); +ONIG_EXTERN +int onig_set_subexp_call_limit_in_search P_((unsigned long n)); +ONIG_EXTERN  int onig_get_subexp_call_max_nest_level P_((void));  ONIG_EXTERN  int onig_set_subexp_call_max_nest_level P_((int level)); diff --git a/src/regcomp.c b/src/regcomp.c index 4d5b78f..dd2b328 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -133,6 +133,7 @@ ops_init(regex_t* reg, int init_alloc_size)      size = sizeof(Operation) * init_alloc_size;      p = (Operation* )xrealloc(reg->ops, size);      CHECK_NULL_RETURN_MEMERR(p); +    reg->ops = p;  #ifdef USE_DIRECT_THREADED_CODE      {        enum OpCode* cp; @@ -144,13 +145,12 @@ ops_init(regex_t* reg, int init_alloc_size)  #endif    }    else { -    p  = (Operation* )0; +    reg->ops = (Operation* )0;  #ifdef USE_DIRECT_THREADED_CODE      reg->ocs = (enum OpCode* )0;  #endif    } -  reg->ops = p;    reg->ops_curr  = 0; /* !!! not yet done ops_new() */    reg->ops_alloc = init_alloc_size;    reg->ops_used  = 0; @@ -176,6 +176,7 @@ ops_expand(regex_t* reg, int n)    size = sizeof(Operation) * n;    p = (Operation* )xrealloc(reg->ops, size);    CHECK_NULL_RETURN_MEMERR(p); +  reg->ops = p;  #ifdef USE_DIRECT_THREADED_CODE    size = sizeof(enum OpCode) * n; @@ -184,7 +185,6 @@ ops_expand(regex_t* reg, int n)    reg->ocs = cp;  #endif -  reg->ops = p;    reg->ops_alloc = n;    if (reg->ops_used == 0)      reg->ops_curr = 0; @@ -265,10 +265,12 @@ ops_free(regex_t* reg)      case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:        break;      case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC: +    case OP_BACKREF_CHECK: +#ifdef USE_BACKREF_WITH_LEVEL      case OP_BACKREF_WITH_LEVEL:      case OP_BACKREF_WITH_LEVEL_IC: -    case OP_BACKREF_CHECK:      case OP_BACKREF_CHECK_WITH_LEVEL: +#endif        if (op->backref_general.num != 1)          xfree(op->backref_general.ns);        break; @@ -631,7 +633,7 @@ mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)    to->min = distance_add(to->min, add->min);    to->max = distance_add(to->max, add->max); -  to->min_is_sure = add->min_is_sure != 0 && to->min_is_sure != 0; +  to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;  }  static void @@ -656,8 +658,11 @@ static void  mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)  {    if (to->min > alt->min) { -    to->min = alt->min; -    if (alt->min_is_sure != 0) +    to->min         = alt->min; +    to->min_is_sure = alt->min_is_sure; +  } +  else if (to->min == alt->min) { +    if (alt->min_is_sure != FALSE)        to->min_is_sure = TRUE;    } @@ -840,7 +845,7 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env,              en->min_char_len = ci->min;              en->max_char_len = ci->max;              NODE_STATUS_ADD(node, FIXED_CLEN); -            if (ci->min_is_sure != 0) +            if (ci->min_is_sure != FALSE)                NODE_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);            }          } @@ -882,15 +887,15 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env,      }      break; -  case NODE_ANCHOR: +  case NODE_GIMMICK:      mmcl_set(ci, 0); -    /* can't optimize look-behind if anchor exists. */ -    ci->min_is_sure = FALSE;      break; -  case NODE_GIMMICK: +  case NODE_ANCHOR:    zero:      mmcl_set(ci, 0); +    /* can't optimize look-behind if anchor exists. */ +    ci->min_is_sure = FALSE;      break;    case NODE_BACKREF: @@ -1082,6 +1087,9 @@ compile_call(CallNode* node, regex_t* reg, ScanEnv* env)    if (r != 0) return r;    COP(reg)->call.addr = 0; /* dummy addr. */ +#ifdef ONIG_DEBUG_MATCH_COUNTER +  COP(reg)->call.called_mem = node->called_gnum; +#endif    offset = COP_CURR_OFFSET_BYTES(reg, call.addr);    r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node)); @@ -1822,7 +1830,6 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)    COP(reg)->memory_end.num = node->m.regnum;    if (NODE_IS_CALLED(node)) { -    if (r != 0) return r;      r = add_op(reg, OP_RETURN);    }  #else @@ -2764,7 +2771,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)  static int  make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)  { -  int r = 0; +  int r;    Node* node = *plink;    switch (NODE_TYPE(node)) { @@ -2772,17 +2779,17 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)    case NODE_ALT:      do {        r = make_named_capture_number_map(&(NODE_CAR(node)), map, counter); -    } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); +    } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); +    if (r < 0) return r;      break;    case NODE_QUANT:      {        Node** ptarget = &(NODE_BODY(node)); -      Node*  old = *ptarget;        r = make_named_capture_number_map(ptarget, map, counter); -      if (r != 0) return r; -      if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) { -        r = onig_reduce_nested_quantifier(node); +      if (r < 0) return r; +      if (r == 1 && NODE_TYPE(*ptarget) == NODE_QUANT) { +        return onig_reduce_nested_quantifier(node);        }      }      break; @@ -2796,41 +2803,48 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)            map[en->m.regnum].new_val = *counter;            en->m.regnum = *counter;            r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); +          if (r < 0) return r;          }          else {            *plink = NODE_BODY(node);            NODE_BODY(node) = NULL_NODE;            onig_node_free(node);            r = make_named_capture_number_map(plink, map, counter); +          if (r < 0) return r; +          return 1;          }        }        else if (en->type == BAG_IF_ELSE) {          r = make_named_capture_number_map(&(NODE_BAG_BODY(en)), map, counter); -        if (r != 0) return r; +        if (r < 0) return r;          if (IS_NOT_NULL(en->te.Then)) {            r = make_named_capture_number_map(&(en->te.Then), map, counter); -          if (r != 0) return r; +          if (r < 0) return r;          }          if (IS_NOT_NULL(en->te.Else)) {            r = make_named_capture_number_map(&(en->te.Else), map, counter); -          if (r != 0) return r; +          if (r < 0) return r;          }        } -      else +      else {          r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); +        if (r < 0) return r; +      }      }      break;    case NODE_ANCHOR: -    if (IS_NOT_NULL(NODE_BODY(node))) +    if (IS_NOT_NULL(NODE_BODY(node))) {        r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); +      if (r < 0) return r; +    }      break;    default:      break;    } -  return r; +  return 0;  }  static int @@ -2982,7 +2996,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)    }    counter = 0;    r = make_named_capture_number_map(root, map, &counter); -  if (r != 0) return r; +  if (r < 0) return r;    r = renumber_backref_traverse(*root, map);    if (r != 0) return r; @@ -3546,7 +3560,9 @@ check_node_in_look_behind(Node* node, int not, int* used)        if (r != 0) break;        if (en->type == BAG_MEMORY) { -        if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)) *used = TRUE; +        if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node) +         || NODE_IS_REFERENCED(node)) +          *used = TRUE;        }        else if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) { @@ -3978,6 +3994,7 @@ set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env)      {        BagNode* en = BAG_(node); +      r = 0;        if (en->type == BAG_MEMORY) {          if (NODE_IS_BACKREF(node)) {            if (IS_NOT_NULL(empty)) @@ -4484,7 +4501,7 @@ remove_from_list(Node* prev, Node* a)  }  static int -reduce_string_list(Node* node) +reduce_string_list(Node* node, OnigEncoding enc)  {    int r = 0; @@ -4515,43 +4532,70 @@ reduce_string_list(Node* node)            }          }          else { -          prev = NULL_NODE; +          if (IS_NOT_NULL(prev)) { +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE +            StrNode* sn = STR_(prev); +            if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) +              return ONIGERR_INVALID_WIDE_CHAR_VALUE; +#endif +            prev = NULL_NODE; +          } +          r = reduce_string_list(curr, enc); +          if (r != 0) return r;            prev_node = node;          }          node = next_node;        } while (r == 0 && IS_NOT_NULL(node)); + +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE +      if (IS_NOT_NULL(prev)) { +        StrNode* sn = STR_(prev); +        if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) +          return ONIGERR_INVALID_WIDE_CHAR_VALUE; +      } +#endif      }      break;    case NODE_ALT:      do { -      r = reduce_string_list(NODE_CAR(node)); +      r = reduce_string_list(NODE_CAR(node), enc);      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break; +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE +  case NODE_STRING: +    { +      StrNode* sn = STR_(node); +      if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) +        return ONIGERR_INVALID_WIDE_CHAR_VALUE; +    } +    break; +#endif +    case NODE_ANCHOR:      if (IS_NULL(NODE_BODY(node)))        break;      /* fall */    case NODE_QUANT: -    r = reduce_string_list(NODE_BODY(node)); +    r = reduce_string_list(NODE_BODY(node), enc);      break;    case NODE_BAG:      {        BagNode* en = BAG_(node); -      r = reduce_string_list(NODE_BODY(node)); +      r = reduce_string_list(NODE_BODY(node), enc);        if (r != 0) return r;        if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) { -          r = reduce_string_list(en->te.Then); +          r = reduce_string_list(en->te.Then, enc);            if (r != 0) return r;          }          if (IS_NOT_NULL(en->te.Else)) { -          r = reduce_string_list(en->te.Else); +          r = reduce_string_list(en->te.Else, enc);            if (r != 0) return r;          }        } @@ -4723,7 +4767,7 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env)        return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;      } -    if (ci.min == 0 && ci.min_is_sure != 0 && used == FALSE) { +    if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) {        if (an->type == ANCR_LOOK_BEHIND_NOT)          r = onig_node_reset_fail(node);        else @@ -4779,18 +4823,23 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env)  static int  tune_next(Node* node, Node* next_node, regex_t* reg)  { +  int called;    NodeType type; +  called = FALSE; +   retry:    type = NODE_TYPE(node);    if (type == NODE_QUANT) {      QuantNode* qn = QUANT_(node);      if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {  #ifdef USE_QUANT_PEEK_NEXT -      Node* n = get_tree_head_literal(next_node, 1, reg); -      /* '\0': for UTF-16BE etc... */ -      if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { -        qn->next_head_exact = n; +      if (called == FALSE) { +        Node* n = get_tree_head_literal(next_node, 1, reg); +        /* '\0': for UTF-16BE etc... */ +        if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { +          qn->next_head_exact = n; +        }        }  #endif        /* automatic posseivation a*b ==> (?>a*)b */ @@ -4815,6 +4864,8 @@ tune_next(Node* node, Node* next_node, regex_t* reg)    else if (type == NODE_BAG) {      BagNode* en = BAG_(node);      if (en->type == BAG_MEMORY) { +      if (NODE_IS_CALLED(node)) +        called = TRUE;        node = NODE_BODY(node);        goto retry;      } @@ -4999,17 +5050,18 @@ unravel_cf_look_behind_add(Node** rlist, Node** rsn,  {    int r, i, found; -  found = 0; +  found = FALSE;    for (i = 0; i < n; i++) {      OnigCaseFoldCodeItem* item = items + i;      if (item->byte_len == one_len) {        if (item->code_len == 1) { -        found = 1; +        found = TRUE; +        break;        }      }    } -  if (found == 0) { +  if (found == FALSE) {      r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */);    }    else { @@ -5073,6 +5125,7 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state)      one_len = (OnigLen )enclen(enc, p);      if (n == 0) {        q = p + one_len; +      if (q > end) q = end;        r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */);        if (r != 0) goto err;      } @@ -5221,12 +5274,12 @@ quantifiers_memory_node_info(Node* node)  __inline  #endif  static int -tune_call_node_call(CallNode* cn, ScanEnv* env, int state) +check_call_reference(CallNode* cn, ScanEnv* env, int state)  {    MemEnv* mem_env = SCANENV_MEMENV(env);    if (cn->by_number != 0) { -    int gnum = cn->group_num; +    int gnum = cn->called_gnum;      if (env->num_named > 0 &&          IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -5241,12 +5294,14 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state)      }    set_call_attr: -    NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node; +    NODE_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node;      if (IS_NULL(NODE_CALL_BODY(cn))) {        onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,                                       cn->name, cn->name_end);        return ONIGERR_UNDEFINED_NAME_REFERENCE;      } + +    NODE_STATUS_ADD(NODE_CALL_BODY(cn), REFERENCED);    }    else {      int *refs; @@ -5263,7 +5318,7 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state)        return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;      }      else { -      cn->group_num = refs[0]; +      cn->called_gnum = refs[0];        goto set_call_attr;      }    } @@ -5396,7 +5451,7 @@ tune_call(Node* node, ScanEnv* env, int state)        CALL_(node)->entry_count--;      } -    r = tune_call_node_call(CALL_(node), env, state); +    r = check_call_reference(CALL_(node), env, state);      break;    default: @@ -6187,8 +6242,10 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)        r = 1; /* 1:full */        break;      } -    for (j = 0; j < len && p < end; j++) +    for (j = 0; j < len && p < end; j++) { +      /* coverity[overrun-local] */        to->s[i++] = *p++; +    }    }    to->len = i; @@ -6210,8 +6267,10 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)    for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {      len = enclen(enc, p);      if (i + len > OPT_EXACT_MAXLEN) break; -    for (j = 0; j < len && p < end; j++) +    for (j = 0; j < len && p < end; j++) { +      /* coverity[overrun-local] */        to->s[i++] = *p++; +    }    }    to->len = i; @@ -7229,19 +7288,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    else      reg->ops_used = 0; -  reg->string_pool        = 0; -  reg->string_pool_end    = 0; -  reg->num_mem            = 0; -  reg->num_repeat         = 0; -  reg->num_empty_check    = 0; -  reg->repeat_range_alloc = 0; -  reg->repeat_range       = (RepeatRange* )NULL; -  reg->empty_status_mem   = 0; -    r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);    if (r != 0) goto err; -  r = reduce_string_list(root); +  r = reduce_string_list(root, reg->enc);    if (r != 0) goto err;    /* mixed use named group and no-named group */ @@ -7653,6 +7703,134 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)    return onig_is_code_in_cc_len(len, code, cc);  } +typedef struct { +  int prec_read; +  int look_behind; +  int backref_with_level; +  int call; +} SlowElementCount; + +static int +node_detect_can_be_slow(Node* node, SlowElementCount* ct) +{ +  int r; + +  r = 0; +  switch (NODE_TYPE(node)) { +  case NODE_LIST: +  case NODE_ALT: +    do { +      r = node_detect_can_be_slow(NODE_CAR(node), ct); +      if (r != 0) return r; +    } while (IS_NOT_NULL(node = NODE_CDR(node))); +    break; + +  case NODE_QUANT: +    r = node_detect_can_be_slow(NODE_BODY(node), ct); +    break; + +  case NODE_ANCHOR: +    switch (ANCHOR_(node)->type) { +    case ANCR_PREC_READ: +    case ANCR_PREC_READ_NOT: +      ct->prec_read++; +      break; +    case ANCR_LOOK_BEHIND: +    case ANCR_LOOK_BEHIND_NOT: +      ct->look_behind++; +      break; +    default: +      break; +    } + +    if (ANCHOR_HAS_BODY(ANCHOR_(node))) +      r = node_detect_can_be_slow(NODE_BODY(node), ct); +    break; + +  case NODE_BAG: +    { +      BagNode* en = BAG_(node); + +      r = node_detect_can_be_slow(NODE_BODY(node), ct); +      if (r != 0) return r; + +      if (en->type == BAG_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = node_detect_can_be_slow(en->te.Then, ct); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = node_detect_can_be_slow(en->te.Else, ct); +          if (r != 0) return r; +        } +      } +    } +    break; + +#ifdef USE_BACKREF_WITH_LEVEL +  case NODE_BACKREF: +    if (NODE_IS_NEST_LEVEL(node)) +      ct->backref_with_level++; +    break; +#endif + +#ifdef USE_CALL +  case NODE_CALL: +    ct->call++; +    break; +#endif + +  default: +    break; +  } + +  return r; +} + +extern int +onig_detect_can_be_slow_pattern(const UChar* pattern, +  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, +  OnigSyntaxType* syntax) +{ +  int r; +  regex_t* reg; +  Node* root; +  ScanEnv scan_env; +  SlowElementCount count; + +  reg = (regex_t* )xmalloc(sizeof(regex_t)); +  if (IS_NULL(reg)) return ONIGERR_MEMORY; + +  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); +  if (r != 0) { +    xfree(reg); +    return r; +  } + +  root = 0; +  r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); +  if (r == 0) { +    count.prec_read          = 0; +    count.look_behind        = 0; +    count.backref_with_level = 0; +    count.call               = 0; + +    r = node_detect_can_be_slow(root, &count); +    if (r == 0) { +      int n = count.prec_read + count.look_behind +            + count.backref_with_level + count.call; +      r = n; +    } +  } + +  if (IS_NOT_NULL(scan_env.mem_env_dynamic)) +    xfree(scan_env.mem_env_dynamic); + +  onig_node_free(root); +  onig_free(reg); +  return r; +} +  #ifdef ONIG_DEBUG_PARSE @@ -7734,14 +7912,18 @@ print_indent_tree(FILE* f, Node* node, int indent)      break;    case NODE_CCLASS: +#define CCLASS_MBUF_MAX_OUTPUT_NUM   10 +      fprintf(f, "<cclass:%p>", node);      if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);      if (CCLASS_(node)->mbuf) {        BBuf* bbuf = CCLASS_(node)->mbuf; -      for (i = 0; i < bbuf->used; i++) { +      fprintf(f, " mbuf(%u) ", bbuf->used); +      for (i = 0; i < bbuf->used && i < CCLASS_MBUF_MAX_OUTPUT_NUM; i++) {          if (i > 0) fprintf(f, ",");          fprintf(f, "%0x", bbuf->p[i]);        } +      if (i < bbuf->used) fprintf(f, "...");      }      break; @@ -7822,6 +8004,11 @@ print_indent_tree(FILE* f, Node* node, int indent)          if (i > 0) fputs(", ", f);          fprintf(f, "%d", p[i]);        } +#ifdef USE_BACKREF_WITH_LEVEL +      if (NODE_IS_NEST_LEVEL(node)) { +        fprintf(f, ", level: %d", br->nest_level); +      } +#endif      }      break; @@ -7830,6 +8017,7 @@ print_indent_tree(FILE* f, Node* node, int indent)      {        CallNode* cn = CALL_(node);        fprintf(f, "<call:%p>", node); +      fprintf(f, " num: %d, name", cn->called_gnum);        p_string(f, cn->name_end - cn->name, cn->name);      }      break; @@ -7881,6 +8069,8 @@ print_indent_tree(FILE* f, Node* node, int indent)        fprintf(f, "memory:%d", BAG_(node)->m.regnum);        if (NODE_IS_CALLED(node))          fprintf(f, ", called"); +      else if (NODE_IS_REFERENCED(node)) +        fprintf(f, ", referenced");        if (NODE_IS_FIXED_ADDR(node))          fprintf(f, ", fixed-addr");        break; diff --git a/src/regenc.c b/src/regenc.c index dbfbc89..27e4549 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -263,12 +263,12 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)  extern int  onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)  { -  UChar* start = (UChar* )s; -  UChar* p = (UChar* )s; +  const UChar* start = s; +  const UChar* p = s;    while (1) {      if (*p == '\0') { -      UChar* q; +      const UChar* q;        int len = ONIGENC_MBC_MINLEN(enc);        if (len == 1) return (int )(p - start); diff --git a/src/regerror.c b/src/regerror.c index 58bc7fd..dc1c8b6 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -56,6 +56,8 @@ onig_error_code_to_format(int code)      p = "retry-limit-in-match over"; break;    case ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER:      p = "retry-limit-in-search over"; break; +  case ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER: +    p = "subexp-call-limit-in-search over"; break;    case ONIGERR_TYPE_BUG:      p = "undefined type (bug)"; break;    case ONIGERR_PARSER_BUG: diff --git a/src/regexec.c b/src/regexec.c index 1b6895d..bb6b474 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -46,15 +46,15 @@  #define CHECK_INTERRUPT_IN_MATCH -#define STACK_MEM_START(reg, i) \ -  (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \ -   STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i]))) +#define STACK_MEM_START(reg, idx) \ +  (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \ +   STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s) -#define STACK_MEM_END(reg, i) \ -  (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \ -   STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i]))) +#define STACK_MEM_END(reg, idx) \ +  (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \ +   STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s) -static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev); +static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);  static int  search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp); @@ -170,6 +170,9 @@ typedef struct {    int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */    UChar* best_s;  #endif +#ifdef USE_CALL +  unsigned long  subexp_call_in_search_counter; +#endif  } MatchArg; @@ -1057,8 +1060,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  /** stack **/ -#define INVALID_STACK_INDEX   -1 -  #define STK_ALT_FLAG               0x0001  /* stack type */ @@ -1099,7 +1100,15 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  #define STK_MASK_TO_VOID_TARGET    0x100e  #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */ -typedef intptr_t StackIndex; +typedef ptrdiff_t StackIndex; + +#define INVALID_STACK_INDEX   ((StackIndex )-1) + +typedef union { +  StackIndex i; +  UChar*     s; +} StkPtrType; +  typedef struct _StackType {    unsigned int type; @@ -1108,7 +1117,6 @@ typedef struct _StackType {      struct {        Operation* pcode;     /* byte code position */        UChar*     pstr;      /* string position */ -      UChar*     pstr_prev; /* previous char position of pstr */      } state;      struct {        int        count; @@ -1119,8 +1127,8 @@ typedef struct _StackType {      struct {        UChar *pstr;       /* start/end position */        /* Following information is set, if this stack type is MEM-START */ -      StackIndex prev_start;  /* prev. info (for backtrack  "(...)*" ) */ -      StackIndex prev_end;    /* prev. info (for backtrack  "(...)*" ) */ +      StkPtrType prev_start;  /* prev. info (for backtrack  "(...)*" ) */ +      StkPtrType prev_end;    /* prev. info (for backtrack  "(...)*" ) */      } mem;      struct {        UChar *pstr;            /* start position */ @@ -1166,8 +1174,8 @@ struct OnigCalloutArgsStruct {    MatchArg*   msa;    StackType*  stk_base;    StackType*  stk; -  StackIndex* mem_start_stk; -  StackIndex* mem_end_stk; +  StkPtrType* mem_start_stk; +  StkPtrType* mem_end_stk;  };  #endif @@ -1178,7 +1186,7 @@ struct OnigCalloutArgsStruct {  #define UPDATE_FOR_STACK_REALLOC do{\    repeat_stk      = (StackIndex* )alloc_base;\    empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ -  mem_start_stk   = (StackIndex* )(empty_check_stk + reg->num_empty_check);\ +  mem_start_stk   = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\    mem_end_stk     = mem_start_stk + num_mem + 1;\  } while(0) @@ -1194,7 +1202,7 @@ struct OnigCalloutArgsStruct {  #define PTR_NUM_SIZE(reg)  (((reg)->num_mem + 1) * 2)  #define UPDATE_FOR_STACK_REALLOC do{\ -  mem_start_stk = (StackIndex* )alloc_base;\ +  mem_start_stk = (StkPtrType* )alloc_base;\    mem_end_stk   = mem_start_stk + num_mem + 1;\  } while(0) @@ -1218,8 +1226,12 @@ struct OnigCalloutArgsStruct {  #endif  #if defined(USE_CALL) +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \ +  (msa).subexp_call_in_search_counter = 0; +  #define POP_CALL  else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}  #else +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)  #define POP_CALL  #endif @@ -1231,6 +1243,7 @@ struct OnigCalloutArgsStruct {    (msa).start    = (arg_start);\    (msa).match_stack_limit  = (mpv)->match_stack_limit;\    RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ +  SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\    (msa).mp = mpv;\    (msa).best_len = ONIG_MISMATCH;\    (msa).ptr_num  = PTR_NUM_SIZE(reg);\ @@ -1243,6 +1256,7 @@ struct OnigCalloutArgsStruct {    (msa).start    = (arg_start);\    (msa).match_stack_limit  = (mpv)->match_stack_limit;\    RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ +  SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\    (msa).mp = mpv;\    (msa).ptr_num  = PTR_NUM_SIZE(reg);\  } while(0) @@ -1258,27 +1272,27 @@ struct OnigCalloutArgsStruct {      is_alloca  = 0;\      alloc_base = msa->stack_p;\      stk_base   = (StackType* )(alloc_base\ -                 + (sizeof(StackIndex) * msa->ptr_num));\ +                 + (sizeof(StkPtrType) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + msa->stack_n;\    }\    else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\      is_alloca  = 0;\ -    alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ +    alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\                    + sizeof(StackType) * (stack_num));\      CHECK_NULL_RETURN_MEMERR(alloc_base);\      stk_base   = (StackType* )(alloc_base\ -                 + (sizeof(StackIndex) * msa->ptr_num));\ +                 + (sizeof(StkPtrType) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + (stack_num);\    }\    else {\      is_alloca  = 1;\ -    alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ +    alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\                   + sizeof(StackType) * (stack_num));\      CHECK_NULL_RETURN_MEMERR(alloc_base);\      stk_base   = (StackType* )(alloc_base\ -                 + (sizeof(StackIndex) * msa->ptr_num));\ +                 + (sizeof(StkPtrType) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + (stack_num);\    }\ @@ -1288,7 +1302,7 @@ struct OnigCalloutArgsStruct {  #define STACK_SAVE(msa,is_alloca,alloc_base) do{\    (msa)->stack_n = (int )(stk_end - stk_base);\    if ((is_alloca) != 0) {\ -    size_t size = sizeof(StackIndex) * (msa)->ptr_num\ +    size_t size = sizeof(StkPtrType) * (msa)->ptr_num\                  + sizeof(StackType) * (msa)->stack_n;\      (msa)->stack_p = xmalloc(size);\      CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\ @@ -1373,6 +1387,24 @@ onig_set_retry_limit_in_search(unsigned long n)  #endif  } +#ifdef USE_CALL +static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH; + +extern unsigned long +onig_get_subexp_call_limit_in_search(void) +{ +  return SubexpCallLimitInSearch; +} + +extern int +onig_set_subexp_call_limit_in_search(unsigned long n) +{ +  SubexpCallLimitInSearch = n; +  return 0; +} + +#endif +  #ifdef USE_CALLOUT  static OnigCalloutFunc DefaultProgressCallout;  static OnigCalloutFunc DefaultRetractionCallout; @@ -1637,9 +1669,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,    stk      = *arg_stk;    n = (unsigned int )(stk_end - stk_base); -  size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; +  size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;    n *= 2; -  new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; +  new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;    if (*is_alloca != 0) {      new_alloc_base = (char* )xmalloc(new_size);      if (IS_NULL(new_alloc_base)) { @@ -1669,7 +1701,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,    used = (int )(stk - stk_base);    *arg_alloc_base = alloc_base;    *arg_stk_base   = (StackType* )(alloc_base -                                  + (sizeof(StackIndex) * msa->ptr_num)); +                                  + (sizeof(StkPtrType) * msa->ptr_num));    *arg_stk      = *arg_stk_base + used;    *arg_stk_end  = *arg_stk_base + n;    return 0; @@ -1694,22 +1726,20 @@ stack_double(int* is_alloca, char** arg_alloc_base,  #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ +#define STACK_PUSH(stack_type,pat,s) do {\    STACK_ENSURE(1);\    stk->type = (stack_type);\    stk->u.state.pcode     = (pat);\    stk->u.state.pstr      = (s);\ -  stk->u.state.pstr_prev = (sprev);\    STACK_INC;\  } while(0) -#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\ +#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\    STACK_ENSURE(1);\    stk->type = (stack_type);\    stk->zid  = (int )(id);\    stk->u.state.pcode     = (pat);\    stk->u.state.pstr      = (s);\ -  stk->u.state.pstr_prev = (sprev);\    STACK_INC;\  } while(0) @@ -1724,7 +1754,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,    stk->type = (stack_type);\    stk->u.state.pcode = (pat);\    stk->u.state.pstr      = s;\ -  stk->u.state.pstr_prev = sprev;\    STACK_INC;\  } while (0)  #else @@ -1735,10 +1764,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,  } while (0)  #endif -#define STACK_PUSH_ALT(pat,s,sprev)       STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \ -  STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id) +#define STACK_PUSH_ALT(pat,s)       STACK_PUSH(STK_ALT,pat,s) +#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s) +#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)  #if 0  #define STACK_PUSH_REPEAT(sid, pat) do {\ @@ -1767,8 +1795,8 @@ stack_double(int* is_alloca, char** arg_alloc_base,    stk->u.mem.pstr       = (s);\    stk->u.mem.prev_start = mem_start_stk[mnum];\    stk->u.mem.prev_end   = mem_end_stk[mnum];\ -  mem_start_stk[mnum]   = GET_STACK_INDEX(stk);\ -  mem_end_stk[mnum]     = INVALID_STACK_INDEX;\ +  mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\ +  mem_end_stk[mnum].i   = INVALID_STACK_INDEX;\    STACK_INC;\  } while(0) @@ -1779,7 +1807,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,    stk->u.mem.pstr       = (s);\    stk->u.mem.prev_start = mem_start_stk[mnum];\    stk->u.mem.prev_end   = mem_end_stk[mnum];\ -  mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ +  mem_end_stk[mnum].i   = GET_STACK_INDEX(stk);\    STACK_INC;\  } while(0) @@ -1861,12 +1889,11 @@ stack_double(int* is_alloca, char** arg_alloc_base,    STACK_INC;\  } while(0) -#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\ +#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\    STACK_ENSURE(1);\    stk->type = STK_MARK;\    stk->zid  = (sid);\    stk->u.val.v  = (UChar* )(s);\ -  stk->u.val.v2 = (sprev);\    STACK_INC;\  } while(0) @@ -1885,7 +1912,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,    stk->zid  = (sid);\    stk->u.val.type = (stype);\    stk->u.val.v    = (UChar* )(sval);\ -  stk->u.val.v2   = sprev;\    STACK_INC;\  } while(0) @@ -1932,7 +1958,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,          && k->zid == (sid)) {\        if (level == 0) {\          (sval) = k->u.val.v;\ -        sprev  = k->u.val.v2;\          break;\        }\      }\ @@ -2135,14 +2160,14 @@ stack_double(int* is_alloca, char** arg_alloc_base,  } while(0)  #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\ -  if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\ +  if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\      (addr) = 0;\    }\    else {\      if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\ -      (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ +      (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\      else\ -      (addr) = (UChar* )k->u.mem.prev_end;\ +      (addr) = k->u.mem.prev_end.s;\    }\  } while (0) @@ -2163,7 +2188,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,          if (endp == 0) {\            (isnull) = 0; break;\          }\ -        else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ +        else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\            (isnull) = 0; break;\          }\          else if (endp != s) {\ @@ -2199,7 +2224,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,                    if (endp == 0) {\                      (isnull) = 0; break;\                    }\ -                  else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \ +                  else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \                      (isnull) = 0; break;\                    }\                    else if (endp != s) {\ @@ -2362,6 +2387,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,        p1++;        p2++;      } +    if (s2 >= end2) { +      if (s1 < end1) return 0; +      else           break; +    }    }    *ps2 = s2; @@ -2390,7 +2419,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,  #define ON_STR_END(s)          ((s) == end)  #define DATA_ENSURE_CHECK1     (s < right_range)  #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range) -#define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail +#define DATA_ENSURE(n)         if (right_range - s < (n)) goto fail  #define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range @@ -2632,9 +2661,9 @@ typedef struct {  #define BYTECODE_INTERPRETER_START      GOTO_OP;  #define BYTECODE_INTERPRETER_END -#define CASE_OP(x)   L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0) +#define CASE_OP(x)   L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)  #define DEFAULT_OP   /* L_DEFAULT: */ -#define NEXT_OP      sprev = sbegin; JUMP_OP +#define NEXT_OP      JUMP_OP  #define JUMP_OP      GOTO_OP  #ifdef USE_DIRECT_THREADED_CODE  #define GOTO_OP      goto *(p->opaddr) @@ -2648,9 +2677,8 @@ typedef struct {  #define BYTECODE_INTERPRETER_START \    while (1) {\    MATCH_DEBUG_OUT(0)\ -  sbegin = s;\    switch (p->opcode) { -#define BYTECODE_INTERPRETER_END  } sprev = sbegin; } +#define BYTECODE_INTERPRETER_END  } }  #define CASE_OP(x)   case OP_##x: SOP_IN(OP_##x);  #define DEFAULT_OP   default:  #define NEXT_OP      break @@ -2718,12 +2746,22 @@ typedef struct {    best_len = err_code; goto match_at_end;\  } while(0) +#define MATCH_COUNTER_OUT(title) do {\ +  int i;\ +  fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \ +  fprintf(DBGFP, "      ");\ +  for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\ +    fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\ +  }\ +  fprintf(DBGFP, "\n");\ +  fflush(DBGFP);\ +} while (0) +  /* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */  static int  match_at(regex_t* reg, const UChar* str, const UChar* end, -         const UChar* in_right_range, const UChar* sstart, UChar* sprev, +         const UChar* in_right_range, const UChar* sstart,           MatchArg* msa)  { @@ -2782,10 +2820,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    &&L_BACKREF_N_IC,    &&L_BACKREF_MULTI,    &&L_BACKREF_MULTI_IC, +#ifdef USE_BACKREF_WITH_LEVEL    &&L_BACKREF_WITH_LEVEL,    &&L_BACKREF_WITH_LEVEL_IC, +#endif    &&L_BACKREF_CHECK, +#ifdef USE_BACKREF_WITH_LEVEL    &&L_BACKREF_CHECK_WITH_LEVEL, +#endif    &&L_MEM_START,    &&L_MEM_START_PUSH,    &&L_MEM_END_PUSH, @@ -2838,13 +2880,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    LengthType tlen, tlen2;    MemNumType mem;    RelAddrType addr; -  UChar *s, *ps, *sbegin; +  UChar *s, *ps;    UChar *right_range;    int is_alloca;    char *alloc_base;    StackType *stk_base, *stk, *stk_end;    StackType *stkp; /* used as any purpose. */ -  StackIndex *mem_start_stk, *mem_end_stk; +  StkPtrType *mem_start_stk, *mem_end_stk;    UChar* keep;  #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR @@ -2858,6 +2900,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #ifdef USE_CALLOUT    int of;  #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER +#define MAX_SUBEXP_CALL_COUNTERS  9 +  unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS]; +#endif    Operation* p = reg->ops;    OnigOptionType option = reg->options; @@ -2872,6 +2918,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    static unsigned int counter = 1;  #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER +  for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) { +    subexp_call_counters[i] = 0; +  } +#endif +  #ifdef USE_DIRECT_THREADED_CODE    if (IS_NULL(msa)) {      for (i = 0; i < reg->ops_used; i++) { @@ -2903,12 +2955,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    STACK_INIT(INIT_MATCH_STACK_SIZE);    UPDATE_FOR_STACK_REALLOC;    for (i = 1; i <= num_mem; i++) { -    mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; +    mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;    }  #ifdef ONIG_DEBUG_MATCH -  fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", -          str, end, sstart, sprev); +  fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);    fprintf(DBGFP, "size: %d, start offset: %d\n",            (int )(end - str), (int )(sstart - str));  #endif @@ -2932,24 +2983,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            if (n > msa->best_len) {              msa->best_len = n;              msa->best_s   = (UChar* )sstart; +            goto set_region;            }            else              goto end_best_len;          }  #endif          best_len = n; + +      set_region:          region = msa->region;          if (region) {            if (keep > s) keep = s; -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API            if (OPTON_POSIX_REGION(msa->options)) {              posix_regmatch_t* rmt = (posix_regmatch_t* )region;              rmt[0].rm_so = (regoff_t )(keep - str);              rmt[0].rm_eo = (regoff_t )(s    - str);              for (i = 1; i <= num_mem; i++) { -              if (mem_end_stk[i] != INVALID_STACK_INDEX) { +              if (mem_end_stk[i].i != INVALID_STACK_INDEX) {                  rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);                  rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i)   - str);                } @@ -2959,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              }            }            else { -#endif /* USE_POSIX_API_REGION_OPTION */ +#endif /* USE_POSIX_API */              region->beg[0] = (int )(keep - str);              region->end[0] = (int )(s    - str);              for (i = 1; i <= num_mem; i++) { -              if (mem_end_stk[i] != INVALID_STACK_INDEX) { +              if (mem_end_stk[i].i != INVALID_STACK_INDEX) {                  region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);                  region->end[i] = (int )(STACK_MEM_END(reg, i)   - str);                } @@ -2996,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,                if (r < 0) MATCH_AT_ERROR_RETURN(r);              }  #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API            } /* else OPTON_POSIX_REGION() */  #endif          } /* if (region) */ @@ -3012,8 +3066,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            best_len = ONIG_MISMATCH;            goto fail; /* for retry */          } -        if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { -          goto fail; /* for retry */ +        if (OPTON_FIND_LONGEST(option)) { +          if (s >= in_right_range && msa->best_s == sstart) +            best_len = msa->best_len; +          else +            goto fail; /* for retry */          }        } @@ -3034,7 +3091,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; -      sprev = s;        s++;        INC_OP;        JUMP_OUT; @@ -3047,7 +3103,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; -      sprev = s;        s++;        INC_OP;        JUMP_OUT; @@ -3062,7 +3117,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; -      sprev = s;        s++;        INC_OP;        JUMP_OUT; @@ -3079,7 +3133,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; -      sprev = s;        s++;        INC_OP;        JUMP_OUT; @@ -3091,7 +3144,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        while (tlen-- > 0) {          if (*ps++ != *s++) goto fail;        } -      sprev = s - 1;        INC_OP;        JUMP_OUT; @@ -3112,7 +3164,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        ps++; s++;        if (*ps != *s) goto fail;        ps++; s++; -      sprev = s;        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; @@ -3131,7 +3182,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        ps++; s++;        if (*ps != *s) goto fail;        ps++; s++; -      sprev = s;        if (*ps != *s) goto fail;        ps++; s++;        if (*ps != *s) goto fail; @@ -3149,7 +3199,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          if (*ps != *s) goto fail;          ps++; s++;        } -      sprev = s - 2;        INC_OP;        JUMP_OUT; @@ -3165,7 +3214,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          if (*ps != *s) goto fail;          ps++; s++;        } -      sprev = s - 3;        INC_OP;        JUMP_OUT; @@ -3179,7 +3227,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          if (*ps != *s) goto fail;          ps++; s++;        } -      sprev = s - tlen;        INC_OP;        JUMP_OUT; @@ -3295,11 +3342,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(ANYCHAR_STAR)        INC_OP;        while (DATA_ENSURE_CHECK1) { -        STACK_PUSH_ALT(p, s, sprev); +        STACK_PUSH_ALT(p, s);          n = enclen(encode, s);          DATA_ENSURE(n);          if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail; -        sprev = s;          s += n;        }        JUMP_OUT; @@ -3307,15 +3353,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(ANYCHAR_ML_STAR)        INC_OP;        while (DATA_ENSURE_CHECK1) { -        STACK_PUSH_ALT(p, s, sprev); +        STACK_PUSH_ALT(p, s);          n = enclen(encode, s);          if (n > 1) {            DATA_ENSURE(n); -          sprev = s;            s += n;          }          else { -          sprev = s;            s++;          }        } @@ -3329,12 +3373,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          INC_OP;          while (DATA_ENSURE_CHECK1) {            if (c == *s) { -            STACK_PUSH_ALT(p, s, sprev); +            STACK_PUSH_ALT(p, s);            }            n = enclen(encode, s);            DATA_ENSURE(n);            if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail; -          sprev = s;            s += n;          }        } @@ -3348,16 +3391,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          INC_OP;          while (DATA_ENSURE_CHECK1) {            if (c == *s) { -            STACK_PUSH_ALT(p, s, sprev); +            STACK_PUSH_ALT(p, s);            }            n = enclen(encode, s);            if (n > 1) {              DATA_ENSURE(n); -            sprev = s;              s += n;            }            else { -            sprev = s;              s++;            }          } @@ -3410,14 +3451,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))              goto fail;          } -        else if (ON_STR_END(s)) { -          if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) -            goto fail; -        }          else { -          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) -              == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) -            goto fail; +          UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); +          if (ON_STR_END(s)) { +            if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +              goto fail; +          } +          else { +            if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) +                == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +              goto fail; +          }          }        }        INC_OP; @@ -3432,14 +3476,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))              goto fail;          } -        else if (ON_STR_END(s)) { -          if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) -            goto fail; -        }          else { -          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) -              != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) -            goto fail; +          UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); +          if (ON_STR_END(s)) { +            if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +              goto fail; +          } +          else { +            if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) +                != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +              goto fail; +          }          }        }        INC_OP; @@ -3452,7 +3499,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          mode = p->word_boundary.mode;          if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { -          if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { +          UChar* sprev; +          if (ON_STR_BEGIN(s)) { +            INC_OP; +            JUMP_OUT; +          } +          sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); +          if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {              INC_OP;              JUMP_OUT;            } @@ -3465,10 +3518,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          ModeType mode;          mode = p->word_boundary.mode; -        if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { -          if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { -            INC_OP; -            JUMP_OUT; +        if (! ON_STR_BEGIN(s)) { +          UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); +          if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { +            if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { +              INC_OP; +              JUMP_OUT; +            }            }          }        } @@ -3478,6 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(TEXT_SEGMENT_BOUNDARY)        {          int is_break; +        UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);          switch (p->text_segment_boundary.type) {          case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: @@ -3507,12 +3564,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(BEGIN_BUF)        if (! ON_STR_BEGIN(s)) goto fail; +      if (OPTON_NOTBOL(msa->options)) goto fail; +      if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail;        INC_OP;        JUMP_OUT;      CASE_OP(END_BUF)        if (! ON_STR_END(s)) goto fail; +      if (OPTON_NOTEOL(msa->options)) goto fail; +      if (OPTON_NOT_END_STRING(msa->options)) goto fail;        INC_OP;        JUMP_OUT; @@ -3523,15 +3584,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          INC_OP;          JUMP_OUT;        } -      else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { -        INC_OP; -        JUMP_OUT; +      else if (! ON_STR_END(s)) { +        UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); +        if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +          INC_OP; +          JUMP_OUT; +        }        }        goto fail;      CASE_OP(END_LINE)        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE +        UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (OPTON_NOTEOL(msa->options)) goto fail; @@ -3556,9 +3621,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(SEMI_END_BUF)        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE +        UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (OPTON_NOTEOL(msa->options)) goto fail; +          if (OPTON_NOT_END_STRING(msa->options)) goto fail;            INC_OP;            JUMP_OUT;  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -3567,6 +3634,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&                 ON_STR_END(s + enclen(encode, s))) { +        if (OPTON_NOTEOL(msa->options)) goto fail; +        if (OPTON_NOT_END_STRING(msa->options)) goto fail;          INC_OP;          JUMP_OUT;        } @@ -3575,6 +3644,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          UChar* ss = s + enclen(encode, s);          ss += enclen(encode, ss);          if (ON_STR_END(ss)) { +          if (OPTON_NOTEOL(msa->options)) goto fail; +          if (OPTON_NOT_END_STRING(msa->options)) goto fail;            INC_OP;            JUMP_OUT;          } @@ -3586,6 +3657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        switch (p->check_position.type) {        case CHECK_POSITION_SEARCH_START:          if (s != msa->start) goto fail; +        if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail;          break;        case CHECK_POSITION_CURRENT_RIGHT_RANGE:          if (s != right_range) goto fail; @@ -3604,7 +3676,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(MEM_START)        mem = p->memory_start.num; -      mem_start_stk[mem] = (StackIndex )((void* )s); +      mem_start_stk[mem].s = s;        INC_OP;        JUMP_OUT; @@ -3616,7 +3688,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(MEM_END)        mem = p->memory_end.num; -      mem_end_stk[mem] = (StackIndex )((void* )s); +      mem_end_stk[mem].s = s;        INC_OP;        JUMP_OUT; @@ -3629,20 +3701,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */          si = GET_STACK_INDEX(stkp);          STACK_PUSH_MEM_END(mem, s); -        mem_start_stk[mem] = si; +        mem_start_stk[mem].i = si;          INC_OP;          JUMP_OUT;        }      CASE_OP(MEM_END_REC)        mem = p->memory_end.num; -      mem_end_stk[mem] = (StackIndex )((void* )s); +      mem_end_stk[mem].s = s;        STACK_GET_MEM_START(mem, stkp);        if (MEM_STATUS_AT(reg->push_mem_start, mem)) -        mem_start_stk[mem] = GET_STACK_INDEX(stkp); +        mem_start_stk[mem].i = GET_STACK_INDEX(stkp);        else -        mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); +        mem_start_stk[mem].s = stkp->u.mem.pstr;        STACK_PUSH_MEM_END_MARK(mem);        INC_OP; @@ -3661,21 +3733,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        mem = p->backref_n.n1;      backref:        { -        int len;          UChar *pstart, *pend; -        if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail; -        if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; +        if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) goto fail; +        if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;          pstart = STACK_MEM_START(reg, mem);          pend   = STACK_MEM_END(reg, mem);          n = (int )(pend - pstart);          if (n != 0) {            DATA_ENSURE(n); -          sprev = s;            STRING_CMP(s, pstart, n); -          while (sprev + (len = enclen(encode, sprev)) < s) -            sprev += len;          }        }        INC_OP; @@ -3684,21 +3752,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(BACKREF_N_IC)        mem = p->backref_n.n1;        { -        int len;          UChar *pstart, *pend; -        if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail; -        if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; +        if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) goto fail; +        if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;          pstart = STACK_MEM_START(reg, mem);          pend   = STACK_MEM_END(reg, mem);          n = (int )(pend - pstart);          if (n != 0) {            DATA_ENSURE(n); -          sprev = s;            STRING_CMP_IC(case_fold_flag, pstart, &s, n); -          while (sprev + (len = enclen(encode, sprev)) < s) -            sprev += len;          }        }        INC_OP; @@ -3706,28 +3770,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(BACKREF_MULTI)        { -        int len, is_fail; +        int is_fail;          UChar *pstart, *pend, *swork;          tlen = p->backref_general.num;          for (i = 0; i < tlen; i++) {            mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; -          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue; -          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; +          if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue; +          if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;            pstart = STACK_MEM_START(reg, mem);            pend   = STACK_MEM_END(reg, mem);            n = (int )(pend - pstart);            if (n != 0) {              DATA_ENSURE(n); -            sprev = s;              swork = s;              STRING_CMP_VALUE(swork, pstart, n, is_fail);              if (is_fail) continue;              s = swork; -            while (sprev + (len = enclen(encode, sprev)) < s) -              sprev += len;            }            break; /* success */          } @@ -3738,28 +3799,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(BACKREF_MULTI_IC)        { -        int len, is_fail; +        int is_fail;          UChar *pstart, *pend, *swork;          tlen = p->backref_general.num;          for (i = 0; i < tlen; i++) {            mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; -          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue; -          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; +          if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue; +          if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;            pstart = STACK_MEM_START(reg, mem);            pend   = STACK_MEM_END(reg, mem);            n = (int )(pend - pstart);            if (n != 0) {              DATA_ENSURE(n); -            sprev = s;              swork = s;              STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);              if (is_fail) continue;              s = swork; -            while (sprev + (len = enclen(encode, sprev)) < s) -              sprev += len;            }            break; /* success */          } @@ -3774,10 +3832,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto backref_with_level;      CASE_OP(BACKREF_WITH_LEVEL)        { -        int len;          int level;          MemNumType* mems; -        UChar* ssave;          n = 0;        backref_with_level: @@ -3785,17 +3841,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          tlen  = p->backref_general.num;          mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns; -        ssave = s; -        if (backref_match_at_nested_level(reg, stk, stk_base, n, -                    case_fold_flag, level, (int )tlen, mems, &s, end)) { -          if (ssave != s) { -            sprev = ssave; -            while (sprev + (len = enclen(encode, sprev)) < s) -              sprev += len; -          } -        } -        else +        if (! backref_match_at_nested_level(reg, stk, stk_base, n, +                      case_fold_flag, level, (int )tlen, mems, &s, end)) {            goto fail; +        }        }        INC_OP;        JUMP_OUT; @@ -3810,8 +3859,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          for (i = 0; i < tlen; i++) {            mem = mems[i]; -          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue; -          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; +          if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue; +          if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;            break; /* success */          }          if (i == tlen) goto fail; @@ -3928,13 +3977,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(PUSH)        addr = p->push.addr; -      STACK_PUSH_ALT(p + addr, s, sprev); +      STACK_PUSH_ALT(p + addr, s);        INC_OP;        JUMP_OUT;      CASE_OP(PUSH_SUPER)        addr = p->push.addr; -      STACK_PUSH_SUPER_ALT(p + addr, s, sprev); +      STACK_PUSH_SUPER_ALT(p + addr, s);        INC_OP;        JUMP_OUT; @@ -3956,7 +4005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          addr = p->push_or_jump_exact1.addr;          c    = p->push_or_jump_exact1.c;          if (DATA_ENSURE_CHECK1 && c == *s) { -          STACK_PUSH_ALT(p + addr, s, sprev); +          STACK_PUSH_ALT(p + addr, s);            INC_OP;            JUMP_OUT;          } @@ -3972,9 +4021,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          addr = p->push_if_peek_next.addr;          c    = p->push_if_peek_next.c;          if (DATA_ENSURE_CHECK1 && c == *s) { -          STACK_PUSH_ALT(p + addr, s, sprev); -          INC_OP; -          JUMP_OUT; +          STACK_PUSH_ALT(p + addr, s);          }        }        INC_OP; @@ -3986,7 +4033,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        STACK_PUSH_REPEAT_INC(mem, 0);        if (reg->repeat_range[mem].lower == 0) { -        STACK_PUSH_ALT(p + addr, s, sprev); +        STACK_PUSH_ALT(p + addr, s);        }        INC_OP;        JUMP_OUT; @@ -3997,7 +4044,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        STACK_PUSH_REPEAT_INC(mem, 0);        if (reg->repeat_range[mem].lower == 0) { -        STACK_PUSH_ALT(p + 1, s, sprev); +        STACK_PUSH_ALT(p + 1, s);          p += addr;        }        else @@ -4014,7 +4061,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        else if (n >= reg->repeat_range[mem].lower) {          INC_OP; -        STACK_PUSH_ALT(p, s, sprev); +        STACK_PUSH_ALT(p, s);          p = reg->repeat_range[mem].u.pcode;        }        else { @@ -4033,7 +4080,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        else {          if (n >= reg->repeat_range[mem].lower) { -          STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev); +          STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);            INC_OP;          }          else { @@ -4047,6 +4094,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (subexp_call_nest_counter == SubexpCallMaxNestLevel)          goto fail;        subexp_call_nest_counter++; + +      if (SubexpCallLimitInSearch != 0) { +        msa->subexp_call_in_search_counter++; +#ifdef ONIG_DEBUG_MATCH_COUNTER +        if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS) +          subexp_call_counters[p->call.called_mem]++; +        if (msa->subexp_call_in_search_counter % 1000 == 0) +          MATCH_COUNTER_OUT("CALL"); +#endif +        if (msa->subexp_call_in_search_counter > +            SubexpCallLimitInSearch) { +          MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER); +        } +      } +        addr = p->call.addr;        INC_OP; STACK_PUSH_CALL_FRAME(p);        p = reg->ops + addr; @@ -4070,7 +4132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          for (tlen = p->move.n; tlen > 0; tlen--) {            len = enclen(encode, s); -          sprev = s;            s += len;            if (s > end) goto fail;            if (s == end) { @@ -4079,7 +4140,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            }          }        } -      sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        INC_OP;        JUMP_OUT; @@ -4088,10 +4148,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (tlen != 0) {          s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);          if (IS_NULL(s)) goto fail; -        sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        }        if (p->step_back_start.remaining != 0) { -        STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining); +        STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);          p += p->step_back_start.addr;        }        else @@ -4103,9 +4162,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (tlen != INFINITE_LEN) tlen--;        s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);        if (IS_NULL(s)) goto fail; -      sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        if (tlen != 0) { -        STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen); +        STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);        }        INC_OP;        JUMP_OUT; @@ -4114,8 +4172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        mem  = p->cut_to_mark.id; /* mem: mark id */        STACK_TO_VOID_TO_MARK(stkp, mem);        if (p->cut_to_mark.restore_pos != 0) { -        s     = stkp->u.val.v; -        sprev = stkp->u.val.v2; +        s = stkp->u.val.v;        }        INC_OP;        JUMP_OUT; @@ -4123,7 +4180,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(MARK)        mem  = p->mark.id; /* mem: mark id */        if (p->mark.save_pos != 0) -        STACK_PUSH_MARK_WITH_POS(mem, s, sprev); +        STACK_PUSH_MARK_WITH_POS(mem, s);        else          STACK_PUSH_MARK(mem); @@ -4275,9 +4332,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      fail:  #endif        STACK_POP; -      p     = stk->u.state.pcode; -      s     = stk->u.state.pstr; -      sprev = stk->u.state.pstr_prev; +      p = stk->u.state.pcode; +      s = stk->u.state.pstr;        CHECK_RETRY_LIMIT_IN_MATCH;        JUMP_OUT; @@ -4290,6 +4346,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    if (msa->retry_limit_in_search != 0) {      msa->retry_limit_in_search_counter += retry_in_match_counter;    } + +#ifdef ONIG_DEBUG_MATCH_COUNTER +  MATCH_COUNTER_OUT("END"); +#endif +    STACK_SAVE(msa, is_alloca, alloc_base);    return best_len;  } @@ -4324,12 +4385,11 @@ typedef struct {    int    state;  /* value of enum SearchRangeStatus */    UChar* low;    UChar* high; -  UChar* low_prev;    UChar* sch_range;  } SearchRange;  #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \ -  r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \ +  r = match_at(reg, str, end, (upper_range), s, msas + i); \    if (r != ONIG_MISMATCH) {\      if (r >= 0) {\        goto match;\ @@ -4345,8 +4405,8 @@ regset_search_body_position_lead(OnigRegSet* set,             OnigOptionType option, MatchArg* msas, int* rmatch_pos)  {    int r, n, i; -  UChar *s, *prev; -  UChar *low, *high, *low_prev; +  UChar *s; +  UChar *low, *high;    UChar* sch_range;    regex_t* reg;    OnigEncoding enc; @@ -4354,12 +4414,7 @@ regset_search_body_position_lead(OnigRegSet* set,    n   = set->n;    enc = set->enc; -    s = (UChar* )start; -  if (s > str) -    prev = onigenc_get_prev_char_head(enc, str, s); -  else -    prev = (UChar* )NULL;    sr = (SearchRange* )xmalloc(sizeof(*sr) * n);    CHECK_NULL_RETURN_MEMERR(sr); @@ -4375,18 +4430,16 @@ regset_search_body_position_lead(OnigRegSet* set,          else            sch_range = (UChar* )end; -        if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) { +        if (forward_search(reg, str, end, s, sch_range, &low, &high)) {            sr[i].state = SRS_LOW_HIGH;            sr[i].low  = low;            sr[i].high = high; -          sr[i].low_prev = low_prev;            sr[i].sch_range = sch_range;          }        }        else {          sch_range = (UChar* )end; -        if (forward_search(reg, str, end, s, sch_range, -                           &low, &high, (UChar** )NULL)) { +        if (forward_search(reg, str, end, s, sch_range, &low, &high)) {            goto total_active;          }        } @@ -4396,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set,        sr[i].state    = SRS_ALL_RANGE;        sr[i].low      = s;        sr[i].high     = (UChar* )range; -      sr[i].low_prev = prev;      }    } @@ -4412,10 +4464,9 @@ regset_search_body_position_lead(OnigRegSet* set,          if (s <  sr[i].low) continue;          if (s >= sr[i].high) {            if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, -                             &low, &high, &low_prev) != 0) { +                             &low, &high) != 0) {              sr[i].low      = low;              sr[i].high     = high; -            sr[i].low_prev = low_prev;              if (s < low) continue;            }            else { @@ -4436,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set,          for (i = 0; i < n; i++) {            if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {              low = sr[i].low; -            low_prev = sr[i].low_prev;            }          }          if (low == range) break;          s = low; -        prev = low_prev;        }        else { -        prev = s;          s += enclen(enc, s);        }      } while (1); @@ -4459,10 +4507,9 @@ regset_search_body_position_lead(OnigRegSet* set,            if (s <  sr[i].low) continue;            if (s >= sr[i].high) {              if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, -                               &low, &high, &low_prev) != 0) { +                               &low, &high) != 0) {                sr[i].low      = low;                sr[i].high     = high; -              /* sr[i].low_prev = low_prev; */                if (s < low) continue;              }              else { @@ -4483,7 +4530,6 @@ regset_search_body_position_lead(OnigRegSet* set,        if (set->anychar_inf != 0)          prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end); -      prev = s;        s += enclen(enc, s);      } while (1);    } @@ -4552,7 +4598,7 @@ onig_regset_search_with_param(OnigRegSet* set,  {    int r;    int i; -  UChar *s, *prev; +  UChar *s;    regex_t* reg;    OnigEncoding enc;    OnigRegion* region; @@ -4654,7 +4700,6 @@ onig_regset_search_with_param(OnigRegSet* set,    else if (str == end) { /* empty string */      start = end = str;      s = (UChar* )start; -    prev = (UChar* )NULL;      msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);      CHECK_NULL_RETURN_MEMERR(msas); @@ -4669,7 +4714,7 @@ onig_regset_search_with_param(OnigRegSet* set,          /* Can't use REGSET_MATCH_AND_RETURN_CHECK()             because r must be set regex index (i)          */ -        r = match_at(reg, str, end, end, s, prev, msas + i); +        r = match_at(reg, str, end, end, s, msas + i);          if (r != ONIG_MISMATCH) {            if (r >= 0) {              r = i; @@ -4814,7 +4859,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,    else      s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); -  while (s >= text) { +  while (PTR_GE(s, text)) {      if (*s == *target) {        p = s + 1;        t = target + 1; @@ -4855,7 +4900,7 @@ sunday_quick_search_step_forward(regex_t* reg,    tail = target_end - 1;    tlen1 = (int )(tail - target);    end = text_range; -  if (end + tlen1 > text_end) +  if (tlen1 > text_end - end)      end = text_end - tlen1;    map_offset = reg->map_offset; @@ -4893,15 +4938,38 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,    const UChar *s, *t, *p, *end;    const UChar *tail;    int map_offset; - -  end = text_range + (target_end - target); -  if (end > text_end) -    end = text_end; +  ptrdiff_t target_len;    map_offset = reg->map_offset;    tail = target_end - 1; -  s = text + (tail - target); +  target_len = target_end - target; +  if (target_len > text_end - text_range) { +    end = text_end; +    if (target_len > text_end - text) +      return (UChar* )NULL; +  } +  else { +    end = text_range + target_len; +  } + +  s = text + target_len - 1; + +#ifdef USE_STRICT_POINTER_ADDRESS +  if (s < end) { +    while (TRUE) { +      p = s; +      t = tail; +      while (*p == *t) { +        if (t == target) return (UChar* )p; +        p--; t--; +      } +      if (text_end - s <= map_offset) break; +      if (reg->map[*(s + map_offset)] >= end - s) break; +      s += reg->map[*(s + map_offset)]; +    } +  } +#else    while (s < end) {      p = s;      t = tail; @@ -4909,9 +4977,10 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,        if (t == target) return (UChar* )p;        p--; t--;      } -    if (s + map_offset >= text_end) break; +    if (text_end - s <= map_offset) break;      s += reg->map[*(s + map_offset)];    } +#endif    return (UChar* )NULL;  } @@ -4937,7 +5006,7 @@ map_search_backward(OnigEncoding enc, UChar map[],  {    const UChar *s = text_start; -  while (s >= text) { +  while (PTR_GE(s, text)) {      if (map[*s]) return (UChar* )s;      s = onigenc_get_prev_char_head(enc, adjust_text, s); @@ -4963,13 +5032,16 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,                        OnigMatchParam* mp)  {    int r; -  UChar *prev;    MatchArg msa; +#ifndef USE_POSIX_API +  if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT; +#endif +    ADJUST_MATCH_PARAM(reg, mp);    MATCH_ARG_INIT(msa, reg, option, region, at, mp);    if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API        && !OPTON_POSIX_REGION(option)  #endif        ) { @@ -4986,8 +5058,14 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,        }      } -    prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); -    r = match_at(reg, str, end, end, at, prev, &msa); +    r = match_at(reg, str, end, end, at, &msa); +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +    if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) { +      if (msa.best_len >= 0) { +        r = msa.best_len; +      } +    } +#endif    }   end: @@ -4997,7 +5075,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,  static int  forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, -               UChar* range, UChar** low, UChar** high, UChar** low_prev) +               UChar* range, UChar** low, UChar** high)  {    UChar *p, *pprev = (UChar* )NULL; @@ -5081,33 +5159,18 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,      }      if (reg->dist_max == 0) { -      *low = p; -      if (low_prev) { -        if (*low > start) -          *low_prev = onigenc_get_prev_char_head(reg->enc, start, p); -        else -          *low_prev = onigenc_get_prev_char_head(reg->enc, -                                                 (pprev ? pprev : str), p); -      } +      *low  = p;        *high = p;      }      else {        if (reg->dist_max != INFINITE_LEN) {          if (p - str < reg->dist_max) {            *low = (UChar* )str; -          if (low_prev) -            *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);          }          else {            *low = p - reg->dist_max;            if (*low > start) { -            *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start, -                                                 *low, (const UChar** )low_prev); -          } -          else { -            if (low_prev) -              *low_prev = onigenc_get_prev_char_head(reg->enc, -                                                     (pprev ? pprev : str), *low); +            *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);            }          }        } @@ -5263,7 +5326,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,                  OnigOptionType option, OnigMatchParam* mp)  {    int r; -  UChar *s, *prev; +  UChar *s;    MatchArg msa;    const UChar *orig_start = start; @@ -5275,8 +5338,15 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,    ADJUST_MATCH_PARAM(reg, mp); +#ifndef USE_POSIX_API +  if (OPTON_POSIX_REGION(option)) { +    r = ONIGERR_INVALID_ARGUMENT; +    goto finish_no_msa; +  } +#endif +    if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API        && ! OPTON_POSIX_REGION(option)  #endif        ) { @@ -5294,27 +5364,14 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,    } -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE  #define MATCH_AND_RETURN_CHECK(upper_range) \ -  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ -  if (r != ONIG_MISMATCH) {\ -    if (r >= 0) {\ -      if (! OPTON_FIND_LONGEST(reg->options)) {\ -        goto match;\ -      }\ -    }\ -    else goto finish; /* error */ \ -  } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ -  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ +  r = match_at(reg, str, end, (upper_range), s, &msa);\    if (r != ONIG_MISMATCH) {\      if (r >= 0) {\        goto match;\      }\      else goto finish; /* error */ \    } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */    /* anchor optimize: resume search range */ @@ -5422,7 +5479,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,      if (reg->threshold_len == 0) {        start = end = str = address_for_empty_string;        s = (UChar* )start; -      prev = (UChar* )NULL;        MATCH_ARG_INIT(msa, reg, option, region, start, mp);        MATCH_AND_RETURN_CHECK(end); @@ -5440,13 +5496,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,    s = (UChar* )start;    if (range > start) {   /* forward search */ -    if (s > str) -      prev = onigenc_get_prev_char_head(reg->enc, str, s); -    else -      prev = (UChar* )NULL; -      if (reg->optimize != OPTIMIZE_NONE) { -      UChar *sch_range, *low, *high, *low_prev; +      UChar *sch_range, *low, *high;        if (reg->dist_max != 0) {          if (reg->dist_max == INFINITE_LEN) @@ -5467,27 +5518,27 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,        if (reg->dist_max != INFINITE_LEN) {          do { -          if (! forward_search(reg, str, end, s, sch_range, &low, &high, -                               &low_prev)) goto mismatch; +          if (! forward_search(reg, str, end, s, sch_range, &low, &high)) +            goto mismatch;            if (s < low) {              s    = low; -            prev = low_prev;            }            while (s <= high) {              MATCH_AND_RETURN_CHECK(data_range); -            prev = s;              s += enclen(reg->enc, s);            }          } while (s < range);          goto mismatch;        }        else { /* check only. */ -        if (! forward_search(reg, str, end, s, sch_range, &low, &high, -                             (UChar** )NULL)) goto mismatch; +        if (! forward_search(reg, str, end, s, sch_range, &low, &high)) +          goto mismatch;          if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&              (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {            do { +            UChar* prev; +              MATCH_AND_RETURN_CHECK(data_range);              prev = s;              s += enclen(reg->enc, s); @@ -5504,7 +5555,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,      do {        MATCH_AND_RETURN_CHECK(data_range); -      prev = s;        s += enclen(reg->enc, s);      } while (s < range); @@ -5549,12 +5599,11 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,            if (s > high)              s = high; -          while (s >= low) { -            prev = onigenc_get_prev_char_head(reg->enc, str, s); +          while (PTR_GE(s, low)) {              MATCH_AND_RETURN_CHECK(orig_start); -            s = prev; +            s = onigenc_get_prev_char_head(reg->enc, str, s);            } -        } while (s >= range); +        } while (PTR_GE(s, range));          goto mismatch;        }        else { /* check only. */ @@ -5566,10 +5615,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,      }      do { -      prev = onigenc_get_prev_char_head(reg->enc, str, s);        MATCH_AND_RETURN_CHECK(orig_start); -      s = prev; -    } while (s >= range); +      s = onigenc_get_prev_char_head(reg->enc, str, s); +    } while (PTR_GE(s, range));    }   mismatch: @@ -5589,7 +5637,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,    /* If result is mismatch and no FIND_NOT_EMPTY option,       then the region is not set in match_at(). */    if (OPTON_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API        && !OPTON_POSIX_REGION(option)  #endif        ) { @@ -5952,7 +6000,7 @@ extern int  onig_init_for_match_at(regex_t* reg)  {    return match_at(reg, (const UChar* )NULL, (const UChar* )NULL, -                  (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL, +                  (const UChar* )NULL, (const UChar* )NULL,                    (MatchArg* )NULL);  }  #endif @@ -6139,8 +6187,8 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i    const UChar* str;    StackType*   stk_base;    int i; -  StackIndex* mem_start_stk; -  StackIndex* mem_end_stk; +  StkPtrType* mem_start_stk; +  StkPtrType* mem_end_stk;    i = mem_num;    reg = a->regex; @@ -6150,7 +6198,7 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i    mem_end_stk   = a->mem_end_stk;    if (i > 0) { -    if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { +    if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {        *begin = (int )(STACK_MEM_START(reg, i) - str);        *end   = (int )(STACK_MEM_END(reg, i)   - str);      } diff --git a/src/regint.h b/src/regint.h index 04ebe0a..74a5c61 100644 --- a/src/regint.h +++ b/src/regint.h @@ -34,6 +34,7 @@  /* #define ONIG_DEBUG_COMPILE */  /* #define ONIG_DEBUG_SEARCH */  /* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DEBUG_MATCH_COUNTER */  /* #define ONIG_DONT_OPTIMIZE */  /* for byte-code statistical data. */ @@ -41,7 +42,7 @@  #if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \      defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ -    defined(ONIG_DEBUG_STATISTICS) +    defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_STATISTICS)  #ifndef ONIG_DEBUG  #define ONIG_DEBUG  #define DBGFP   stderr @@ -70,23 +71,29 @@  #endif  /* internal config */ +#define USE_CHECK_VALIDITY_OF_STRING_IN_TREE  #define USE_OP_PUSH_OR_JUMP_EXACT  #define USE_QUANT_PEEK_NEXT  #define USE_ST_LIBRARY  #define USE_TIMEOFDAY +#define USE_STRICT_POINTER_ADDRESS +#define USE_STRICT_POINTER_COMPARISON  #define USE_WORD_BEGIN_END   /* "\<", "\>" */  #define USE_CAPTURE_HISTORY  #define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION  #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE  /* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ +/* enabled by configure --enable-posix-api=yes */ +/* #define USE_POSIX_API */ + +#define DEFAULT_PARSE_DEPTH_LIMIT           4096  #define INIT_MATCH_STACK_SIZE                160  #define DEFAULT_MATCH_STACK_LIMIT_SIZE         0 /* unlimited */  #define DEFAULT_RETRY_LIMIT_IN_MATCH    10000000  #define DEFAULT_RETRY_LIMIT_IN_SEARCH          0 /* unlimited */ -#define DEFAULT_PARSE_DEPTH_LIMIT           4096 +#define DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH    0 /* unlimited */  #define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL    20 @@ -181,6 +188,12 @@  #define CHECK_NULL_RETURN_MEMERR(p)   if (IS_NULL(p)) return ONIGERR_MEMORY  #define NULL_UCHARP                   ((UChar* )0) +#ifdef USE_STRICT_POINTER_COMPARISON +#define PTR_GE(p,q)   ((p) != NULL && (p) >= (q)) +#else +#define PTR_GE(p,q)   (p) >= (q) +#endif +  #ifndef ONIG_INT_MAX  #define ONIG_INT_MAX    INT_MAX  #endif @@ -255,11 +268,22 @@  #ifdef _WIN32 -#if defined(_MSC_VER) && (_MSC_VER < 1300) +#ifdef _MSC_VER + +#if _MSC_VER < 1300  typedef int           intptr_t;  typedef unsigned int  uintptr_t;  #endif + +#if _MSC_VER < 1600 +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#endif +  #endif +#endif /* _WIN32 */  #if SIZEOF_VOIDP == SIZEOF_LONG  typedef unsigned long hash_data_type; @@ -378,6 +402,9 @@ typedef unsigned int  MemStatusType;  #define OPTON_POSIX_REGION(option)   ((option) & ONIG_OPTION_POSIX_REGION)  #define OPTON_CHECK_VALIDITY_OF_STRING(option)  ((option) & \                                        ONIG_OPTION_CHECK_VALIDITY_OF_STRING) +#define OPTON_NOT_BEGIN_STRING(option)    ((option) & ONIG_OPTION_NOT_BEGIN_STRING) +#define OPTON_NOT_END_STRING(option)      ((option) & ONIG_OPTION_NOT_END_STRING) +#define OPTON_NOT_BEGIN_POSITION(option)  ((option) & ONIG_OPTION_NOT_BEGIN_POSITION)  #define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \    ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) @@ -562,10 +589,14 @@ enum OpCode {    OP_BACKREF_N_IC,    OP_BACKREF_MULTI,    OP_BACKREF_MULTI_IC, +#ifdef USE_BACKREF_WITH_LEVEL    OP_BACKREF_WITH_LEVEL,        /* \k<xxx+n>, \k<xxx-n> */    OP_BACKREF_WITH_LEVEL_IC,     /* \k<xxx+n>, \k<xxx-n> */ +#endif    OP_BACKREF_CHECK,             /* (?(n)), (?('name')) */ +#ifdef USE_BACKREF_WITH_LEVEL    OP_BACKREF_CHECK_WITH_LEVEL,  /* (?(n-level)), (?('name-level')) */ +#endif    OP_MEM_START,    OP_MEM_START_PUSH,     /* push back-tracker to stack */    OP_MEM_END_PUSH,       /* push back-tracker to stack */ @@ -891,6 +922,9 @@ typedef struct {      } update_var;      struct {        AbsAddrType addr; +#ifdef ONIG_DEBUG_MATCH_COUNTER +      MemNumType called_mem; +#endif      } call;  #ifdef USE_CALLOUT      struct { diff --git a/src/regparse.c b/src/regparse.c index cc015a7..dd2824b 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -290,7 +290,7 @@ bbuf_clone(BBuf** rto, BBuf* from)    CHECK_NULL_RETURN_MEMERR(to);    r = BB_INIT(to, from->alloc);    if (r != 0) { -    xfree(to->p); +    bbuf_free(to);      *rto = 0;      return r;    } @@ -303,6 +303,8 @@ static int  backref_rel_to_abs(int rel_no, ScanEnv* env)  {    if (rel_no > 0) { +    if (rel_no > ONIG_INT_MAX - env->num_mem) +      return ONIGERR_INVALID_BACKREF;      return env->num_mem + rel_no;    }    else { @@ -437,6 +439,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end              int capa)  {    UChar* r; +  ptrdiff_t dest_delta = dest_end - dest;    if (dest)      r = (UChar* )xrealloc(dest, capa + 1); @@ -444,7 +447,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end      r = (UChar* )xmalloc(capa + 1);    CHECK_NULL_RETURN(r); -  onig_strcpy(r + (dest_end - dest), src, src_end); +  onig_strcpy(r + dest_delta, src, src_end);    return r;  } @@ -1294,7 +1297,9 @@ static int  i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,                            void* arg ARG_UNUSED)  { -  xfree(e->name); +  if (IS_NOT_NULL(e)) { +    xfree(e->name); +  }    /*xfree(key->s); */ /* is same as e->name */    xfree(key);    xfree(e); @@ -2502,7 +2507,7 @@ node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)    CALL_(node)->by_number   = by_number;    CALL_(node)->name        = name;    CALL_(node)->name_end    = name_end; -  CALL_(node)->group_num   = gnum; +  CALL_(node)->called_gnum = gnum;    CALL_(node)->entry_count = 1;    return node;  } @@ -3135,7 +3140,6 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua    lower = QUANT_(quant)->lower;    upper = QUANT_(quant)->upper; -  onig_node_free(quant);    r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);    if (r != 0) goto err; @@ -3202,9 +3206,9 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,        simple:          r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,                                                          body, possessive, env); +        onig_node_free(quant);          if (r != 0) {            ns[4] = NULL_NODE; -          onig_node_free(quant);            onig_node_free(body);            goto err;          } @@ -3708,21 +3712,24 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_    while (! PEND) {      PFETCH(c); -    if (! IS_CODE_POINT_DIVIDE(c)) break; -  } -  if (IS_CODE_POINT_DIVIDE(c)) -    return ONIGERR_INVALID_CODE_POINT_VALUE; - -  if (c == '}') { -    *src = p; -    return 1; /* end of sequence */ -  } -  else if (c == '-' && in_cc == TRUE) { -    *src = p; -    return 2; /* range */ +    if (! IS_CODE_POINT_DIVIDE(c)) { +      if (c == '}') { +        *src = p; +        return 1; /* end of sequence */ +      } +      else if (c == '-' && in_cc == TRUE) { +        *src = p; +        return 2; /* range */ +      } +      PUNFETCH; +      break; +    } +    else { +      if (PEND) +        return ONIGERR_INVALID_CODE_POINT_VALUE; +    }    } -  PUNFETCH;    r = scan_number_of_base(&p, end, 1, enc, rcode, base);    if (r != 0) return r; @@ -3873,13 +3880,17 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)      to   = data[i*2+1];      if (pre <= from - 1) {        r = add_code_range_to_buf(pbuf, pre, from - 1); -      if (r != 0) return r; +      if (r != 0) { +        bbuf_free(*pbuf); +        return r; +      }      }      if (to == ~((OnigCodePoint )0)) break;      pre = to + 1;    }    if (to < ~((OnigCodePoint )0)) {      r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); +    if (r != 0) bbuf_free(*pbuf);    }    return r;  } @@ -4564,7 +4575,7 @@ fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)  /* \M-, \C-, \c, or \... */  static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +fetch_escaped_value_raw(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)  {    int v;    OnigCodePoint c; @@ -4583,7 +4594,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)        if (PEND) return ONIGERR_END_PATTERN_AT_META;        PFETCH_S(c);        if (c == MC_ESC(env->syntax)) { -        v = fetch_escaped_value(&p, end, env, &c); +        v = fetch_escaped_value_raw(&p, end, env, &c);          if (v < 0) return v;        }        c = ((c & 0xff) | 0x80); @@ -4612,7 +4623,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)        }        else {          if (c == MC_ESC(env->syntax)) { -          v = fetch_escaped_value(&p, end, env, &c); +          v = fetch_escaped_value_raw(&p, end, env, &c);            if (v < 0) return v;          }          c &= 0x9f; @@ -4634,6 +4645,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)    return 0;  } +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +{ +  int r; +  int len; + +  r = fetch_escaped_value_raw(src, end, env, val); +  if (r != 0) return r; + +  len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val); +  if (len < 0) return len; + +  return 0; +} +  static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env);  static OnigCodePoint @@ -5192,7 +5218,7 @@ fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env, int state)            else {              int curr_state; -	    curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; +            curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;              r = check_code_point_sequence_cc(p, end, tok->base_num, enc,                                               curr_state);              if (r < 0) return r; @@ -6372,7 +6398,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)  }  static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)  {  #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20  #define POSIX_BRACKET_NAME_MIN_LEN         4 @@ -6481,7 +6507,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)  }  static int -parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) +prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)  {    int r, ctype;    CClassNode* cc; @@ -6617,7 +6643,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,  }  static int -parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) +prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)  {    int r, neg, len, fetched, and_start;    OnigCodePoint in_code, curr_code; @@ -6715,6 +6741,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)            p = psave;            for (i = 1; i < len; i++) {              r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE); +            if (r < 0) goto err;            }            fetched = 0;          } @@ -6759,7 +6786,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)        break;      case TK_CC_POSIX_BRACKET_OPEN: -      r = parse_posix_bracket(cc, &p, end, env); +      r = prs_posix_bracket(cc, &p, end, env);        if (r < 0) goto err;        if (r == 1) {  /* is not POSIX bracket */          CC_ESC_WARN(env, (UChar* )"["); @@ -6869,7 +6896,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)          }          state = CS_COMPLETE; -        r = parse_cc(&anode, tok, &p, end, env); +        r = prs_cc(&anode, tok, &p, end, env);          if (r != 0) {            onig_node_free(anode);            goto cc_open_err; @@ -6967,14 +6994,14 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)    return r;  } -static int parse_alts(Node** top, PToken* tok, int term, -                      UChar** src, UChar* end, ScanEnv* env, int group_head); +static int prs_alts(Node** top, PToken* tok, int term, +                    UChar** src, UChar* end, ScanEnv* env, int group_head);  #ifdef USE_CALLOUT  /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */  static int -parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)  {    int r;    int i; @@ -7078,18 +7105,18 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv    contents = onigenc_strdup(enc, code_start, code_end);    CHECK_NULL_RETURN_MEMERR(contents); -  r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); -  if (r != 0) { -    xfree(contents); -    return r; -  } -    e = onig_reg_callout_list_at(env->reg, num);    if (IS_NULL(e)) {      xfree(contents);      return ONIGERR_MEMORY;    } +  r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); +  if (r != 0) { +    xfree(contents); +    return r; +  } +    e->of      = ONIG_CALLOUT_OF_CONTENTS;    e->in      = in;    e->name_id = ONIG_NON_NAME_ID; @@ -7101,7 +7128,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv  }  static long -parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) +prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)  {    long v;    long d; @@ -7137,10 +7164,27 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long*    return ONIG_NORMAL;  } +static void +clear_callout_args(int n, unsigned int types[], OnigValue vals[]) +{ +  int i; + +  for (i = 0; i < n; i++) { +    switch (types[i]) { +    case ONIG_TYPE_STRING: +      if (IS_NOT_NULL(vals[i].s.start)) +        xfree(vals[i].s.start); +      break; +    default: +      break; +    } +  } +} +  static int -parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, -                   int max_arg_num, unsigned int types[], OnigValue vals[], -                   ScanEnv* env) +prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, +                 int max_arg_num, unsigned int types[], OnigValue vals[], +                 ScanEnv* env)  {  #define MAX_CALLOUT_ARG_BYTE_LENGTH   128 @@ -7168,7 +7212,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,      bufend = buf;      s = e = p;      while (1) { -      if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; +      if (PEND) { +        r = ONIGERR_INVALID_CALLOUT_PATTERN; +        goto err_clear; +      }        e = p;        PFETCH_S(c); @@ -7196,8 +7243,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,          add_char:            if (skip_mode == FALSE) {              clen = p - e; -            if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) -              return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ +            if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) { +              r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ +              goto err_clear; +            }              xmemcpy(bufend, e, clen);              bufend += clen; @@ -7208,15 +7257,17 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,      }      if (cn != 0) { -      if (max_arg_num >= 0 && n >= max_arg_num) -        return ONIGERR_INVALID_CALLOUT_ARG; +      if (max_arg_num >= 0 && n >= max_arg_num) { +        r = ONIGERR_INVALID_CALLOUT_ARG; +        goto err_clear; +      }        if (skip_mode == FALSE) {          if ((types[n] & ONIG_TYPE_LONG) != 0) {            int fixed = 0;            if (cn > 0) {              long rl; -            r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl); +            r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);              if (r == ONIG_NORMAL) {                vals[n].l = rl;                fixed = 1; @@ -7226,8 +7277,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,            if (fixed == 0) {              types[n] = (types[n] & ~ONIG_TYPE_LONG); -            if (types[n] == ONIG_TYPE_VOID) -              return ONIGERR_INVALID_CALLOUT_ARG; +            if (types[n] == ONIG_TYPE_VOID) { +              r = ONIGERR_INVALID_CALLOUT_ARG; +              goto err_clear; +            }            }          } @@ -7236,22 +7289,29 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,            break;          case ONIG_TYPE_CHAR: -          if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG; +          if (cn != 1) { +            r = ONIGERR_INVALID_CALLOUT_ARG; +            goto err_clear; +          }            vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);            break;          case ONIG_TYPE_STRING:            {              UChar* rs = onigenc_strdup(enc, buf, bufend); -            CHECK_NULL_RETURN_MEMERR(rs); +            if (IS_NULL(rs)) { +              r = ONIGERR_MEMORY; goto err_clear; +            }              vals[n].s.start = rs;              vals[n].s.end   = rs + (e - s);            }            break;          case ONIG_TYPE_TAG: -          if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) -            return ONIGERR_INVALID_CALLOUT_TAG_NAME; +          if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) { +            r = ONIGERR_INVALID_CALLOUT_TAG_NAME; +            goto err_clear; +          }            vals[n].s.start = s;            vals[n].s.end   = e; @@ -7259,7 +7319,8 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,          case ONIG_TYPE_VOID:          case ONIG_TYPE_POINTER: -          return ONIGERR_PARSER_BUG; +          r = ONIGERR_PARSER_BUG; +          goto err_clear;            break;          }        } @@ -7270,15 +7331,23 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,      if (c == cterm) break;    } -  if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN; +  if (c != cterm) { +    r = ONIGERR_INVALID_CALLOUT_PATTERN; +    goto err_clear; +  }    *src = p;    return n; + + err_clear: +  if (skip_mode == FALSE) +    clear_callout_args(n, types, vals); +  return r;  }  /* (*name[TAG]) (*name[TAG]{a,b,..}) */  static int -parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)  {    int r;    int i; @@ -7343,7 +7412,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en      /* read for single check only */      save = p; -    arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env); +    arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);      if (arg_num < 0) return arg_num;      is_not_single = PPEEK_IS(cterm) ?  0 : 1; @@ -7357,10 +7426,13 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en        types[i] = get_callout_arg_type_by_name_id(name_id, i);      } -    arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env); +    arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);      if (arg_num < 0) return arg_num; -    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +    if (PEND) { +      r = ONIGERR_END_PATTERN_IN_GROUP; +      goto err_clear; +    }      PFETCH_S(c);    }    else { @@ -7379,32 +7451,40 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en    in = onig_get_callout_in_by_name_id(name_id);    opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id); -  if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) -    return ONIGERR_INVALID_CALLOUT_ARG; +  if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) { +    r = ONIGERR_INVALID_CALLOUT_ARG; +    goto err_clear; +  } -  if (c != cterm) -    return ONIGERR_INVALID_CALLOUT_PATTERN; +  if (c != cterm) { +    r = ONIGERR_INVALID_CALLOUT_PATTERN; +    goto err_clear; +  }    r = reg_callout_list_entry(env, &num); -  if (r != 0) return r; +  if (r != 0) goto err_clear;    ext = onig_get_regex_ext(env->reg); -  CHECK_NULL_RETURN_MEMERR(ext); +  if (IS_NULL(ext)) { +    r = ONIGERR_MEMORY; goto err_clear; +  }    if (IS_NULL(ext->pattern)) {      r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); -    if (r != ONIG_NORMAL) return r; +    if (r != ONIG_NORMAL) goto err_clear;    }    if (tag_start != tag_end) {      r = callout_tag_entry(env, env->reg, tag_start, tag_end, num); -    if (r != ONIG_NORMAL) return r; +    if (r != ONIG_NORMAL) goto err_clear;    } -  r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); -  if (r != ONIG_NORMAL) return r; -    e = onig_reg_callout_list_at(env->reg, num); -  CHECK_NULL_RETURN_MEMERR(e); +  if (IS_NULL(e)) { +    r = ONIGERR_MEMORY; goto err_clear; +  } + +  r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); +  if (r != ONIG_NORMAL) goto err_clear;    e->of         = ONIG_CALLOUT_OF_NAME;    e->in         = in; @@ -7425,12 +7505,16 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en    *np = node;    *src = p;    return 0; + + err_clear: +  clear_callout_args(arg_num, types, vals); +  return r;  }  #endif  static int -parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, -          ScanEnv* env) +prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, +        ScanEnv* env)  {    int r, num;    Node *target; @@ -7457,7 +7541,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,      group:        r = fetch_token(tok, &p, end, env);        if (r < 0) return r; -      r = parse_alts(np, tok, term, &p, end, env, FALSE); +      r = prs_alts(np, tok, term, &p, end, env, FALSE);        if (r < 0) return r;        *src = p;        return 1; /* group */ @@ -7554,7 +7638,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          r = fetch_token(tok, &p, end, env);          if (r < 0) return r; -        r = parse_alts(&absent, tok, term, &p, end, env, TRUE); +        r = prs_alts(&absent, tok, term, &p, end, env, TRUE);          if (r < 0) {            onig_node_free(absent);            return r; @@ -7600,7 +7684,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,        if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))          return ONIGERR_UNDEFINED_GROUP_OPTION; -      r = parse_callout_of_contents(np, ')', &p, end, env); +      r = prs_callout_of_contents(np, ')', &p, end, env);        if (r != 0) return r;        goto end; @@ -7620,10 +7704,12 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          if (IS_CODE_DIGIT_ASCII(enc, c)              || c == '-' || c == '+' || c == '<' || c == '\'') { -          UChar* name_end; -          int back_num; +#ifdef USE_BACKREF_WITH_LEVEL            int exist_level;            int level; +#endif +          UChar* name_end; +          int back_num;            enum REF_NUM num_type;            int is_enclosed; @@ -7631,8 +7717,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            if (! is_enclosed)              PUNFETCH;            prev = p; -          exist_level = 0;  #ifdef USE_BACKREF_WITH_LEVEL +          exist_level = 0;            name_end = NULL_UCHARP; /* no need. escape gcc warning. */            r = fetch_name_with_level(                      (OnigCodePoint )(is_enclosed != 0 ? c : '('), @@ -7709,7 +7795,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,                /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */                condition_is_checker = 0;                PFETCH(c); -              r = parse_callout_of_contents(&condition, ')', &p, end, env); +              r = prs_callout_of_contents(&condition, ')', &p, end, env);                if (r != 0) return r;                goto end_condition;              } @@ -7719,7 +7805,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          else if (c == '*' &&                   IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {            condition_is_checker = 0; -          r = parse_callout_of_name(&condition, ')', &p, end, env); +          r = prs_callout_of_name(&condition, ')', &p, end, env);            if (r != 0) return r;            goto end_condition;          } @@ -7730,7 +7816,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            condition_is_checker = 0;            r = fetch_token(tok, &p, end, env);            if (r < 0) return r; -          r = parse_alts(&condition, tok, term, &p, end, env, FALSE); +          r = prs_alts(&condition, tok, term, &p, end, env, FALSE);            if (r < 0) {              onig_node_free(condition);              return r; @@ -7773,7 +7859,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              onig_node_free(condition);              return r;            } -          r = parse_alts(&target, tok, term, &p, end, env, TRUE); +          r = prs_alts(&target, tok, term, &p, end, env, TRUE);            if (r < 0) {              onig_node_free(condition);              onig_node_free(target); @@ -7949,7 +8035,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              env->options = option;              r = fetch_token(tok, &p, end, env);              if (r < 0) return r; -            r = parse_alts(&target, tok, term, &p, end, env, FALSE); +            r = prs_alts(&target, tok, term, &p, end, env, FALSE);              env->options = prev;              if (r < 0) {                onig_node_free(target); @@ -7976,7 +8062,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,    else if (c == '*' &&             IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {      PINC; -    r = parse_callout_of_name(np, ')', &p, end, env); +    r = prs_callout_of_name(np, ')', &p, end, env);      if (r != 0) return r;      goto end; @@ -7996,7 +8082,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,    CHECK_NULL_RETURN_MEMERR(*np);    r = fetch_token(tok, &p, end, env);    if (r < 0) return r; -  r = parse_alts(&target, tok, term, &p, end, env, FALSE); +  r = prs_alts(&target, tok, term, &p, end, env, FALSE);    if (r < 0) {      onig_node_free(target);      return r; @@ -8006,7 +8092,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,    if (NODE_TYPE(*np) == NODE_BAG) {      if (BAG_(*np)->type == BAG_MEMORY) { -      /* Don't move this to previous of parse_alts() */ +      /* Don't move this to previous of prs_alts() */        r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);        if (r != 0) return r;      } @@ -8285,8 +8371,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)  }  static int -parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, -          ScanEnv* env, int group_head) +prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, +        ScanEnv* env, int group_head)  {    int r, len, group;    Node* qn; @@ -8311,7 +8397,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,    break;    case TK_SUBEXP_OPEN: -    r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env); +    r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);      if (r < 0) return r;      if (r == 1) { /* group */        if (group_head == 0) @@ -8341,7 +8427,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,          env->options = BAG_(*np)->o.options;          r = fetch_token(tok, src, end, env);          if (r < 0) return r; -        r = parse_alts(&target, tok, term, src, end, env, FALSE); +        r = prs_alts(&target, tok, term, src, end, env, FALSE);          env->options = prev;          if (r < 0) {            onig_node_free(target); @@ -8419,8 +8505,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,    case TK_CODE_POINT:      {        UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; -      len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); +      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);        if (len < 0) return len; +      len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);  #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG        *np = node_new_str_crude(buf, buf + len, env->options);  #else @@ -8465,7 +8552,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,            *np = node_new_cclass();            CHECK_NULL_RETURN_MEMERR(*np);            cc = CCLASS_(*np); -          add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); +          r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); +          if (r != 0) { +            onig_node_free(*np); +            *np = NULL_NODE; +            return r; +          }            if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);          }          break; @@ -8478,7 +8570,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,      break;    case TK_CHAR_PROPERTY: -    r = parse_char_property(np, tok, src, end, env); +    r = prs_char_property(np, tok, src, end, env);      if (r != 0) return r;      break; @@ -8486,7 +8578,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,      {        CClassNode* cc; -      r = parse_cc(np, tok, src, end, env); +      r = prs_cc(np, tok, src, end, env);        if (r != 0) return r;        cc = CCLASS_(*np); @@ -8685,8 +8777,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,  }  static int -parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, -             ScanEnv* env, int group_head) +prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, +           ScanEnv* env, int group_head)  {    int r;    Node *node, **headp; @@ -8694,7 +8786,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,    *top = NULL;    INC_PARSE_DEPTH(env->parse_depth); -  r = parse_exp(&node, tok, term, src, end, env, group_head); +  r = prs_exp(&node, tok, term, src, end, env, group_head);    if (r < 0) {      onig_node_free(node);      return r; @@ -8712,7 +8804,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,      headp = &(NODE_CDR(*top));      while (r != TK_EOT && r != term && r != TK_ALT) { -      r = parse_exp(&node, tok, term, src, end, env, FALSE); +      r = prs_exp(&node, tok, term, src, end, env, FALSE);        if (r < 0) {          onig_node_free(node);          return r; @@ -8736,8 +8828,8 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,  /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */  static int -parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, -           ScanEnv* env, int group_head) +prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, +         ScanEnv* env, int group_head)  {    int r;    Node *node, **headp; @@ -8747,7 +8839,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,    INC_PARSE_DEPTH(env->parse_depth);    save_options = env->options; -  r = parse_branch(&node, tok, term, src, end, env, group_head); +  r = prs_branch(&node, tok, term, src, end, env, group_head);    if (r < 0) {      onig_node_free(node);      return r; @@ -8767,7 +8859,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,      while (r == TK_ALT) {        r = fetch_token(tok, src, end, env);        if (r < 0) return r; -      r = parse_branch(&node, tok, term, src, end, env, FALSE); +      r = prs_branch(&node, tok, term, src, end, env, FALSE);        if (r < 0) {          onig_node_free(node);          return r; @@ -8800,7 +8892,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,  }  static int -parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +prs_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)  {    int r;    PToken tok; @@ -8808,7 +8900,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)    ptoken_init(&tok);    r = fetch_token(&tok, src, end, env);    if (r < 0) return r; -  r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE); +  r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);    if (r < 0) return r;    return 0; @@ -8846,6 +8938,15 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,    RegexExt* ext;  #endif +  reg->string_pool        = 0; +  reg->string_pool_end    = 0; +  reg->num_mem            = 0; +  reg->num_repeat         = 0; +  reg->num_empty_check    = 0; +  reg->repeat_range_alloc = 0; +  reg->repeat_range       = (RepeatRange* )NULL; +  reg->empty_status_mem   = 0; +    names_clear(reg);    scan_env_clear(env); @@ -8863,7 +8964,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,      return ONIGERR_INVALID_WIDE_CHAR_VALUE;    p = (UChar* )pattern; -  r = parse_regexp(root, &p, (UChar* )end, env); +  r = prs_regexp(root, &p, (UChar* )end, env);    if (r != 0) return r;  #ifdef USE_CALL diff --git a/src/regparse.h b/src/regparse.h index 979e982..c60a42d 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -163,7 +163,7 @@ typedef struct {    struct _Node* body; /* to BagNode : BAG_MEMORY */    int     by_number; -  int     group_num; +  int     called_gnum;    UChar*  name;    UChar*  name_end;    int     entry_count; @@ -339,6 +339,7 @@ typedef struct {  #define NODE_ST_TEXT_SEGMENT_WORD   (1<<23)  #define NODE_ST_ABSENT_WITH_SIDE_EFFECTS (1<<24)  /* stopper or clear */  #define NODE_ST_FIXED_CLEN_MIN_SURE (1<<25) +#define NODE_ST_REFERENCED          (1<<26)  #define NODE_STATUS(node)           (((Node* )node)->u.base.status) @@ -374,6 +375,7 @@ typedef struct {  #define NODE_IS_TEXT_SEGMENT_WORD(node)  ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0)  #define NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node)  ((NODE_STATUS(node) & NODE_ST_ABSENT_WITH_SIDE_EFFECTS) != 0)  #define NODE_IS_FIXED_CLEN_MIN_SURE(node)  ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN_MIN_SURE) != 0) +#define NODE_IS_REFERENCED(node)      ((NODE_STATUS(node) & NODE_ST_REFERENCED) != 0)  #define NODE_PARENT(node)         ((node)->u.base.parent)  #define NODE_BODY(node)           ((node)->u.base.body) diff --git a/src/regposerr.c b/src/regposerr.c index 12d95a9..e5b7899 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -37,6 +37,18 @@  #include "config.h"  #include "onigposix.h" +#undef regex_t +#undef regmatch_t +#undef regoff_t +#undef regcomp +#undef regexec +#undef regfree +#undef regerror +#undef reg_set_encoding +#undef reg_name_to_group_numbers +#undef reg_foreach_name +#undef reg_number_of_names +  #ifndef ONIG_NO_STANDARD_C_HEADERS  #include <string.h>  #include <stdio.h> @@ -92,10 +104,9 @@ static char* ESTRING[] = {  }; -  extern size_t -regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, -         size_t size) +onig_posix_regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED, +                    char* buf, size_t size)  {    char* s;    char tbuf[35]; @@ -121,3 +132,14 @@ regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,    }    return len;  } + +#ifdef USE_BINARY_COMPATIBLE_POSIX_API + +extern size_t +regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED, +         char* buf, size_t size) +{ +  return onig_posix_regerror(posix_ecode, reg, buf, size); +} + +#endif diff --git a/src/regposix.c b/src/regposix.c index 4e523a4..497ba02 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -33,6 +33,18 @@  #include "onigposix.h" +#undef regex_t +#undef regmatch_t +#undef regoff_t +#undef regcomp +#undef regexec +#undef regfree +#undef regerror +#undef reg_set_encoding +#undef reg_name_to_group_numbers +#undef reg_foreach_name +#undef reg_number_of_names +  #define ONIG_C(reg)    ((onig_regex_t* )((reg)->onig))  #define PONIG_C(reg)   ((onig_regex_t** )(&(reg)->onig)) @@ -64,6 +76,7 @@ onig2posix_error_code(int code)      { ONIGERR_MATCH_STACK_LIMIT_OVER,                     REG_EONIG_INTERNAL },      { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER,                  REG_EONIG_INTERNAL },      { ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER,                 REG_EONIG_INTERNAL }, +    { ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER,           REG_EONIG_INTERNAL },      { ONIGERR_TYPE_BUG,                                   REG_EONIG_INTERNAL },      { ONIGERR_PARSER_BUG,                                 REG_EONIG_INTERNAL },      { ONIGERR_STACK_BUG,                                  REG_EONIG_INTERNAL }, @@ -144,7 +157,7 @@ onig2posix_error_code(int code)  }  extern int -regcomp(regex_t* reg, const char* pattern, int posix_options) +onig_posix_regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options)  {    int r, len;    OnigSyntaxType* syntax = OnigDefaultSyntax; @@ -178,12 +191,12 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)  }  extern int -regexec(regex_t* reg, const char* str, size_t nmatch, -        regmatch_t pmatch[], int posix_options) +onig_posix_regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch, +                   onig_posix_regmatch_t pmatch[], int posix_options)  {    int r, i, len;    UChar* end; -  regmatch_t* pm; +  onig_posix_regmatch_t* pm;    OnigOptionType options;    options = ONIG_OPTION_POSIX_REGION; @@ -191,11 +204,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,    if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;    if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { -    pm = (regmatch_t* )NULL; +    pm = (onig_posix_regmatch_t* )NULL;      nmatch = 0;    }    else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { -    pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) +    pm = (onig_posix_regmatch_t* )xmalloc(sizeof(onig_posix_regmatch_t)                                 * (ONIG_C(reg)->num_mem + 1));      if (pm == NULL)        return REG_ESPACE; @@ -212,7 +225,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,    if (r >= 0) {      r = 0; /* Match */      if (pm != pmatch && pm != NULL) { -      xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); +      xmemcpy(pmatch, pm, sizeof(onig_posix_regmatch_t) * nmatch);      }    }    else if (r == ONIG_MISMATCH) { @@ -236,7 +249,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,  }  extern void -regfree(regex_t* reg) +onig_posix_regfree(onig_posix_regex_t* reg)  {    onig_free(ONIG_C(reg));    reg->onig = (void* )0; @@ -244,7 +257,7 @@ regfree(regex_t* reg)  extern void -reg_set_encoding(int mb_code) +onig_posix_reg_set_encoding(int mb_code)  {    OnigEncoding enc; @@ -279,15 +292,15 @@ reg_set_encoding(int mb_code)  }  extern int -reg_name_to_group_numbers(regex_t* reg, +onig_posix_reg_name_to_group_numbers(onig_posix_regex_t* reg,    const unsigned char* name, const unsigned char* name_end, int** nums)  {    return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);  }  typedef struct { -  int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); -  regex_t* reg; +  int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*); +  onig_posix_regex_t* reg;    void* arg;  } i_wrap; @@ -301,8 +314,8 @@ i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,  }  extern int -reg_foreach_name(regex_t* reg, - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), +onig_posix_reg_foreach_name(onig_posix_regex_t* reg, + int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*),   void* arg)  {    i_wrap warg; @@ -315,7 +328,58 @@ reg_foreach_name(regex_t* reg,  }  extern int -reg_number_of_names(regex_t* reg) +onig_posix_reg_number_of_names(onig_posix_regex_t* reg)  {    return onig_number_of_names(ONIG_C(reg));  } + + +#ifdef USE_BINARY_COMPATIBLE_POSIX_API + +extern int +regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options) +{ +  return onig_posix_regcomp(reg, pattern, posix_options); +} + +extern int +regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch, +        onig_posix_regmatch_t pmatch[], int posix_options) +{ +  return onig_posix_regexec(reg, str, nmatch, pmatch, posix_options); +} + +extern void +regfree(onig_posix_regex_t* reg) +{ +  onig_posix_regfree(reg); +} + +extern void +reg_set_encoding(int mb_code) +{ +  onig_posix_reg_set_encoding(mb_code); +} + +extern int +reg_name_to_group_numbers(onig_posix_regex_t* reg, +  const unsigned char* name, const unsigned char* name_end, int** nums) +{ +  return onig_posix_reg_name_to_group_numbers(reg, name, name_end, nums); +} + +extern int +reg_foreach_name(onig_posix_regex_t* reg, +  int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), +  void* arg) +{ +  return onig_posix_reg_foreach_name(reg, func, arg); +} + +extern int +reg_number_of_names(onig_posix_regex_t* reg) +{ +  return onig_posix_reg_number_of_names(reg); +} + +#endif /* USE_BINARY_COMPATIBLE_POSIX_API */ @@ -2,7 +2,7 @@    sjis.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -113,13 +113,15 @@ static int  code_to_mbclen(OnigCodePoint code)  {    if (code < 256) { -    return EncLen_SJIS[(int )code] == 1; +    if (EncLen_SJIS[(int )code] == 1) +      return 1;    } -  else if (code <= 0xffff) { -    return 2; +  else if (code < 0x10000) { +    if (EncLen_SJIS[(int )(code >>  8) & 0xff] == 2) +      return 2;    } -  else -    return ONIGERR_INVALID_CODE_POINT_VALUE; + +  return ONIGERR_INVALID_CODE_POINT_VALUE;  }  static OnigCodePoint @@ -151,6 +151,7 @@ st_init_table_with_size(type, size)  #endif    size = new_size(size);	/* round up to prime number */ +  if (size <= 0) return 0;    tbl = alloc(st_table);    if (tbl == 0) return 0; @@ -318,10 +319,13 @@ rehash(table)       register st_table *table;  {    register st_table_entry *ptr, *next, **new_bins; -  int i, old_num_bins = table->num_bins, new_num_bins; +  int i, new_num_bins, old_num_bins;    unsigned int hash_val; -  new_num_bins = new_size(old_num_bins+1); +  old_num_bins = table->num_bins; +  new_num_bins = new_size(old_num_bins + 1); +  if (new_num_bins <= 0) return ; +    new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));    if (new_bins == 0) {      return ; diff --git a/src/unicode.c b/src/unicode.c index 080da74..6703d4b 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -387,15 +387,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,          for (i = 0; i < ncs[0]; i++) {            for (j = 0; j < ncs[1]; j++) {              for (k = 0; k < ncs[2]; k++) { +              if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1] && +                  cs[2][k] == orig_codes[2]) +                continue; +                items[n].byte_len = lens[2];                items[n].code_len = 3;                items[n].code[0]  = cs[0][i];                items[n].code[1]  = cs[1][j];                items[n].code[2]  = cs[2][k]; -              if (items[n].code[0] == orig_codes[0] && -                  items[n].code[1] == orig_codes[1] && -                  items[n].code[2] == orig_codes[2]) -                continue;                n++;              }            } @@ -431,13 +431,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,        for (i = 0; i < ncs[0]; i++) {          for (j = 0; j < ncs[1]; j++) { +          if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1]) +            continue;            items[n].byte_len = lens[1];            items[n].code_len = 2;            items[n].code[0]  = cs[0][i];            items[n].code[1]  = cs[1][j]; -          if (items[n].code[0] == orig_codes[0] && -              items[n].code[1] == orig_codes[1]) -            continue;            n++;          }        } diff --git a/src/utf16_be.c b/src/utf16_be.c index d237b93..5014e18 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@    utf16_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/utf16_le.c b/src/utf16_le.c index f14d263..35ceb3c 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@    utf16_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/utf32_be.c b/src/utf32_be.c index bdd3db7..31bd98b 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@    utf32_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,10 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)  static OnigCodePoint  utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)  { -  return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +  OnigCodePoint code; + +  code = (OnigCodePoint )((((p[0] & 0x7f) * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +  return code;  }  static int diff --git a/src/utf32_le.c b/src/utf32_le.c index 473ab74..f50cab7 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@    utf32_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako + * Copyright (c) 2002-2020  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,10 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)  static OnigCodePoint  utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)  { -  return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +  OnigCodePoint code; + +  code = (OnigCodePoint )((((p[3] & 0x7f) * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +  return code;  }  static int | 
