diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2019-11-29 11:26:35 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2019-11-29 11:26:35 +0100 | 
| commit | 4216de6a3336cbc6dddb572cb7e6ab6193bf3729 (patch) | |
| tree | 327a40dae71db474527a1281a205cc2ebddb2ce6 /src | |
| parent | 40f3d0030e6e98bcb02d6523e5ee48497dec49a6 (diff) | |
New upstream version 6.9.4upstream/6.9.4
Diffstat (limited to 'src')
75 files changed, 3350 insertions, 2775 deletions
| diff --git a/src/Makefile.windows b/src/Makefile.windows index 762cf07..1e87504 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -2,6 +2,9 @@  product_name = oniguruma +TEST_DIR = $(ONIG_DIR)/../test +WIN_DIR  = $(ONIG_DIR)/../windows +  CPPFLAGS =  CFLAGS = -O2 -nologo /W3  LDFLAGS = @@ -152,25 +155,24 @@ $(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/  $(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h  $(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h -# C library test -ctest: $(testc) -	.\$(testc) -# POSIX C library test -ptest: $(testp) -	.\$(testp) +test_regset: $(TEST_DIR)/test_regset.c $(libname) +	$(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_regset.c $(libname) + +test_utf8: $(TEST_DIR)/test_utf8.c $(libname) +	$(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_utf8.c $(libname) -$(testc): $(testc).c $(libname) -	$(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname) +testc: $(WIN_DIR)/testc.c $(libname) +	$(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern $(WIN_DIR)/testc.c $(libname) -$(testp): $(testc).c $(dlllib) -	$(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib) +testp: $(WIN_DIR)/testc.c $(libname) +	$(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /DPOSIX_TEST $(WIN_DIR)/testc.c $(libname) -$(testc)u: $(testc)u.c $(libname) -	$(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) +testu: $(TEST_DIR)/testu.c $(libname) +	$(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern $(TEST_DIR)/testu.c $(libname)  clean: -	del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj +	del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\test_regset.exe $(BUILD_DIR)\test_utf8.exe $(BUILD_DIR)\testp.exe $(BUILD_DIR)\testc.exe $(BUILD_DIR)\testu.exe  samples: all diff --git a/src/ascii.c b/src/ascii.c index e83e4d6..f2dc0d3 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -2,7 +2,7 @@    ascii.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -2,7 +2,7 @@    big5.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,16 @@ big5_mbc_enc_len(const UChar* p)  }  static int +big5_code_to_mbclen(OnigCodePoint code) +{ +  if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; +  if ((code &    0xff00) != 0) return 2; +  if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1; + +  return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int  is_valid_mbc_string(const UChar* p, const UChar* end)  {    while (p < end) { @@ -99,15 +109,6 @@ big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,                                     pp, end, lower);  } -#if 0 -static int -big5_is_mbc_ambiguous(OnigCaseFoldType flag, -                      const UChar** pp, const UChar* end) -{ -  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); -} -#endif -  static int  big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)  { @@ -174,7 +175,7 @@ OnigEncodingType OnigEncodingBIG5 = {    1,          /* min enc length */    onigenc_is_mbc_newline_0x0a,    big5_mbc_to_code, -  onigenc_mb2_code_to_mbclen, +  big5_code_to_mbclen,    big5_code_to_mbc,    big5_mbc_case_fold,    onigenc_ascii_apply_all_case_fold, diff --git a/src/config.h.win32 b/src/config.h.win32 index 1f848e2..82a35b9 100644 --- a/src/config.h.win32 +++ b/src/config.h.win32 @@ -1,3 +1,9 @@ +#if defined(__MINGW32__) || _MSC_VER >= 1600 +#define HAVE_STDINT_H 1 +#endif +#if defined(__MINGW32__) || _MSC_VER >= 1800 +#define HAVE_INTTYPES_H 1 +#endif  #define HAVE_SYS_TYPES_H 1  #define HAVE_SYS_STAT_H 1  #define HAVE_MEMORY_H 1 diff --git a/src/config.h.win64 b/src/config.h.win64 index f72671b..7f19699 100644 --- a/src/config.h.win64 +++ b/src/config.h.win64 @@ -1,3 +1,9 @@ +#if defined(__MINGW32__) || _MSC_VER >= 1600 +#define HAVE_STDINT_H 1 +#endif +#if defined(__MINGW32__) || _MSC_VER >= 1800 +#define HAVE_INTTYPES_H 1 +#endif  #define HAVE_SYS_TYPES_H 1  #define HAVE_SYS_STAT_H 1  #define HAVE_MEMORY_H 1 diff --git a/src/config.h.windows.in b/src/config.h.windows.in index d8de1dd..d4f73d7 100644 --- a/src/config.h.windows.in +++ b/src/config.h.windows.in @@ -1,7 +1,14 @@ +#if defined(__MINGW32__) || _MSC_VER >= 1600 +#define HAVE_STDINT_H 1 +#endif +#if defined(__MINGW32__) || _MSC_VER >= 1800 +#define HAVE_INTTYPES_H 1 +#endif  #define HAVE_SYS_TYPES_H 1  #define HAVE_SYS_STAT_H 1  #define HAVE_MEMORY_H 1  #define HAVE_OFF_T 1 +  #define SIZEOF_INT 4  #define SIZEOF_LONG 4  #define SIZEOF_LONG_LONG 8 diff --git a/src/cp1251.c b/src/cp1251.c index b4ce4d8..fa20780 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -2,8 +2,8 @@    cp1251.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2006-2018  Byte      <byte AT mail DOT kna DOT ru> - *                          K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2006-2019  Byte      <byte AT mail DOT kna DOT ru> + *                          K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/euc_jp.c b/src/euc_jp.c index d17386d..640b3e3 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@    euc_jp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -120,25 +120,6 @@ code_to_mbclen(OnigCodePoint code)      return ONIGERR_INVALID_CODE_POINT_VALUE;  } -#if 0 -static int -code_to_mbc_first(OnigCodePoint code) -{ -  int first; - -  if ((code & 0xff0000) != 0) { -    first = (code >> 16) & 0xff; -  } -  else if ((code & 0xff00) != 0) { -    first = (code >> 8) & 0xff; -  } -  else { -    return (int )code; -  } -  return first; -} -#endif -  static int  code_to_mbc(OnigCodePoint code, UChar *buf)  { diff --git a/src/euc_jp_prop.c b/src/euc_jp_prop.c index be719cf..a816f48 100644 --- a/src/euc_jp_prop.c +++ b/src/euc_jp_prop.c @@ -1,5 +1,5 @@  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -pt -T -L ANSI-C -N onigenc_euc_jp_lookup_property_name --output-file gperf1.tmp euc_jp_prop.gperf  */ +/* Command-line: gperf -pt -T -L ANSI-C -N onigenc_euc_jp_lookup_property_name --output-file gperf1.tmp euc_jp_prop.gperf  */  /* Computed positions: -k'1,3' */  #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ diff --git a/src/euc_kr.c b/src/euc_kr.c index bb968b0..7fa50af 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@    euc_kr.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,16 @@ euckr_mbc_enc_len(const UChar* p)  }  static int +euckr_code_to_mbclen(OnigCodePoint code) +{ +  if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; +  if ((code &    0xff00) != 0) return 2; +  if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1; + +  return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int  is_valid_mbc_string(const UChar* p, const UChar* end)  {    while (p < end) { @@ -98,15 +108,6 @@ euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,                                     pp, end, lower);  } -#if 0 -static int -euckr_is_mbc_ambiguous(OnigCaseFoldType flag, -                       const UChar** pp, const UChar* end) -{ -  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); -} -#endif -  static int  euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)  { @@ -149,7 +150,7 @@ OnigEncodingType OnigEncodingEUC_KR = {    1,          /* min enc length */    onigenc_is_mbc_newline_0x0a,    euckr_mbc_to_code, -  onigenc_mb2_code_to_mbclen, +  euckr_code_to_mbclen,    euckr_code_to_mbc,    euckr_mbc_case_fold,    onigenc_ascii_apply_all_case_fold, @@ -174,7 +175,7 @@ OnigEncodingType OnigEncodingEUC_CN = {    1,          /* min enc length */    onigenc_is_mbc_newline_0x0a,    euckr_mbc_to_code, -  onigenc_mb2_code_to_mbclen, +  euckr_code_to_mbclen,    euckr_code_to_mbc,    euckr_mbc_case_fold,    onigenc_ascii_apply_all_case_fold, diff --git a/src/euc_tw.c b/src/euc_tw.c index c9acaf1..8e72b97 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@    euc_tw.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,20 @@ euctw_mbc_enc_len(const UChar* p)  }  static int +euctw_code_to_mbclen(OnigCodePoint code) +{ +       if ((code & 0xff000000) != 0) return 4; +  else if ((code &   0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; +  else if ((code &     0xff00) != 0) return 2; +  else { +    if (EncLen_EUCTW[(int )(code & 0xff)] == 1) +      return 1; + +    return ONIGERR_INVALID_CODE_POINT_VALUE; +  } +} + +static int  is_valid_mbc_string(const UChar* p, const UChar* end)  {    while (p < end) { @@ -155,7 +169,7 @@ OnigEncodingType OnigEncodingEUC_TW = {    1,          /* min enc length */    onigenc_is_mbc_newline_0x0a,    euctw_mbc_to_code, -  onigenc_mb4_code_to_mbclen, +  euctw_code_to_mbclen,    euctw_code_to_mbc,    euctw_mbc_case_fold,    onigenc_ascii_apply_all_case_fold, diff --git a/src/gb18030.c b/src/gb18030.c index 8d415b0..50898eb 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -3,7 +3,7 @@  **********************************************************************/  /*-   * Copyright (c) 2005-2019  KUBO Takehiro <kubo AT jiubao DOT org> - *                          K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + *                          K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@  #if 1  #define DEBUG_GB18030(arg)  #else +#include <stdio.h>  #define DEBUG_GB18030(arg) printf arg  #endif @@ -76,6 +77,20 @@ gb18030_mbc_enc_len(const UChar* p)  }  static int +gb18030_code_to_mbclen(OnigCodePoint code) +{ +       if ((code & 0xff000000) != 0) return 4; +  else if ((code &   0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; +  else if ((code &     0xff00) != 0) return 2; +  else { +    if (GB18030_MAP[(int )(code & 0xff)] == CM) +      return ONIGERR_INVALID_CODE_POINT_VALUE; + +    return 1; +  } +} + +static int  is_valid_mbc_string(const UChar* p, const UChar* end)  {    while (p < end) { @@ -135,15 +150,6 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,                                     pp, end, lower);  } -#if 0 -static int -gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, -                         const UChar** pp, const UChar* end) -{ -  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); -} -#endif -  static int  gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)  { @@ -522,7 +528,7 @@ OnigEncodingType OnigEncodingGB18030 = {    1,          /* min enc length */    onigenc_is_mbc_newline_0x0a,    gb18030_mbc_to_code, -  onigenc_mb4_code_to_mbclen, +  gb18030_code_to_mbclen,    gb18030_code_to_mbc,    gb18030_mbc_case_fold,    onigenc_ascii_apply_all_case_fold, diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py index f453186..c633100 100755 --- a/src/gperf_fold_key_conv.py +++ b/src/gperf_fold_key_conv.py @@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')  REG_RETURN_TYPE = re.compile('^const\s+short\s+int\s*\*')  REG_FOLD_KEY = re.compile('unicode_fold(\d)_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')  REG_ENTRY = re.compile('\{".*?",\s*(-?\d+)\s*\}') -REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+') +REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')  REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')  REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')  REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)') @@ -34,7 +34,7 @@ def parse_line(s, key_len):      if r != s: return r      r = re.sub(REG_ENTRY, '\\1', s)      if r != s: return r -    r = re.sub(REG_IF_LEN, 'if (0 == 0)', s) +    r = re.sub(REG_IF_LEN, '', s)      if r != s: return r      r = re.sub(REG_GET_HASH, 'int key = hash(codes);', s)      if r != s: return r diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py index 3cf4836..d999d4e 100755 --- a/src/gperf_unfold_key_conv.py +++ b/src/gperf_unfold_key_conv.py @@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')  REG_UNFOLD_KEY = re.compile('onigenc_unicode_unfold_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')  REG_ENTRY = re.compile('\{".+?",\s*/\*(.+?)\*/\s*(-?\d+),\s*(\d)\}')  REG_EMPTY_ENTRY = re.compile('\{"",\s*(-?\d+),\s*(\d)\}') -REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+') +REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')  REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')  REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')  REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)') @@ -32,7 +32,7 @@ def parse_line(s):      if r != s: return r      r = re.sub(REG_EMPTY_ENTRY, '{0xffffffff, \\1, \\2}', s)      if r != s: return r -    r = re.sub(REG_IF_LEN, 'if (0 == 0)', s) +    r = re.sub(REG_IF_LEN, '', s)      if r != s: return r      r = re.sub(REG_GET_HASH, 'int key = hash(&code);', s)      if r != s: return r diff --git a/src/iso8859_1.c b/src/iso8859_1.c index 3b64942..e681c2a 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -2,7 +2,7 @@    iso8859_1.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -216,32 +216,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -    if (*p >= 0xaa && *p <= 0xba) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_10.c b/src/iso8859_10.c index f5882bc..e98cffb 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -2,7 +2,7 @@    iso8859_10.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_11.c b/src/iso8859_11.c index da8fda0..8639ce2 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -2,7 +2,7 @@    iso8859_11.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/iso8859_13.c b/src/iso8859_13.c index 0cf251c..2bd460f 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -2,7 +2,7 @@    iso8859_13.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    /* 0xdf, 0xb5 are lower case letter, but can't convert. */ -    if (*p == 0xb5) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 030e9f5..5030b55 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -2,7 +2,7 @@    iso8859_14.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,29 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1; /* return byte length of converted char to lower */  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, -                 const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_15.c b/src/iso8859_15.c index 859d727..f32c3de 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -2,7 +2,7 @@    iso8859_15.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1; /* return byte length of converted char to lower */  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    /* 0xdf etc.. are lower case letter, but can't convert. */ -    if (*p == 0xaa || *p == 0xb5 || *p == 0xba) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 2614e56..22a653a 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -2,7 +2,7 @@    iso8859_16.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1; /* return byte length of converted char to lower */  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_2.c b/src/iso8859_2.c index ba030d5..dc3d0a1 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -2,7 +2,7 @@    iso8859_2.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1; /* return byte length of converted char to lower */  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static const OnigPairCaseFoldCodes CaseFoldMap[] = {   { 0xa1, 0xb1 },   { 0xa3, 0xb3 }, diff --git a/src/iso8859_3.c b/src/iso8859_3.c index f090d0b..49dc6b2 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -2,7 +2,7 @@    iso8859_3.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -    if (*p == 0xb5) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_4.c b/src/iso8859_4.c index 57dc9fe..f3f6ba9 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -2,7 +2,7 @@    iso8859_4.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,31 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1; /* return byte length of converted char to lower */  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    if (*p == 0xa2) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_5.c b/src/iso8859_5.c index a090d25..a5f587c 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -2,7 +2,7 @@    iso8859_5.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -114,19 +114,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  (*pp)++; -  v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 1c16c79..fb72442 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -2,7 +2,7 @@    iso8859_6.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 8c88351..018efac 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -2,7 +2,7 @@    iso8859_7.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -114,26 +114,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  (*pp)++; -  v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    if (*p == 0xc0 || *p == 0xe0) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/iso8859_8.c b/src/iso8859_8.c index bd3e94d..92a5eb1 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -2,7 +2,7 @@    iso8859_8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/iso8859_9.c b/src/iso8859_9.c index 1d291d5..1f9bdea 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -2,7 +2,7 @@    iso8859_9.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,    return 1;  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -    (*pp)++; -    return TRUE; -  } - -  (*pp)++; -  v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  if ((v | BIT_CTYPE_LOWER) != 0) { -    /* 0xdf etc..  are lower case letter, but can't convert. */ -    if (*p >= 0xaa && *p <= 0xba) -      return FALSE; -    else -      return TRUE; -  } - -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  is_code_ctype(OnigCodePoint code, unsigned int ctype)  { @@ -2,7 +2,7 @@    koi8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -115,25 +115,6 @@ koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,    return 1;  } -#if 0 -static int -koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) -{ -  const OnigUChar* p = *pp; - -  (*pp)++; -  if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 && -       ONIGENC_IS_MBC_ASCII(p)) || -      ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 && -       !ONIGENC_IS_MBC_ASCII(p))) { -    int v = (EncKOI8_CtypeTable[*p] & -             (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -    return (v != 0 ? TRUE : FALSE); -  } -  return FALSE; -} -#endif -  static int  koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/koi8_r.c b/src/koi8_r.c index 1284f7f..c77302f 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -2,7 +2,7 @@    koi8_r.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -114,19 +114,6 @@ koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,    return 1;  } -#if 0 -static int -koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  int v; -  const UChar* p = *pp; - -  (*pp)++; -  v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -  return (v != 0 ? TRUE : FALSE); -} -#endif -  static int  koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)  { diff --git a/src/make_property.sh b/src/make_property.sh index bc5cf98..cef0a96 100755 --- a/src/make_property.sh +++ b/src/make_property.sh @@ -1,8 +1,9 @@  #!/bin/sh +GPERF=gperf +  TMP1=gperf1.tmp  TMP2=gperf2.tmp -GPERF=/usr/local/bin/gperf  GPERF_OPT='-pt -T -L ANSI-C' diff --git a/src/make_unicode_egcb_data.py b/src/make_unicode_egcb_data.py index 0f63f97..9c71796 100755 --- a/src/make_unicode_egcb_data.py +++ b/src/make_unicode_egcb_data.py @@ -1,7 +1,7 @@  #!/usr/bin/python  # -*- coding: utf-8 -*-  # make_unicode_egcb_data.py -# Copyright (c) 2017-2018  K.Kosako +# Copyright (c) 2017-2019  K.Kosako  import sys  import re @@ -13,18 +13,19 @@ PR_LINE_REG  = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")  PA_LINE_REG  = re.compile("(\w+)\s*;\s*(\w+)")  PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")  BL_LINE_REG  = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)") -VERSION_REG  = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt") +VERSION_REG  = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt") -VERSION_INFO = None +VERSION_INFO = [-1, -1, -1]  DIC  = { }  PROPS = []  PropIndex = { }  def check_version_info(s): -  global VERSION_INFO    m = VERSION_REG.match(s)    if m is not None: -    VERSION_INFO = m.group(1) +    VERSION_INFO[0] = int(m.group(1)) +    VERSION_INFO[1] = int(m.group(2)) +    VERSION_INFO[2] = int(m.group(3))  def print_ranges(ranges):    for (start, end) in ranges: @@ -160,7 +161,7 @@ def parse_properties(path):          continue        if s[0] == '#': -        if VERSION_INFO is None: +        if VERSION_INFO[0] < 0:            check_version_info(s)        m = PR_LINE_REG.match(s) @@ -194,7 +195,7 @@ PROPS = sorted(PROPS)  print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'  COPYRIGHT = '''  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -222,9 +223,11 @@ COPYRIGHT = '''  print COPYRIGHT  print '' -if VERSION_INFO is not None: -  print "#define GRAPHEME_BREAK_PROPERTY_VERSION  %s" % re.sub(r'[\.-]', '_', VERSION_INFO) -  print '' +if VERSION_INFO[0] < 0: +  raise RuntimeError("Version is not found") + +print "#define GRAPHEME_BREAK_PROPERTY_VERSION  %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]) +print ''  ranges = []  for prop in PROPS: diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh index 35ce974..1d5cc1e 100755 --- a/src/make_unicode_fold.sh +++ b/src/make_unicode_fold.sh @@ -1,6 +1,6 @@  #!/bin/sh -GPERF=/usr/local/bin/gperf +GPERF=gperf  TMP0=gperf0.tmp  TMP1=gperf1.tmp diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py index 783988c..55d5b88 100755 --- a/src/make_unicode_fold_data.py +++ b/src/make_unicode_fold_data.py @@ -1,7 +1,7 @@  #!/usr/bin/python  # -*- coding: utf-8 -*-  # make_unicode_fold_data.py -# Copyright (c) 2016-2018  K.Kosako +# Copyright (c) 2016-2019  K.Kosako  import sys  import re @@ -16,9 +16,9 @@ DataName = 'OnigUnicodeFolds'  ENCODING = 'utf-8'  LINE_REG = re.compile("([0-9A-F]{1,6}); (.); ([0-9A-F]{1,6})(?: ([0-9A-F]{1,6}))?(?: ([0-9A-F]{1,6}))?;(?:\s*#\s*)(.*)") -VERSION_REG  = re.compile("#.*-(\d+\.\d+\.\d+)\.txt") +VERSION_REG  = re.compile("#.*-(\d+)\.(\d+)\.(\d+)\.txt") -VERSION_INFO = None +VERSION_INFO = [-1, -1, -1]  FOLDS = {}  TURKISH_FOLDS = {} @@ -56,18 +56,19 @@ def form3bytes(x):      return "\\x%02x\\x%02x\\x%02x" % (x2, x1, x0)  def check_version_info(s): -    global VERSION_INFO -    if VERSION_INFO is None: -        m = VERSION_REG.match(s) -        if m is not None: -            VERSION_INFO = m.group(1) +  m = VERSION_REG.match(s) +  if m is not None: +    VERSION_INFO[0] = int(m.group(1)) +    VERSION_INFO[1] = int(m.group(2)) +    VERSION_INFO[2] = int(m.group(3))  def parse_line(s):      if len(s) == 0: -        return False +      return False      if s[0] == '#': +      if VERSION_INFO[0] < 0:          check_version_info(s) -        return False +      return False      m = LINE_REG.match(s)      if m is None: @@ -232,9 +233,11 @@ def output_fold_source(f, out_comment):      print >> f, "/* This file was generated by make_unicode_fold_data.py. */"      print >> f, '#include "regenc.h"'      print >> f, '' -    if VERSION_INFO is not None: -        print "#define UNICODE_CASEFOLD_VERSION  %s" % re.sub(r'[\.-]', '_', VERSION_INFO) -        print '' +    if VERSION_INFO[0] < 0: +      raise RuntimeError("Version is not found") + +    print "#define UNICODE_CASEFOLD_VERSION  %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]) +    print ''      #output_macros(f, DataName)      print >> f, ''      #output_typedef(f) @@ -246,7 +249,7 @@ HEAD = '''  /* This gperf source file was generated by make_unicode_fold_data.py */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh index 124d76a..51c8951 100755 --- a/src/make_unicode_property.sh +++ b/src/make_unicode_property.sh @@ -1,10 +1,11 @@  #!/bin/sh +GPERF=gperf +  NAME=unicode_property_data  TMP1=gperf1.tmp  TMP2=gperf2.tmp  TMP= -GPERF=/usr/local/bin/gperf  GPERF_OPT='-T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool'  POOL_CAST='s/\(int *\)\(size_t *\)&\(\(struct +unicode_prop_name_pool_t *\* *\) *0\)->unicode_prop_name_pool_str([^,]+)/pool_offset(\1)/g' diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index dc3071a..9776628 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -1,7 +1,7 @@  #!/usr/bin/python  # -*- coding: utf-8 -*-  # make_unicode_property_data.py -# Copyright (c) 2016-2018  K.Kosako +# Copyright (c) 2016-2019  K.Kosako  import sys  import re @@ -22,9 +22,12 @@ PR_LINE_REG  = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")  PA_LINE_REG  = re.compile("(\w+)\s*;\s*(\w+)")  PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")  BL_LINE_REG  = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)") -VERSION_REG  = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt") +UNICODE_VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt") +EMOJI_VERSION_REG   = re.compile("(?i)#\s*Version:\s*(\d+)\.(\d+)") + +VERSION_INFO = [-1, -1, -1] +EMOJI_VERSION_INFO = [-1, -1] -VERSION_INFO = None  DIC  = { }  KDIC = { }  PropIndex = { } @@ -40,13 +43,6 @@ def fix_block_name(name):    s = re.sub(r'[- ]+', '_', name)    return 'In_' + s -def check_version_info(s): -  global VERSION_INFO -  m = VERSION_REG.match(s) -  if m is not None: -    VERSION_INFO = m.group(1) - -  def print_ranges(ranges):    for (start, end) in ranges:      print "0x%06x, 0x%06x" % (start, end) @@ -233,7 +229,8 @@ def parse_unicode_data_file(f):    normalize_ranges_in_dic(dic)    return dic, assigned -def parse_properties(path, klass, prop_prefix = None): +def parse_properties(path, klass, prop_prefix = None, version_reg = None): +  version_match = None    with open(path, 'r') as f:      dic = { }      prop = None @@ -243,9 +240,10 @@ def parse_properties(path, klass, prop_prefix = None):        if len(s) == 0:          continue -      if s[0] == '#': -        if VERSION_INFO is None: -          check_version_info(s) +      if s[0] == '#' and version_reg is not None and version_match is None: +        version_match = version_reg.match(s) +        if version_match is not None: +          continue        m = PR_LINE_REG.match(s)        if m: @@ -266,7 +264,7 @@ def parse_properties(path, klass, prop_prefix = None):          props.append(prop)    normalize_ranges_in_dic(dic) -  return (dic, props) +  return (dic, props, version_match)  def parse_property_aliases(path):    a = { } @@ -414,11 +412,11 @@ def entry_and_print_prop_and_index(name, index):    nname = normalize_prop_name(name)    print_prop_and_index(nname, index) -def parse_and_merge_properties(path, klass): -  dic, props = parse_properties(path, klass) +def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None): +  dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)    merge_dic(DIC, dic)    merge_props(PROPS, props) -  return dic, props +  return dic, props, ver_m  ### main ###  argv = sys.argv @@ -447,11 +445,21 @@ with open('UnicodeData.txt', 'r') as f:  PROPS = DIC.keys()  PROPS = list_sub(PROPS, POSIX_LIST) -parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property') -dic, props = parse_and_merge_properties('Scripts.txt', 'Script') +_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG) +if ver_m is not None: +  VERSION_INFO[0] = int(ver_m.group(1)) +  VERSION_INFO[1] = int(ver_m.group(2)) +  VERSION_INFO[2] = int(ver_m.group(3)) + +dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')  DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic)) +  parse_and_merge_properties('PropList.txt',   'Binary Property') -parse_and_merge_properties('emoji-data.txt', 'Emoji Property') + +_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG) +if ver_m is not None: +  EMOJI_VERSION_INFO[0] = int(ver_m.group(1)) +  EMOJI_VERSION_INFO[1] = int(ver_m.group(2))  PROPS.append('Unknown')  KDIC['Unknown'] = 'Script' @@ -464,9 +472,9 @@ dic, BLOCKS = parse_blocks('Blocks.txt')  merge_dic(DIC, dic)  if INCLUDE_GRAPHEME_CLUSTER_DATA: -  dic, props = parse_properties('GraphemeBreakProperty.txt', -                                'GraphemeBreak Property', -                                GRAPHEME_CLUSTER_BREAK_NAME_PREFIX) +  dic, props, _ = parse_properties('GraphemeBreakProperty.txt', +                                   'GraphemeBreak Property', +                                   GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)    merge_dic(DIC, dic)    merge_props(PROPS, props)    #prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other' @@ -533,9 +541,13 @@ sys.stdout.write(s)  if OUTPUT_LIST_MODE:    UPF = open("UNICODE_PROPERTIES", "w") -  if VERSION_INFO is not None: -    print >> UPF, "Unicode Properties (from Unicode Version: %s)" % VERSION_INFO -    print >> UPF, '' +  if VERSION_INFO[0] < 0: +    raise RuntimeError("Unicode Version is not found") +  if EMOJI_VERSION_INFO[0] < 0: +    raise RuntimeError("Emoji Version is not found") + +  print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d,  Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]) +  print >> UPF, ''  index = -1  for prop in POSIX_LIST: @@ -569,9 +581,14 @@ if not(POSIX_ONLY):  print '%%'  print ''  if not(POSIX_ONLY): -  if VERSION_INFO is not None: -    print "#define UNICODE_PROPERTY_VERSION  %s" % re.sub(r'[\.-]', '_', VERSION_INFO) -    print '' +  if VERSION_INFO[0] < 0: +    raise RuntimeError("Unicode Version is not found") +  if EMOJI_VERSION_INFO[0] < 0: +    raise RuntimeError("Emoji Version is not found") + +  print "#define UNICODE_PROPERTY_VERSION  %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]) +  print "#define UNICODE_EMOJI_VERSION     %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]) +  print ''  print "#define PROPERTY_NAME_MAX_SIZE  %d" % (PROPERTY_NAME_MAX_LEN + 10)  print "#define CODE_RANGES_NUM         %d" % (index + 1) diff --git a/src/make_unicode_wb_data.py b/src/make_unicode_wb_data.py index 624fa7e..ddedd5d 100755 --- a/src/make_unicode_wb_data.py +++ b/src/make_unicode_wb_data.py @@ -13,18 +13,19 @@ PR_LINE_REG  = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")  PA_LINE_REG  = re.compile("(\w+)\s*;\s*(\w+)")  PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")  BL_LINE_REG  = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)") -VERSION_REG  = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt") +VERSION_REG  = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt") -VERSION_INFO = None +VERSION_INFO = [-1, -1, -1]  DIC  = { }  PROPS = []  PropIndex = { }  def check_version_info(s): -  global VERSION_INFO    m = VERSION_REG.match(s)    if m is not None: -    VERSION_INFO = m.group(1) +    VERSION_INFO[0] = int(m.group(1)) +    VERSION_INFO[1] = int(m.group(2)) +    VERSION_INFO[2] = int(m.group(3))  def print_ranges(ranges):    for (start, end) in ranges: @@ -160,7 +161,7 @@ def parse_properties(path):          continue        if s[0] == '#': -        if VERSION_INFO is None: +        if VERSION_INFO[0] < 0:            check_version_info(s)        m = PR_LINE_REG.match(s) @@ -194,7 +195,7 @@ PROPS = sorted(PROPS)  print '/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */'  COPYRIGHT = '''  /*- - * Copyright (c) 2019  K.Kosako  <kkosako0 AT gmail DOT com> + * Copyright (c) 2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -222,9 +223,11 @@ COPYRIGHT = '''  print COPYRIGHT  print '' -if VERSION_INFO is not None: -  print "#define WORD_BREAK_PROPERTY_VERSION  %s" % re.sub(r'[\.-]', '_', VERSION_INFO) -  print '' +if VERSION_INFO[0] < 0: +  raise RuntimeError("Version is not found.") + +print "#define WORD_BREAK_PROPERTY_VERSION  %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]) +print ''  ranges = []  for prop in PROPS: diff --git a/src/mktable.c b/src/mktable.c index 80ac08a..318bac0 100644 --- a/src/mktable.c +++ b/src/mktable.c @@ -2,7 +2,7 @@    mktable.c  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/onig_init.c b/src/onig_init.c index 7ad98b7..c660e7d 100644 --- a/src/onig_init.c +++ b/src/onig_init.c @@ -2,7 +2,7 @@    onig_init.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2016-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2016-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/oniggnu.h b/src/oniggnu.h index d688883..96d9085 100644 --- a/src/oniggnu.h +++ b/src/oniggnu.h @@ -4,7 +4,7 @@    oniggnu.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/onigposix.h b/src/onigposix.h index da0f919..5ff779f 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@    onigposix.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -95,6 +95,7 @@ typedef struct {  #endif  #endif +#ifndef ONIG_STATIC  #ifndef ONIG_EXTERN  #if defined(_WIN32) && !defined(__GNUC__)  #if defined(ONIGURUMA_EXPORT) @@ -108,6 +109,9 @@ typedef struct {  #ifndef ONIG_EXTERN  #define ONIG_EXTERN   extern  #endif +#else +#define ONIG_EXTERN   extern +#endif  #ifndef ONIGURUMA_H  typedef unsigned int        OnigOptionType; diff --git a/src/oniguruma.h b/src/oniguruma.h index 90cf2d9..08ac6f7 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@    oniguruma.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,9 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6  #define ONIGURUMA_VERSION_MINOR   9 -#define ONIGURUMA_VERSION_TEENY   3 +#define ONIGURUMA_VERSION_TEENY   4 -#define ONIGURUMA_VERSION_INT     60903 +#define ONIGURUMA_VERSION_INT     60904  #ifndef P_  #if defined(__STDC__) || defined(_WIN32) @@ -687,6 +687,14 @@ typedef OnigRegexType*  OnigRegex;    typedef OnigRegexType  regex_t;  #endif +struct OnigRegSetStruct; +typedef struct OnigRegSetStruct OnigRegSet; + +typedef enum { +  ONIG_REGSET_POSITION_LEAD = 0, +  ONIG_REGSET_REGEX_LEAD    = 1, +  ONIG_REGSET_PRIORITY_TO_REGEX_ORDER = 2 +} OnigRegSetLead;  typedef struct {    int             num_of_elements; @@ -797,6 +805,26 @@ ONIG_EXTERN  int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));  ONIG_EXTERN  int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); + +ONIG_EXTERN +int onig_regset_new P_((OnigRegSet** rset, int n, regex_t* regs[])); +ONIG_EXTERN +int onig_regset_add P_((OnigRegSet* set, regex_t* reg)); +ONIG_EXTERN +int onig_regset_replace P_((OnigRegSet* set, int at, regex_t* reg)); +ONIG_EXTERN +void onig_regset_free P_((OnigRegSet* set)); +ONIG_EXTERN +int onig_regset_number_of_regex P_((OnigRegSet* set)); +ONIG_EXTERN +regex_t* onig_regset_get_regex P_((OnigRegSet* set, int at)); +ONIG_EXTERN +OnigRegion* onig_regset_get_region P_((OnigRegSet* set, int at)); +ONIG_EXTERN +int onig_regset_search P_((OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)); +ONIG_EXTERN +int onig_regset_search_with_param P_((OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range,  OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)); +  ONIG_EXTERN  OnigRegion* onig_region_new P_((void));  ONIG_EXTERN diff --git a/src/regcomp.c b/src/regcomp.c index b96c793..69d4b95 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@    regcomp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -224,17 +224,17 @@ ops_free(regex_t* reg)  #endif      switch (opcode) { -    case OP_EXACTMBN: +    case OP_STR_MBN:        if (! is_in_string_pool(reg, op->exact_len_n.s))          xfree(op->exact_len_n.s);        break; -    case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC: +    case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N: case OP_STR_N_IC:        if (! is_in_string_pool(reg, op->exact_n.s))          xfree(op->exact_n.s);        break; -    case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4: -    case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2: -    case OP_EXACTMB2N3: case OP_EXACT1_IC: +    case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4: +    case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2: +    case OP_STR_MB2N3: case OP_STR_1_IC:        break;      case OP_CCLASS_NOT: case OP_CCLASS: @@ -298,17 +298,17 @@ ops_calc_size_of_string_pool(regex_t* reg)  #endif      switch (opcode) { -    case OP_EXACTMBN: +    case OP_STR_MBN:        total += op->exact_len_n.len * op->exact_len_n.n;        break; -    case OP_EXACTN: -    case OP_EXACTN_IC: +    case OP_STR_N: +    case OP_STR_N_IC:        total += op->exact_n.n;        break; -    case OP_EXACTMB2N: +    case OP_STR_MB2N:        total += op->exact_n.n * 2;        break; -    case OP_EXACTMB3N: +    case OP_STR_MB3N:        total += op->exact_n.n * 3;        break; @@ -349,15 +349,15 @@ ops_make_string_pool(regex_t* reg)  #endif      switch (opcode) { -    case OP_EXACTMBN: +    case OP_STR_MBN:        len = op->exact_len_n.len * op->exact_len_n.n;        xmemcpy(curr, op->exact_len_n.s, len);        xfree(op->exact_len_n.s);        op->exact_len_n.s = curr;        curr += len;        break; -    case OP_EXACTN: -    case OP_EXACTN_IC: +    case OP_STR_N: +    case OP_STR_N_IC:        len = op->exact_n.n;      copy:        xmemcpy(curr, op->exact_n.s, len); @@ -365,11 +365,11 @@ ops_make_string_pool(regex_t* reg)        op->exact_n.s = curr;        curr += len;        break; -    case OP_EXACTMB2N: +    case OP_STR_MB2N:        len = op->exact_n.n * 2;        goto copy;        break; -    case OP_EXACTMB3N: +    case OP_STR_MB3N:        len = op->exact_n.n * 3;        goto copy;        break; @@ -427,7 +427,7 @@ onig_positive_int_multiply(int x, int y)  static void -swap_node(Node* a, Node* b) +node_swap(Node* a, Node* b)  {    Node c; @@ -452,6 +452,81 @@ swap_node(Node* a, Node* b)    }  } +static int +node_list_len(Node* list) +{ +  int len; + +  len = 1; +  while (IS_NOT_NULL(NODE_CDR(list))) { +    list = NODE_CDR(list); +    len++; +  } + +  return len; +} + +static Node* +node_list_add(Node* list, Node* x) +{ +  Node *n; + +  n = onig_node_new_list(x, NULL); +  if (IS_NULL(n)) return NULL_NODE; + +  if (IS_NOT_NULL(list)) { +    while (IS_NOT_NULL(NODE_CDR(list))) +      list = NODE_CDR(list); + +    NODE_CDR(list) = n; +  } + +  return n; +} + +static int +node_str_node_cat(Node* node, Node* add) +{ +  int r; + +  if (STR_(node)->flag != STR_(add)->flag) +    return ONIGERR_TYPE_BUG; + +  r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end); +  if (r != 0) return r; + +  if (NODE_STRING_IS_CASE_FOLD_MATCH(node)) +    STR_(node)->case_min_len += STR_(add)->case_min_len; + +  return 0; +} + +static int +node_str_cat_case_fold(Node* node, const UChar* s, const UChar* end, int case_min_len) +{ +  int r; + +  if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) +    return ONIGERR_TYPE_BUG; + +  r = onig_node_str_cat(node, s, end); +  if (r != 0) return r; + +  STR_(node)->case_min_len += case_min_len; +  return 0; +} + +static void +node_conv_to_str_node(Node* node, int flag) +{ +  NODE_SET_TYPE(node, NODE_STRING); +  STR_(node)->flag     = flag; +  STR_(node)->s        = STR_(node)->buf; +  STR_(node)->end      = STR_(node)->buf; +  STR_(node)->capacity = 0; +  STR_(node)->case_min_len = 0; +} +  static OnigLen  distance_add(OnigLen d1, OnigLen d2)  { @@ -549,52 +624,45 @@ static int compile_length_tree(Node* node, regex_t* reg);  static int compile_tree(Node* node, regex_t* reg, ScanEnv* env); -#define IS_NEED_STR_LEN_OP_EXACT(op) \ -   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\ -    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC) +#define IS_NEED_STR_LEN_OP(op) \ +   ((op) == OP_STR_N    || (op) == OP_STR_MB2N ||\ +    (op) == OP_STR_MB3N || (op) == OP_STR_MBN  || (op) == OP_STR_N_IC)  static int -select_str_opcode(int mb_len, int str_len, int ignore_case) +select_str_opcode(int mb_len, int str_len)  {    int op; -  if (ignore_case) { +  switch (mb_len) { +  case 1:      switch (str_len) { -    case 1:  op = OP_EXACT1_IC; break; -    default: op = OP_EXACTN_IC; break; +    case 1:  op = OP_STR_1; break; +    case 2:  op = OP_STR_2; break; +    case 3:  op = OP_STR_3; break; +    case 4:  op = OP_STR_4; break; +    case 5:  op = OP_STR_5; break; +    default: op = OP_STR_N; break;      } -  } -  else { -    switch (mb_len) { -    case 1: -      switch (str_len) { -      case 1:  op = OP_EXACT1; break; -      case 2:  op = OP_EXACT2; break; -      case 3:  op = OP_EXACT3; break; -      case 4:  op = OP_EXACT4; break; -      case 5:  op = OP_EXACT5; break; -      default: op = OP_EXACTN; break; -      } -      break; +    break; -    case 2: -      switch (str_len) { -      case 1:  op = OP_EXACTMB2N1; break; -      case 2:  op = OP_EXACTMB2N2; break; -      case 3:  op = OP_EXACTMB2N3; break; -      default: op = OP_EXACTMB2N;  break; -      } -      break; +  case 2: +    switch (str_len) { +    case 1:  op = OP_STR_MB2N1; break; +    case 2:  op = OP_STR_MB2N2; break; +    case 3:  op = OP_STR_MB2N3; break; +    default: op = OP_STR_MB2N;  break; +    } +    break; -    case 3: -      op = OP_EXACTMB3N; -      break; +  case 3: +    op = OP_STR_MB3N; +    break; -    default: -      op = OP_EXACTMBN; -      break; -    } +  default: +    op = OP_STR_MBN; +    break;    } +    return op;  } @@ -621,31 +689,43 @@ is_strict_real_node(Node* node)  }  static int -compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env) +compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ScanEnv* env)  {    int r; -  int saved_num_null_check = reg->num_null_check; +  int saved_num_empty_check; +  int emptiness; +  Node* body; + +  body = NODE_BODY((Node* )qn); +  emptiness = qn->emptiness; +  saved_num_empty_check = reg->num_empty_check;    if (emptiness != BODY_IS_NOT_EMPTY) {      r = add_op(reg, OP_EMPTY_CHECK_START);      if (r != 0) return r; -    COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */ -    reg->num_null_check++; +    COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */ +    reg->num_empty_check++;    } -  r = compile_tree(node, reg, env); +  r = compile_tree(body, reg, env);    if (r != 0) return r;    if (emptiness != BODY_IS_NOT_EMPTY) {      if (emptiness == BODY_IS_EMPTY_POSSIBILITY)        r = add_op(reg, OP_EMPTY_CHECK_END); -    else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) -      r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); +    else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) { +      if (NODE_IS_EMPTY_STATUS_CHECK(qn) != 0) +        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); +      else +        r = add_op(reg, OP_EMPTY_CHECK_END); +    } +#ifdef USE_CALL      else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH); +#endif      if (r != 0) return r; -    COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */ +    COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */    }    return r;  } @@ -682,14 +762,13 @@ compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)  static int  add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, -                          regex_t* reg ARG_UNUSED, int ignore_case) +                          regex_t* reg ARG_UNUSED)  {    return 1;  }  static int -add_compile_string(UChar* s, int mb_len, int str_len, -                   regex_t* reg, int ignore_case) +add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)  {    int op;    int r; @@ -697,14 +776,14 @@ add_compile_string(UChar* s, int mb_len, int str_len,    UChar* p;    UChar* end; -  op = select_str_opcode(mb_len, str_len, ignore_case); +  op = select_str_opcode(mb_len, str_len);    r = add_op(reg, op);    if (r != 0) return r;    byte_len = mb_len * str_len;    end = s + byte_len; -  if (op == OP_EXACTMBN) { +  if (op == OP_STR_MBN) {      p = onigenc_strdup(reg->enc, s, end);      CHECK_NULL_RETURN_MEMERR(p); @@ -712,11 +791,11 @@ add_compile_string(UChar* s, int mb_len, int str_len,      COP(reg)->exact_len_n.n   = str_len;      COP(reg)->exact_len_n.s   = p;    } -  else if (IS_NEED_STR_LEN_OP_EXACT(op)) { +  else if (IS_NEED_STR_LEN_OP(op)) {      p = onigenc_strdup(reg->enc, s, end);      CHECK_NULL_RETURN_MEMERR(p); -    if (op == OP_EXACTN_IC) +    if (op == OP_STR_N_IC)        COP(reg)->exact_n.n = byte_len;      else        COP(reg)->exact_n.n = str_len; @@ -724,8 +803,8 @@ add_compile_string(UChar* s, int mb_len, int str_len,      COP(reg)->exact_n.s = p;    }    else { +    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));      xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len); -    COP(reg)->exact.s[byte_len] = '\0';    }    return 0; @@ -734,7 +813,7 @@ add_compile_string(UChar* s, int mb_len, int str_len,  static int  compile_length_string_node(Node* node, regex_t* reg)  { -  int rlen, r, len, prev_len, slen, ambig; +  int rlen, r, len, prev_len, slen;    UChar *p, *prev;    StrNode* sn;    OnigEncoding enc = reg->enc; @@ -743,7 +822,7 @@ compile_length_string_node(Node* node, regex_t* reg)    if (sn->end <= sn->s)      return 0; -  ambig = NODE_STRING_IS_AMBIG(node); +  if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) return 1;    p = prev = sn->s;    prev_len = enclen(enc, p); @@ -757,7 +836,7 @@ compile_length_string_node(Node* node, regex_t* reg)        slen++;      }      else { -      r = add_compile_string_length(prev, prev_len, slen, reg, ambig); +      r = add_compile_string_length(prev, prev_len, slen, reg);        rlen += r;        prev = p;        slen = 1; @@ -766,25 +845,59 @@ compile_length_string_node(Node* node, regex_t* reg)      p += len;    } -  r = add_compile_string_length(prev, prev_len, slen, reg, ambig); +  r = add_compile_string_length(prev, prev_len, slen, reg);    rlen += r;    return rlen;  }  static int -compile_length_string_raw_node(StrNode* sn, regex_t* reg) +compile_length_string_crude_node(StrNode* sn, regex_t* reg)  {    if (sn->end <= sn->s)      return 0;    return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s), -                                   reg, 0); +                                   reg); +} + +static int +compile_ambig_string_node(Node* node, regex_t* reg) +{ +  int r; +  int len; +  int byte_len; +  UChar* p; +  StrNode* sn; +  OnigEncoding enc = reg->enc; + +  sn = STR_(node); +  len = enclen(enc, sn->s); +  byte_len = (int )(sn->end - sn->s); +  if (len == byte_len) { +    r = add_op(reg, OP_STR_1_IC); +    if (r != 0) return r; + +    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s)); +    xmemcpy(COP(reg)->exact.s, sn->s, (size_t )byte_len); +  } +  else { +    r = add_op(reg, OP_STR_N_IC); +    if (r != 0) return r; + +    p = onigenc_strdup(enc, sn->s, sn->end); +    CHECK_NULL_RETURN_MEMERR(p); + +    COP(reg)->exact_n.s = p; +    COP(reg)->exact_n.n = byte_len; +  } + +  return 0;  }  static int  compile_string_node(Node* node, regex_t* reg)  { -  int r, len, prev_len, slen, ambig; +  int r, len, prev_len, slen;    UChar *p, *prev, *end;    StrNode* sn;    OnigEncoding enc = reg->enc; @@ -794,7 +907,9 @@ compile_string_node(Node* node, regex_t* reg)      return 0;    end = sn->end; -  ambig = NODE_STRING_IS_AMBIG(node); +  if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) { +    return compile_ambig_string_node(node, reg); +  }    p = prev = sn->s;    prev_len = enclen(enc, p); @@ -807,7 +922,7 @@ compile_string_node(Node* node, regex_t* reg)        slen++;      }      else { -      r = add_compile_string(prev, prev_len, slen, reg, ambig); +      r = add_compile_string(prev, prev_len, slen, reg);        if (r != 0) return r;        prev  = p; @@ -818,16 +933,16 @@ compile_string_node(Node* node, regex_t* reg)      p += len;    } -  return add_compile_string(prev, prev_len, slen, reg, ambig); +  return add_compile_string(prev, prev_len, slen, reg);  }  static int -compile_string_raw_node(StrNode* sn, regex_t* reg) +compile_string_crude_node(StrNode* sn, regex_t* reg)  {    if (sn->end <= sn->s)      return 0; -  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0); +  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);  }  static void* @@ -891,15 +1006,27 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)    return 0;  } +static void +set_addr_in_repeat_range(regex_t* reg) +{ +  int i; + +  for (i = 0; i < reg->num_repeat; i++) { +    RepeatRange* p = reg->repeat_range + i; +    int offset = p->u.offset; +    p->u.pcode = reg->ops + offset; +  } +} +  static int -entry_repeat_range(regex_t* reg, int id, int lower, int upper) +entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)  {  #define REPEAT_RANGE_ALLOC  4 -  OnigRepeatRange* p; +  RepeatRange* p;    if (reg->repeat_range_alloc == 0) { -    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); +    p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);      CHECK_NULL_RETURN_MEMERR(p);      reg->repeat_range = p;      reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; @@ -907,7 +1034,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)    else if (reg->repeat_range_alloc <= id) {      int n;      n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; -    p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n); +    p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);      CHECK_NULL_RETURN_MEMERR(p);      reg->repeat_range = p;      reg->repeat_range_alloc = n; @@ -916,8 +1043,9 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)      p = reg->repeat_range;    } -  p[id].lower = lower; -  p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper); +  p[id].lower    = lower; +  p[id].upper    = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper); +  p[id].u.offset = ops_index;    return 0;  } @@ -932,24 +1060,16 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,    if (r != 0) return r;    COP(reg)->repeat.id   = num_repeat; -  COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC; +  COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC; -  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); +  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper, +                         COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);    if (r != 0) return r; -  r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); +  r = compile_quant_body_with_empty_check(qn, reg, env);    if (r != 0) return r; -  if ( -#ifdef USE_CALL -      NODE_IS_IN_MULTI_ENTRY(qn) || -#endif -      NODE_IS_IN_REAL_REPEAT(qn)) { -    r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); -  } -  else { -    r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); -  } +  r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);    if (r != 0) return r;    COP(reg)->repeat_inc.id = num_repeat; @@ -985,21 +1105,21 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)      if (qn->lower <= 1 ||          int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {        if (IS_NOT_NULL(qn->next_head_exact)) -        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; +        return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;        else -        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; +        return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;      }    }    mod_tlen = tlen;    if (emptiness != BODY_IS_NOT_EMPTY) -    mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END; +    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;    if (infinite &&        (qn->lower <= 1 ||         int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { -      len = SIZE_OP_JUMP; +      len = OPSIZE_JUMP;      }      else {        len = tlen * qn->lower; @@ -1008,36 +1128,36 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)      if (qn->greedy) {  #ifdef USE_OP_PUSH_OR_JUMP_EXACT        if (IS_NOT_NULL(qn->head_exact)) -        len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; +        len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;        else  #endif        if (IS_NOT_NULL(qn->next_head_exact)) -        len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; +        len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;        else -        len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; +        len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;      }      else -      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; +      len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;    }    else if (qn->upper == 0) { -    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ -      len = SIZE_OP_JUMP + tlen; +    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */ +      len = OPSIZE_JUMP + tlen;      }      else        len = 0;    }    else if (!infinite && qn->greedy &&             (qn->upper == 1 || -            int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, +            int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,                               QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      len = tlen * qn->lower; -    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); +    len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);    }    else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ -    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; +    len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;    }    else { -    len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT; +    len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;    }    return len; @@ -1078,7 +1198,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)    mod_tlen = tlen;    if (emptiness != BODY_IS_NOT_EMPTY) -    mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END; +    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;    if (infinite &&        (qn->lower <= 1 || @@ -1091,16 +1211,16 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)        if (qn->greedy) {  #ifdef USE_OP_PUSH_OR_JUMP_EXACT          if (IS_NOT_NULL(qn->head_exact)) -          COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP; +          COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;          else  #endif          if (IS_NOT_NULL(qn->next_head_exact)) -          COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP; +          COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;          else -          COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP; +          COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;        }        else { -        COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP; +        COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;        }      }      else { @@ -1113,36 +1233,36 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)        if (IS_NOT_NULL(qn->head_exact)) {          r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);          if (r != 0) return r; -        COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; +        COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;          COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0]; -        r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); +        r = compile_quant_body_with_empty_check(qn, reg, env);          if (r != 0) return r; -        addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1); +        addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);        }        else  #endif        if (IS_NOT_NULL(qn->next_head_exact)) {          r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);          if (r != 0) return r; -        COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; +        COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;          COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0]; -        r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); +        r = compile_quant_body_with_empty_check(qn, reg, env);          if (r != 0) return r; -        addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT); +        addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);        }        else {          r = add_op(reg, OP_PUSH);          if (r != 0) return r; -        COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; +        COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP; -        r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); +        r = compile_quant_body_with_empty_check(qn, reg, env);          if (r != 0) return r; -        addr = -(mod_tlen + (int )SIZE_OP_PUSH); +        addr = -(mod_tlen + (int )OPSIZE_PUSH);        }        r = add_op(reg, OP_JUMP); @@ -1152,9 +1272,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      else {        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP; +      COP(reg)->jump.addr = mod_tlen + SIZE_INC; -      r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); +      r = compile_quant_body_with_empty_check(qn, reg, env);        if (r != 0) return r;        r = add_op(reg, OP_PUSH); @@ -1163,10 +1283,10 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      }    }    else if (qn->upper == 0) { -    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ +    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = tlen + SIZE_INC_OP; +      COP(reg)->jump.addr = tlen + SIZE_INC;        r = compile_tree(NODE_QUANT_BODY(qn), reg, env);      } @@ -1177,7 +1297,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)    }    else if (! infinite && qn->greedy &&             (qn->upper == 1 || -            int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, +            int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,                               QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {      int n = qn->upper - qn->lower; @@ -1185,7 +1305,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      if (r != 0) return r;      for (i = 0; i < n; i++) { -      int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH); +      int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);        if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;        r = add_op(reg, OP_PUSH); @@ -1199,11 +1319,11 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)    else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */      r = add_op(reg, OP_PUSH);      if (r != 0) return r; -    COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP; +    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;      r = add_op(reg, OP_JUMP);      if (r != 0) return r; -    COP(reg)->jump.addr = tlen + SIZE_INC_OP; +    COP(reg)->jump.addr = tlen + SIZE_INC;      r = compile_tree(NODE_QUANT_BODY(qn), reg, env);    } @@ -1260,35 +1380,35 @@ compile_length_bag_node(BagNode* node, regex_t* reg)  #ifdef USE_CALL      if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { -      len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; +      len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;        return len;      }      if (NODE_IS_CALLED(node)) { -      len = SIZE_OP_MEMORY_START_PUSH + tlen -        + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; -      if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) +      len = OPSIZE_MEM_START_PUSH + tlen +        + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN; +      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))          len += (NODE_IS_RECURSION(node) -                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); +                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);        else          len += (NODE_IS_RECURSION(node) -                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); +                ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);      }      else if (NODE_IS_RECURSION(node)) { -      len = SIZE_OP_MEMORY_START_PUSH; -      len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) -                     ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); +      len = OPSIZE_MEM_START_PUSH; +      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum) +                     ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);      }      else  #endif      { -      if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) -        len = SIZE_OP_MEMORY_START_PUSH; +      if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum)) +        len = OPSIZE_MEM_START_PUSH;        else -        len = SIZE_OP_MEMORY_START; +        len = OPSIZE_MEM_START; -      len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) -                     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); +      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum) +                     ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);      }      break; @@ -1303,10 +1423,10 @@ compile_length_bag_node(BagNode* node, regex_t* reg)        v = onig_positive_int_multiply(qn->lower, tlen);        if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; -      len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; +      len = v + OPSIZE_PUSH + tlen + OPSIZE_POP_OUT + OPSIZE_JUMP;      }      else { -      len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; +      len = OPSIZE_ATOMIC_START + tlen + OPSIZE_ATOMIC_END;      }      break; @@ -1318,8 +1438,8 @@ compile_length_bag_node(BagNode* node, regex_t* reg)        len = compile_length_tree(cond, reg);        if (len < 0) return len; -      len += SIZE_OP_PUSH; -      len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END; +      len += OPSIZE_PUSH; +      len += OPSIZE_ATOMIC_START + OPSIZE_ATOMIC_END;        if (IS_NOT_NULL(Then)) {          tlen = compile_length_tree(Then, reg); @@ -1327,7 +1447,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg)          len += tlen;        } -      len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END; +      len += OPSIZE_JUMP + OPSIZE_ATOMIC_END;        if (IS_NOT_NULL(Else)) {          tlen = compile_length_tree(Else, reg); @@ -1352,24 +1472,25 @@ static int  compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)  {    int r; -  int len;  #ifdef USE_CALL    if (NODE_IS_CALLED(node)) { +    int len; +      r = add_op(reg, OP_CALL);      if (r != 0) return r; -    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP; +    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;      NODE_STATUS_ADD(node, ADDR_FIXED);      COP(reg)->call.addr = (int )node->m.called_addr;      if (node->m.regnum == 0) {        len = compile_length_tree(NODE_BAG_BODY(node), reg); -      len += SIZE_OP_RETURN; +      len += OPSIZE_RETURN;        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = len + SIZE_INC_OP; +      COP(reg)->jump.addr = len + SIZE_INC;        r = compile_tree(NODE_BAG_BODY(node), reg, env);        if (r != 0) return r; @@ -1379,25 +1500,24 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)      }      else {        len = compile_length_tree(NODE_BAG_BODY(node), reg); -      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); -      if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) +      len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN); +      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))          len += (NODE_IS_RECURSION(node) -                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); +                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);        else -        len += (NODE_IS_RECURSION(node) -                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); +        len += (NODE_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = len + SIZE_INC_OP; +      COP(reg)->jump.addr = len + SIZE_INC;      }    }  #endif -  if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) -    r = add_op(reg, OP_MEMORY_START_PUSH); +  if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum)) +    r = add_op(reg, OP_MEM_START_PUSH);    else -    r = add_op(reg, OP_MEMORY_START); +    r = add_op(reg, OP_MEM_START);    if (r != 0) return r;    COP(reg)->memory_start.num = node->m.regnum; @@ -1405,11 +1525,11 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)    if (r != 0) return r;  #ifdef USE_CALL -  if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) +  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))      r = add_op(reg, (NODE_IS_RECURSION(node) -                     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); +                     ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));    else -    r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END)); +    r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));    if (r != 0) return r;    COP(reg)->memory_end.num = node->m.regnum; @@ -1418,10 +1538,10 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)      r = add_op(reg, OP_RETURN);    }  #else -  if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) -    r = add_op(reg, OP_MEMORY_END_PUSH); +  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)) +    r = add_op(reg, OP_MEM_END_PUSH);    else -    r = add_op(reg, OP_MEMORY_END); +    r = add_op(reg, OP_MEM_END);    if (r != 0) return r;    COP(reg)->memory_end.num = node->m.regnum;  #endif @@ -1454,7 +1574,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)        r = add_op(reg, OP_PUSH);        if (r != 0) return r; -      COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP; +      COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP_OUT + OPSIZE_JUMP;        r = compile_tree(NODE_QUANT_BODY(qn), reg, env);        if (r != 0) return r; @@ -1463,7 +1583,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT); +      COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP_OUT);      }      else {        r = add_op(reg, OP_ATOMIC_START); @@ -1493,11 +1613,11 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)        else          then_len = 0; -      jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP; +      jump_len = cond_len + then_len + OPSIZE_ATOMIC_END + OPSIZE_JUMP;        r = add_op(reg, OP_PUSH);        if (r != 0) return r; -      COP(reg)->push.addr = SIZE_INC_OP + jump_len; +      COP(reg)->push.addr = SIZE_INC + jump_len;        r = compile_tree(cond, reg, env);        if (r != 0) return r; @@ -1518,7 +1638,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)        r = add_op(reg, OP_JUMP);        if (r != 0) return r; -      COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP; +      COP(reg)->jump.addr = OPSIZE_ATOMIC_END + else_len + SIZE_INC;        r = add_op(reg, OP_ATOMIC_END);        if (r != 0) return r; @@ -1546,16 +1666,16 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)    switch (node->type) {    case ANCR_PREC_READ: -    len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; +    len = OPSIZE_PREC_READ_START + tlen + OPSIZE_PREC_READ_END;      break;    case ANCR_PREC_READ_NOT: -    len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END; +    len = OPSIZE_PREC_READ_NOT_START + tlen + OPSIZE_PREC_READ_NOT_END;      break;    case ANCR_LOOK_BEHIND: -    len = SIZE_OP_LOOK_BEHIND + tlen; +    len = OPSIZE_LOOK_BEHIND + tlen;      break;    case ANCR_LOOK_BEHIND_NOT: -    len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END; +    len = OPSIZE_LOOK_BEHIND_NOT_START + tlen + OPSIZE_LOOK_BEHIND_NOT_END;      break;    case ANCR_WORD_BOUNDARY: @@ -1564,7 +1684,7 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)    case ANCR_WORD_BEGIN:    case ANCR_WORD_END:  #endif -    len = SIZE_OP_WORD_BOUNDARY; +    len = OPSIZE_WORD_BOUNDARY;      break;    case ANCR_TEXT_SEGMENT_BOUNDARY: @@ -1648,7 +1768,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)      r = add_op(reg, OP_PREC_READ_NOT_START);      if (r != 0) return r; -    COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END; +    COP(reg)->prec_read_not_start.addr = SIZE_INC + len + OPSIZE_PREC_READ_NOT_END;      r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);      if (r != 0) return r;      r = add_op(reg, OP_PREC_READ_NOT_END); @@ -1678,7 +1798,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)        len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);        r = add_op(reg, OP_LOOK_BEHIND_NOT_START);        if (r != 0) return r; -      COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END; +      COP(reg)->look_behind_not_start.addr = SIZE_INC + len + OPSIZE_LOOK_BEHIND_NOT_END;        if (node->char_len < 0) {          r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n); @@ -1764,25 +1884,25 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)    switch (node->type) {    case GIMMICK_FAIL: -    len = SIZE_OP_FAIL; +    len = OPSIZE_FAIL;      break;    case GIMMICK_SAVE: -    len = SIZE_OP_PUSH_SAVE_VAL; +    len = OPSIZE_PUSH_SAVE_VAL;      break;    case GIMMICK_UPDATE_VAR: -    len = SIZE_OP_UPDATE_VAR; +    len = OPSIZE_UPDATE_VAR;      break;  #ifdef USE_CALLOUT    case GIMMICK_CALLOUT:      switch (node->detail_type) {      case ONIG_CALLOUT_OF_CONTENTS: -      len = SIZE_OP_CALLOUT_CONTENTS; +      len = OPSIZE_CALLOUT_CONTENTS;        break;      case ONIG_CALLOUT_OF_NAME: -      len = SIZE_OP_CALLOUT_NAME; +      len = OPSIZE_CALLOUT_NAME;        break;      default: @@ -1821,13 +1941,13 @@ compile_length_tree(Node* node, regex_t* reg)          r += compile_length_tree(NODE_CAR(node), reg);          n++;        } while (IS_NOT_NULL(node = NODE_CDR(node))); -      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); +      r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);      }      break;    case NODE_STRING: -    if (NODE_STRING_IS_RAW(node)) -      r = compile_length_string_raw_node(STR_(node), reg); +    if (NODE_STRING_IS_CRUDE(node)) +      r = compile_length_string_crude_node(STR_(node), reg);      else        r = compile_length_string_node(node, reg);      break; @@ -1841,12 +1961,12 @@ compile_length_tree(Node* node, regex_t* reg)      break;    case NODE_BACKREF: -    r = SIZE_OP_BACKREF; +    r = OPSIZE_BACKREF;      break;  #ifdef USE_CALL    case NODE_CALL: -    r = SIZE_OP_CALL; +    r = OPSIZE_CALL;      break;  #endif @@ -1893,7 +2013,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)        do {          len += compile_length_tree(NODE_CAR(x), reg);          if (IS_NOT_NULL(NODE_CDR(x))) { -          len += SIZE_OP_PUSH + SIZE_OP_JUMP; +          len += OPSIZE_PUSH + OPSIZE_JUMP;          }        } while (IS_NOT_NULL(x = NODE_CDR(x)));        pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */ @@ -1904,7 +2024,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)            enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;            r = add_op(reg, push);            if (r != 0) break; -          COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP; +          COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;          }          r = compile_tree(NODE_CAR(node), reg, env);          if (r != 0) break; @@ -1919,8 +2039,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)      break;    case NODE_STRING: -    if (NODE_STRING_IS_RAW(node)) -      r = compile_string_raw_node(STR_(node), reg); +    if (NODE_STRING_IS_CRUDE(node)) +      r = compile_string_crude_node(STR_(node), reg);      else        r = compile_string_node(node, reg);      break; @@ -2090,8 +2210,9 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)        Node** ptarget = &(NODE_BODY(node));        Node*  old = *ptarget;        r = noname_disable_map(ptarget, map, counter); +      if (r != 0) return r;        if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) { -        onig_reduce_nested_quantifier(node, *ptarget); +        r = onig_reduce_nested_quantifier(node);        }      }      break; @@ -2303,11 +2424,11 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)      }    } -  loc = env->capture_history; -  MEM_STATUS_CLEAR(env->capture_history); +  loc = env->cap_history; +  MEM_STATUS_CLEAR(env->cap_history);    for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {      if (MEM_STATUS_AT(loc, i)) { -      MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val); +      MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);      }    } @@ -2683,7 +2804,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)            len = NODE_STRING_LEN(x);            if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); -          if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) { +          if (NODE_STRING_IS_CASE_FOLD_MATCH(x) || NODE_STRING_IS_CASE_FOLD_MATCH(y)) {              /* tiny version */              return 0;            } @@ -2743,7 +2864,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)          break;        if (exact == 0 || -          ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) { +          ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_CRUDE(node)) {          n = node;        }      } @@ -2871,9 +2992,9 @@ tree_min_len(Node* node, ScanEnv* env)        if (NODE_IS_RECURSION(node)) break;        backs = BACKREFS_P(br); -      len = tree_min_len(mem_env[backs[0]].node, env); +      len = tree_min_len(mem_env[backs[0]].mem_node, env);        for (i = 1; i < br->back_num; i++) { -        tmin = tree_min_len(mem_env[backs[i]].node, env); +        tmin = tree_min_len(mem_env[backs[i]].mem_node, env);          if (len > tmin) len = tmin;        }      } @@ -3042,7 +3163,7 @@ tree_max_len(Node* node, ScanEnv* env)        }        backs = BACKREFS_P(br);        for (i = 0; i < br->back_num; i++) { -        tmax = tree_max_len(mem_env[backs[i]].node, env); +        tmax = tree_max_len(mem_env[backs[i]].mem_node, env);          if (len < tmax) len = tmax;        }      } @@ -3179,7 +3300,7 @@ check_backrefs(Node* node, ScanEnv* env)          if (backs[i] > env->num_mem)            return ONIGERR_INVALID_BACKREF; -        NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF); +        NODE_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);        }        r = 0;      } @@ -3193,6 +3314,204 @@ check_backrefs(Node* node, ScanEnv* env)    return r;  } +static int +set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env) +{ +  int r; + +  switch (NODE_TYPE(node)) { +  case NODE_LIST: +  case NODE_ALT: +    do { +      r = set_empty_repeat_node_trav(NODE_CAR(node), empty, env); +    } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); +    break; + +  case NODE_ANCHOR: +    { +      AnchorNode* an = ANCHOR_(node); + +      if (! ANCHOR_HAS_BODY(an)) { +        r = 0; +        break; +      } + +      switch (an->type) { +      case ANCR_PREC_READ: +      case ANCR_LOOK_BEHIND: +        empty = NULL_NODE; +        break; +      default: +        break; +      } +      r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); +    } +    break; + +  case NODE_QUANT: +    { +      QuantNode* qn = QUANT_(node); + +      if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node; +      r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); +    } +    break; + +  case NODE_BAG: +    if (IS_NOT_NULL(NODE_BODY(node))) { +      r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); +      if (r != 0) return r; +    } +    { +      BagNode* en = BAG_(node); + +      if (en->type == BAG_MEMORY) { +        if (NODE_IS_BACKREF(node)) { +          if (IS_NOT_NULL(empty)) +            SCANENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty; +        } +      } +      else if (en->type == BAG_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = set_empty_repeat_node_trav(en->te.Then, empty, env); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = set_empty_repeat_node_trav(en->te.Else, empty, env); +        } +      } +    } +    break; + +  default: +    r = 0; +    break; +  } + +  return r; +} + +static int +is_ancestor_node(Node* node, Node* me) +{ +  Node* parent; + +  while ((parent = NODE_PARENT(me)) != NULL_NODE) { +    if (parent == node) return 1; +    me = parent; +  } +  return 0; +} + +static void +set_empty_status_check_trav(Node* node, ScanEnv* env) +{ +  switch (NODE_TYPE(node)) { +  case NODE_LIST: +  case NODE_ALT: +    do { +      set_empty_status_check_trav(NODE_CAR(node), env); +    } while (IS_NOT_NULL(node = NODE_CDR(node))); +    break; + +  case NODE_ANCHOR: +    { +      AnchorNode* an = ANCHOR_(node); + +      if (! ANCHOR_HAS_BODY(an)) break; +      set_empty_status_check_trav(NODE_BODY(node), env); +    } +    break; + +  case NODE_QUANT: +    set_empty_status_check_trav(NODE_BODY(node), env); +    break; + +  case NODE_BAG: +    if (IS_NOT_NULL(NODE_BODY(node))) +      set_empty_status_check_trav(NODE_BODY(node), env); +    { +      BagNode* en = BAG_(node); + +      if (en->type == BAG_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          set_empty_status_check_trav(en->te.Then, env); +        } +        if (IS_NOT_NULL(en->te.Else)) { +          set_empty_status_check_trav(en->te.Else, env); +        } +      } +    } +    break; + +  case NODE_BACKREF: +    { +      int i; +      int* backs; +      MemEnv* mem_env = SCANENV_MEMENV(env); +      BackRefNode* br = BACKREF_(node); +      backs = BACKREFS_P(br); +      for (i = 0; i < br->back_num; i++) { +        Node* ernode = mem_env[backs[i]].empty_repeat_node; +        if (IS_NOT_NULL(ernode)) { +          if (! is_ancestor_node(ernode, node)) { +            MEM_STATUS_LIMIT_ON(env->reg->empty_status_mem, backs[i]); +            NODE_STATUS_ADD(ernode, EMPTY_STATUS_CHECK); +            NODE_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK); +          } +        } +      } +    } +    break; + +  default: +    break; +  } +} + +static void +set_parent_node_trav(Node* node, Node* parent) +{ +  NODE_PARENT(node) = parent; + +  switch (NODE_TYPE(node)) { +  case NODE_LIST: +  case NODE_ALT: +    do { +      set_parent_node_trav(NODE_CAR(node), node); +    } while (IS_NOT_NULL(node = NODE_CDR(node))); +    break; + +  case NODE_ANCHOR: +    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break; +    set_parent_node_trav(NODE_BODY(node), node); +    break; + +  case NODE_QUANT: +    set_parent_node_trav(NODE_BODY(node), node); +    break; + +  case NODE_BAG: +    if (IS_NOT_NULL(NODE_BODY(node))) +      set_parent_node_trav(NODE_BODY(node), node); +    { +      BagNode* en = BAG_(node); + +      if (en->type == BAG_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) +          set_parent_node_trav(en->te.Then, node); +        if (IS_NOT_NULL(en->te.Else)) { +          set_parent_node_trav(en->te.Else, node); +        } +      } +    } +    break; + +  default: +    break; +  } +} +  #ifdef USE_CALL @@ -3298,6 +3617,9 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)            if ((eret & RECURSION_MUST) == 0)              r &= ~RECURSION_MUST;          } +        else { +          r &= ~RECURSION_MUST; +        }        }        else {          r = infinite_recursive_call_check(NODE_BODY(node), env, head); @@ -3472,7 +3794,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)      r = recursive_call_check_trav(NODE_BODY(node), env, state);      if (QUANT_(node)->upper == 0) {        if (r == FOUND_CALLED_NODE) -        QUANT_(node)->is_refered = 1; +        QUANT_(node)->include_referred = 1;      }      break; @@ -3495,8 +3817,10 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)            if (! NODE_IS_RECURSION(node)) {              NODE_STATUS_ADD(node, MARK1);              r = recursive_call_check(NODE_BODY(node)); -            if (r != 0) +            if (r != 0) {                NODE_STATUS_ADD(node, RECURSION); +              MEM_STATUS_ON(env->backtrack_mem, en->m.regnum); +            }              NODE_STATUS_REMOVE(node, MARK1);            } @@ -3537,6 +3861,96 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)  #endif +static void +remove_from_list(Node* prev, Node* a) +{ +  if (NODE_CDR(prev) != a) return ; + +  NODE_CDR(prev) = NODE_CDR(a); +  NODE_CDR(a) = NULL_NODE; +} + +static int +reduce_string_list(Node* node) +{ +  int r = 0; + +  switch (NODE_TYPE(node)) { +  case NODE_LIST: +    { +      Node* prev; +      Node* curr; +      Node* prev_node; +      Node* next_node; + +      prev = NULL_NODE; +      do { +        next_node = NODE_CDR(node); +        curr = NODE_CAR(node); +        if (NODE_TYPE(curr) == NODE_STRING) { +          if (IS_NULL(prev) || STR_(curr)->flag != STR_(prev)->flag) { +            prev = curr; +            prev_node = node; +          } +          else { +            r = node_str_node_cat(prev, curr); +            if (r != 0) return r; +            remove_from_list(prev_node, node); +            onig_node_free(node); +          } +        } +        else { +          prev = NULL_NODE; +          prev_node = node; +        } + +        node = next_node; +      } while (r == 0 && IS_NOT_NULL(node)); +    } +    break; + +  case NODE_ALT: +    do { +      r = reduce_string_list(NODE_CAR(node)); +    } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); +    break; + +  case NODE_ANCHOR: +    if (IS_NULL(NODE_BODY(node))) +      break; +    /* fall */ +  case NODE_QUANT: +    r = reduce_string_list(NODE_BODY(node)); +    break; + +  case NODE_BAG: +    { +      BagNode* en = BAG_(node); + +      r = reduce_string_list(NODE_BODY(node)); +      if (r != 0) return r; + +      if (en->type == BAG_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = reduce_string_list(en->te.Then); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = reduce_string_list(en->te.Else); +          if (r != 0) return r; +        } +      } +    } +    break; + +  default: +    break; +  } + +  return r; +} + +  #define IN_ALT          (1<<0)  #define IN_NOT          (1<<1)  #define IN_REAL_REPEAT  (1<<2) @@ -3559,7 +3973,7 @@ divide_look_behind_alternatives(Node* node)    head = NODE_ANCHOR_BODY(an);    np = NODE_CAR(head); -  swap_node(node, head); +  node_swap(node, head);    NODE_CAR(node) = head;    NODE_BODY(head) = np; @@ -3581,7 +3995,7 @@ divide_look_behind_alternatives(Node* node)  }  static int -setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +tune_look_behind(Node* node, regex_t* reg, ScanEnv* env)  {    int r, len;    AnchorNode* an = ANCHOR_(node); @@ -3602,7 +4016,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)  }  static int -next_setup(Node* node, Node* next_node, regex_t* reg) +tune_next(Node* node, Node* next_node, regex_t* reg)  {    NodeType type; @@ -3629,7 +4043,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)                Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);                CHECK_NULL_RETURN_MEMERR(en);                NODE_STATUS_ADD(en, STRICT_REAL_REPEAT); -              swap_node(node, en); +              node_swap(node, en);                NODE_BODY(node) = en;              }            } @@ -3649,23 +4063,57 @@ next_setup(Node* node, Node* next_node, regex_t* reg)  static int -update_string_node_case_fold(regex_t* reg, Node *node) +is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])  { -  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; +  int i; + +  for (i = 0; i < n; i++) { +    OnigCaseFoldCodeItem* item = items + i; +    if (item->code_len != 1) return 0; +  } + +  return 1; +} + +static int +get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], int* rmin, int* rmax) +{ +  int i, len, minlen, maxlen; + +  minlen = INT_MAX; +  maxlen = 0; +  for (i = 0; i < n; i++) { +    OnigCaseFoldCodeItem* item = items + i; + +    len = item->byte_len; +    if (len < minlen) minlen = len; +    if (len > maxlen) maxlen = len; +  } + +  *rmin = minlen; +  *rmax = maxlen; +  return 0; +} + +static int +conv_string_case_fold(OnigEncoding enc, OnigCaseFoldType case_fold_flag, +           UChar* s, UChar* end, UChar** rs, UChar** rend, int* rcase_min_len) +{ +  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];    UChar *sbuf, *ebuf, *sp; -  int r, i, len, sbuf_size; -  StrNode* sn = STR_(node); +  int i, n, len, sbuf_size; -  end = sn->end; -  sbuf_size = (int )(end - sn->s) * 2; +  *rs = NULL; +  sbuf_size = (int )(end - s) * 2;    sbuf = (UChar* )xmalloc(sbuf_size);    CHECK_NULL_RETURN_MEMERR(sbuf);    ebuf = sbuf + sbuf_size; +  n = 0;    sp = sbuf; -  p = sn->s; +  p = s;    while (p < end) { -    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); +    len = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, buf);      for (i = 0; i < len; i++) {        if (sp >= ebuf) {          sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); @@ -3677,356 +4125,302 @@ update_string_node_case_fold(regex_t* reg, Node *node)        *sp++ = buf[i];      } +    n++;    } -  r = onig_node_str_set(node, sbuf, sp); -  if (r != 0) { -    xfree(sbuf); -    return r; -  } - -  xfree(sbuf); +  *rs = sbuf; +  *rend = sp; +  *rcase_min_len = n;    return 0;  }  static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg) +make_code_list_to_string(Node** rnode, OnigEncoding enc, +                         int n, OnigCodePoint codes[])  { -  int r; -  Node *node; +  int r, i, len; +  Node* node; +  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; -  node = onig_node_new_str(s, end); -  if (IS_NULL(node)) return ONIGERR_MEMORY; +  *rnode = NULL_NODE; +  node = onig_node_new_str(NULL, NULL); +  CHECK_NULL_RETURN_MEMERR(node); -  r = update_string_node_case_fold(reg, node); -  if (r != 0) { -    onig_node_free(node); -    return r; +  for (i = 0; i < n; i++) { +    len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf); +    if (len < 0) { +      r = len; +      goto err; +    } + +    r = onig_node_str_cat(node, buf, buf + len); +    if (r != 0) goto err;    } -  NODE_STRING_SET_AMBIG(node); -  NODE_STRING_SET_DONT_GET_OPT_INFO(node);    *rnode = node;    return 0; + + err: +  onig_node_free(node); +  return r;  }  static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, -                            int slen, UChar *end, regex_t* reg, Node **rnode) +unravel_cf_node_add(Node** rlist, Node* add)  { -  int r, i, j; -  int len; -  int varlen; -  Node *anode, *var_anode, *snode, *xnode, *an; -  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - -  *rnode = var_anode = NULL_NODE; +  Node *list; -  varlen = 0; -  for (i = 0; i < item_num; i++) { -    if (items[i].byte_len != slen) { -      varlen = 1; -      break; -    } +  list = *rlist; +  if (IS_NULL(list)) { +    list = onig_node_new_list(add, NULL); +    CHECK_NULL_RETURN_MEMERR(list); +    *rlist = list;    } +  else { +    Node* r = node_list_add(list, add); +    CHECK_NULL_RETURN_MEMERR(r); +  } + +  return 0; +} -  if (varlen != 0) { -    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); -    if (IS_NULL(var_anode)) return ONIGERR_MEMORY; +static int +unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end, +                      unsigned int flag, int case_min_len) +{ +  int r; +  Node *sn, *list; -    xnode = onig_node_new_list(NULL, NULL); -    if (IS_NULL(xnode)) goto mem_err; -    NODE_CAR(var_anode) = xnode; +  list = *rlist; +  sn   = *rsn; -    anode = onig_node_new_alt(NULL_NODE, NULL_NODE); -    if (IS_NULL(anode)) goto mem_err; -    NODE_CAR(xnode) = anode; +  if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) { +    if (NODE_STRING_IS_CASE_FOLD_MATCH(sn)) +      r = node_str_cat_case_fold(sn, s, end, case_min_len); +    else +      r = onig_node_str_cat(sn, s, end);    }    else { -    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); -    if (IS_NULL(anode)) return ONIGERR_MEMORY; +    sn = onig_node_new_str(s, end); +    CHECK_NULL_RETURN_MEMERR(sn); + +    STR_(sn)->flag = flag; +    STR_(sn)->case_min_len = case_min_len; +    r = unravel_cf_node_add(&list, sn);    } -  snode = onig_node_new_str(p, p + slen); -  if (IS_NULL(snode)) goto mem_err; +  if (r == 0) { +    *rlist = list; +    *rsn = sn; +  } +  return r; +} -  NODE_CAR(anode) = snode; +static int +unravel_cf_string_fold_add(Node** rlist, Node** rsn, OnigEncoding enc, +                      OnigCaseFoldType case_fold_flag, UChar* s, UChar* end) +{ +  int r; +  int case_min_len; +  UChar *rs, *rend; -  for (i = 0; i < item_num; i++) { -    snode = onig_node_new_str(NULL, NULL); -    if (IS_NULL(snode)) goto mem_err; +  r = conv_string_case_fold(enc, case_fold_flag, s, end, +                            &rs, &rend, &case_min_len); +  if (r != 0) return r; -    for (j = 0; j < items[i].code_len; j++) { -      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); -      if (len < 0) { -        r = len; -        goto mem_err2; -      } +  r = unravel_cf_string_add(rlist, rsn, rs, rend, +                            NODE_STRING_CASE_FOLD_MATCH, case_min_len); +  xfree(rs); -      r = onig_node_str_cat(snode, buf, buf + len); -      if (r != 0) goto mem_err2; -    } +  return r; +} -    an = onig_node_new_alt(NULL_NODE, NULL_NODE); -    if (IS_NULL(an)) { -      goto mem_err2; -    } +static int +unravel_cf_string_alt_or_cc_add(Node** rlist, int n, +            OnigCaseFoldCodeItem items[], int byte_len, OnigEncoding enc, +            OnigCaseFoldType case_fold_flag, UChar* s, UChar* end) +{ +  int r, i; +  Node* node; -    if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) { -      Node *rem; -      UChar *q = p + items[i].byte_len; +  if (is_all_code_len_1_items(n, items)) { +    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */ -      if (q < end) { -        r = expand_case_fold_make_rem_string(&rem, q, end, reg); -        if (r != 0) { -          onig_node_free(an); -          goto mem_err2; -        } +    codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end); +    for (i = 0; i < n; i++) { +      OnigCaseFoldCodeItem* item = items + i; +      codes[i+1] = item->code[0]; +    } +    r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes); +    if (r != 0) return r; +  } +  else { +    Node *snode, *alt, *curr; -        xnode = onig_node_list_add(NULL_NODE, snode); -        if (IS_NULL(xnode)) { -          onig_node_free(an); -          onig_node_free(rem); -          goto mem_err2; -        } -        if (IS_NULL(onig_node_list_add(xnode, rem))) { -          onig_node_free(an); -          onig_node_free(xnode); -          onig_node_free(rem); -          goto mem_err; -        } +    snode = onig_node_new_str(s, end); +    CHECK_NULL_RETURN_MEMERR(snode); +    node = curr = onig_node_new_alt(snode, NULL_NODE); +    if (IS_NULL(curr)) { +      onig_node_free(snode); +      return ONIGERR_MEMORY; +    } -        NODE_CAR(an) = xnode; +    r = 0; +    for (i = 0; i < n; i++) { +      OnigCaseFoldCodeItem* item = items + i; +      r = make_code_list_to_string(&snode, enc, item->code_len, item->code); +      if (r != 0) { +        onig_node_free(node); +        return r;        } -      else { -        NODE_CAR(an) = snode; + +      alt = onig_node_new_alt(snode, NULL_NODE); +      if (IS_NULL(alt)) { +        onig_node_free(snode); +        onig_node_free(node); +        return ONIGERR_MEMORY;        } -      NODE_CDR(var_anode) = an; -      var_anode = an; -    } -    else { -      NODE_CAR(an)     = snode; -      NODE_CDR(anode) = an; -      anode = an; +      NODE_CDR(curr) = alt; +      curr = alt;      }    } -  return varlen; - - mem_err2: -  onig_node_free(snode); - - mem_err: -  onig_node_free(*rnode); - -  return ONIGERR_MEMORY; +  r = unravel_cf_node_add(rlist, node); +  if (r != 0) onig_node_free(node); +  return r;  }  static int -is_good_case_fold_items_for_search(OnigEncoding enc, int slen, -                                   int n, OnigCaseFoldCodeItem items[]) +unravel_cf_look_behind_add(Node** rlist, Node** rsn, +                int n, OnigCaseFoldCodeItem items[], OnigEncoding enc, +                UChar* s, int one_len)  { -  int i, len; -  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; +  int r, i, found; +  found = 0;    for (i = 0; i < n; i++) {      OnigCaseFoldCodeItem* item = items + i; +    if (item->byte_len == one_len) { +      if (item->code_len == 1) { +        found = 1; +      } +    } +  } -    if (item->code_len != 1)    return 0; -    if (item->byte_len != slen) return 0; -    len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf); -    if (len != slen) return 0; +  if (found == 0) { +    r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */, 0);    } +  else { +    Node* node; +    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */ -  return 1; -} +    found = 0; +    codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len); +    for (i = 0; i < n; i++) { +      OnigCaseFoldCodeItem* item = items + i; +      if (item->byte_len == one_len) { +        if (item->code_len == 1) { +          codes[found++] = item->code[0]; +        } +      } +    } +    r = onig_new_cclass_with_code_list(&node, enc, found, codes); +    if (r != 0) return r; -#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8 +    r = unravel_cf_node_add(rlist, node); +    if (r != 0) onig_node_free(node); + +    *rsn = NULL_NODE; +  } + +  return r; +}  static int -expand_case_fold_string(Node* node, regex_t* reg, int state) -{ -  int r, n, len, alt_num; -  int fold_len; -  int prev_is_ambig, prev_is_good, is_good, is_in_look_behind; -  UChar *start, *end, *p; -  UChar* foldp; -  Node *top_root, *root, *snode, *prev_node; +unravel_case_fold_string(Node* node, regex_t* reg, int state) +{ +  int r, n, one_len, min_len, max_len, in_look_behind; +  UChar *start, *end, *p, *q; +  StrNode* snode; +  Node *sn, *list; +  OnigEncoding enc;    OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; -  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; -  StrNode* sn; -  if (NODE_STRING_IS_AMBIG(node)) return 0; +  if (NODE_STRING_IS_CASE_EXPANDED(node)) return 0; -  sn = STR_(node); +  snode = STR_(node); -  start = sn->s; -  end   = sn->end; +  start = snode->s; +  end   = snode->end;    if (start >= end) return 0; -  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0; +  in_look_behind = (state & IN_LOOK_BEHIND) != 0; +  enc = reg->enc; -  r = 0; -  top_root = root = prev_node = snode = NULL_NODE; -  alt_num = 1; +  list = sn = NULL_NODE;    p = start;    while (p < end) { -    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, -                                           p, end, items); +    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end, +                                           items);      if (n < 0) {        r = n;        goto err;      } -    len = enclen(reg->enc, p); -    is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items); - -    if (is_in_look_behind || -        (IS_NOT_NULL(snode) || -         (is_good -          /* expand single char case: ex. /(?i:a)/ */ -          && !(p == start && p + len >= end)))) { -      if (IS_NULL(snode)) { -        if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { -          top_root = root = onig_node_list_add(NULL_NODE, prev_node); -          if (IS_NULL(root)) { -            onig_node_free(prev_node); -            goto mem_err; -          } -        } - -        prev_node = snode = onig_node_new_str(NULL, NULL); -        if (IS_NULL(snode)) goto mem_err; -        if (IS_NOT_NULL(root)) { -          if (IS_NULL(onig_node_list_add(root, snode))) { -            onig_node_free(snode); -            goto mem_err; -          } -        } - -        prev_is_ambig = -1; /* -1: new */ -        prev_is_good  =  0; /* escape compiler warning */ -      } -      else { -        prev_is_ambig = NODE_STRING_IS_AMBIG(snode); -        prev_is_good  = NODE_STRING_IS_GOOD_AMBIG(snode); -      } - -      if (n != 0) { -        foldp = p; -        fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, -                                         &foldp, end, buf); -        foldp = buf; -      } -      else { -        foldp = p; fold_len = len; -      } - -      if ((prev_is_ambig == 0 && n != 0) || -          (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) { -        if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) { -          top_root = root = onig_node_list_add(NULL_NODE, prev_node); -          if (IS_NULL(root)) { -            onig_node_free(prev_node); -            goto mem_err; -          } -        } - -        prev_node = snode = onig_node_new_str(foldp, foldp + fold_len); -        if (IS_NULL(snode)) goto mem_err; -        if (IS_NULL(onig_node_list_add(root, snode))) { -          onig_node_free(snode); -          goto mem_err; -        } -      } -      else { -        r = onig_node_str_cat(snode, foldp, foldp + fold_len); -        if (r != 0) goto err; -      } - -      if (n != 0) NODE_STRING_SET_AMBIG(snode); -      if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode); +    one_len = enclen(enc, p); +    if (n == 0) { +      q = p + one_len; +      r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */, 0); +      if (r != 0) goto err;      }      else { -      alt_num *= (n + 1); -      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; - -      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { -        top_root = root = onig_node_list_add(NULL_NODE, prev_node); -        if (IS_NULL(root)) { -          onig_node_free(prev_node); -          goto mem_err; -        } +      if (in_look_behind != 0) { +        q = p + one_len; +        r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len); +        if (r != 0) goto err;        } - -      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); -      if (r < 0) goto mem_err; -      if (r == 1) { -        if (IS_NULL(root)) { -          top_root = prev_node; +      else { +        get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len); +        q = p + max_len; +        if (one_len == max_len && min_len == max_len) { +          r = unravel_cf_string_alt_or_cc_add(&list, n, items, max_len, enc, +                                              reg->case_fold_flag, p, q); +          if (r != 0) goto err; +          sn = NULL_NODE;          }          else { -          if (IS_NULL(onig_node_list_add(root, prev_node))) { -            onig_node_free(prev_node); -            goto mem_err; -          } -        } - -        root = NODE_CAR(prev_node); -      } -      else { /* r == 0 */ -        if (IS_NOT_NULL(root)) { -          if (IS_NULL(onig_node_list_add(root, prev_node))) { -            onig_node_free(prev_node); -            goto mem_err; -          } +          r = unravel_cf_string_fold_add(&list, &sn, enc, reg->case_fold_flag, +                                         p, q); +          if (r != 0) goto err;          }        } - -      snode = NULL_NODE;      } -    p += len; +    p = q;    } -  if (p < end) { -    Node *srem; - -    r = expand_case_fold_make_rem_string(&srem, p, end, reg); -    if (r != 0) goto mem_err; - -    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { -      top_root = root = onig_node_list_add(NULL_NODE, prev_node); -      if (IS_NULL(root)) { -        onig_node_free(srem); -        onig_node_free(prev_node); -        goto mem_err; -      } -    } - -    if (IS_NULL(root)) { -      prev_node = srem; +  if (IS_NOT_NULL(list)) { +    if (node_list_len(list) == 1) { +      node_swap(node, NODE_CAR(list));      }      else { -      if (IS_NULL(onig_node_list_add(root, srem))) { -        onig_node_free(srem); -        goto mem_err; -      } +      node_swap(node, list);      } +    onig_node_free(list); +  } +  else { +    node_swap(node, sn); +    onig_node_free(sn);    } - -  /* ending */ -  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); -  swap_node(node, top_root); -  onig_node_free(top_root);    return 0; - mem_err: -  r = ONIGERR_MEMORY; -   err: -  onig_node_free(top_root); +  if (IS_NOT_NULL(list)) +    onig_node_free(list); +  else if (IS_NOT_NULL(sn)) +    onig_node_free(sn); +    return r;  } @@ -4121,7 +4515,7 @@ quantifiers_memory_node_info(Node* node)  __inline  #endif  static int -setup_call_node_call(CallNode* cn, ScanEnv* env, int state) +tune_call_node_call(CallNode* cn, ScanEnv* env, int state)  {    MemEnv* mem_env = SCANENV_MEMENV(env); @@ -4141,7 +4535,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)      }    set_call_attr: -    NODE_CALL_BODY(cn) = mem_env[cn->group_num].node; +    NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node;      if (IS_NULL(NODE_CALL_BODY(cn))) {        onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,                                       cn->name, cn->name_end); @@ -4172,23 +4566,23 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)  }  static void -setup_call2_call(Node* node) +tune_call2_call(Node* node)  {    switch (NODE_TYPE(node)) {    case NODE_LIST:    case NODE_ALT:      do { -      setup_call2_call(NODE_CAR(node)); +      tune_call2_call(NODE_CAR(node));      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break;    case NODE_QUANT: -    setup_call2_call(NODE_BODY(node)); +    tune_call2_call(NODE_BODY(node));      break;    case NODE_ANCHOR:      if (ANCHOR_HAS_BODY(ANCHOR_(node))) -      setup_call2_call(NODE_BODY(node)); +      tune_call2_call(NODE_BODY(node));      break;    case NODE_BAG: @@ -4198,19 +4592,19 @@ setup_call2_call(Node* node)        if (en->type == BAG_MEMORY) {          if (! NODE_IS_MARK1(node)) {            NODE_STATUS_ADD(node, MARK1); -          setup_call2_call(NODE_BODY(node)); +          tune_call2_call(NODE_BODY(node));            NODE_STATUS_REMOVE(node, MARK1);          }        }        else if (en->type == BAG_IF_ELSE) { -        setup_call2_call(NODE_BODY(node)); +        tune_call2_call(NODE_BODY(node));          if (IS_NOT_NULL(en->te.Then)) -          setup_call2_call(en->te.Then); +          tune_call2_call(en->te.Then);          if (IS_NOT_NULL(en->te.Else)) -          setup_call2_call(en->te.Else); +          tune_call2_call(en->te.Else);        }        else { -        setup_call2_call(NODE_BODY(node)); +        tune_call2_call(NODE_BODY(node));        }      }      break; @@ -4226,7 +4620,7 @@ setup_call2_call(Node* node)          NODE_STATUS_ADD(called, CALLED);          BAG_(called)->m.entry_count++; -        setup_call2_call(called); +        tune_call2_call(called);        }        NODE_STATUS_REMOVE(node, MARK1);      } @@ -4238,7 +4632,7 @@ setup_call2_call(Node* node)  }  static int -setup_call(Node* node, ScanEnv* env, int state) +tune_call(Node* node, ScanEnv* env, int state)  {    int r; @@ -4246,7 +4640,7 @@ setup_call(Node* node, ScanEnv* env, int state)    case NODE_LIST:    case NODE_ALT:      do { -      r = setup_call(NODE_CAR(node), env, state); +      r = tune_call(NODE_CAR(node), env, state);      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break; @@ -4254,12 +4648,12 @@ setup_call(Node* node, ScanEnv* env, int state)      if (QUANT_(node)->upper == 0)        state |= IN_ZERO_REPEAT; -    r = setup_call(NODE_BODY(node), env, state); +    r = tune_call(NODE_BODY(node), env, state);      break;    case NODE_ANCHOR:      if (ANCHOR_HAS_BODY(ANCHOR_(node))) -      r = setup_call(NODE_BODY(node), env, state); +      r = tune_call(NODE_BODY(node), env, state);      else        r = 0;      break; @@ -4273,20 +4667,20 @@ setup_call(Node* node, ScanEnv* env, int state)            NODE_STATUS_ADD(node, IN_ZERO_REPEAT);            BAG_(node)->m.entry_count--;          } -        r = setup_call(NODE_BODY(node), env, state); +        r = tune_call(NODE_BODY(node), env, state);        }        else if (en->type == BAG_IF_ELSE) { -        r = setup_call(NODE_BODY(node), env, state); +        r = tune_call(NODE_BODY(node), env, state);          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) { -          r = setup_call(en->te.Then, env, state); +          r = tune_call(en->te.Then, env, state);            if (r != 0) return r;          }          if (IS_NOT_NULL(en->te.Else)) -          r = setup_call(en->te.Else, env, state); +          r = tune_call(en->te.Else, env, state);        }        else -        r = setup_call(NODE_BODY(node), env, state); +        r = tune_call(NODE_BODY(node), env, state);      }      break; @@ -4296,7 +4690,7 @@ setup_call(Node* node, ScanEnv* env, int state)        CALL_(node)->entry_count--;      } -    r = setup_call_node_call(CALL_(node), env, state); +    r = tune_call_node_call(CALL_(node), env, state);      break;    default: @@ -4308,7 +4702,7 @@ setup_call(Node* node, ScanEnv* env, int state)  }  static int -setup_call2(Node* node) +tune_call2(Node* node)  {    int r = 0; @@ -4316,23 +4710,23 @@ setup_call2(Node* node)    case NODE_LIST:    case NODE_ALT:      do { -      r = setup_call2(NODE_CAR(node)); +      r = tune_call2(NODE_CAR(node));      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break;    case NODE_QUANT:      if (QUANT_(node)->upper != 0) -      r = setup_call2(NODE_BODY(node)); +      r = tune_call2(NODE_BODY(node));      break;    case NODE_ANCHOR:      if (ANCHOR_HAS_BODY(ANCHOR_(node))) -      r = setup_call2(NODE_BODY(node)); +      r = tune_call2(NODE_BODY(node));      break;    case NODE_BAG:      if (! NODE_IS_IN_ZERO_REPEAT(node)) -      r = setup_call2(NODE_BODY(node)); +      r = tune_call2(NODE_BODY(node));      {        BagNode* en = BAG_(node); @@ -4340,18 +4734,18 @@ setup_call2(Node* node)        if (r != 0) return r;        if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) { -          r = setup_call2(en->te.Then); +          r = tune_call2(en->te.Then);            if (r != 0) return r;          }          if (IS_NOT_NULL(en->te.Else)) -          r = setup_call2(en->te.Else); +          r = tune_call2(en->te.Else);        }      }      break;    case NODE_CALL:      if (! NODE_IS_IN_ZERO_REPEAT(node)) { -      setup_call2_call(node); +      tune_call2_call(node);      }      break; @@ -4364,7 +4758,7 @@ setup_call2(Node* node)  static void -setup_called_state_call(Node* node, int state) +tune_called_state_call(Node* node, int state)  {    switch (NODE_TYPE(node)) {    case NODE_ALT: @@ -4372,7 +4766,7 @@ setup_called_state_call(Node* node, int state)      /* fall */    case NODE_LIST:      do { -      setup_called_state_call(NODE_CAR(node), state); +      tune_called_state_call(NODE_CAR(node), state);      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break; @@ -4385,7 +4779,7 @@ setup_called_state_call(Node* node, int state)        if (qn->lower != qn->upper)          state |= IN_VAR_REPEAT; -      setup_called_state_call(NODE_QUANT_BODY(qn), state); +      tune_called_state_call(NODE_QUANT_BODY(qn), state);      }      break; @@ -4400,7 +4794,7 @@ setup_called_state_call(Node* node, int state)          /* fall */        case ANCR_PREC_READ:        case ANCR_LOOK_BEHIND: -        setup_called_state_call(NODE_ANCHOR_BODY(an), state); +        tune_called_state_call(NODE_ANCHOR_BODY(an), state);          break;        default:          break; @@ -4416,31 +4810,33 @@ setup_called_state_call(Node* node, int state)          if (NODE_IS_MARK1(node)) {            if ((~en->m.called_state & state) != 0) {              en->m.called_state |= state; -            setup_called_state_call(NODE_BODY(node), state); +            tune_called_state_call(NODE_BODY(node), state);            }          }          else {            NODE_STATUS_ADD(node, MARK1);            en->m.called_state |= state; -          setup_called_state_call(NODE_BODY(node), state); +          tune_called_state_call(NODE_BODY(node), state);            NODE_STATUS_REMOVE(node, MARK1);          }        }        else if (en->type == BAG_IF_ELSE) { +        state |= IN_ALT; +        tune_called_state_call(NODE_BODY(node), state);          if (IS_NOT_NULL(en->te.Then)) { -          setup_called_state_call(en->te.Then, state); +          tune_called_state_call(en->te.Then, state);          }          if (IS_NOT_NULL(en->te.Else)) -          setup_called_state_call(en->te.Else, state); +          tune_called_state_call(en->te.Else, state);        }        else { -        setup_called_state_call(NODE_BODY(node), state); +        tune_called_state_call(NODE_BODY(node), state);        }      }      break;    case NODE_CALL: -    setup_called_state_call(NODE_BODY(node), state); +    tune_called_state_call(NODE_BODY(node), state);      break;    default: @@ -4449,7 +4845,7 @@ setup_called_state_call(Node* node, int state)  }  static void -setup_called_state(Node* node, int state) +tune_called_state(Node* node, int state)  {    switch (NODE_TYPE(node)) {    case NODE_ALT: @@ -4457,13 +4853,13 @@ setup_called_state(Node* node, int state)      /* fall */    case NODE_LIST:      do { -      setup_called_state(NODE_CAR(node), state); +      tune_called_state(NODE_CAR(node), state);      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break;  #ifdef USE_CALL    case NODE_CALL: -    setup_called_state_call(node, state); +    tune_called_state_call(node, state);      break;  #endif @@ -4480,14 +4876,15 @@ setup_called_state(Node* node, int state)          /* fall */        case BAG_OPTION:        case BAG_STOP_BACKTRACK: -        setup_called_state(NODE_BODY(node), state); +        tune_called_state(NODE_BODY(node), state);          break;        case BAG_IF_ELSE: -        setup_called_state(NODE_BODY(node), state); +        state |= IN_ALT; +        tune_called_state(NODE_BODY(node), state);          if (IS_NOT_NULL(en->te.Then)) -          setup_called_state(en->te.Then, state); +          tune_called_state(en->te.Then, state);          if (IS_NOT_NULL(en->te.Else)) -          setup_called_state(en->te.Else, state); +          tune_called_state(en->te.Else, state);          break;        }      } @@ -4502,7 +4899,7 @@ setup_called_state(Node* node, int state)        if (qn->lower != qn->upper)          state |= IN_VAR_REPEAT; -      setup_called_state(NODE_QUANT_BODY(qn), state); +      tune_called_state(NODE_QUANT_BODY(qn), state);      }      break; @@ -4517,7 +4914,7 @@ setup_called_state(Node* node, int state)          /* fall */        case ANCR_PREC_READ:        case ANCR_LOOK_BEHIND: -        setup_called_state(NODE_ANCHOR_BODY(an), state); +        tune_called_state(NODE_ANCHOR_BODY(an), state);          break;        default:          break; @@ -4538,13 +4935,13 @@ setup_called_state(Node* node, int state)  #endif  /* USE_CALL */ -static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env); +static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env);  #ifdef __GNUC__  __inline  #endif  static int -setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) +tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)  {  /* allowed node types in look-behind */  #define ALLOWED_TYPE_IN_LB \ @@ -4572,10 +4969,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)    switch (an->type) {    case ANCR_PREC_READ: -    r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); +    r = tune_tree(NODE_ANCHOR_BODY(an), reg, state, env);      break;    case ANCR_PREC_READ_NOT: -    r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); +    r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);      break;    case ANCR_LOOK_BEHIND: @@ -4584,9 +4981,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)                            ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);        if (r < 0) return r;        if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; -      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env); +      r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);        if (r != 0) return r; -      r = setup_look_behind(node, reg, env); +      r = tune_look_behind(node, reg, env);      }      break; @@ -4596,10 +4993,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)                            ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);        if (r < 0) return r;        if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; -      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND), -                     env); +      r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND), +                    env);        if (r != 0) return r; -      r = setup_look_behind(node, reg, env); +      r = tune_look_behind(node, reg, env);      }      break; @@ -4615,7 +5012,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)  __inline  #endif  static int -setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) +tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env)  {    int r;    OnigLen d; @@ -4634,12 +5031,6 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)      if (d == 0) {  #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT        qn->emptiness = quantifiers_memory_node_info(body); -      if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) { -        if (NODE_TYPE(body) == NODE_BAG && -            BAG_(body)->type == BAG_MEMORY) { -          MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum); -        } -      }  #else        qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;  #endif @@ -4651,7 +5042,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)    if (qn->lower != qn->upper)      state |= IN_VAR_REPEAT; -  r = setup_tree(body, reg, state, env); +  r = tune_tree(body, reg, state, env);    if (r != 0) return r;    /* expand string */ @@ -4660,13 +5051,12 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)      if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&          qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {        int len = NODE_STRING_LEN(body); -      StrNode* sn = STR_(body);        if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {          int i, n = qn->lower; -        onig_node_conv_to_str_node(node, STR_(body)->flag); +        node_conv_to_str_node(node, STR_(body)->flag);          for (i = 0; i < n; i++) { -          r = onig_node_str_cat(node, sn->s, sn->end); +          r = node_str_node_cat(node, body);            if (r != 0) return r;          }          onig_node_free(body); @@ -4691,7 +5081,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)    return r;  } -/* setup_tree does the following work. +/* tune_tree does the following work.   1. check empty loop. (set qn->emptiness)   2. expand ignore-case in char class.   3. set memory status bit flags. (reg->mem_stats) @@ -4700,7 +5090,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)   6. expand repeated string.   */  static int -setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env)  {    int r = 0; @@ -4709,9 +5099,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)      {        Node* prev = NULL_NODE;        do { -        r = setup_tree(NODE_CAR(node), reg, state, env); +        r = tune_tree(NODE_CAR(node), reg, state, env);          if (IS_NOT_NULL(prev) && r == 0) { -          r = next_setup(prev, NODE_CAR(node), reg); +          r = tune_next(prev, NODE_CAR(node), reg);          }          prev = NODE_CAR(node);        } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); @@ -4720,13 +5110,13 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)    case NODE_ALT:      do { -      r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env); +      r = tune_tree(NODE_CAR(node), reg, (state | IN_ALT), env);      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break;    case NODE_STRING: -    if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) { -      r = expand_case_fold_string(node, reg, state); +    if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_CRUDE(node)) { +      r = unravel_case_fold_string(node, reg, state);      }      break; @@ -4739,12 +5129,18 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)        for (i = 0; i < br->back_num; i++) {          if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;          MEM_STATUS_ON(env->backrefed_mem, p[i]); -        MEM_STATUS_ON(env->bt_mem_start, p[i]); +#if 0  #ifdef USE_BACKREF_WITH_LEVEL          if (NODE_IS_NEST_LEVEL(node)) { -          MEM_STATUS_ON(env->bt_mem_end, p[i]); +          MEM_STATUS_ON(env->backtrack_mem, p[i]);          }  #endif +#else +        /* More precisely, it should be checked whether alt/repeat exists before +           the subject capture node, and then this backreference position +           exists before (or in) the capture node. */ +        MEM_STATUS_ON(env->backtrack_mem, p[i]); +#endif        }      }      break; @@ -4758,7 +5154,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          {            OnigOptionType options = reg->options;            reg->options = BAG_(node)->o.options; -          r = setup_tree(NODE_BODY(node), reg, state, env); +          r = tune_tree(NODE_BODY(node), reg, state, env);            reg->options = options;          }          break; @@ -4770,15 +5166,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0              || NODE_IS_RECURSION(node)) { -          MEM_STATUS_ON(env->bt_mem_start, en->m.regnum); +          MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);          } -        r = setup_tree(NODE_BODY(node), reg, state, env); +        r = tune_tree(NODE_BODY(node), reg, state, env);          break;        case BAG_STOP_BACKTRACK:          {            Node* target = NODE_BODY(node); -          r = setup_tree(target, reg, state, env); +          r = tune_tree(target, reg, state, env);            if (NODE_TYPE(target) == NODE_QUANT) {              QuantNode* tqn = QUANT_(target);              if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 && @@ -4791,25 +5187,25 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          break;        case BAG_IF_ELSE: -        r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env); +        r = tune_tree(NODE_BODY(node), reg, (state | IN_ALT), env);          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) { -          r = setup_tree(en->te.Then, reg, (state | IN_ALT), env); +          r = tune_tree(en->te.Then, reg, (state | IN_ALT), env);            if (r != 0) return r;          }          if (IS_NOT_NULL(en->te.Else)) -          r = setup_tree(en->te.Else, reg, (state | IN_ALT), env); +          r = tune_tree(en->te.Else, reg, (state | IN_ALT), env);          break;        }      }      break;    case NODE_QUANT: -    r = setup_quant(node, reg, state, env); +    r = tune_quant(node, reg, state, env);      break;    case NODE_ANCHOR: -    r = setup_anchor(node, reg, state, env); +    r = tune_anchor(node, reg, state, env);      break;  #ifdef USE_CALL @@ -4908,7 +5304,7 @@ typedef struct {  } MinMax;  typedef struct { -  MinMax           mmd; +  MinMax           mm;    OnigEncoding     enc;    OnigOptionType   options;    OnigCaseFoldType case_fold_flag; @@ -4921,17 +5317,16 @@ typedef struct {  } OptAnc;  typedef struct { -  MinMax     mmd;   /* position */ +  MinMax     mm;   /* position */    OptAnc     anc;    int        reach_end;    int        case_fold; -  int        good_case_fold;    int        len;    UChar      s[OPT_EXACT_MAXLEN];  } OptStr;  typedef struct { -  MinMax    mmd;    /* position */ +  MinMax    mm;     /* position */    OptAnc    anc;    int       value;  /* weighted value */    UChar     map[CHAR_MAP_SIZE]; @@ -5148,11 +5543,10 @@ is_full_opt_exact(OptStr* e)  static void  clear_opt_exact(OptStr* e)  { -  clear_mml(&e->mmd); +  clear_mml(&e->mm);    clear_opt_anc_info(&e->anc);    e->reach_end      = 0;    e->case_fold      = 0; -  e->good_case_fold = 0;    e->len            = 0;    e->s[0]           = '\0';  } @@ -5176,11 +5570,6 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)        to->case_fold = 1;      } -    else { -      if (to->good_case_fold != 0) { -        if (add->good_case_fold == 0) return 0; -      } -    }    }    r = 0; @@ -5235,7 +5624,7 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)      return ;    } -  if (! is_equal_mml(&to->mmd, &add->mmd)) { +  if (! is_equal_mml(&to->mm, &add->mm)) {      clear_opt_exact(to);      return ;    } @@ -5257,8 +5646,6 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)    to->len = i;    if (add->case_fold != 0)      to->case_fold = 1; -  if (add->good_case_fold == 0) -    to->good_case_fold = 0;    alt_merge_opt_anc_info(&to->anc, &add->anc);    if (! to->reach_end) to->anc.right = 0; @@ -5291,10 +5678,7 @@ select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)    if (now->case_fold == 0) vn *= 2;    if (alt->case_fold == 0) va *= 2; -  if (now->good_case_fold != 0) vn *= 4; -  if (alt->good_case_fold != 0) va *= 4; - -  if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) +  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)      copy_opt_exact(now, alt);  } @@ -5378,7 +5762,7 @@ select_opt_map(OptMap* now, OptMap* alt)    vn = z / now->value;    va = z / alt->value; -  if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) +  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)      copy_opt_map(now, alt);  } @@ -5392,17 +5776,14 @@ comp_opt_exact_or_map(OptStr* e, OptMap* m)    if (m->value <= 0) return -1;    if (e->case_fold != 0) { -    if (e->good_case_fold != 0) -      case_value = 2; -    else -      case_value = 1; +    case_value = 1;    }    else      case_value = 3;    ae = COMP_EM_BASE * e->len * case_value;    am = COMP_EM_BASE * 5 * 2 / m->value; -  return comp_distance_value(&e->mmd, &m->mmd, ae, am); +  return comp_distance_value(&e->mm, &m->mm, ae, am);  }  static void @@ -5410,14 +5791,14 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)  {    int i, val; -  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ +  /* if (! is_equal_mml(&to->mm, &add->mm)) return ; */    if (to->value == 0) return ; -  if (add->value == 0 || to->mmd.max < add->mmd.min) { +  if (add->value == 0 || to->mm.max < add->mm.min) {      clear_opt_map(to);      return ;    } -  alt_merge_mml(&to->mmd, &add->mmd); +  alt_merge_mml(&to->mm, &add->mm);    val = 0;    for (i = 0; i < CHAR_MAP_SIZE; i++) { @@ -5435,9 +5816,9 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)  static void  set_bound_node_opt_info(OptNode* opt, MinMax* plen)  { -  copy_mml(&(opt->sb.mmd),  plen); -  copy_mml(&(opt->spr.mmd), plen); -  copy_mml(&(opt->map.mmd), plen); +  copy_mml(&(opt->sb.mm),  plen); +  copy_mml(&(opt->spr.mm), plen); +  copy_mml(&(opt->map.mm), plen);  }  static void @@ -5472,7 +5853,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)    }    if (add->map.value > 0 && to->len.max == 0) { -    if (add->map.mmd.max == 0) +    if (add->map.mm.max == 0)        add->map.anc.left |= to->anc.left;    } @@ -5497,10 +5878,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)    if (to->spr.len > 0) {      if (add->len.max > 0) { -      if (to->spr.len > (int )add->len.max) -        to->spr.len = add->len.max; - -      if (to->spr.mmd.max == 0) +      if (to->spr.mm.max == 0)          select_opt_exact(enc, &to->sb, &to->spr);        else          select_opt_exact(enc, &to->sm, &to->spr); @@ -5540,7 +5918,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)    r = 0;    enc = env->enc;    clear_node_opt_info(opt); -  set_bound_node_opt_info(opt, &env->mmd); +  set_bound_node_opt_info(opt, &env->mm);    switch (NODE_TYPE(node)) {    case NODE_LIST: @@ -5552,7 +5930,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)        do {          r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);          if (r == 0) { -          add_mml(&nenv.mmd, &xo.len); +          add_mml(&nenv.mm, &xo.len);            concat_left_node_opt_info(enc, opt, &xo);          }        } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); @@ -5577,9 +5955,8 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)      {        StrNode* sn = STR_(node);        int slen = (int )(sn->end - sn->s); -      /* int is_raw = NODE_STRING_IS_RAW(node); */ -      if (! NODE_STRING_IS_AMBIG(node)) { +      if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) {          concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);          if (slen > 0) {            add_char_opt_map(&opt->map, *(sn->s), enc); @@ -5587,28 +5964,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)          set_mml(&opt->len, slen, slen);        }        else { -        int max; +        int max, min; -        if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) { -          int n = onigenc_strlen(enc, sn->s, sn->end); -          max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; -        } -        else { -          concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); -          opt->sb.case_fold = 1; -          if (NODE_STRING_IS_GOOD_AMBIG(node)) -            opt->sb.good_case_fold = 1; - -          if (slen > 0) { -            r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, -                                     enc, env->case_fold_flag); -            if (r != 0) break; -          } +        concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); +        opt->sb.case_fold = 1; -          max = slen; +        if (slen > 0) { +          r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, +                                   enc, env->case_fold_flag); +          if (r != 0) break;          } -        set_mml(&opt->len, slen, max); +        max = slen; +        min = sn->case_min_len * ONIGENC_MBC_MINLEN(enc); +        set_mml(&opt->len, min, max);        }      }      break; @@ -5618,7 +5987,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)        int z;        CClassNode* cc = CCLASS_(node); -      /* no need to check ignore case. (set in setup_tree()) */ +      /* no need to check ignore case. (set in tune_tree()) */        if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {          OnigLen min = ONIGENC_MBC_MINLEN(enc); @@ -5728,11 +6097,11 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)          break;        }        backs = BACKREFS_P(br); -      min = tree_min_len(mem_env[backs[0]].node, env->scan_env); -      max = tree_max_len(mem_env[backs[0]].node, env->scan_env); +      min = tree_min_len(mem_env[backs[0]].mem_node, env->scan_env); +      max = tree_max_len(mem_env[backs[0]].mem_node, env->scan_env);        for (i = 1; i < br->back_num; i++) { -        tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env); -        tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env); +        tmin = tree_min_len(mem_env[backs[i]].mem_node, env->scan_env); +        tmax = tree_max_len(mem_env[backs[i]].mem_node, env->scan_env);          if (min > tmin) min = tmin;          if (max < tmax) max = tmax;        } @@ -5782,7 +6151,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)        }        if (IS_INFINITE_REPEAT(qn->upper)) { -        if (env->mmd.max == 0 && +        if (env->mm.max == 0 &&              NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {            if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))              add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML); @@ -5850,7 +6219,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)            copy_opt_env(&nenv, env);            r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);            if (r == 0) { -            add_mml(&nenv.mmd, &xo.len); +            add_mml(&nenv.mm, &xo.len);              concat_left_node_opt_info(enc, opt, &xo);              if (IS_NOT_NULL(en->te.Then)) {                r = optimize_nodes(en->te.Then, &xo, &nenv); @@ -5899,15 +6268,6 @@ set_optimize_exact(regex_t* reg, OptStr* e)    if (e->case_fold) {      reg->optimize = OPTIMIZE_STR_CASE_FOLD; -    if (e->good_case_fold != 0) { -      if (e->len >= 2) { -        r = set_sunday_quick_search_or_bmh_skip_table(reg, 1, -                             reg->exact, reg->exact_end, -                             reg->map, &(reg->map_offset)); -        if (r != 0) return r; -        reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST; -      } -    }    }    else {      int allow_reverse; @@ -5930,11 +6290,17 @@ set_optimize_exact(regex_t* reg, OptStr* e)      }    } -  reg->dmin = e->mmd.min; -  reg->dmax = e->mmd.max; +  reg->dist_min = e->mm.min; +  reg->dist_max = e->mm.max; -  if (reg->dmin != INFINITE_LEN) { -    reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact); +  if (reg->dist_min != INFINITE_LEN) { +    int n; +    if (e->case_fold != 0) +      n = 1; +    else +      n = (int )(reg->exact_end - reg->exact); + +    reg->threshold_len = reg->dist_min + n;    }    return 0; @@ -5949,11 +6315,11 @@ set_optimize_map(regex_t* reg, OptMap* m)      reg->map[i] = m->map[i];    reg->optimize   = OPTIMIZE_MAP; -  reg->dmin       = m->mmd.min; -  reg->dmax       = m->mmd.max; +  reg->dist_min   = m->mm.min; +  reg->dist_max   = m->mm.max; -  if (reg->dmin != INFINITE_LEN) { -    reg->threshold_len = reg->dmin + 1; +  if (reg->dist_min != INFINITE_LEN) { +    reg->threshold_len = reg->dist_min + 1;    }  } @@ -5979,7 +6345,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)    env.options        = reg->options;    env.case_fold_flag = reg->case_fold_flag;    env.scan_env       = scan_env; -  clear_mml(&env.mmd); +  clear_mml(&env.mm);    r = optimize_nodes(node, &opt, &env);    if (r != 0) return r; @@ -5995,8 +6361,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)                                    ANCR_PREC_READ_NOT);    if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) { -    reg->anchor_dmin = opt.len.min; -    reg->anchor_dmax = opt.len.max; +    reg->anc_dist_min = opt.len.min; +    reg->anc_dist_max = opt.len.max;    }    if (opt.sb.len > 0 || opt.sm.len > 0) { @@ -6031,8 +6397,8 @@ clear_optimize_info(regex_t* reg)  {    reg->optimize      = OPTIMIZE_NONE;    reg->anchor        = 0; -  reg->anchor_dmin   = 0; -  reg->anchor_dmax   = 0; +  reg->anc_dist_min  = 0; +  reg->anc_dist_max  = 0;    reg->sub_anchor    = 0;    reg->exact_end     = (UChar* )NULL;    reg->map_offset    = 0; @@ -6151,12 +6517,12 @@ print_optimize_info(FILE* f, regex_t* reg)  {    static const char* on[] = { "NONE", "STR",                                "STR_FAST", "STR_FAST_STEP_FORWARD", -                              "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" }; +                              "STR_CASE_FOLD", "MAP" };    fprintf(f, "optimize: %s\n", on[reg->optimize]);    fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);    if ((reg->anchor & ANCR_END_BUF_MASK) != 0) -    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); +    print_distance_range(f, reg->anc_dist_min, reg->anc_dist_max);    fprintf(f, "\n");    if (reg->optimize) { @@ -6304,7 +6670,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    Node*  root;    ScanEnv  scan_env;  #ifdef USE_CALL -  UnsetAddrList  uslist; +  UnsetAddrList  uslist = {0};  #endif    root = 0; @@ -6328,13 +6694,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    reg->string_pool_end    = 0;    reg->num_mem            = 0;    reg->num_repeat         = 0; -  reg->num_null_check     = 0; +  reg->num_empty_check    = 0;    reg->repeat_range_alloc = 0; -  reg->repeat_range       = (OnigRepeatRange* )NULL; +  reg->repeat_range       = (RepeatRange* )NULL; +  reg->empty_status_mem   = 0;    r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);    if (r != 0) goto err; +  r = reduce_string_list(root); +  if (r != 0) goto err; +    /* mixed use named group and no-named group */    if (scan_env.num_named > 0 &&        IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -6355,38 +6725,65 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,      r = unset_addr_list_init(&uslist, scan_env.num_call);      if (r != 0) goto err;      scan_env.unset_addr_list = &uslist; -    r = setup_call(root, &scan_env, 0); +    r = tune_call(root, &scan_env, 0);      if (r != 0) goto err_unset; -    r = setup_call2(root); +    r = tune_call2(root);      if (r != 0) goto err_unset;      r = recursive_call_check_trav(root, &scan_env, 0);      if (r  < 0) goto err_unset;      r = infinite_recursive_call_check_trav(root, &scan_env);      if (r != 0) goto err_unset; -    setup_called_state(root, 0); +    tune_called_state(root, 0);    }    reg->num_call = scan_env.num_call;  #endif -  r = setup_tree(root, reg, 0, &scan_env); +#ifdef ONIG_DEBUG_PARSE +  fprintf(stderr, "MAX PARSE DEPTH: %d\n", scan_env.max_parse_depth); +  fprintf(stderr, "TREE (parsed)\n"); +  print_tree(stderr, root); +  fprintf(stderr, "\n"); +#endif + +  r = tune_tree(root, reg, 0, &scan_env);    if (r != 0) goto err_unset; +  if (scan_env.backref_num != 0) { +    set_parent_node_trav(root, NULL_NODE); +    r = set_empty_repeat_node_trav(root, NULL_NODE, &scan_env); +    if (r != 0) goto err_unset; +    set_empty_status_check_trav(root, &scan_env); +  } +  #ifdef ONIG_DEBUG_PARSE +  fprintf(stderr, "TREE (after tune)\n");    print_tree(stderr, root); +  fprintf(stderr, "\n");  #endif -  reg->capture_history  = scan_env.capture_history; -  reg->bt_mem_start     = scan_env.bt_mem_start; -  reg->bt_mem_start    |= reg->capture_history; -  if (IS_FIND_CONDITION(reg->options)) -    MEM_STATUS_ON_ALL(reg->bt_mem_end); +  reg->capture_history  = scan_env.cap_history; +  reg->push_mem_start   = scan_env.backtrack_mem | scan_env.cap_history; + +#ifdef USE_CALLOUT +  if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) { +    reg->push_mem_end = reg->push_mem_start; +  }    else { -    reg->bt_mem_end  = scan_env.bt_mem_end; -    reg->bt_mem_end |= reg->capture_history; +    if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start)) +      reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history; +    else +      reg->push_mem_end = reg->push_mem_start & +                        (scan_env.backrefed_mem | scan_env.cap_history);    } -  reg->bt_mem_start |= reg->bt_mem_end; +#else +  if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start)) +    reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history; +  else +    reg->push_mem_end = reg->push_mem_start & +                      (scan_env.backrefed_mem | scan_env.cap_history); +#endif    clear_optimize_info(reg);  #ifndef ONIG_DONT_OPTIMIZE @@ -6420,14 +6817,20 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,      }  #endif -    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) +    set_addr_in_repeat_range(reg); + +    if ((reg->push_mem_end != 0) +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +        || (reg->num_repeat      != 0) +        || (reg->num_empty_check != 0) +#endif  #ifdef USE_CALLOUT          || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)  #endif          )        reg->stack_pop_level = STACK_POP_LEVEL_ALL;      else { -      if (reg->bt_mem_start != 0) +      if (reg->push_mem_start != 0)          reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;        else          reg->stack_pop_level = STACK_POP_LEVEL_FREE; @@ -6560,11 +6963,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,    if (IS_NULL(*reg)) return ONIGERR_MEMORY;    r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); -  if (r != 0) goto err; +  if (r != 0) { +    xfree(*reg); +    *reg = NULL; +    return r; +  }    r = onig_compile(*reg, pattern, pattern_end, einfo);    if (r != 0) { -  err:      onig_free(*reg);      *reg = NULL;    } @@ -6709,12 +7115,14 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)  #ifdef ONIG_DEBUG_PARSE +#ifdef USE_CALL  static void  p_string(FILE* f, int len, UChar* s)  {    fputs(":", f);    while (len-- > 0) { fputc(*s++, f); }  } +#endif  static void  Indent(FILE* f, int indent) @@ -6734,7 +7142,7 @@ print_indent_tree(FILE* f, Node* node, int indent)    Indent(f, indent);    if (IS_NULL(node)) {      fprintf(f, "ERROR: null node!!!\n"); -    exit (0); +    exit(0);    }    type = NODE_TYPE(node); @@ -6758,28 +7166,22 @@ print_indent_tree(FILE* f, Node* node, int indent)    case NODE_STRING:      { +      char* str;        char* mode; -      char* dont; -      char* good; -      if (NODE_STRING_IS_RAW(node)) -        mode = "-raw"; -      else if (NODE_STRING_IS_AMBIG(node)) -        mode = "-ambig"; +      if (NODE_STRING_IS_CRUDE(node)) +        mode = "-crude"; +      else if (NODE_STRING_IS_CASE_FOLD_MATCH(node)) +        mode = "-case_fold_match";        else          mode = ""; -      if (NODE_STRING_IS_GOOD_AMBIG(node)) -        good = "-good"; +      if (STR_(node)->s == STR_(node)->end) +        str = "empty-string";        else -        good = ""; +        str = "string"; -      if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) -        dont = " (dont-opt)"; -      else -        dont = ""; - -      fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node); +      fprintf(f, "<%s%s:%p>", str, mode, node);        for (p = STR_(node)->s; p < STR_(node)->end; p++) {          if (*p >= 0x20 && *p < 0x7f)            fputc(*p, f); @@ -6901,6 +7303,34 @@ print_indent_tree(FILE* f, Node* node, int indent)    case NODE_BAG:      fprintf(f, "<bag:%p> ", node); +    if (BAG_(node)->type == BAG_IF_ELSE) { +      Node* Then; +      Node* Else; +      BagNode* bn; + +      bn = BAG_(node); +      fprintf(f, "if-else\n"); +      print_indent_tree(f, NODE_BODY(node), indent + add); + +      Then = bn->te.Then; +      Else = bn->te.Else; +      if (IS_NULL(Then)) { +        Indent(f, indent + add); +        fprintf(f, "THEN empty\n"); +      } +      else +        print_indent_tree(f, Then, indent + add); + +      if (IS_NULL(Else)) { +        Indent(f, indent + add); +        fprintf(f, "ELSE empty\n"); +      } +      else +        print_indent_tree(f, Else, indent + add); + +      break; +    } +      switch (BAG_(node)->type) {      case BAG_OPTION:        fprintf(f, "option:%d", BAG_(node)->o.options); @@ -6911,8 +7341,7 @@ print_indent_tree(FILE* f, Node* node, int indent)      case BAG_STOP_BACKTRACK:        fprintf(f, "stop-bt");        break; -    case BAG_IF_ELSE: -      fprintf(f, "if-else"); +    default:        break;      }      fprintf(f, "\n"); diff --git a/src/regenc.c b/src/regenc.c index 9fab721..16ac313 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -2,7 +2,7 @@    regenc.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -182,7 +182,8 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,      p += enclen(enc, p);    }    else { -    if (prev) *prev = (const UChar* )NULL; /* Sorry */ +    if (prev) +      *prev = onigenc_get_prev_char_head(enc, start, p);    }    return p;  } @@ -208,20 +209,6 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)    return (UChar* )s;  } -#if 0 -extern int -onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end) -{ -  int len; -  int n; - -  len = ONIGENC_MBC_ENC_LEN(enc, p); -  n = (int )(end - p); - -  return (n < len ? n : len); -} -#endif -  extern UChar*  onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)  { @@ -705,18 +692,6 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,    return 1; /* return byte length of converted char to lower */  } -#if 0 -extern int -onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, -                               const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  (*pp)++; -  return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); -} -#endif -  extern int  onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)  { @@ -833,41 +808,6 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,    }  } -#if 0 -extern int -onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, -                             const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  if (ONIGENC_IS_MBC_ASCII(p)) { -    (*pp)++; -    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); -  } - -  (*pp) += enclen(enc, p); -  return FALSE; -} -#endif - -extern int -onigenc_mb2_code_to_mbclen(OnigCodePoint code) -{ -  if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - -  if ((code & 0xff00) != 0) return 2; -  else return 1; -} - -extern int -onigenc_mb4_code_to_mbclen(OnigCodePoint code) -{ -       if ((code & 0xff000000) != 0) return 4; -  else if ((code & 0xff0000) != 0) return 3; -  else if ((code & 0xff00) != 0) return 2; -  else return 1; -} -  extern int  onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)  { diff --git a/src/regenc.h b/src/regenc.h index bd2819e..db35841 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@    regenc.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -163,13 +163,11 @@ extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const  /* methods for multi byte encoding */  extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));  extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));  extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));  extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));  extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));  extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));  extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));  extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));  extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));  extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len)); diff --git a/src/regerror.c b/src/regerror.c index e6d1806..b57a276 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@    regerror.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regexec.c b/src/regexec.c index f957b75..ce498c6 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@    regexec.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -39,6 +39,20 @@  #define CHECK_INTERRUPT_IN_MATCH +#define STACK_MEM_START(reg, i) \ +  (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \ +   STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i]))) + +#define STACK_MEM_END(reg, i) \ +  (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \ +   STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i]))) + +static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev); + +static int +search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp); + +  #ifdef USE_CALLOUT  typedef struct {    int last_match_at_call_counter; @@ -129,7 +143,7 @@ typedef struct {  } MatchArg; -#ifdef ONIG_DEBUG +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)  /* arguments type */  typedef enum { @@ -149,102 +163,108 @@ typedef struct {  } OpInfoType;  static OpInfoType OpInfo[] = { -  { OP_FINISH,            "finish" }, -  { OP_END,               "end"    }, -  { OP_EXACT1,            "exact1" }, -  { OP_EXACT2,            "exact2" }, -  { OP_EXACT3,            "exact3" }, -  { OP_EXACT4,            "exact4" }, -  { OP_EXACT5,            "exact5" }, -  { OP_EXACTN,            "exactn" }, -  { OP_EXACTMB2N1,        "exactmb2-n1" }, -  { OP_EXACTMB2N2,        "exactmb2-n2" }, -  { OP_EXACTMB2N3,        "exactmb2-n3" }, -  { OP_EXACTMB2N,         "exactmb2-n"  }, -  { OP_EXACTMB3N,         "exactmb3n"   }, -  { OP_EXACTMBN,          "exactmbn"    }, -  { OP_EXACT1_IC,         "exact1-ic"   }, -  { OP_EXACTN_IC,         "exactn-ic"   }, -  { OP_CCLASS,            "cclass"      }, -  { OP_CCLASS_MB,         "cclass-mb"   }, -  { OP_CCLASS_MIX,        "cclass-mix"  }, -  { OP_CCLASS_NOT,        "cclass-not"  }, -  { OP_CCLASS_MB_NOT,     "cclass-mb-not"  }, -  { OP_CCLASS_MIX_NOT,    "cclass-mix-not" }, -  { OP_ANYCHAR,               "anychar"     }, -  { OP_ANYCHAR_ML,            "anychar-ml"  }, -  { OP_ANYCHAR_STAR,          "anychar*"    }, -  { OP_ANYCHAR_ML_STAR,       "anychar-ml*" }, -  { OP_ANYCHAR_STAR_PEEK_NEXT,    "anychar*-peek-next" }, -  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next" }, -  { OP_WORD,                  "word"       }, -  { OP_WORD_ASCII,            "word-ascii" }, -  { OP_NO_WORD,               "not-word"   }, -  { OP_NO_WORD_ASCII,         "not-word-ascii" }, -  { OP_WORD_BOUNDARY,         "word-boundary"  }, -  { OP_NO_WORD_BOUNDARY,      "not-word-boundary" }, -  { OP_WORD_BEGIN,            "word-begin" }, -  { OP_WORD_END,              "word-end"   }, -  { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" }, -  { OP_BEGIN_BUF,             "begin-buf"  }, -  { OP_END_BUF,               "end-buf"    }, -  { OP_BEGIN_LINE,            "begin-line" }, -  { OP_END_LINE,              "end-line"   }, -  { OP_SEMI_END_BUF,          "semi-end-buf"   }, -  { OP_BEGIN_POSITION,        "begin-position" }, -  { OP_BACKREF1,              "backref1"       }, -  { OP_BACKREF2,              "backref2"       }, -  { OP_BACKREF_N,             "backref-n"      }, -  { OP_BACKREF_N_IC,          "backref-n-ic"   }, -  { OP_BACKREF_MULTI,         "backref_multi"  }, -  { OP_BACKREF_MULTI_IC,      "backref_multi-ic"     }, -  { OP_BACKREF_WITH_LEVEL,    "backref_with_level"   }, -  { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c" }, -  { OP_BACKREF_CHECK,         "backref_check"        }, -  { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level" }, -  { OP_MEMORY_START_PUSH,     "mem-start-push"        }, -  { OP_MEMORY_START,          "mem-start"             }, -  { OP_MEMORY_END_PUSH,       "mem-end-push"          }, -  { OP_MEMORY_END_PUSH_REC,   "mem-end-push-rec"      }, -  { OP_MEMORY_END,            "mem-end"               }, -  { OP_MEMORY_END_REC,        "mem-end-rec"           }, -  { OP_FAIL,                  "fail"                  }, -  { OP_JUMP,                  "jump"                  }, -  { OP_PUSH,                  "push"                  }, -  { OP_PUSH_SUPER,            "push-super"            }, -  { OP_POP_OUT,               "pop-out"               }, +  { OP_FINISH,         "finish"}, +  { OP_END,            "end"}, +  { OP_STR_1,          "str_1"}, +  { OP_STR_2,          "str_2"}, +  { OP_STR_3,          "str_3"}, +  { OP_STR_4,          "str_4"}, +  { OP_STR_5,          "str_5"}, +  { OP_STR_N,          "str_n"}, +  { OP_STR_MB2N1,      "str_mb2-n1"}, +  { OP_STR_MB2N2,      "str_mb2-n2"}, +  { OP_STR_MB2N3,      "str_mb2-n3"}, +  { OP_STR_MB2N,       "str_mb2-n"}, +  { OP_STR_MB3N,       "str_mb3n"}, +  { OP_STR_MBN,        "str_mbn"}, +  { OP_STR_1_IC,       "str_1-ic"}, +  { OP_STR_N_IC,       "str_n-ic"}, +  { OP_CCLASS,         "cclass"}, +  { OP_CCLASS_MB,      "cclass-mb"}, +  { OP_CCLASS_MIX,     "cclass-mix"}, +  { OP_CCLASS_NOT,     "cclass-not"}, +  { OP_CCLASS_MB_NOT,  "cclass-mb-not"}, +  { OP_CCLASS_MIX_NOT, "cclass-mix-not"}, +  { OP_ANYCHAR,               "anychar"}, +  { OP_ANYCHAR_ML,            "anychar-ml"}, +  { OP_ANYCHAR_STAR,          "anychar*"}, +  { OP_ANYCHAR_ML_STAR,       "anychar-ml*"}, +  { OP_ANYCHAR_STAR_PEEK_NEXT,    "anychar*-peek-next"}, +  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"}, +  { OP_WORD,                  "word"}, +  { OP_WORD_ASCII,            "word-ascii"}, +  { OP_NO_WORD,               "not-word"}, +  { OP_NO_WORD_ASCII,         "not-word-ascii"}, +  { OP_WORD_BOUNDARY,         "word-boundary"}, +  { OP_NO_WORD_BOUNDARY,      "not-word-boundary"}, +  { OP_WORD_BEGIN,            "word-begin"}, +  { OP_WORD_END,              "word-end"}, +  { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"}, +  { OP_BEGIN_BUF,             "begin-buf"}, +  { OP_END_BUF,               "end-buf"}, +  { OP_BEGIN_LINE,            "begin-line"}, +  { OP_END_LINE,              "end-line"}, +  { OP_SEMI_END_BUF,          "semi-end-buf"}, +  { OP_BEGIN_POSITION,        "begin-position"}, +  { OP_BACKREF1,              "backref1"}, +  { OP_BACKREF2,              "backref2"}, +  { OP_BACKREF_N,             "backref-n"}, +  { OP_BACKREF_N_IC,          "backref-n-ic"}, +  { OP_BACKREF_MULTI,         "backref_multi"}, +  { OP_BACKREF_MULTI_IC,      "backref_multi-ic"}, +  { OP_BACKREF_WITH_LEVEL,    "backref_with_level"}, +  { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"}, +  { OP_BACKREF_CHECK,         "backref_check"}, +  { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"}, +  { OP_MEM_START_PUSH,        "mem-start-push"}, +  { OP_MEM_START,             "mem-start"}, +  { OP_MEM_END_PUSH,          "mem-end-push"}, +#ifdef USE_CALL +  { OP_MEM_END_PUSH_REC,      "mem-end-push-rec"}, +#endif +  { OP_MEM_END,               "mem-end"}, +#ifdef USE_CALL +  { OP_MEM_END_REC,           "mem-end-rec"}, +#endif +  { OP_FAIL,                  "fail"}, +  { OP_JUMP,                  "jump"}, +  { OP_PUSH,                  "push"}, +  { OP_PUSH_SUPER,            "push-super"}, +  { OP_POP_OUT,               "pop-out"},  #ifdef USE_OP_PUSH_OR_JUMP_EXACT -  { OP_PUSH_OR_JUMP_EXACT1,   "push-or-jump-e1"       }, +  { OP_PUSH_OR_JUMP_EXACT1,   "push-or-jump-e1"}, +#endif +  { OP_PUSH_IF_PEEK_NEXT,     "push-if-peek-next"}, +  { OP_REPEAT,                "repeat"}, +  { OP_REPEAT_NG,             "repeat-ng"}, +  { OP_REPEAT_INC,            "repeat-inc"}, +  { OP_REPEAT_INC_NG,         "repeat-inc-ng"}, +  { OP_EMPTY_CHECK_START,     "empty-check-start"}, +  { OP_EMPTY_CHECK_END,       "empty-check-end"}, +  { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"}, +#ifdef USE_CALL +  { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"}, +#endif +  { OP_PREC_READ_START,       "push-pos"}, +  { OP_PREC_READ_END,         "pop-pos"}, +  { OP_PREC_READ_NOT_START,   "prec-read-not-start"}, +  { OP_PREC_READ_NOT_END,     "prec-read-not-end"}, +  { OP_ATOMIC_START,          "atomic-start"}, +  { OP_ATOMIC_END,            "atomic-end"}, +  { OP_LOOK_BEHIND,           "look-behind"}, +  { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"}, +  { OP_LOOK_BEHIND_NOT_END,   "look-behind-not-end"}, +  { OP_PUSH_SAVE_VAL,         "push-save-val"}, +  { OP_UPDATE_VAR,            "update-var"}, +#ifdef USE_CALL +  { OP_CALL,                  "call"}, +  { OP_RETURN,                "return"},  #endif -  { OP_PUSH_IF_PEEK_NEXT,     "push-if-peek-next"     }, -  { OP_REPEAT,                "repeat"                }, -  { OP_REPEAT_NG,             "repeat-ng"             }, -  { OP_REPEAT_INC,            "repeat-inc"            }, -  { OP_REPEAT_INC_NG,         "repeat-inc-ng"         }, -  { OP_REPEAT_INC_SG,         "repeat-inc-sg"         }, -  { OP_REPEAT_INC_NG_SG,      "repeat-inc-ng-sg"      }, -  { OP_EMPTY_CHECK_START,     "empty-check-start"     }, -  { OP_EMPTY_CHECK_END,       "empty-check-end"       }, -  { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" }, -  { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" }, -  { OP_PREC_READ_START,       "push-pos"              }, -  { OP_PREC_READ_END,         "pop-pos"               }, -  { OP_PREC_READ_NOT_START,   "prec-read-not-start"   }, -  { OP_PREC_READ_NOT_END,     "prec-read-not-end"     }, -  { OP_ATOMIC_START,          "atomic-start"          }, -  { OP_ATOMIC_END,            "atomic-end"            }, -  { OP_LOOK_BEHIND,           "look-behind"           }, -  { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" }, -  { OP_LOOK_BEHIND_NOT_END,   "look-behind-not-end"   }, -  { OP_CALL,                  "call"                  }, -  { OP_RETURN,                "return"                }, -  { OP_PUSH_SAVE_VAL,         "push-save-val"         }, -  { OP_UPDATE_VAR,            "update-var"            },  #ifdef USE_CALLOUT -  { OP_CALLOUT_CONTENTS,      "callout-contents"      }, -  { OP_CALLOUT_NAME,          "callout-name"          }, +  { OP_CALLOUT_CONTENTS,      "callout-contents"}, +  { OP_CALLOUT_NAME,          "callout-name"},  #endif -  { -1, "" } +  { -1, ""}  };  static char* @@ -320,32 +340,32 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,    fprintf(f, "%s", op2name(opcode));    switch (opcode) { -  case OP_EXACT1: +  case OP_STR_1:      p_string(f, 1, p->exact.s); break; -  case OP_EXACT2: +  case OP_STR_2:      p_string(f, 2, p->exact.s); break; -  case OP_EXACT3: +  case OP_STR_3:      p_string(f, 3, p->exact.s); break; -  case OP_EXACT4: +  case OP_STR_4:      p_string(f, 4, p->exact.s); break; -  case OP_EXACT5: +  case OP_STR_5:      p_string(f, 5, p->exact.s); break; -  case OP_EXACTN: +  case OP_STR_N:      len = p->exact_n.n;      p_string(f, len, p->exact_n.s); break; -  case OP_EXACTMB2N1: +  case OP_STR_MB2N1:      p_string(f, 2, p->exact.s); break; -  case OP_EXACTMB2N2: +  case OP_STR_MB2N2:      p_string(f, 4, p->exact.s); break; -  case OP_EXACTMB2N3: +  case OP_STR_MB2N3:      p_string(f, 3, p->exact.s); break; -  case OP_EXACTMB2N: +  case OP_STR_MB2N:      len = p->exact_n.n;      p_len_string(f, len, 2, p->exact_n.s); break; -  case OP_EXACTMB3N: +  case OP_STR_MB3N:      len = p->exact_n.n;      p_len_string(f, len, 3, p->exact_n.s); break; -  case OP_EXACTMBN: +  case OP_STR_MBN:      {        int mb_len; @@ -357,11 +377,11 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,        while (n-- > 0) { fputc(*q++, f); }      }      break; -  case OP_EXACT1_IC: +  case OP_STR_1_IC:      len = enclen(enc, p->exact.s);      p_string(f, len, p->exact.s);      break; -  case OP_EXACTN_IC: +  case OP_STR_N_IC:      len = p->exact_n.n;      p_len_string(f, len, 1, p->exact_n.s);      break; @@ -375,13 +395,13 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,    case OP_CCLASS_MB_NOT:      {        OnigCodePoint ncode; -      OnigCodePoint* codes;       +      OnigCodePoint* codes;        codes = (OnigCodePoint* )p->cclass_mb.mb;        GET_CODE_POINT(ncode, codes);        codes++;        GET_CODE_POINT(code, codes); -      fprintf(f, ":%u:%u", code, ncode); +      fprintf(f, ":%d:0x%x", ncode, code);      }      break;    case OP_CCLASS_MIX: @@ -447,15 +467,18 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,      }      break; -  case OP_MEMORY_START: -  case OP_MEMORY_START_PUSH: +  case OP_MEM_START: +  case OP_MEM_START_PUSH:      mem = p->memory_start.num;      fprintf(f, ":%d", mem);      break; -  case OP_MEMORY_END_PUSH: -  case OP_MEMORY_END_PUSH_REC: -  case OP_MEMORY_END: -  case OP_MEMORY_END_REC: + +  case OP_MEM_END: +  case OP_MEM_END_PUSH: +#ifdef USE_CALL +  case OP_MEM_END_REC: +  case OP_MEM_END_PUSH_REC: +#endif      mem = p->memory_end.num;      fprintf(f, ":%d", mem);      break; @@ -499,8 +522,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,    case OP_REPEAT_INC:    case OP_REPEAT_INC_NG: -  case OP_REPEAT_INC_SG: -  case OP_REPEAT_INC_NG_SG:      mem = p->repeat.id;      fprintf(f, ":%d", mem);      break; @@ -511,7 +532,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,      break;    case OP_EMPTY_CHECK_END:    case OP_EMPTY_CHECK_END_MEMST: +#ifdef USE_CALL    case OP_EMPTY_CHECK_END_MEMST_PUSH: +#endif      mem = p->empty_check_end.mem;      fprintf(f, ":%d", mem);      break; @@ -534,10 +557,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,      p_rel_addr(f, addr, p, start);      break; +#ifdef USE_CALL    case OP_CALL:      addr = p->call.addr;      fprintf(f, ":{/%d}", addr);      break; +#endif    case OP_PUSH_SAVE_VAL:      { @@ -607,7 +632,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,    case OP_ATOMIC_START:    case OP_ATOMIC_END:    case OP_LOOK_BEHIND_NOT_END: +#ifdef USE_CALL    case OP_RETURN: +#endif      break;    default: @@ -615,7 +642,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,      break;    }  } -#endif /* ONIG_DEBUG */ +#endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */  #ifdef ONIG_DEBUG_COMPILE  extern void @@ -625,8 +652,8 @@ onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)    Operation* start = reg->ops;    Operation* end   = reg->ops + reg->ops_used; -  fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", -          reg->bt_mem_start, reg->bt_mem_end); +  fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n", +          reg->push_mem_start, reg->push_mem_end);    fprintf(f, "code-length: %d\n", reg->ops_used);    bp = start; @@ -943,7 +970,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)        result = ONIGERR_INVALID_ARGUMENT;\      }\      best_len = result;\ -    goto finish;\ +    goto match_at_end;\      break;\    }\  } while(0) @@ -965,18 +992,26 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  /* handled by normal-POP */  #define STK_MEM_START              0x0010  #define STK_MEM_END                0x8030 -#define STK_REPEAT_INC             0x0050 +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +#define STK_REPEAT_INC             (0x0040 | STK_MASK_POP_HANDLED) +#else +#define STK_REPEAT_INC             0x0040 +#endif  #ifdef USE_CALLOUT  #define STK_CALLOUT                0x0070  #endif  /* avoided by normal-POP */  #define STK_VOID                   0x0000  /* for fill a blank */ +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +#define STK_EMPTY_CHECK_START      (0x3000 | STK_MASK_POP_HANDLED) +#else  #define STK_EMPTY_CHECK_START      0x3000 +#endif  #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */  #define STK_MEM_END_MARK           0x8100  #define STK_TO_VOID_START          0x1200  /* mark for "(?>...)" */ -#define STK_REPEAT                 0x0300 +/* #define STK_REPEAT                 0x0300 */  #define STK_CALL_FRAME             0x0400  #define STK_RETURN                 0x0500  #define STK_SAVE_VAL               0x0600 @@ -1002,11 +1037,10 @@ typedef struct _StackType {        UChar*     pstr_prev; /* previous char position of pstr */      } state;      struct { -      int        count;  /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ -      Operation* pcode;  /* byte code position (head of repeated target) */ -    } repeat; -    struct { -      StackIndex si;     /* index of stack */ +      int        count; +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +      StackIndex prev_index;  /* index of stack */ +#endif      } repeat_inc;      struct {        UChar *pstr;       /* start/end position */ @@ -1015,7 +1049,10 @@ typedef struct _StackType {        StackIndex prev_end;    /* prev. info (for backtrack  "(...)*" ) */      } mem;      struct { -      UChar *pstr;       /* start position */ +      UChar *pstr;            /* start position */ +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +      StackIndex prev_index;  /* index of stack */ +#endif      } empty_check;  #ifdef USE_CALL      struct { @@ -1061,29 +1098,64 @@ struct OnigCalloutArgsStruct {  #endif +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR + +#define PTR_NUM_SIZE(reg)  ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2) +#define UPDATE_FOR_STACK_REALLOC do{\ +  repeat_stk      = (StackIndex* )alloc_base;\ +  empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ +  mem_start_stk   = (StackIndex* )(empty_check_stk + reg->num_empty_check);\ +  mem_end_stk     = mem_start_stk + num_mem + 1;\ +} while(0) + +#define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid] +#define LOAD_TO_REPEAT_STK_VAR(sid)  repeat_stk[sid] = GET_STACK_INDEX(stk) +#define POP_REPEAT_INC  else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;} + +#define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid] +#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)  empty_check_stk[sid] = GET_STACK_INDEX(stk) +#define POP_EMPTY_CHECK_START  else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;} + +#else + +#define PTR_NUM_SIZE(reg)  (((reg)->num_mem + 1) * 2) +#define UPDATE_FOR_STACK_REALLOC do{\ +  mem_start_stk = (StackIndex* )alloc_base;\ +  mem_end_stk   = mem_start_stk + num_mem + 1;\ +} while(0) + +#define SAVE_REPEAT_STK_VAR(sid) +#define LOAD_TO_REPEAT_STK_VAR(sid) +#define POP_REPEAT_INC + +#define SAVE_EMPTY_CHECK_STK_VAR(sid) +#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) +#define POP_EMPTY_CHECK_START + +#endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */  #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \    (msa).stack_p  = (void* )0;\    (msa).options  = (arg_option);\    (msa).region   = (arg_region);\    (msa).start    = (arg_start);\ -  (msa).match_stack_limit  = (mp)->match_stack_limit;\ -  (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ -  (msa).mp = mp;\ +  (msa).match_stack_limit  = (mpv)->match_stack_limit;\ +  (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\ +  (msa).mp = mpv;\    (msa).best_len = ONIG_MISMATCH;\ -  (msa).ptr_num  = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ +  (msa).ptr_num  = PTR_NUM_SIZE(reg);\  } while(0)  #else -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \    (msa).stack_p  = (void* )0;\    (msa).options  = (arg_option);\    (msa).region   = (arg_region);\    (msa).start    = (arg_start);\ -  (msa).match_stack_limit  = (mp)->match_stack_limit;\ -  (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ -  (msa).mp = mp;\ -  (msa).ptr_num  = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ +  (msa).match_stack_limit  = (mpv)->match_stack_limit;\ +  (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\ +  (msa).mp = mpv;\ +  (msa).ptr_num  = PTR_NUM_SIZE(reg);\  } while(0)  #endif @@ -1138,12 +1210,6 @@ struct OnigCalloutArgsStruct {    };\  } while(0) -#define UPDATE_FOR_STACK_REALLOC do{\ -  repeat_stk    = (StackIndex* )alloc_base;\ -  mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ -  mem_end_stk   = mem_start_stk + num_mem + 1;\ -} while(0) -  static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;  extern unsigned int @@ -1164,7 +1230,9 @@ onig_set_match_stack_limit_size(unsigned int size)  static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;  #define CHECK_RETRY_LIMIT_IN_MATCH  do {\ -  if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +  if (retry_in_match_counter++ > retry_limit_in_match) {\ +    MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\ +  }\  } while (0)  #else @@ -1554,19 +1622,23 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \    STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev) +#if 0  #define STACK_PUSH_REPEAT(sid, pat) do {\    STACK_ENSURE(1);\    stk->type = STK_REPEAT;\    stk->zid  = (sid);\ -  stk->u.repeat.pcode  = (pat);\ -  stk->u.repeat.count  = 0;\ +  stk->u.repeat.pcode = (pat);\    STACK_INC;\  } while(0) +#endif -#define STACK_PUSH_REPEAT_INC(sindex) do {\ +#define STACK_PUSH_REPEAT_INC(sid, ct) do {\    STACK_ENSURE(1);\    stk->type = STK_REPEAT_INC;\ -  stk->u.repeat_inc.si  = (sindex);\ +  stk->zid  = (sid);\ +  stk->u.repeat_inc.count = (ct);\ +  SAVE_REPEAT_STK_VAR(sid);\ +  LOAD_TO_REPEAT_STK_VAR(sid);\    STACK_INC;\  } while(0) @@ -1639,6 +1711,8 @@ stack_double(int is_alloca, char** arg_alloc_base,    stk->type = STK_EMPTY_CHECK_START;\    stk->zid  = (cnum);\    stk->u.empty_check.pstr = (s);\ +  SAVE_EMPTY_CHECK_STK_VAR(cnum);\ +  LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\    STACK_INC;\  } while(0) @@ -1776,7 +1850,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_BASE_CHECK(p, at) \    if ((p) < stk_base) {\      fprintf(stderr, "at %s\n", at);\ -    goto stack_error;\ +    MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\    }  #else  #define STACK_BASE_CHECK(p, at) @@ -1827,13 +1901,12 @@ stack_double(int is_alloca, char** arg_alloc_base,            mem_start_stk[stk->zid] = stk->u.mem.prev_start;\            mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\          }\ -        else if (stk->type == STK_REPEAT_INC) {\ -          STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ -        }\          else if (stk->type == STK_MEM_END) {\            mem_start_stk[stk->zid] = stk->u.mem.prev_start;\            mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\          }\ +        POP_REPEAT_INC \ +        POP_EMPTY_CHECK_START \          POP_CALLOUT_CASE\        }\      }\ @@ -1852,13 +1925,12 @@ stack_double(int is_alloca, char** arg_alloc_base,            mem_start_stk[stk->zid] = stk->u.mem.prev_start;\            mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\          }\ -        else if (stk->type == STK_REPEAT_INC) {\ -          STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ -        }\          else if (stk->type == STK_MEM_END) {\            mem_start_stk[stk->zid] = stk->u.mem.prev_start;\            mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\          }\ +        POP_REPEAT_INC \ +        POP_EMPTY_CHECK_START \          /* Don't call callout here because negation of total success by (?!..) (?<!..) */\        }\      }\ @@ -1910,26 +1982,47 @@ stack_double(int is_alloca, char** arg_alloc_base,    }\  } while(0) -#define STACK_EMPTY_CHECK(isnull,sid,s) do {\ -  StackType* k = stk;\ + +#define EMPTY_CHECK_START_SEARCH(sid, k) do {\ +  k = stk;\    while (1) {\      k--;\ -    STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ +    STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \      if (k->type == STK_EMPTY_CHECK_START) {\ -      if (k->zid == (sid)) {\ -        (isnull) = (k->u.empty_check.pstr == (s));\ -        break;\ -      }\ +      if (k->zid == (sid)) break;\      }\    }\  } while(0) +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR + +#define GET_EMPTY_CHECK_START(sid, k) do {\ +  if (reg->num_call == 0) {\ +    k = STACK_AT(empty_check_stk[sid]);\ +  }\ +  else {\ +    EMPTY_CHECK_START_SEARCH(sid, k);\ +  }\ +} while(0) +#else + +#define GET_EMPTY_CHECK_START(sid, k)  EMPTY_CHECK_START_SEARCH(sid, k) + +#endif + + +#define STACK_EMPTY_CHECK(isnull, sid, s) do {\ +  StackType* k;\ +  GET_EMPTY_CHECK_START(sid, k);\ +  (isnull) = (k->u.empty_check.pstr == (s));\ +} while(0) +  #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\    if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\      (addr) = 0;\    }\    else {\ -    if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\ +    if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\        (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\      else\        (addr) = (UChar* )k->u.mem.prev_end;\ @@ -1937,45 +2030,30 @@ stack_double(int is_alloca, char** arg_alloc_base,  } while (0)  #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT -#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\ -  StackType* k = stk;\ -  while (1) {\ -    k--;\ -    STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \ -    if (k->type == STK_EMPTY_CHECK_START) {\ -      if (k->zid == (sid)) {\ -        if (k->u.empty_check.pstr != (s)) {\ -          (isnull) = 0;\ -          break;\ +#define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\ +  StackType* k;\ +  GET_EMPTY_CHECK_START(sid, k);\ +  if (k->u.empty_check.pstr != (s)) {\ +    (isnull) = 0;\ +  }\ +  else {\ +    UChar* endp;\ +    (isnull) = 1;\ +    while (k < stk) {\ +      if (k->type == STK_MEM_START &&\ +        MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\ +        STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ +        if (endp == 0) {\ +          (isnull) = 0; break;\          }\ -        else {\ -          UChar* endp;\ -          int level = 0;\ -          (isnull) = 1;\ -          while (k < stk) {\ -            if (k->type == STK_MEM_START && level == 0) {\ -              STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ -              if (endp == 0) {\ -                (isnull) = 0; break;\ -              }\ -              else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ -                (isnull) = 0; break;\ -              }\ -              else if (endp != s) {\ -                (isnull) = -1; /* empty, but position changed */ \ -              }\ -            }\ -            else if (k->type == STK_PREC_READ_START) {\ -              level++;\ -            }\ -            else if (k->type == STK_PREC_READ_END) {\ -              level--;\ -            }\ -            k++;\ -          }\ -          break;\ +        else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ +          (isnull) = 0; break;\ +        }\ +        else if (endp != s) {\ +          (isnull) = -1; /* empty, but position changed */ \          }\        }\ +      k++;\      }\    }\  } while(0) @@ -1995,11 +2073,11 @@ stack_double(int is_alloca, char** arg_alloc_base,            }\            else {\              UChar* endp;\ -            int prec_level = 0;\              (isnull) = 1;\              while (k < stk) {\                if (k->type == STK_MEM_START) {\ -                if (level == 0 && prec_level == 0) {\ +                if (level == 0 && \ +                  MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\                    STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\                    if (endp == 0) {\                      (isnull) = 0; break;\ @@ -2018,12 +2096,6 @@ stack_double(int is_alloca, char** arg_alloc_base,                else if (k->type == STK_EMPTY_CHECK_END) {\                  if (k->zid == (sid)) level--;\                }\ -              else if (k->type == STK_PREC_READ_START) {\ -                prec_level++;\ -              }\ -              else if (k->type == STK_PREC_READ_END) {\ -                prec_level--;\ -              }\                k++;\              }\              break;\ @@ -2062,24 +2134,45 @@ stack_double(int is_alloca, char** arg_alloc_base,  } while(0)  #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */ -#define STACK_GET_REPEAT(sid, k) do {\ -  int level = 0;\ -  k = stk;\ +#define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\ +  StackType* k = stk;\    while (1) {\ -    k--;\ -    STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ -    if (k->type == STK_REPEAT) {\ -      if (level == 0) {\ -        if (k->zid == (sid)) {\ -          break;\ +    (k)--;\ +    STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\ +    if ((k)->type == STK_REPEAT_INC) {\ +      if ((k)->zid == (sid)) {\ +        (c) = (k)->u.repeat_inc.count;\ +        break;\ +      }\ +    }\ +    else if ((k)->type == STK_RETURN) {\ +      int level = -1;\ +      while (1) {\ +        (k)--;\ +        if ((k)->type == STK_CALL_FRAME) {\ +          level++;\ +          if (level == 0) break;\          }\ +        else if ((k)->type == STK_RETURN) level--;\        }\      }\ -    else if (k->type == STK_CALL_FRAME) level--;\ -    else if (k->type == STK_RETURN)     level++;\    }\  } while(0) +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR + +#define STACK_GET_REPEAT_COUNT(sid, c) do {\ +  if (reg->num_call == 0) {\ +    (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\ +  }\ +  else {\ +    STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\ +  }\ +} while(0) +#else +#define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c) +#endif +  #define STACK_RETURN(addr)  do {\    int level = 0;\    StackType* k = stk;\ @@ -2481,6 +2574,8 @@ typedef struct {  #define MATCH_DEBUG_OUT(offset)  #endif +#define MATCH_AT_ERROR_RETURN(err_code)  best_len = err_code; goto match_at_end +  /* match data(str - end) from position (sstart). */  /* if sstart == str then set sprev to NULL. */ @@ -2500,20 +2595,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    static const void *opcode_to_label[] = {    &&L_FINISH,    &&L_END, -  &&L_EXACT1, -  &&L_EXACT2, -  &&L_EXACT3, -  &&L_EXACT4, -  &&L_EXACT5, -  &&L_EXACTN, -  &&L_EXACTMB2N1, -  &&L_EXACTMB2N2, -  &&L_EXACTMB2N3, -  &&L_EXACTMB2N, -  &&L_EXACTMB3N, -  &&L_EXACTMBN, -  &&L_EXACT1_IC, -  &&L_EXACTN_IC, +  &&L_STR_1, +  &&L_STR_2, +  &&L_STR_3, +  &&L_STR_4, +  &&L_STR_5, +  &&L_STR_N, +  &&L_STR_MB2N1, +  &&L_STR_MB2N2, +  &&L_STR_MB2N3, +  &&L_STR_MB2N, +  &&L_STR_MB3N, +  &&L_STR_MBN, +  &&L_STR_1_IC, +  &&L_STR_N_IC,    &&L_CCLASS,    &&L_CCLASS_MB,    &&L_CCLASS_MIX, @@ -2551,12 +2646,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    &&L_BACKREF_WITH_LEVEL_IC,    &&L_BACKREF_CHECK,    &&L_BACKREF_CHECK_WITH_LEVEL, -  &&L_MEMORY_START, -  &&L_MEMORY_START_PUSH, -  &&L_MEMORY_END_PUSH, -  &&L_MEMORY_END_PUSH_REC, -  &&L_MEMORY_END, -  &&L_MEMORY_END_REC, +  &&L_MEM_START, +  &&L_MEM_START_PUSH, +  &&L_MEM_END_PUSH, +#ifdef USE_CALL +  &&L_MEM_END_PUSH_REC, +#endif +  &&L_MEM_END, +#ifdef USE_CALL +  &&L_MEM_END_REC, +#endif    &&L_FAIL,    &&L_JUMP,    &&L_PUSH, @@ -2570,12 +2669,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    &&L_REPEAT_NG,    &&L_REPEAT_INC,    &&L_REPEAT_INC_NG, -  &&L_REPEAT_INC_SG, -  &&L_REPEAT_INC_NG_SG,    &&L_EMPTY_CHECK_START,    &&L_EMPTY_CHECK_END,    &&L_EMPTY_CHECK_END_MEMST, +#ifdef USE_CALL    &&L_EMPTY_CHECK_END_MEMST_PUSH, +#endif    &&L_PREC_READ_START,    &&L_PREC_READ_END,    &&L_PREC_READ_NOT_START, @@ -2585,10 +2684,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    &&L_LOOK_BEHIND,    &&L_LOOK_BEHIND_NOT_START,    &&L_LOOK_BEHIND_NOT_END, -  &&L_CALL, -  &&L_RETURN,    &&L_PUSH_SAVE_VAL,    &&L_UPDATE_VAR, +#ifdef USE_CALL +  &&L_CALL, +  &&L_RETURN, +#endif  #ifdef USE_CALLOUT    &&L_CALLOUT_CONTENTS,    &&L_CALLOUT_NAME, @@ -2606,15 +2707,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    char *alloc_base;    StackType *stk_base, *stk, *stk_end;    StackType *stkp; /* used as any purpose. */ -  StackIndex si; -  StackIndex *repeat_stk;    StackIndex *mem_start_stk, *mem_end_stk;    UChar* keep; + +#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR +  StackIndex *repeat_stk; +  StackIndex *empty_check_stk; +#endif  #ifdef USE_RETRY_LIMIT_IN_MATCH    unsigned long retry_limit_in_match;    unsigned long retry_in_match_counter;  #endif -  #ifdef USE_CALLOUT    int of;  #endif @@ -2700,15 +2803,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              rmt[0].rm_eo = (regoff_t )(s    - str);              for (i = 1; i <= num_mem; i++) {                if (mem_end_stk[i] != INVALID_STACK_INDEX) { -                if (MEM_STATUS_AT(reg->bt_mem_start, i)) -                  rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); -                else -                  rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); - -                rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i) -                                            ? STACK_AT(mem_end_stk[i])->u.mem.pstr -                                            : (UChar* )((void* )mem_end_stk[i])) -                                           - str); +                rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str); +                rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i)   - str);                }                else {                  rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; @@ -2721,14 +2817,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              region->end[0] = (int )(s    - str);              for (i = 1; i <= num_mem; i++) {                if (mem_end_stk[i] != INVALID_STACK_INDEX) { -                if (MEM_STATUS_AT(reg->bt_mem_start, i)) -                  region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); -                else -                  region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str); - -                region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) -                                         ? STACK_AT(mem_end_stk[i])->u.mem.pstr -                                         : (UChar* )((void* )mem_end_stk[i])) - str); +                region->beg[i] = (int )(STACK_MEM_START(reg, i) - str); +                region->end[i] = (int )(STACK_MEM_END(reg, i)   - str);                }                else {                  region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; @@ -2756,10 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,                stkp = stk_base;                r = make_capture_history_tree(region->history_root, &stkp,                                              stk, (UChar* )str, reg); -              if (r < 0) { -                best_len = r; /* error code */ -                goto finish; -              } +              if (r < 0) MATCH_AT_ERROR_RETURN(r);              }  #endif /* USE_CAPTURE_HISTORY */  #ifdef USE_POSIX_API_REGION_OPTION @@ -2784,9 +2871,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        /* default behavior: return first-matching result. */ -      goto finish; +      goto match_at_end; -    CASE_OP(EXACT1) +    CASE_OP(STR_1)        DATA_ENSURE(1);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2794,7 +2881,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        NEXT_OUT; -    CASE_OP(EXACT1_IC) +    CASE_OP(STR_1_IC)        {          int len;          UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2815,7 +2902,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        NEXT_OUT; -    CASE_OP(EXACT2) +    CASE_OP(STR_2)        DATA_ENSURE(2);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2826,7 +2913,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACT3) +    CASE_OP(STR_3)        DATA_ENSURE(3);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2839,7 +2926,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACT4) +    CASE_OP(STR_4)        DATA_ENSURE(4);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2854,7 +2941,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACT5) +    CASE_OP(STR_5)        DATA_ENSURE(5);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2871,7 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTN) +    CASE_OP(STR_N)        tlen = p->exact_n.n;        DATA_ENSURE(tlen);        ps = p->exact_n.s; @@ -2882,7 +2969,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTN_IC) +    CASE_OP(STR_N_IC)        {          int len;          UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2900,6 +2987,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            DATA_ENSURE(0);            q = lowbuf;            while (len-- > 0) { +            if (ps >= endp) goto fail;              if (*ps != *q) goto fail;              ps++; q++;            } @@ -2909,7 +2997,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTMB2N1) +    CASE_OP(STR_MB2N1)        DATA_ENSURE(2);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2919,7 +3007,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        NEXT_OUT; -    CASE_OP(EXACTMB2N2) +    CASE_OP(STR_MB2N2)        DATA_ENSURE(4);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2934,7 +3022,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTMB2N3) +    CASE_OP(STR_MB2N3)        DATA_ENSURE(6);        ps = p->exact.s;        if (*ps != *s) goto fail; @@ -2953,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTMB2N) +    CASE_OP(STR_MB2N)        tlen = p->exact_n.n;        DATA_ENSURE(tlen * 2);        ps = p->exact_n.s; @@ -2967,7 +3055,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTMB3N) +    CASE_OP(STR_MB3N)        tlen = p->exact_n.n;        DATA_ENSURE(tlen * 3);        ps = p->exact_n.s; @@ -2983,7 +3071,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(EXACTMBN) +    CASE_OP(STR_MBN)        tlen  = p->exact_len_n.len; /* mb byte len */        tlen2 = p->exact_len_n.n;   /* number of chars */        tlen2 *= tlen; @@ -3014,7 +3102,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          UChar *ss;          int mb_len; -        DATA_ENSURE(1);          mb_len = enclen(encode, s);          DATA_ENSURE(mb_len);          ss = s; @@ -3303,7 +3390,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break;  #endif          default: -          goto bytecode_error; +          MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);            break;          } @@ -3403,46 +3490,50 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        INC_OP;        JUMP_OUT; -    CASE_OP(MEMORY_START_PUSH) +    CASE_OP(MEM_START_PUSH)        mem = p->memory_start.num;        STACK_PUSH_MEM_START(mem, s);        INC_OP;        JUMP_OUT; -    CASE_OP(MEMORY_START) +    CASE_OP(MEM_START)        mem = p->memory_start.num;        mem_start_stk[mem] = (StackIndex )((void* )s);        INC_OP;        JUMP_OUT; -    CASE_OP(MEMORY_END_PUSH) +    CASE_OP(MEM_END_PUSH)        mem = p->memory_end.num;        STACK_PUSH_MEM_END(mem, s);        INC_OP;        JUMP_OUT; -    CASE_OP(MEMORY_END) +    CASE_OP(MEM_END)        mem = p->memory_end.num;        mem_end_stk[mem] = (StackIndex )((void* )s);        INC_OP;        JUMP_OUT;  #ifdef USE_CALL -    CASE_OP(MEMORY_END_PUSH_REC) -      mem = p->memory_end.num; -      STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ -      si = GET_STACK_INDEX(stkp); -      STACK_PUSH_MEM_END(mem, s); -      mem_start_stk[mem] = si; -      INC_OP; -      JUMP_OUT; +    CASE_OP(MEM_END_PUSH_REC) +      { +        StackIndex si; + +        mem = p->memory_end.num; +        STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ +        si = GET_STACK_INDEX(stkp); +        STACK_PUSH_MEM_END(mem, s); +        mem_start_stk[mem] = si; +        INC_OP; +        JUMP_OUT; +      } -    CASE_OP(MEMORY_END_REC) +    CASE_OP(MEM_END_REC)        mem = p->memory_end.num;        mem_end_stk[mem] = (StackIndex )((void* )s);        STACK_GET_MEM_START(mem, stkp); -      if (MEM_STATUS_AT(reg->bt_mem_start, mem)) +      if (MEM_STATUS_AT(reg->push_mem_start, mem))          mem_start_stk[mem] = GET_STACK_INDEX(stkp);        else          mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); @@ -3470,14 +3561,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;          if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; -        if (MEM_STATUS_AT(reg->bt_mem_start, mem)) -          pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; -        else -          pstart = (UChar* )((void* )mem_start_stk[mem]); - -        pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) -                ? STACK_AT(mem_end_stk[mem])->u.mem.pstr -                : (UChar* )((void* )mem_end_stk[mem])); +        pstart = STACK_MEM_START(reg, mem); +        pend   = STACK_MEM_END(reg, mem);          n = (int )(pend - pstart);          if (n != 0) {            DATA_ENSURE(n); @@ -3499,14 +3584,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;          if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; -        if (MEM_STATUS_AT(reg->bt_mem_start, mem)) -          pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; -        else -          pstart = (UChar* )((void* )mem_start_stk[mem]); - -        pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) -                ? STACK_AT(mem_end_stk[mem])->u.mem.pstr -                : (UChar* )((void* )mem_end_stk[mem])); +        pstart = STACK_MEM_START(reg, mem); +        pend   = STACK_MEM_END(reg, mem);          n = (int )(pend - pstart);          if (n != 0) {            DATA_ENSURE(n); @@ -3531,14 +3610,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;            if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; -          if (MEM_STATUS_AT(reg->bt_mem_start, mem)) -            pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; -          else -            pstart = (UChar* )((void* )mem_start_stk[mem]); - -          pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) -                  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr -                  : (UChar* )((void* )mem_end_stk[mem])); +          pstart = STACK_MEM_START(reg, mem); +          pend   = STACK_MEM_END(reg, mem);            n = (int )(pend - pstart);            if (n != 0) {              DATA_ENSURE(n); @@ -3569,14 +3642,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;            if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; -          if (MEM_STATUS_AT(reg->bt_mem_start, mem)) -            pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; -          else -            pstart = (UChar* )((void* )mem_start_stk[mem]); - -          pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) -                  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr -                  : (UChar* )((void* )mem_end_stk[mem])); +          pstart = STACK_MEM_START(reg, mem); +          pend   = STACK_MEM_END(reg, mem);            n = (int )(pend - pstart);            if (n != 0) {              DATA_ENSURE(n); @@ -3689,12 +3756,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            case OP_PUSH:            case OP_REPEAT_INC:            case OP_REPEAT_INC_NG: -          case OP_REPEAT_INC_SG: -          case OP_REPEAT_INC_NG_SG:              INC_OP;              break;            default: -            goto unexpected_bytecode_error; +            MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);              break;            }  #else @@ -3797,7 +3862,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          addr = p->push_if_peek_next.addr;          c    = p->push_if_peek_next.c; -        if (c == *s) { +        if (DATA_ENSURE_CHECK1 && c == *s) {            STACK_PUSH_ALT(p + addr, s, sprev);            INC_OP;            JUMP_OUT; @@ -3810,10 +3875,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        mem  = p->repeat.id;  /* mem: OP_REPEAT ID */        addr = p->repeat.addr; -      STACK_ENSURE(1); -      repeat_stk[mem] = GET_STACK_INDEX(stk); -      STACK_PUSH_REPEAT(mem, p + 1); - +      STACK_PUSH_REPEAT_INC(mem, 0);        if (reg->repeat_range[mem].lower == 0) {          STACK_PUSH_ALT(p + addr, s, sprev);        } @@ -3824,10 +3886,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        mem  = p->repeat.id;  /* mem: OP_REPEAT ID */        addr = p->repeat.addr; -      STACK_ENSURE(1); -      repeat_stk[mem] = GET_STACK_INDEX(stk); -      STACK_PUSH_REPEAT(mem, p + 1); - +      STACK_PUSH_REPEAT_INC(mem, 0);        if (reg->repeat_range[mem].lower == 0) {          STACK_PUSH_ALT(p + 1, s, sprev);          p += addr; @@ -3838,64 +3897,42 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      CASE_OP(REPEAT_INC)        mem  = p->repeat_inc.id;  /* mem: OP_REPEAT ID */ -      si   = repeat_stk[mem]; -      stkp = STACK_AT(si); - -    repeat_inc: -      stkp->u.repeat.count++; -      if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { +      STACK_GET_REPEAT_COUNT(mem, n); +      n++; +      if (n >= reg->repeat_range[mem].upper) {          /* end of repeat. Nothing to do. */          INC_OP;        } -      else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { +      else if (n >= reg->repeat_range[mem].lower) {          INC_OP;          STACK_PUSH_ALT(p, s, sprev); -        p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ +        p = reg->repeat_range[mem].u.pcode;        }        else { -        p = stkp->u.repeat.pcode; +        p = reg->repeat_range[mem].u.pcode;        } -      STACK_PUSH_REPEAT_INC(si); +      STACK_PUSH_REPEAT_INC(mem, n);        CHECK_INTERRUPT_JUMP_OUT; -    CASE_OP(REPEAT_INC_SG) -      mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */ -      STACK_GET_REPEAT(mem, stkp); -      si = GET_STACK_INDEX(stkp); -      goto repeat_inc; -      CASE_OP(REPEAT_INC_NG)        mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */ -      si = repeat_stk[mem]; -      stkp = STACK_AT(si); - -    repeat_inc_ng: -      stkp->u.repeat.count++; -      if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { -        if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { -          Operation* pcode = stkp->u.repeat.pcode; - -          STACK_PUSH_REPEAT_INC(si); -          STACK_PUSH_ALT(pcode, s, sprev); +      STACK_GET_REPEAT_COUNT(mem, n); +      n++; +      STACK_PUSH_REPEAT_INC(mem, n); +      if (n == reg->repeat_range[mem].upper) { +        INC_OP; +      } +      else { +        if (n >= reg->repeat_range[mem].lower) { +          STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);            INC_OP;          }          else { -          p = stkp->u.repeat.pcode; -          STACK_PUSH_REPEAT_INC(si); +          p = reg->repeat_range[mem].u.pcode;          }        } -      else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { -        STACK_PUSH_REPEAT_INC(si); -        INC_OP; -      }        CHECK_INTERRUPT_JUMP_OUT; -    CASE_OP(REPEAT_INC_NG_SG) -      mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */ -      STACK_GET_REPEAT(mem, stkp); -      si = GET_STACK_INDEX(stkp); -      goto repeat_inc_ng; -      CASE_OP(PREC_READ_START)        STACK_PUSH_PREC_READ_START(s, sprev);        INC_OP; @@ -4044,14 +4081,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          OnigCalloutFunc func;          OnigCalloutArgs args; -        of = ONIG_CALLOUT_OF_NAME; -        name_id = p->callout_name.id; -        mem     = p->callout_name.num; +        of  = ONIG_CALLOUT_OF_NAME; +        mem = p->callout_name.num;        callout_common_entry:          e = onig_reg_callout_list_at(reg, mem);          in = e->in;          if (of == ONIG_CALLOUT_OF_NAME) { +          name_id = p->callout_name.id;            func = onig_get_callout_start_func(reg, mem);          }          else { @@ -4074,7 +4111,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,                call_result = ONIGERR_INVALID_ARGUMENT;              }              best_len = call_result; -            goto finish; +            goto match_at_end;              break;            }          } @@ -4100,7 +4137,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #endif      CASE_OP(FINISH) -      goto finish; +      goto match_at_end;  #ifdef ONIG_DEBUG_STATISTICS      fail: @@ -4121,37 +4158,472 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        JUMP_OUT;      DEFAULT_OP -      goto bytecode_error; +      MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);    } BYTECODE_INTERPRETER_END; - finish: + match_at_end:    STACK_SAVE;    return best_len; +} -#ifdef ONIG_DEBUG - stack_error: -  STACK_SAVE; -  return ONIGERR_STACK_BUG; -#endif +typedef struct { +  regex_t*    reg; +  OnigRegion* region; +} RR; + +struct OnigRegSetStruct { +  RR*          rs; +  int          n; +  int          alloc; +  OnigEncoding enc; +  int          anchor;      /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ +  OnigLen      anc_dmin;    /* (SEMI_)END_BUF anchor distance */ +  OnigLen      anc_dmax;    /* (SEMI_)END_BUF anchor distance */ +  int          all_low_high; +  int          anychar_inf; +}; - bytecode_error: -  STACK_SAVE; -  return ONIGERR_UNDEFINED_BYTECODE; +enum SearchRangeStatus { +  SRS_DEAD      = 0, +  SRS_LOW_HIGH  = 1, +  SRS_ALL_RANGE = 2 +}; -#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE) - unexpected_bytecode_error: -  STACK_SAVE; -  return ONIGERR_UNEXPECTED_BYTECODE; -#endif +typedef struct { +  int    state;  /* value of enum SearchRangeStatus */ +  UChar* low; +  UChar* high; +  UChar* low_prev; +  UChar* sch_range; +} SearchRange; + +#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \ +  r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \ +  if (r != ONIG_MISMATCH) {\ +    if (r >= 0) {\ +      goto match;\ +    }\ +    else goto finish; /* error */ \ +  } -#ifdef USE_RETRY_LIMIT_IN_MATCH - retry_limit_in_match_over: -  STACK_SAVE; -  return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +static inline int +regset_search_body_position_lead(OnigRegSet* set, +           const UChar* str, const UChar* end, +           const UChar* start, const UChar* range, /* match start range */ +           const UChar* orig_range, /* data range */ +           OnigOptionType option, MatchArg* msas, int* rmatch_pos) +{ +  int r, n, i; +  UChar *s, *prev; +  UChar *low, *high, *low_prev; +  UChar* sch_range; +  regex_t* reg; +  OnigEncoding enc; +  SearchRange* sr; + +  n   = set->n; +  enc = set->enc; + +  s = (UChar* )start; +  if (s > str) +    prev = onigenc_get_prev_char_head(enc, str, s); +  else +    prev = (UChar* )NULL; + +  sr = (SearchRange* )xmalloc(sizeof(*sr) * n); +  CHECK_NULL_RETURN_MEMERR(sr); + +  for (i = 0; i < n; i++) { +    reg = set->rs[i].reg; + +    sr[i].state = SRS_DEAD; +    if (reg->optimize != OPTIMIZE_NONE) { +      if (reg->dist_max != INFINITE_LEN) { +        if (end - range > reg->dist_max) +          sch_range = (UChar* )range + reg->dist_max; +        else +          sch_range = (UChar* )end; + +        if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) { +          sr[i].state = SRS_LOW_HIGH; +          sr[i].low  = low; +          sr[i].high = high; +          sr[i].low_prev = low_prev; +          sr[i].sch_range = sch_range; +        } +      } +      else { +        sch_range = (UChar* )end; +        if (forward_search(reg, str, end, s, sch_range, +                           &low, &high, (UChar** )NULL)) { +          goto total_active; +        } +      } +    } +    else { +    total_active: +      sr[i].state    = SRS_ALL_RANGE; +      sr[i].low      = s; +      sr[i].high     = (UChar* )range; +      sr[i].low_prev = prev; +    } +  } + +#define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN   500 + +  if (set->all_low_high != 0 +      && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) { +    do { +      int try_count = 0; +      for (i = 0; i < n; i++) { +        if (sr[i].state == SRS_DEAD) continue; + +        if (s <  sr[i].low) continue; +        if (s >= sr[i].high) { +          if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, +                             &low, &high, &low_prev) != 0) { +            sr[i].low      = low; +            sr[i].high     = high; +            sr[i].low_prev = low_prev; +            if (s < low) continue; +          } +          else { +            sr[i].state = SRS_DEAD; +            continue; +          } +        } + +        reg = set->rs[i].reg; +        REGSET_MATCH_AND_RETURN_CHECK(orig_range); +        try_count++; +      } /* for (i) */ + +      if (s >= range) break; + +      if (try_count == 0) { +        low = (UChar* )range; +        for (i = 0; i < n; i++) { +          if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) { +            low = sr[i].low; +            low_prev = sr[i].low_prev; +          } +        } +        if (low == range) break; + +        s = low; +        prev = low_prev; +      } +      else { +        prev = s; +        s += enclen(enc, s); +      } +    } while (1); +  } +  else { +    int prev_is_newline = 1; +    do { +      for (i = 0; i < n; i++) { +        if (sr[i].state == SRS_DEAD) continue; +        if (sr[i].state == SRS_LOW_HIGH) { +          if (s <  sr[i].low) continue; +          if (s >= sr[i].high) { +            if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, +                               &low, &high, &low_prev) != 0) { +              sr[i].low      = low; +              sr[i].high     = high; +              /* sr[i].low_prev = low_prev; */ +              if (s < low) continue; +            } +            else { +              sr[i].state = SRS_DEAD; +              continue; +            } +          } +        } + +        reg = set->rs[i].reg; +        if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) { +          REGSET_MATCH_AND_RETURN_CHECK(orig_range); +        } +      } + +      if (s >= range) break; + +      if (set->anychar_inf != 0) +        prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end); + +      prev = s; +      s += enclen(enc, s); +    } while (1); +  } + +  xfree(sr); +  return ONIG_MISMATCH; + + finish: +  xfree(sr); +  return r; + + match: +  xfree(sr); +  *rmatch_pos = (int )(s - str); +  return i; +} + +static inline int +regset_search_body_regex_lead(OnigRegSet* set, +              const UChar* str, const UChar* end, +              const UChar* start, const UChar* orig_range, OnigRegSetLead lead, +              OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos) +{ +  int r; +  int i; +  int n; +  int match_index; +  const UChar* ep; +  regex_t* reg; +  OnigRegion* region; + +  n = set->n; + +  match_index = ONIG_MISMATCH; +  ep = orig_range; +  for (i = 0; i < n; i++) { +    reg    = set->rs[i].reg; +    region = set->rs[i].region; +    r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]); +    if (r > 0) { +      if (str + r < ep) { +        match_index = i; +        *rmatch_pos = r; +        if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER) +          break; + +        ep = str + r; +      } +    } +    else if (r == 0) { +      match_index = i; +      *rmatch_pos = r; +      break; +    } +  } + +  return match_index; +} + +extern int +onig_regset_search_with_param(OnigRegSet* set, +           const UChar* str, const UChar* end, +           const UChar* start, const UChar* range, +           OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[], +           int* rmatch_pos) +{ +  int r; +  int i; +  UChar *s, *prev; +  regex_t* reg; +  OnigEncoding enc; +  OnigRegion* region; +  MatchArg* msas; +  const UChar *orig_start = start; +  const UChar *orig_range = range; + +  if (set->n == 0) +    return ONIG_MISMATCH; + +  if (IS_POSIX_REGION(option)) +    return ONIGERR_INVALID_ARGUMENT; + +  r = 0; +  enc = set->enc; +  msas = (MatchArg* )NULL; + +  for (i = 0; i < set->n; i++) { +    reg    = set->rs[i].reg; +    region = set->rs[i].region; +    ADJUST_MATCH_PARAM(reg, mps[i]); +    if (IS_NOT_NULL(region)) { +      r = onig_region_resize_clear(region, reg->num_mem + 1); +      if (r != 0) goto finish_no_msa; +    } +  } + +  if (start > end || start < str) goto mismatch_no_msa; +  if (str < end) { +    /* forward search only */ +    if (range <= start) +      return ONIGERR_INVALID_ARGUMENT; +  } + +  if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { +    if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) { +      r = ONIGERR_INVALID_WIDE_CHAR_VALUE; +      goto finish_no_msa; +    } +  } + +  if (set->anchor != OPTIMIZE_NONE && str < end) { +    UChar *min_semi_end, *max_semi_end; + +    if ((set->anchor & ANCR_BEGIN_POSITION) != 0) { +      /* search start-position only */ +    begin_position: +      range = start + 1; +    } +    else if ((set->anchor & ANCR_BEGIN_BUF) != 0) { +      /* search str-position only */ +      if (start != str) goto mismatch_no_msa; +      range = str + 1; +    } +    else if ((set->anchor & ANCR_END_BUF) != 0) { +      min_semi_end = max_semi_end = (UChar* )end; + +    end_buf: +      if ((OnigLen )(max_semi_end - str) < set->anc_dmin) +        goto mismatch_no_msa; + +      if ((OnigLen )(min_semi_end - start) > set->anc_dmax) { +        start = min_semi_end - set->anc_dmax; +        if (start < end) +          start = onigenc_get_right_adjust_char_head(enc, str, start); +      } +      if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) { +        range = max_semi_end - set->anc_dmin + 1; +      } +      if (start > range) goto mismatch_no_msa; +    } +    else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) { +      UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1); + +      max_semi_end = (UChar* )end; +      if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) { +        min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +        pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1); +        if (IS_NOT_NULL(pre_end) && +            ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) { +          min_semi_end = pre_end; +        }  #endif +        if (min_semi_end > str && start <= min_semi_end) { +          goto end_buf; +        } +      } +      else { +        min_semi_end = (UChar* )end; +        goto end_buf; +      } +    } +    else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) { +      goto begin_position; +    } +  } +  else if (str == end) { /* empty string */ +    start = end = str; +    s = (UChar* )start; +    prev = (UChar* )NULL; + +    msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n); +    CHECK_NULL_RETURN_MEMERR(msas); +    for (i = 0; i < set->n; i++) { +      reg = set->rs[i].reg; +      MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]); +    } +    for (i = 0; i < set->n; i++) { +      reg = set->rs[i].reg; +      if (reg->threshold_len == 0) { +        REGSET_MATCH_AND_RETURN_CHECK(end); +      } +    } + +    goto mismatch; +  } + +  if (lead == ONIG_REGSET_POSITION_LEAD) { +    msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n); +    CHECK_NULL_RETURN_MEMERR(msas); + +    for (i = 0; i < set->n; i++) { +      MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region, +                     orig_start, mps[i]); +    } + +    r = regset_search_body_position_lead(set, str, end, start, range, +                                         orig_range, option, msas, rmatch_pos); +  } +  else { +    r = regset_search_body_regex_lead(set, str, end, start, orig_range, +                                      lead, option, mps, rmatch_pos); +  } +  if (r < 0) goto finish; +  else       goto match2; + + mismatch: +  r = ONIG_MISMATCH; + finish: +  for (i = 0; i < set->n; i++) { +    if (IS_NOT_NULL(msas)) +      MATCH_ARG_FREE(msas[i]); +    if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) && +        IS_NOT_NULL(set->rs[i].region)) { +      onig_region_clear(set->rs[i].region); +    } +  } +  if (IS_NOT_NULL(msas)) xfree(msas); +  return r; + + mismatch_no_msa: +  r = ONIG_MISMATCH; + finish_no_msa: +  return r; + + match: +  *rmatch_pos = (int )(s - str); + match2: +  for (i = 0; i < set->n; i++) { +    if (IS_NOT_NULL(msas)) +      MATCH_ARG_FREE(msas[i]); +    if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) && +        IS_NOT_NULL(set->rs[i].region)) { +      onig_region_clear(set->rs[i].region); +    } +  } +  if (IS_NOT_NULL(msas)) xfree(msas); +  return r; /* regex index */  } +extern int +onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end, +                   const UChar* start, const UChar* range, +                   OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos) +{ +  int r; +  int i; +  OnigMatchParam* mp; +  OnigMatchParam** mps; + +  mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n); +  CHECK_NULL_RETURN_MEMERR(mps); + +  mp = (OnigMatchParam* )(mps + set->n); + +  for (i = 0; i < set->n; i++) { +    onig_initialize_match_param(mp + i); +    mps[i] = mp + i; +  } + +  r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps, +                                    rmatch_pos); +  for (i = 0; i < set->n; i++) +    onig_free_match_param_content(mp + i); + +  xfree(mps); + +  return r; +}  static UChar*  slow_search(OnigEncoding enc, UChar* target, UChar* target_end, @@ -4193,9 +4665,11 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag,    UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];    while (t < tend) { +    if (p >= end) return 0;      lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);      q = lowbuf;      while (lowlen > 0) { +      if (t >= tend)    return 0;        if (*t++ != *q++) return 0;        lowlen--;      } @@ -4209,16 +4683,11 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,                 UChar* target, UChar* target_end,                 const UChar* text, const UChar* text_end, UChar* text_range)  { -  UChar *s, *end; - -  end = (UChar* )text_end; -  end -= target_end - target - 1; -  if (end > text_range) -    end = text_range; +  UChar *s;    s = (UChar* )text; -  while (s < end) { +  while (s < text_range) {      if (str_lower_case_match(enc, case_fold_flag, target, target_end,                               s, text_end))        return s; @@ -4372,60 +4841,6 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,  }  static UChar* -sunday_quick_search_case_fold(regex_t* reg, -                              const UChar* target, const UChar* target_end, -                              const UChar* text,   const UChar* text_end, -                              const UChar* text_range) -{ -  const UChar *s, *se, *end; -  const UChar *tail; -  int skip, tlen1; -  int map_offset; -  int case_fold_flag; -  OnigEncoding enc; - -#ifdef ONIG_DEBUG_SEARCH -  fprintf(stderr, -          "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); -#endif - -  enc = reg->enc; -  case_fold_flag = reg->case_fold_flag; - -  tail = target_end - 1; -  tlen1 = (int )(tail - target); -  end = text_range; -  if (end + tlen1 > text_end) -    end = text_end - tlen1; - -  map_offset = reg->map_offset; -  s = text; - -  while (s < end) { -    if (str_lower_case_match(enc, case_fold_flag, target, target_end, -                             s, text_end)) -      return (UChar* )s; - -    se = s + tlen1; -    if (se + map_offset >= text_end) break; -    skip = reg->map[*(se + map_offset)]; -#if 0 -    p = s; -    do { -      s += enclen(enc, s); -    } while ((s - p) < skip && s < end); -#else -    /* This is faster than prev code for long text.  ex: /(?i)Twain/  */ -    s += skip; -    if (s < end) -      s = onigenc_get_right_adjust_char_head(enc, text, s); -#endif -  } - -  return (UChar* )NULL; -} - -static UChar*  map_search(OnigEncoding enc, UChar map[],             const UChar* text, const UChar* text_range)  { @@ -4505,25 +4920,26 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,  }  static int -forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, -                     UChar* range, UChar** low, UChar** high, UChar** low_prev) +forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, +               UChar* range, UChar** low, UChar** high, UChar** low_prev)  {    UChar *p, *pprev = (UChar* )NULL;  #ifdef ONIG_DEBUG_SEARCH -  fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n", -          str, end, s, range); +  fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n", +          str, end, start, range);  #endif -  p = s; -  if (reg->dmin > 0) { +  p = start; +  if (reg->dist_min != 0) { +    if (end - p <= reg->dist_min) +      return 0; /* fail */ +      if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { -      p += reg->dmin; +      p += reg->dist_min;      }      else { -      UChar *q = p + reg->dmin; - -      if (q >= end) return 0; /* fail */ +      UChar *q = p + reg->dist_min;        while (p < q) p += enclen(reg->enc, p);      }    } @@ -4538,11 +4954,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,                         reg->exact, reg->exact_end, p, end, range);      break; -  case OPTIMIZE_STR_CASE_FOLD_FAST: -    p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end, -                                      range); -    break; -    case OPTIMIZE_STR_FAST:      p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);      break; @@ -4558,7 +4969,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,    }    if (p && p < range) { -    if (p - reg->dmin < s) { +    if (p - start < reg->dist_min) {      retry_gate:        pprev = p;        p += enclen(reg->enc, p); @@ -4571,8 +4982,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,        switch (reg->sub_anchor) {        case ANCR_BEGIN_LINE:          if (!ON_STR_BEGIN(p)) { -          prev = onigenc_get_prev_char_head(reg->enc, -                                            (pprev ? pprev : str), p); +          prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);            if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))              goto retry_gate;          } @@ -4593,35 +5003,34 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,  #endif                   )            goto retry_gate; +          break;        }      } -    if (reg->dmax == 0) { +    if (reg->dist_max == 0) {        *low = p;        if (low_prev) { -        if (*low > s) -          *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); +        if (*low > start) +          *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);          else            *low_prev = onigenc_get_prev_char_head(reg->enc,                                                   (pprev ? pprev : str), p);        } +      *high = p;      }      else { -      if (reg->dmax != INFINITE_LEN) { -        if (p - str < reg->dmax) { +      if (reg->dist_max != INFINITE_LEN) { +        if (p - str < reg->dist_max) {            *low = (UChar* )str;            if (low_prev)              *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);          }          else { -          *low = p - reg->dmax; -          if (*low > s) { -            *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, +          *low = p - reg->dist_max; +          if (*low > start) { +            *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,                                                   *low, (const UChar** )low_prev); -            if (low_prev && IS_NULL(*low_prev)) -              *low_prev = onigenc_get_prev_char_head(reg->enc, -                                                     (pprev ? pprev : s), *low);            }            else {              if (low_prev) @@ -4630,14 +5039,18 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,            }          }        } +      /* no needs to adjust *high, *high is used as range check only */ +      if (p - str < reg->dist_min) +        *high = (UChar* )str; +      else +        *high = p - reg->dist_min;      } -    /* no needs to adjust *high, *high is used as range check only */ -    *high = p - reg->dmin;  #ifdef ONIG_DEBUG_SEARCH      fprintf(stderr, -            "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", -            (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +            "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n", +            (int )(*low - str), (int )(*high - str), +            reg->dist_min, reg->dist_max);  #endif      return 1; /* success */    } @@ -4647,15 +5060,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,  static int -backward_search_range(regex_t* reg, const UChar* str, const UChar* end, -                      UChar* s, const UChar* range, UChar* adjrange, -                      UChar** low, UChar** high) +backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, +                const UChar* range, UChar* adjrange, UChar** low, UChar** high)  {    UChar *p; -  if (range == 0) goto fail; - -  range += reg->dmin;    p = s;   retry: @@ -4667,7 +5076,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,      break;    case OPTIMIZE_STR_CASE_FOLD: -  case OPTIMIZE_STR_CASE_FOLD_FAST:      p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,                                  reg->exact, reg->exact_end,                                  range, adjrange, end, p); @@ -4722,15 +5130,27 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,        }      } -    /* no needs to adjust *high, *high is used as range check only */ -    if (reg->dmax != INFINITE_LEN) { -      *low  = p - reg->dmax; -      *high = p - reg->dmin; +    if (reg->dist_max != INFINITE_LEN) { +      if (p - str < reg->dist_max) +        *low = (UChar* )str; +      else +        *low = p - reg->dist_max; + +      if (reg->dist_min != 0) { +        if (p - str < reg->dist_min) +          *high = (UChar* )str; +        else +          *high = p - reg->dist_min; +      } +      else { +        *high = p; +      } +        *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);      }  #ifdef ONIG_DEBUG_SEARCH -    fprintf(stderr, "backward_search_range: low: %d, high: %d\n", +    fprintf(stderr, "backward_search: low: %d, high: %d\n",              (int )(*low - str), (int )(*high - str));  #endif      return 1; /* success */ @@ -4738,7 +5158,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,   fail:  #ifdef ONIG_DEBUG_SEARCH -  fprintf(stderr, "backward_search_range: fail.\n"); +  fprintf(stderr, "backward_search: fail.\n");  #endif    return 0; /* fail */  } @@ -4751,24 +5171,35 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,  {    int r;    OnigMatchParam mp; +  const UChar* data_range;    onig_initialize_match_param(&mp); -  r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); + +  /* The following is an expanded code of onig_search_with_param()  */ +  if (range > start) +    data_range = range; +  else +    data_range = end; + +  r = search_in_range(reg, str, end, start, range, data_range, region, +                      option, &mp); +    onig_free_match_param_content(&mp);    return r;  } -extern int -onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, -                       const UChar* start, const UChar* range, OnigRegion* region, -                       OnigOptionType option, OnigMatchParam* mp) +static int +search_in_range(regex_t* reg, const UChar* str, const UChar* end, +                const UChar* start, const UChar* range, /* match start range */ +                const UChar* data_range, /* subject string range */ +                OnigRegion* region, +                OnigOptionType option, OnigMatchParam* mp)  {    int r;    UChar *s, *prev;    MatchArg msa;    const UChar *orig_start = start; -  const UChar *orig_range = range;  #ifdef ONIG_DEBUG_SEARCH    fprintf(stderr, @@ -4851,17 +5282,21 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,        min_semi_end = max_semi_end = (UChar* )end;      end_buf: -      if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin) +      if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)          goto mismatch_no_msa;        if (range > start) { -        if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) { -          start = min_semi_end - reg->anchor_dmax; +        if (reg->anc_dist_max != INFINITE_LEN && +            min_semi_end - start > reg->anc_dist_max) { +          start = min_semi_end - reg->anc_dist_max;            if (start < end)              start = onigenc_get_right_adjust_char_head(reg->enc, str, start);          } -        if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) { -          range = max_semi_end - reg->anchor_dmin + 1; +        if (max_semi_end - (range - 1) < reg->anc_dist_min) { +          if (max_semi_end - str + 1 < reg->anc_dist_min) +            goto mismatch_no_msa; +          else +            range = max_semi_end - reg->anc_dist_min + 1;          }          if (start > range) goto mismatch_no_msa; @@ -4869,12 +5304,17 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,             Backward search is used. */        }        else { -        if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { -          range = min_semi_end - reg->anchor_dmax; +        if (reg->anc_dist_max != INFINITE_LEN && +            min_semi_end - range > reg->anc_dist_max) { +          range = min_semi_end - reg->anc_dist_max;          } -        if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) { -          start = max_semi_end - reg->anchor_dmin; -          start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); +        if (max_semi_end - start < reg->anc_dist_min) { +          if (max_semi_end - str < reg->anc_dist_min) +            goto mismatch_no_msa; +          else { +            start = max_semi_end - reg->anc_dist_min; +            start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); +          }          }          if (range > start) goto mismatch_no_msa;        } @@ -4942,29 +5382,33 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,      if (reg->optimize != OPTIMIZE_NONE) {        UChar *sch_range, *low, *high, *low_prev; -      sch_range = (UChar* )range; -      if (reg->dmax != 0) { -        if (reg->dmax == INFINITE_LEN) +      if (reg->dist_max != 0) { +        if (reg->dist_max == INFINITE_LEN)            sch_range = (UChar* )end;          else { -          sch_range += reg->dmax; -          if (sch_range > end) sch_range = (UChar* )end; +          if ((end - range) < reg->dist_max) +            sch_range = (UChar* )end; +          else { +            sch_range = (UChar* )range + reg->dist_max; +          }          }        } +      else +        sch_range = (UChar* )range;        if ((end - start) < reg->threshold_len)          goto mismatch; -      if (reg->dmax != INFINITE_LEN) { +      if (reg->dist_max != INFINITE_LEN) {          do { -          if (! forward_search_range(reg, str, end, s, sch_range, -                                     &low, &high, &low_prev)) goto mismatch; +          if (! forward_search(reg, str, end, s, sch_range, &low, &high, +                               &low_prev)) goto mismatch;            if (s < low) {              s    = low;              prev = low_prev;            }            while (s <= high) { -            MATCH_AND_RETURN_CHECK(orig_range); +            MATCH_AND_RETURN_CHECK(data_range);              prev = s;              s += enclen(reg->enc, s);            } @@ -4972,12 +5416,12 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,          goto mismatch;        }        else { /* check only. */ -        if (! forward_search_range(reg, str, end, s, sch_range, -                                   &low, &high, (UChar** )NULL)) goto mismatch; +        if (! forward_search(reg, str, end, s, sch_range, &low, &high, +                             (UChar** )NULL)) goto mismatch;          if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {            do { -            MATCH_AND_RETURN_CHECK(orig_range); +            MATCH_AND_RETURN_CHECK(data_range);              prev = s;              s += enclen(reg->enc, s); @@ -4994,13 +5438,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,      }      do { -      MATCH_AND_RETURN_CHECK(orig_range); +      MATCH_AND_RETURN_CHECK(data_range);        prev = s;        s += enclen(reg->enc, s);      } while (s < range);      if (s == range) { /* because empty match with /$/. */ -      MATCH_AND_RETURN_CHECK(orig_range); +      MATCH_AND_RETURN_CHECK(data_range);      }    }    else {  /* backward search */ @@ -5011,19 +5455,30 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,      if (reg->optimize != OPTIMIZE_NONE) {        UChar *low, *high, *adjrange, *sch_start; +      const UChar *min_range; + +      if ((end - range) < reg->threshold_len) goto mismatch;        if (range < end)          adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);        else          adjrange = (UChar* )end; -      if (reg->dmax != INFINITE_LEN && -          (end - range) >= reg->threshold_len) { +      if (end - range > reg->dist_min) +        min_range = range + reg->dist_min; +      else +        min_range = end; + +      if (reg->dist_max != INFINITE_LEN) {          do { -          sch_start = s + reg->dmax; -          if (sch_start > end) sch_start = (UChar* )end; -          if (backward_search_range(reg, str, end, sch_start, range, adjrange, -                                    &low, &high) <= 0) +          if (end - s > reg->dist_max) +            sch_start = s + reg->dist_max; +          else { +            sch_start = onigenc_get_prev_char_head(reg->enc, str, end); +          } + +          if (backward_search(reg, str, end, sch_start, min_range, adjrange, +                              &low, &high) <= 0)              goto mismatch;            if (s > high) @@ -5038,22 +5493,10 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,          goto mismatch;        }        else { /* check only. */ -        if ((end - range) < reg->threshold_len) goto mismatch; +        sch_start = onigenc_get_prev_char_head(reg->enc, str, end); -        sch_start = s; -        if (reg->dmax != 0) { -          if (reg->dmax == INFINITE_LEN) -            sch_start = (UChar* )end; -          else { -            sch_start += reg->dmax; -            if (sch_start > end) sch_start = (UChar* )end; -            else -              sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, -                                                        start, sch_start); -          } -        } -        if (backward_search_range(reg, str, end, sch_start, range, adjrange, -                                  &low, &high) <= 0) goto mismatch; +        if (backward_search(reg, str, end, sch_start, min_range, adjrange, +                            &low, &high) <= 0) goto mismatch;        }      } @@ -5109,6 +5552,22 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,  }  extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, +                       const UChar* start, const UChar* range, OnigRegion* region, +                       OnigOptionType option, OnigMatchParam* mp) +{ +  const UChar* data_range; + +  if (range > start) +    data_range = range; +  else +    data_range = end; + +  return search_in_range(reg, str, end, start, range, data_range, region, +                         option, mp); +} + +extern int  onig_scan(regex_t* reg, const UChar* str, const UChar* end,            OnigRegion* region, OnigOptionType option,            int (*scan_callback)(int, int, OnigRegion*, void*), @@ -5210,6 +5669,202 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)    *to = *from;  } +extern int +onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[]) +{ +#define REGSET_INITIAL_ALLOC_SIZE   10 + +  int i; +  int r; +  int alloc; +  OnigRegSet* set; +  RR* rs; + +  *rset = 0; + +  set = (OnigRegSet* )xmalloc(sizeof(*set)); +  CHECK_NULL_RETURN_MEMERR(set); + +  alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE; +  rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc); +  if (IS_NULL(rs)) { +    xfree(set); +    return ONIGERR_MEMORY; +  } + +  set->rs    = rs; +  set->n     = 0; +  set->alloc = alloc; + +  for (i = 0; i < n; i++) { +    regex_t* reg = regs[i]; + +    r = onig_regset_add(set, reg); +    if (r != 0) { +      for (i = 0; i < set->n; i++) { +        OnigRegion* region = set->rs[i].region; +        if (IS_NOT_NULL(region)) +          onig_region_free(region, 1); +      } +      xfree(set->rs); +      xfree(set); +      return r; +    } +  } + +  *rset = set; +  return 0; +} + +static void +update_regset_by_reg(OnigRegSet* set, regex_t* reg) +{ +  if (set->n == 1) { +    set->enc          = reg->enc; +    set->anchor       = reg->anchor; +    set->anc_dmin     = reg->anc_dist_min; +    set->anc_dmax     = reg->anc_dist_max; +    set->all_low_high = +      (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1; +    set->anychar_inf  = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0; +  } +  else { +    int anchor; + +    anchor = set->anchor & reg->anchor; +    if (anchor != 0) { +      OnigLen anc_dmin; +      OnigLen anc_dmax; + +      anc_dmin = set->anc_dmin; +      anc_dmax = set->anc_dmax; +      if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min; +      if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max; +      set->anc_dmin = anc_dmin; +      set->anc_dmax = anc_dmax; +    } + +    set->anchor = anchor; + +    if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) +      set->all_low_high = 0; + +    if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) +      set->anychar_inf = 1; +  } +} + +extern int +onig_regset_add(OnigRegSet* set, regex_t* reg) +{ +  OnigRegion* region; + +  if (IS_FIND_LONGEST(reg->options)) +    return ONIGERR_INVALID_ARGUMENT; + +  if (set->n != 0 && reg->enc != set->enc) +    return ONIGERR_INVALID_ARGUMENT; + +  if (set->n >= set->alloc) { +    RR* nrs; +    int new_alloc; + +    new_alloc = set->alloc * 2; +    nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc); +    CHECK_NULL_RETURN_MEMERR(nrs); + +    set->rs    = nrs; +    set->alloc = new_alloc; +  } + +  region = onig_region_new(); +  CHECK_NULL_RETURN_MEMERR(region); + +  set->rs[set->n].reg    = reg; +  set->rs[set->n].region = region; +  set->n++; + +  update_regset_by_reg(set, reg); +  return 0; +} + +extern int +onig_regset_replace(OnigRegSet* set, int at, regex_t* reg) +{ +  int i; + +  if (at < 0 || at >= set->n) +    return ONIGERR_INVALID_ARGUMENT; + +  if (IS_NULL(reg)) { +    onig_region_free(set->rs[at].region, 1); +    for (i = at; i < set->n - 1; i++) { +      set->rs[i].reg    = set->rs[i+1].reg; +      set->rs[i].region = set->rs[i+1].region; +    } +    set->n--; +  } +  else { +    if (IS_FIND_LONGEST(reg->options)) +      return ONIGERR_INVALID_ARGUMENT; + +    if (set->n > 1 && reg->enc != set->enc) +      return ONIGERR_INVALID_ARGUMENT; + +    set->rs[at].reg = reg; +  } + +  for (i = 0; i < set->n; i++) +    update_regset_by_reg(set, set->rs[i].reg); + +  return 0; +} + +extern void +onig_regset_free(OnigRegSet* set) +{ +  int i; + +  for (i = 0; i < set->n; i++) { +    regex_t* reg; +    OnigRegion* region; + +    reg    = set->rs[i].reg; +    region = set->rs[i].region; +    onig_free(reg); +    if (IS_NOT_NULL(region)) +      onig_region_free(region, 1); +  } + +  xfree(set->rs); +  xfree(set); +} + +extern int +onig_regset_number_of_regex(OnigRegSet* set) +{ +  return set->n; +} + +extern regex_t* +onig_regset_get_regex(OnigRegSet* set, int at) +{ +  if (at < 0 || at >= set->n) +    return (regex_t* )0; + +  return set->rs[at].reg; +} + +extern OnigRegion* +onig_regset_get_region(OnigRegSet* set, int at) +{ +  if (at < 0 || at >= set->n) +    return (OnigRegion* )0; + +  return set->rs[at].region; +} + +  #ifdef USE_DIRECT_THREADED_CODE  extern int  onig_init_for_match_at(regex_t* reg) @@ -5402,35 +6057,25 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i    const UChar* str;    StackType*   stk_base;    int i; +  StackIndex* mem_start_stk; +  StackIndex* mem_end_stk;    i = mem_num;    reg = a->regex;    str = a->string;    stk_base = a->stk_base; +  mem_start_stk = a->mem_start_stk; +  mem_end_stk   = a->mem_end_stk;    if (i > 0) {      if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { -      if (MEM_STATUS_AT(reg->bt_mem_start, i)) -        *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); -      else -        *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); - -      *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) -                     ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr -                     : (UChar* )((void* )a->mem_end_stk[i])) - str); +      *begin = (int )(STACK_MEM_START(reg, i) - str); +      *end   = (int )(STACK_MEM_END(reg, i)   - str);      }      else {        *begin = *end = ONIG_REGION_NOTPOS;      }    } -  else if (i == 0) { -#if 0 -    *begin = a->start   - str; -    *end   = a->current - str; -#else -    return ONIGERR_INVALID_ARGUMENT; -#endif -  }    else      return ONIGERR_INVALID_ARGUMENT; @@ -5468,14 +6113,6 @@ onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUS    return ONIG_MISMATCH;  } -#if 0 -extern int -onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) -{ -  return ONIG_CALLOUT_SUCCESS; -} -#endif -  extern int  onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)  { diff --git a/src/regext.c b/src/regext.c index 965c793..c46f630 100644 --- a/src/regext.c +++ b/src/regext.c @@ -2,7 +2,7 @@    regext.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/reggnu.c b/src/reggnu.c index a124ae8..8a45078 100644 --- a/src/reggnu.c +++ b/src/reggnu.c @@ -2,7 +2,7 @@    reggnu.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regint.h b/src/regint.h index 38389a1..cc540da 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@    regint.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -47,16 +47,11 @@  #endif  #endif -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ -    (defined(__ppc__) && defined(__APPLE__)) || \ -    defined(__x86_64) || defined(__x86_64__) || \ -    defined(__mc68020__) -#define PLATFORM_UNALIGNED_WORD_ACCESS -#endif - +#ifndef ONIG_DISABLE_DIRECT_THREADING  #ifdef __GNUC__  #define USE_GOTO_LABELS_AS_VALUES  #endif +#endif  /* config */  /* spec. config */ @@ -82,6 +77,8 @@  #define USE_VARIABLE_META_CHARS  #define USE_POSIX_API_REGION_OPTION  #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ +  #include "regenc.h" @@ -197,49 +194,16 @@ typedef unsigned int  uintptr_t;  #define CHAR_MAP_SIZE       256  #define INFINITE_LEN        ONIG_INFINITE_DISTANCE -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ -  val  = *(type* )p;\ -  (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ -  xmemcpy(&val, (p), sizeof(type));\ -  (p) += sizeof(type);\ -} while(0) - -/* sizeof(OnigCodePoint) */ -#ifdef SIZEOF_SIZE_T -# define WORD_ALIGNMENT_SIZE     SIZEOF_SIZE_T -#else -# define WORD_ALIGNMENT_SIZE     SIZEOF_LONG -#endif - -#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ -  (pad_size) = WORD_ALIGNMENT_SIZE - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ -  if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ -} while (0) - -#define ALIGNMENT_RIGHT(addr) do {\ -  (addr) += (WORD_ALIGNMENT_SIZE - 1);\ -  (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ -} while (0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ -  #ifdef USE_CALLOUT  typedef struct { -  int           flag; -  OnigCalloutOf of; -  int           in; -  int           name_id; -  const UChar*  tag_start; -  const UChar*  tag_end; +  int             flag; +  OnigCalloutOf   of; +  int             in; +  int             name_id; +  const UChar*    tag_start; +  const UChar*    tag_end;    OnigCalloutType type;    OnigCalloutFunc start_func;    OnigCalloutFunc end_func; @@ -272,7 +236,6 @@ enum OptimizeType {    OPTIMIZE_STR,                   /* Slow Search */    OPTIMIZE_STR_FAST,              /* Sunday quick search / BMH */    OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ -  OPTIMIZE_STR_CASE_FOLD_FAST,    /* Sunday quick search / BMH (ignore case) */    OPTIMIZE_STR_CASE_FOLD,         /* Slow Search (ignore case) */    OPTIMIZE_MAP                    /* char map */  }; @@ -288,6 +251,8 @@ typedef unsigned int  MemStatusType;  #define MEM_STATUS_AT0(stats,n) \    ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM  ?  ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) +#define MEM_STATUS_IS_ALL_ON(stats)  (((stats) & 1) != 0) +  #define MEM_STATUS_ON(stats,n) do {\    if ((n) < (int )MEM_STATUS_BITS_NUM) {\      if ((n) != 0)\ @@ -302,8 +267,14 @@ typedef unsigned int  MemStatusType;      (stats) |= ((MemStatusType )1 << (n));\  } while (0) +#define MEM_STATUS_LIMIT_AT(stats,n) \ +  ((n) < (int )MEM_STATUS_BITS_NUM  ?  ((stats) & ((MemStatusType )1 << n)) : 0) +#define MEM_STATUS_LIMIT_ON(stats,n) do {\ +  if ((n) < (int )MEM_STATUS_BITS_NUM && (n) != 0) {\ +    (stats) |= ((MemStatusType )1 << (n));\ +  }\ +} while (0) -#define INT_MAX_LIMIT           ((1UL << (SIZEOF_INT * 8 - 1)) - 1)  #define IS_CODE_WORD_ASCII(enc,code) \    (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) @@ -354,16 +325,12 @@ typedef unsigned int  MemStatusType;  /* bitset */  #define BITS_PER_BYTE      8  #define SINGLE_BYTE_SIZE   (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM       (sizeof(Bits) * BITS_PER_BYTE) +#define BITS_IN_ROOM       32   /* 4 * BITS_PER_BYTE */  #define BITSET_SIZE        (SINGLE_BYTE_SIZE / BITS_IN_ROOM) -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS -typedef unsigned int   Bits; -#else -typedef unsigned char  Bits; -#endif -typedef Bits           BitSet[BITSET_SIZE]; -typedef Bits*          BitSetRef; +typedef uint32_t  Bits; +typedef Bits      BitSet[BITSET_SIZE]; +typedef Bits*     BitSetRef;  #define SIZE_BITSET        sizeof(BitSet) @@ -372,8 +339,8 @@ typedef Bits*          BitSetRef;    for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \  } while (0) -#define BS_ROOM(bs,pos)            (bs)[pos / BITS_IN_ROOM] -#define BS_BIT(pos)                (1 << (pos % BITS_IN_ROOM)) +#define BS_ROOM(bs,pos)            (bs)[(unsigned int )(pos) >> 5] +#define BS_BIT(pos)                (1u << ((unsigned int )(pos) & 0x1f))  #define BITSET_AT(bs, pos)         (BS_ROOM(bs,pos) & BS_BIT(pos))  #define BITSET_SET_BIT(bs, pos)     BS_ROOM(bs,pos) |= BS_BIT(pos) @@ -389,11 +356,13 @@ typedef struct _BBuf {  #define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size)) +/*  #define BB_SIZE_INC(buf,inc) do{\    (buf)->alloc += (inc);\    (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\  } while (0) +*/  #define BB_EXPAND(buf,low) do{\    do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ @@ -491,39 +460,34 @@ typedef struct _BBuf {  /* operation code */  enum OpCode { -  OP_FINISH = 0,        /* matching process terminator (no more alternative) */ -  OP_END    = 1,        /* pattern code terminator (success end) */ - -  OP_EXACT1 = 2,        /* single byte, N = 1 */ -  OP_EXACT2,            /* single byte, N = 2 */ -  OP_EXACT3,            /* single byte, N = 3 */ -  OP_EXACT4,            /* single byte, N = 4 */ -  OP_EXACT5,            /* single byte, N = 5 */ -  OP_EXACTN,            /* single byte */ -  OP_EXACTMB2N1,        /* mb-length = 2 N = 1 */ -  OP_EXACTMB2N2,        /* mb-length = 2 N = 2 */ -  OP_EXACTMB2N3,        /* mb-length = 2 N = 3 */ -  OP_EXACTMB2N,         /* mb-length = 2 */ -  OP_EXACTMB3N,         /* mb-length = 3 */ -  OP_EXACTMBN,          /* other length */ - -  OP_EXACT1_IC,         /* single byte, N = 1, ignore case */ -  OP_EXACTN_IC,         /* single byte,        ignore case */ - +  OP_FINISH = 0,       /* matching process terminator (no more alternative) */ +  OP_END    = 1,       /* pattern code terminator (success end) */ +  OP_STR_1 = 2,        /* single byte, N = 1 */ +  OP_STR_2,            /* single byte, N = 2 */ +  OP_STR_3,            /* single byte, N = 3 */ +  OP_STR_4,            /* single byte, N = 4 */ +  OP_STR_5,            /* single byte, N = 5 */ +  OP_STR_N,            /* single byte */ +  OP_STR_MB2N1,        /* mb-length = 2 N = 1 */ +  OP_STR_MB2N2,        /* mb-length = 2 N = 2 */ +  OP_STR_MB2N3,        /* mb-length = 2 N = 3 */ +  OP_STR_MB2N,         /* mb-length = 2 */ +  OP_STR_MB3N,         /* mb-length = 3 */ +  OP_STR_MBN,          /* other length */ +  OP_STR_1_IC,         /* single byte, N = 1, ignore case */ +  OP_STR_N_IC,         /* single byte,        ignore case */    OP_CCLASS,    OP_CCLASS_MB,    OP_CCLASS_MIX,    OP_CCLASS_NOT,    OP_CCLASS_MB_NOT,    OP_CCLASS_MIX_NOT, -    OP_ANYCHAR,                 /* "."  */    OP_ANYCHAR_ML,              /* "."  multi-line */    OP_ANYCHAR_STAR,            /* ".*" */    OP_ANYCHAR_ML_STAR,         /* ".*" multi-line */    OP_ANYCHAR_STAR_PEEK_NEXT,    OP_ANYCHAR_ML_STAR_PEEK_NEXT, -    OP_WORD,    OP_WORD_ASCII,    OP_NO_WORD, @@ -532,16 +496,13 @@ enum OpCode {    OP_NO_WORD_BOUNDARY,    OP_WORD_BEGIN,    OP_WORD_END, -    OP_TEXT_SEGMENT_BOUNDARY, -    OP_BEGIN_BUF,    OP_END_BUF,    OP_BEGIN_LINE,    OP_END_LINE,    OP_SEMI_END_BUF,    OP_BEGIN_POSITION, -    OP_BACKREF1,    OP_BACKREF2,    OP_BACKREF_N, @@ -552,34 +513,35 @@ enum OpCode {    OP_BACKREF_WITH_LEVEL_IC,     /* \k<xxx+n>, \k<xxx-n> */    OP_BACKREF_CHECK,             /* (?(n)), (?('name')) */    OP_BACKREF_CHECK_WITH_LEVEL,  /* (?(n-level)), (?('name-level')) */ - -  OP_MEMORY_START, -  OP_MEMORY_START_PUSH,   /* push back-tracker to stack */ -  OP_MEMORY_END_PUSH,     /* push back-tracker to stack */ -  OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ -  OP_MEMORY_END, -  OP_MEMORY_END_REC,      /* push marker to stack */ - +  OP_MEM_START, +  OP_MEM_START_PUSH,     /* push back-tracker to stack */ +  OP_MEM_END_PUSH,       /* push back-tracker to stack */ +#ifdef USE_CALL +  OP_MEM_END_PUSH_REC,   /* push back-tracker to stack */ +#endif +  OP_MEM_END, +#ifdef USE_CALL +  OP_MEM_END_REC,        /* push marker to stack */ +#endif    OP_FAIL,               /* pop stack and move */    OP_JUMP,    OP_PUSH,    OP_PUSH_SUPER,    OP_POP_OUT,  #ifdef USE_OP_PUSH_OR_JUMP_EXACT -  OP_PUSH_OR_JUMP_EXACT1,  /* if match exact then push, else jump. */ +  OP_PUSH_OR_JUMP_EXACT1,   /* if match exact then push, else jump. */  #endif -  OP_PUSH_IF_PEEK_NEXT,    /* if match exact then push, else none. */ -  OP_REPEAT,               /* {n,m} */ -  OP_REPEAT_NG,            /* {n,m}? (non greedy) */ +  OP_PUSH_IF_PEEK_NEXT,     /* if match exact then push, else none. */ +  OP_REPEAT,                /* {n,m} */ +  OP_REPEAT_NG,             /* {n,m}? (non greedy) */    OP_REPEAT_INC, -  OP_REPEAT_INC_NG,        /* non greedy */ -  OP_REPEAT_INC_SG,        /* search and get in stack */ -  OP_REPEAT_INC_NG_SG,     /* search and get in stack (non greedy) */ +  OP_REPEAT_INC_NG,         /* non greedy */    OP_EMPTY_CHECK_START,     /* null loop checker start */    OP_EMPTY_CHECK_END,       /* null loop checker end   */    OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ +#ifdef USE_CALL    OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - +#endif    OP_PREC_READ_START,       /* (?=...)  start */    OP_PREC_READ_END,         /* (?=...)  end   */    OP_PREC_READ_NOT_START,   /* (?!...)  start */ @@ -589,11 +551,12 @@ enum OpCode {    OP_LOOK_BEHIND,           /* (?<=...) start (no needs end opcode) */    OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */    OP_LOOK_BEHIND_NOT_END,   /* (?<!...) end   */ - -  OP_CALL,                  /* \g<name> */ -  OP_RETURN,    OP_PUSH_SAVE_VAL,    OP_UPDATE_VAR, +#ifdef USE_CALL +  OP_CALL,                  /* \g<name> */ +  OP_RETURN, +#endif  #ifdef USE_CALLOUT    OP_CALLOUT_CONTENTS,      /* (?{...}) (?{{...}}) */    OP_CALLOUT_NAME,          /* (*name) (*name[tag](args...)) */ @@ -601,8 +564,8 @@ enum OpCode {  };  enum SaveType { -  SAVE_KEEP = 0, /* SAVE S */ -  SAVE_S = 1, +  SAVE_KEEP        = 0, /* SAVE S */ +  SAVE_S           = 1,    SAVE_RIGHT_RANGE = 2,  }; @@ -642,116 +605,57 @@ typedef int ModeType;  #define SIZE_UPDATE_VAR_TYPE  sizeof(UpdateVarType)  #define SIZE_MODE             sizeof(ModeType) -#define GET_RELADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, RelAddrType) -#define GET_ABSADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, AbsAddrType) -#define GET_LENGTH_INC(len,p)      PLATFORM_GET_INC(len,    p, LengthType) -#define GET_MEMNUM_INC(num,p)      PLATFORM_GET_INC(num,    p, MemNumType) -#define GET_REPEATNUM_INC(num,p)   PLATFORM_GET_INC(num,    p, RepeatNumType) -#define GET_OPTION_INC(option,p)   PLATFORM_GET_INC(option, p, OnigOptionType) -#define GET_POINTER_INC(ptr,p)     PLATFORM_GET_INC(ptr,    p, PointerType) -#define GET_SAVE_TYPE_INC(type,p)       PLATFORM_GET_INC(type, p, SaveType) -#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) -#define GET_MODE_INC(mode,p)            PLATFORM_GET_INC(mode, p, ModeType) -  /* code point's address must be aligned address. */  #define GET_CODE_POINT(code,p)   code = *((OnigCodePoint* )(p)) -#define GET_BYTE_INC(byte,p) do{\ -  byte = *(p);\ -  (p)++;\ -} while(0)  /* op-code + arg size */ -#if 0 -#define SIZE_OP_ANYCHAR_STAR            SIZE_OPCODE -#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) -#define SIZE_OP_JUMP                   (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PUSH                   (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PUSH_SUPER             (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_OUT                 SIZE_OPCODE -#ifdef USE_OP_PUSH_OR_JUMP_EXACT -#define SIZE_OP_PUSH_OR_JUMP_EXACT1    (SIZE_OPCODE + SIZE_RELADDR + 1) -#endif -#define SIZE_OP_PUSH_IF_PEEK_NEXT      (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_REPEAT_INC             (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_REPEAT_INC_NG          (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_WORD_BOUNDARY          (SIZE_OPCODE + SIZE_MODE) -#define SIZE_OP_PREC_READ_START         SIZE_OPCODE -#define SIZE_OP_PREC_READ_NOT_START    (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PREC_READ_END           SIZE_OPCODE -#define SIZE_OP_PREC_READ_NOT_END       SIZE_OPCODE -#define SIZE_OP_FAIL                    SIZE_OPCODE -#define SIZE_OP_MEMORY_START           (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_START_PUSH      (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH        (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH_REC    (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END             (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_REC         (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_ATOMIC_START            SIZE_OPCODE -#define SIZE_OP_ATOMIC_END              SIZE_OPCODE -#define SIZE_OP_EMPTY_CHECK_START       (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_EMPTY_CHECK_END         (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_LOOK_BEHIND            (SIZE_OPCODE + SIZE_LENGTH) -#define SIZE_OP_LOOK_BEHIND_NOT_START  (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) -#define SIZE_OP_LOOK_BEHIND_NOT_END     SIZE_OPCODE -#define SIZE_OP_CALL                   (SIZE_OPCODE + SIZE_ABSADDR) -#define SIZE_OP_RETURN                  SIZE_OPCODE -#define SIZE_OP_PUSH_SAVE_VAL          (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) -#define SIZE_OP_UPDATE_VAR             (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM) - -#ifdef USE_CALLOUT -#define SIZE_OP_CALLOUT_CONTENTS       (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_CALLOUT_NAME           (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM) -#endif - -#else  /* if 0 */  /* for relative address increment to go next op. */ -#define SIZE_INC_OP                     1 - -#define SIZE_OP_ANYCHAR_STAR            1 -#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT  1 -#define SIZE_OP_JUMP                    1 -#define SIZE_OP_PUSH                    1 -#define SIZE_OP_PUSH_SUPER              1 -#define SIZE_OP_POP_OUT                 1 +#define SIZE_INC                       1 + +#define OPSIZE_ANYCHAR_STAR            1 +#define OPSIZE_ANYCHAR_STAR_PEEK_NEXT  1 +#define OPSIZE_JUMP                    1 +#define OPSIZE_PUSH                    1 +#define OPSIZE_PUSH_SUPER              1 +#define OPSIZE_POP_OUT                 1  #ifdef USE_OP_PUSH_OR_JUMP_EXACT -#define SIZE_OP_PUSH_OR_JUMP_EXACT1     1 -#endif -#define SIZE_OP_PUSH_IF_PEEK_NEXT       1 -#define SIZE_OP_REPEAT                  1 -#define SIZE_OP_REPEAT_INC              1 -#define SIZE_OP_REPEAT_INC_NG           1 -#define SIZE_OP_WORD_BOUNDARY           1 -#define SIZE_OP_PREC_READ_START         1 -#define SIZE_OP_PREC_READ_NOT_START     1 -#define SIZE_OP_PREC_READ_END           1 -#define SIZE_OP_PREC_READ_NOT_END       1 -#define SIZE_OP_BACKREF                 1 -#define SIZE_OP_FAIL                    1 -#define SIZE_OP_MEMORY_START            1 -#define SIZE_OP_MEMORY_START_PUSH       1 -#define SIZE_OP_MEMORY_END_PUSH         1 -#define SIZE_OP_MEMORY_END_PUSH_REC     1 -#define SIZE_OP_MEMORY_END              1 -#define SIZE_OP_MEMORY_END_REC          1 -#define SIZE_OP_ATOMIC_START            1 -#define SIZE_OP_ATOMIC_END              1 -#define SIZE_OP_EMPTY_CHECK_START       1 -#define SIZE_OP_EMPTY_CHECK_END         1 -#define SIZE_OP_LOOK_BEHIND             1 -#define SIZE_OP_LOOK_BEHIND_NOT_START   1 -#define SIZE_OP_LOOK_BEHIND_NOT_END     1 -#define SIZE_OP_CALL                    1 -#define SIZE_OP_RETURN                  1 -#define SIZE_OP_PUSH_SAVE_VAL           1 -#define SIZE_OP_UPDATE_VAR              1 +#define OPSIZE_PUSH_OR_JUMP_EXACT1     1 +#endif +#define OPSIZE_PUSH_IF_PEEK_NEXT       1 +#define OPSIZE_REPEAT                  1 +#define OPSIZE_REPEAT_INC              1 +#define OPSIZE_REPEAT_INC_NG           1 +#define OPSIZE_WORD_BOUNDARY           1 +#define OPSIZE_PREC_READ_START         1 +#define OPSIZE_PREC_READ_NOT_START     1 +#define OPSIZE_PREC_READ_END           1 +#define OPSIZE_PREC_READ_NOT_END       1 +#define OPSIZE_BACKREF                 1 +#define OPSIZE_FAIL                    1 +#define OPSIZE_MEM_START               1 +#define OPSIZE_MEM_START_PUSH          1 +#define OPSIZE_MEM_END_PUSH            1 +#define OPSIZE_MEM_END_PUSH_REC        1 +#define OPSIZE_MEM_END                 1 +#define OPSIZE_MEM_END_REC             1 +#define OPSIZE_ATOMIC_START            1 +#define OPSIZE_ATOMIC_END              1 +#define OPSIZE_EMPTY_CHECK_START       1 +#define OPSIZE_EMPTY_CHECK_END         1 +#define OPSIZE_LOOK_BEHIND             1 +#define OPSIZE_LOOK_BEHIND_NOT_START   1 +#define OPSIZE_LOOK_BEHIND_NOT_END     1 +#define OPSIZE_CALL                    1 +#define OPSIZE_RETURN                  1 +#define OPSIZE_PUSH_SAVE_VAL           1 +#define OPSIZE_UPDATE_VAR              1  #ifdef USE_CALLOUT -#define SIZE_OP_CALLOUT_CONTENTS        1 -#define SIZE_OP_CALLOUT_NAME            1 +#define OPSIZE_CALLOUT_CONTENTS        1 +#define OPSIZE_CALLOUT_NAME            1  #endif -#endif /* if 0 */  #define MC_ESC(syn)               (syn)->meta_char_table.esc @@ -882,7 +786,7 @@ typedef struct {      } repeat; /* REPEAT, REPEAT_NG */      struct {        MemNumType  id; -    } repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */ +    } repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */      struct {        MemNumType mem;      } empty_check_start; @@ -933,48 +837,58 @@ typedef struct {  #endif  } RegexExt; +typedef struct { +  int lower; +  int upper; +  union { +    Operation* pcode; /* address of repeated body */ +    int offset; +  } u; +} RepeatRange; +  struct re_pattern_buffer {    /* common members of BBuf(bytes-buffer) */    Operation*   ops;  #ifdef USE_DIRECT_THREADED_CODE    enum OpCode* ocs;  #endif -  Operation*   ops_curr; -  unsigned int ops_used;    /* used space for ops */ -  unsigned int ops_alloc;   /* allocated space for ops */ +  Operation*     ops_curr; +  unsigned int   ops_used;    /* used space for ops */ +  unsigned int   ops_alloc;   /* allocated space for ops */    unsigned char* string_pool;    unsigned char* string_pool_end; -  int num_mem;                   /* used memory(...) num counted from 1 */ -  int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */ -  int num_null_check;            /* OP_EMPTY_CHECK_START/END id counter */ -  int num_call;                  /* number of subexp call */ -  unsigned int capture_history;  /* (?@...) flag (1-31) */ -  unsigned int bt_mem_start;     /* need backtrack flag */ -  unsigned int bt_mem_end;       /* need backtrack flag */ -  int stack_pop_level; -  int repeat_range_alloc; -  OnigRepeatRange* repeat_range; - -  OnigEncoding      enc; -  OnigOptionType    options; -  OnigSyntaxType*   syntax; -  OnigCaseFoldType  case_fold_flag; -  void*             name_table; +  int            num_mem;          /* used memory(...) num counted from 1 */ +  int            num_repeat;       /* OP_REPEAT/OP_REPEAT_NG id-counter */ +  int            num_empty_check;  /* OP_EMPTY_CHECK_START/END id counter */ +  int            num_call;         /* number of subexp call */ +  MemStatusType  capture_history;  /* (?@...) flag (1-31) */ +  MemStatusType  push_mem_start;   /* need backtrack flag */ +  MemStatusType  push_mem_end;     /* need backtrack flag */ +  MemStatusType  empty_status_mem; +  int            stack_pop_level; +  int            repeat_range_alloc; +  RepeatRange*   repeat_range; + +  OnigEncoding     enc; +  OnigOptionType   options; +  OnigSyntaxType*  syntax; +  OnigCaseFoldType case_fold_flag; +  void*            name_table;    /* optimization info (string search, char-map and anchors) */    int            optimize;          /* optimize flag */    int            threshold_len;     /* search str-length for apply optimize */    int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ -  OnigLen        anchor_dmin;       /* (SEMI_)END_BUF anchor distance */ -  OnigLen        anchor_dmax;       /* (SEMI_)END_BUF anchor distance */ +  OnigLen        anc_dist_min;      /* (SEMI_)END_BUF anchor distance */ +  OnigLen        anc_dist_max;      /* (SEMI_)END_BUF anchor distance */    int            sub_anchor;        /* start-anchor for exact or map */    unsigned char *exact;    unsigned char *exact_end;    unsigned char  map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */    int            map_offset; -  OnigLen        dmin;                      /* min-distance of exact or map */ -  OnigLen        dmax;                      /* max-distance of exact or map */ +  OnigLen        dist_min;           /* min-distance of exact or map */ +  OnigLen        dist_max;           /* max-distance of exact or map */    RegexExt*      extp;  }; diff --git a/src/regparse.c b/src/regparse.c index 7f8b1a9..fed53f7 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@    regparse.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -199,6 +199,24 @@ onig_set_parse_depth_limit(unsigned int depth)    return 0;  } +#ifdef ONIG_DEBUG_PARSE +#define INC_PARSE_DEPTH(d) do {\ +  (d)++;\ +  if (env->max_parse_depth < (d)) env->max_parse_depth = d;\ +  if ((d) > ParseDepthLimit) \ +    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\ +} while (0) +#else +#define INC_PARSE_DEPTH(d) do {\ +  (d)++;\ +  if ((d) > ParseDepthLimit) \ +    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\ +} while (0) +#endif + +#define DEC_PARSE_DEPTH(d)  (d)-- + +  static int  bbuf_init(BBuf* buf, int size)  { @@ -244,7 +262,8 @@ bbuf_clone(BBuf** rto, BBuf* from)    return 0;  } -static int backref_rel_to_abs(int rel_no, ScanEnv* env) +static int +backref_rel_to_abs(int rel_no, ScanEnv* env)  {    if (rel_no > 0) {      return env->num_mem + rel_no; @@ -292,15 +311,6 @@ bitset_set_range(BitSetRef bs, int from, int to)    }  } -#if 0 -static void -bitset_set_all(BitSetRef bs) -{ -  int i; -  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } -} -#endif -  static void  bitset_invert(BitSetRef bs)  { @@ -363,24 +373,6 @@ save_entry(ScanEnv* env, enum SaveType type, int* id)  {    int nid = env->save_num; -#if 0 -  if (IS_NULL(env->saves)) { -    int n = 10; -    env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n); -    CHECK_NULL_RETURN_MEMERR(env->saves); -    env->save_alloc_num = n; -  } -  else if (env->save_alloc_num <= nid) { -    int n = env->save_alloc_num * 2; -    SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n); -    CHECK_NULL_RETURN_MEMERR(p); -    env->saves = p; -    env->save_alloc_num = n; -  } - -  env->saves[nid].type = type; -#endif -    env->save_num++;    *id = nid;    return 0; @@ -476,14 +468,14 @@ static int  str_end_hash(st_str_end_key* x)  {    UChar *p; -  int val = 0; +  unsigned val = 0;    p = x->s;    while (p < x->end) { -    val = val * 997 + (int )*p++; +    val = val * 997 + (unsigned )*p++;    } -  return val + (val >> 5); +  return (int) (val + (val >> 5));  }  extern hash_table_type* @@ -566,15 +558,15 @@ static int  callout_name_table_hash(st_callout_name_key* x)  {    UChar *p; -  int val = 0; +  unsigned int val = 0;    p = x->s;    while (p < x->end) { -    val = val * 997 + (int )*p++; +    val = val * 997 + (unsigned int )*p++;    }    /* use intptr_t for escape warning in Windows */ -  return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type; +  return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);  }  extern hash_table_type* @@ -1972,9 +1964,8 @@ callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,  static void  scan_env_clear(ScanEnv* env)  { -  MEM_STATUS_CLEAR(env->capture_history); -  MEM_STATUS_CLEAR(env->bt_mem_start); -  MEM_STATUS_CLEAR(env->bt_mem_end); +  MEM_STATUS_CLEAR(env->cap_history); +  MEM_STATUS_CLEAR(env->backtrack_mem);    MEM_STATUS_CLEAR(env->backrefed_mem);    env->error      = (UChar* )NULL;    env->error_end  = (UChar* )NULL; @@ -1993,6 +1984,10 @@ scan_env_clear(ScanEnv* env)    xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));    env->parse_depth         = 0; +#ifdef ONIG_DEBUG_PARSE +  env->max_parse_depth     = 0; +#endif +  env->backref_num         = 0;    env->keep_num            = 0;    env->save_num            = 0;    env->save_alloc_num      = 0; @@ -2024,11 +2019,8 @@ scan_env_add_mem_entry(ScanEnv* env)        }        for (i = env->num_mem + 1; i < alloc; i++) { -        p[i].node = NULL_NODE; -#if 0 -        p[i].in   = 0; -        p[i].recursion = 0; -#endif +        p[i].mem_node = NULL_NODE; +        p[i].empty_repeat_node = NULL_NODE;        }        env->mem_env_dynamic = p; @@ -2044,7 +2036,7 @@ static int  scan_env_set_mem_node(ScanEnv* env, int num, Node* node)  {    if (env->num_mem >= num) -    SCANENV_MEMENV(env)[num].node = node; +    SCANENV_MEMENV(env)[num].mem_node = node;    else      return ONIGERR_PARSER_BUG;    return 0; @@ -2182,7 +2174,7 @@ node_new_ctype(int type, int not, OnigOptionType options)  static Node*  node_new_anychar(void)  { -  Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE); +  Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);    return node;  } @@ -2242,24 +2234,6 @@ onig_node_new_list(Node* left, Node* right)  }  extern Node* -onig_node_list_add(Node* list, Node* x) -{ -  Node *n; - -  n = onig_node_new_list(x, NULL); -  if (IS_NULL(n)) return NULL_NODE; - -  if (IS_NOT_NULL(list)) { -    while (IS_NOT_NULL(NODE_CDR(list))) -      list = NODE_CDR(list); - -    NODE_CDR(list) = n; -  } - -  return n; -} - -extern Node*  onig_node_new_alt(Node* left, Node* right)  {    Node* node = node_new(); @@ -2357,7 +2331,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,    for (i = 0; i < back_num; i++) {      if (backrefs[i] <= env->num_mem && -        IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) { +        IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].mem_node)) {        NODE_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */        break;      } @@ -2377,6 +2351,8 @@ node_new_backref(int back_num, int* backrefs, int by_name,      for (i = 0; i < back_num; i++)        p[i] = backrefs[i];    } + +  env->backref_num++;    return node;  } @@ -2424,13 +2400,13 @@ node_new_quantifier(int lower, int upper, int by_number)    CHECK_NULL_RETURN(node);    NODE_SET_TYPE(node, NODE_QUANT); -  QUANT_(node)->lower           = lower; -  QUANT_(node)->upper           = upper; -  QUANT_(node)->greedy          = 1; -  QUANT_(node)->emptiness       = BODY_IS_NOT_EMPTY; -  QUANT_(node)->head_exact      = NULL_NODE; -  QUANT_(node)->next_head_exact = NULL_NODE; -  QUANT_(node)->is_refered      = 0; +  QUANT_(node)->lower            = lower; +  QUANT_(node)->upper            = upper; +  QUANT_(node)->greedy           = 1; +  QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY; +  QUANT_(node)->head_exact       = NULL_NODE; +  QUANT_(node)->next_head_exact  = NULL_NODE; +  QUANT_(node)->include_referred = 0;    if (by_number != 0)      NODE_STATUS_ADD(node, BY_NUMBER); @@ -2716,7 +2692,7 @@ make_text_segment(Node** node, ScanEnv* env)    ns[1] = NULL_NODE;    r = ONIGERR_MEMORY; -  ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0); +  ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);    if (IS_NULL(ns[0])) goto err;    r = node_new_true_anychar(&ns[1], env); @@ -2727,7 +2703,7 @@ make_text_segment(Node** node, ScanEnv* env)    ns[0] = x;    ns[1] = NULL_NODE; -  x = node_new_quantifier(0, INFINITE_REPEAT, 1); +  x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);    if (IS_NULL(x)) goto err;    NODE_BODY(x) = ns[0]; @@ -2796,7 +2772,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,    ns[0] = x; -  x = node_new_quantifier(lower, upper, 0); +  x = node_new_quantifier(lower, upper, FALSE);    if (IS_NULL(x)) goto err0;    NODE_BODY(x) = ns[0]; @@ -2825,7 +2801,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,    x = make_alt(2, ns);    if (IS_NULL(x)) goto err0; -  if (is_range_cutter != 0) +  if (is_range_cutter != FALSE)      NODE_STATUS_ADD(x, SUPER);    *node = x; @@ -2915,7 +2891,10 @@ make_range_clear(Node** node, ScanEnv* env)    ns[0] = NULL_NODE; ns[1] = x; -  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env); +#define ID_NOT_USED_DONT_CARE_ME   0 + +  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, +                                  ID_NOT_USED_DONT_CARE_ME, env);    if (r != 0) goto err;    x = make_alt(2, ns); @@ -3034,7 +3013,7 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua    id1 = GIMMICK_(ns[0])->id;    r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive, -                         0, env); +                         FALSE, env);    if (r != 0) goto err;    ns[2] = ns[3] = NULL_NODE; @@ -3077,7 +3056,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,      if (expr == NULL_NODE) {        /* default expr \O* */ -      quant = node_new_quantifier(0, INFINITE_REPEAT, 0); +      quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);        if (IS_NULL(quant)) goto err0;        r = node_new_true_anychar(&body, env); @@ -3204,16 +3183,6 @@ node_str_cat_char(Node* node, UChar c)  }  extern void -onig_node_conv_to_str_node(Node* node, int flag) -{ -  NODE_SET_TYPE(node, NODE_STRING); -  STR_(node)->flag     = flag; -  STR_(node)->capacity = 0; -  STR_(node)->s        = STR_(node)->buf; -  STR_(node)->end      = STR_(node)->buf; -} - -extern void  onig_node_str_clear(Node* node)  {    if (STR_(node)->capacity != 0 && @@ -3221,10 +3190,11 @@ onig_node_str_clear(Node* node)      xfree(STR_(node)->s);    } -  STR_(node)->capacity = 0;    STR_(node)->flag     = 0;    STR_(node)->s        = STR_(node)->buf;    STR_(node)->end      = STR_(node)->buf; +  STR_(node)->capacity = 0; +  STR_(node)->case_min_len = 0;  }  static Node* @@ -3234,10 +3204,12 @@ node_new_str(const UChar* s, const UChar* end)    CHECK_NULL_RETURN(node);    NODE_SET_TYPE(node, NODE_STRING); -  STR_(node)->capacity = 0;    STR_(node)->flag     = 0;    STR_(node)->s        = STR_(node)->buf;    STR_(node)->end      = STR_(node)->buf; +  STR_(node)->capacity = 0; +  STR_(node)->case_min_len = 0; +    if (onig_node_str_cat(node, s, end)) {      onig_node_free(node);      return NULL; @@ -3252,11 +3224,11 @@ onig_node_new_str(const UChar* s, const UChar* end)  }  static Node* -node_new_str_raw(UChar* s, UChar* end) +node_new_str_crude(UChar* s, UChar* end)  {    Node* node = node_new_str(s, end);    CHECK_NULL_RETURN(node); -  NODE_STRING_SET_RAW(node); +  NODE_STRING_SET_CRUDE(node);    return node;  } @@ -3267,14 +3239,14 @@ node_new_empty(void)  }  static Node* -node_new_str_raw_char(UChar c) +node_new_str_crude_char(UChar c)  {    int i;    UChar p[1];    Node* node;    p[0] = c; -  node = node_new_str_raw(p, p + 1); +  node = node_new_str_crude(p, p + 1);    /* clear buf tail */    for (i = 1; i < NODE_STRING_BUF_SIZE; i++) @@ -3297,8 +3269,8 @@ str_node_split_last_char(Node* node, OnigEncoding enc)      if (p && p > sn->s) { /* can be split. */        rn = node_new_str(p, sn->end);        CHECK_NULL_RETURN(rn); -      if (NODE_STRING_IS_RAW(node)) -        NODE_STRING_SET_RAW(rn); +      if (NODE_STRING_IS_CRUDE(node)) +        NODE_STRING_SET_CRUDE(rn);        sn->end = (UChar* )p;      } @@ -3316,10 +3288,10 @@ str_node_can_be_split(Node* node, OnigEncoding enc)    return 0;  } -extern int -onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +static int +scan_number(UChar** src, const UChar* end, OnigEncoding enc)  { -  unsigned int num, val; +  int num, val;    OnigCodePoint c;    UChar* p = *src;    PFETCH_READY; @@ -3328,8 +3300,8 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)    while (! PEND) {      PFETCH(c);      if (IS_CODE_DIGIT_ASCII(enc, c)) { -      val = (unsigned int )DIGITVAL(c); -      if ((INT_MAX_LIMIT - val) / 10UL < num) +      val = (int )DIGITVAL(c); +      if ((INT_MAX - val) / 10 < num)          return -1;  /* overflow */        num = num * 10 + val; @@ -3344,26 +3316,27 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)  }  static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, -                                 int maxlen, OnigEncoding enc) +scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen, +                        OnigEncoding enc, OnigCodePoint* rcode)  { +  OnigCodePoint code;    OnigCodePoint c; -  unsigned int num, val; +  unsigned int val;    int n;    UChar* p = *src;    PFETCH_READY; -  num = 0; +  code = 0;    n = 0;    while (! PEND && n < maxlen) {      PFETCH(c);      if (IS_CODE_XDIGIT_ASCII(enc, c)) {        n++; -      val = (unsigned int )XDIGITVAL(enc,c); -      if ((INT_MAX_LIMIT - val) / 16UL < num) +      val = (unsigned int )XDIGITVAL(enc, c); +      if ((UINT_MAX - val) / 16UL < code)          return ONIGERR_TOO_BIG_NUMBER; /* overflow */ -      num = (num << 4) + XDIGITVAL(enc,c); +      code = (code << 4) + val;      }      else {        PUNFETCH; @@ -3374,36 +3347,46 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,    if (n < minlen)      return ONIGERR_INVALID_CODE_POINT_VALUE; +  *rcode = code;    *src = p; -  return num; +  return ONIG_NORMAL;  }  static int -scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, -                           OnigEncoding enc) +scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen, +                  OnigEncoding enc, OnigCodePoint* rcode)  { +  OnigCodePoint code;    OnigCodePoint c; -  unsigned int num, val; +  unsigned int val; +  int n;    UChar* p = *src;    PFETCH_READY; -  num = 0; -  while (! PEND && maxlen-- != 0) { +  code = 0; +  n = 0; +  while (! PEND && n < maxlen) {      PFETCH(c);      if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') { -      val = ODIGITVAL(c); -      if ((INT_MAX_LIMIT - val) / 8UL < num) -        return -1;  /* overflow */ +      n++; +      val = (unsigned int )ODIGITVAL(c); +      if ((UINT_MAX - val) / 8UL < code) +        return ONIGERR_TOO_BIG_NUMBER; /* overflow */ -      num = (num << 3) + val; +      code = (code << 3) + val;      }      else {        PUNFETCH;        break;      }    } + +  if (n < minlen) +    return ONIGERR_INVALID_CODE_POINT_VALUE; + +  *rcode = code;    *src = p; -  return num; +  return ONIG_NORMAL;  } @@ -3938,68 +3921,70 @@ static enum ReduceType ReduceTypeTable[6][6] = {    {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */  }; -extern void -onig_reduce_nested_quantifier(Node* pnode, Node* cnode) +extern int +onig_reduce_nested_quantifier(Node* pnode)  {    int pnum, cnum;    QuantNode *p, *c; +  Node* cnode; + +  cnode = NODE_BODY(pnode);    p = QUANT_(pnode);    c = QUANT_(cnode);    pnum = quantifier_type_num(p);    cnum = quantifier_type_num(c);    if (pnum < 0 || cnum < 0) { -    if ((p->lower == p->upper) && ! IS_INFINITE_REPEAT(p->upper)) { -      if ((c->lower == c->upper) && ! IS_INFINITE_REPEAT(c->upper)) { -        int n = onig_positive_int_multiply(p->lower, c->lower); -        if (n >= 0) { -          p->lower = p->upper = n; -          NODE_BODY(pnode) = NODE_BODY(cnode); -          goto remove_cnode; -        } -      } +    if (p->lower == p->upper && c->lower == c->upper) { +      int n = onig_positive_int_multiply(p->lower, c->lower); +      if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + +      p->lower = p->upper = n; +      NODE_BODY(pnode) = NODE_BODY(cnode); +      goto remove_cnode;      } -    return ; +    return 0;    }    switch(ReduceTypeTable[cnum][pnum]) {    case RQ_DEL:      *pnode = *cnode; +    goto remove_cnode;      break;    case RQ_A:      NODE_BODY(pnode) = NODE_BODY(cnode);      p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1; +    goto remove_cnode;      break;    case RQ_AQ:      NODE_BODY(pnode) = NODE_BODY(cnode);      p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0; +    goto remove_cnode;      break;    case RQ_QQ:      NODE_BODY(pnode) = NODE_BODY(cnode);      p->lower  = 0;  p->upper = 1;  p->greedy = 0; +    goto remove_cnode;      break;    case RQ_P_QQ: -    NODE_BODY(pnode) = cnode;      p->lower  = 0;  p->upper = 1;  p->greedy = 0;      c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1; -    return ;      break;    case RQ_PQ_Q: -    NODE_BODY(pnode) = cnode;      p->lower  = 0;  p->upper = 1;  p->greedy = 1;      c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 0; -    return ;      break;    case RQ_ASIS: -    NODE_BODY(pnode) = cnode; -    return ;      break;    } +  return 0; +   remove_cnode:    NODE_BODY(cnode) = NULL_NODE;    onig_node_free(cnode); +  return 0;  }  static int @@ -4018,7 +4003,7 @@ node_new_general_newline(Node** node, ScanEnv* env)    alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);    if (alen < 0) return alen; -  crnl = node_new_str_raw(buf, buf + dlen + alen); +  crnl = node_new_str_crude(buf, buf + dlen + alen);    CHECK_NULL_RETURN_MEMERR(crnl);    ncc = node_new_cclass(); @@ -4046,7 +4031,7 @@ node_new_general_newline(Node** node, ScanEnv* env)      if (r != 0) goto err1;    } -  x = node_new_bag_if_else(crnl, 0, ncc); +  x = node_new_bag_if_else(crnl, NULL_NODE, ncc);    if (IS_NULL(x)) goto err1;    *node = x; @@ -4055,7 +4040,7 @@ node_new_general_newline(Node** node, ScanEnv* env)  enum TokenSyms {    TK_EOT      = 0,   /* end of token */ -  TK_RAW_BYTE = 1, +  TK_CRUDE_BYTE = 1,    TK_CHAR,    TK_STRING,    TK_CODE_POINT, @@ -4070,7 +4055,7 @@ enum TokenSyms {    TK_ALT,    TK_SUBEXP_OPEN,    TK_SUBEXP_CLOSE, -  TK_CC_OPEN, +  TK_OPEN_CC,    TK_QUOTE_OPEN,    TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */    TK_KEEP,             /* \K */ @@ -4082,9 +4067,9 @@ enum TokenSyms {    /* in cc */    TK_CC_CLOSE,    TK_CC_RANGE, -  TK_POSIX_BRACKET_OPEN, -  TK_CC_AND,             /* && */ -  TK_CC_CC_OPEN          /* [ */ +  TK_CC_POSIX_BRACKET_OPEN, +  TK_CC_AND,           /* && */ +  TK_CC_OPEN_CC        /* [ */  };  typedef struct { @@ -4094,7 +4079,7 @@ typedef struct {    UChar* backp;    union {      UChar* s; -    int   c; +    UChar byte;      OnigCodePoint code;      int   anchor;      int   subtype; @@ -4129,7 +4114,7 @@ typedef struct {  static int -fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env) +fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)  {    int low, up, syn_allow, non_low = 0;    int r = 0; @@ -4154,7 +4139,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)      }    } -  low = onig_scan_unsigned_number(&p, end, env->enc); +  low = scan_number(&p, end, env->enc);    if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;    if (low > ONIG_MAX_REPEAT_NUM)      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; @@ -4173,7 +4158,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)    PFETCH(c);    if (c == ',') {      UChar* prev = p; -    up = onig_scan_unsigned_number(&p, end, env->enc); +    up = scan_number(&p, end, env->enc);      if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;      if (up > ONIG_MAX_REPEAT_NUM)        return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; @@ -4196,7 +4181,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)    if (PEND) goto invalid;    PFETCH(c);    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { -    if (c != MC_ESC(env->syntax)) goto invalid; +    if (c != MC_ESC(env->syntax) || PEND) goto invalid;      PFETCH(c);    }    if (c != '}') goto invalid; @@ -4419,7 +4404,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,        PFETCH(c);        if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;        PUNFETCH; -      level = onig_scan_unsigned_number(&p, end, enc); +      level = scan_number(&p, end, enc);        if (level < 0) return ONIGERR_TOO_BIG_NUMBER;        *rlevel = (level * flag);        exist_level = 1; @@ -4440,7 +4425,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,   end:    if (r == 0) {      if (*num_type != IS_NOT_NUM) { -      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); +      *rback_num = scan_number(&pnum_head, name_end, enc);        if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;        else if (*rback_num == 0) {          if (*num_type == IS_REL_NUM) @@ -4468,7 +4453,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,  static int  fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,             UChar** rname_end, ScanEnv* env, int* rback_num, -           enum REF_NUM* num_type, int ref) +           enum REF_NUM* num_type, int is_ref)  {    int r, sign;    int digit_count; @@ -4498,7 +4483,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,        return ONIGERR_EMPTY_GROUP_NAME;      if (IS_CODE_DIGIT_ASCII(enc, c)) { -      if (ref == 1) +      if (is_ref == TRUE)          *num_type = IS_ABS_NUM;        else {          r = ONIGERR_INVALID_GROUP_NAME; @@ -4506,7 +4491,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,        digit_count++;      }      else if (c == '-') { -      if (ref == 1) { +      if (is_ref == TRUE) {          *num_type = IS_REL_NUM;          sign = -1;          pnum_head = p; @@ -4516,7 +4501,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,        }      }      else if (c == '+') { -      if (ref == 1) { +      if (is_ref == TRUE) {          *num_type = IS_REL_NUM;          sign = 1;          pnum_head = p; @@ -4566,7 +4551,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,      }      if (*num_type != IS_NOT_NUM) { -      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); +      *rback_num = scan_number(&pnum_head, name_end, enc);        if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;        else if (*rback_num == 0) {          if (*num_type == IS_REL_NUM) { @@ -4698,7 +4683,8 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,  static int  fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)  { -  int num; +  int r; +  OnigCodePoint code;    OnigCodePoint c, c2;    OnigSyntaxType* syn = env->syntax;    OnigEncoding enc = env->enc; @@ -4714,7 +4700,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)    PFETCH(c);    tok->type = TK_CHAR;    tok->base = 0; -  tok->u.c  = c; +  tok->u.code = c;    tok->escaped = 0;    if (c == ']') { @@ -4731,7 +4717,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      PFETCH(c);      tok->escaped = 1; -    tok->u.c = c; +    tok->u.code = c;      switch (c) {      case 'w':        tok->type = TK_CHAR_TYPE; @@ -4804,8 +4790,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {          PINC; -        num = scan_unsigned_octal_number(&p, end, 11, enc); -        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +        r = scan_octal_number(&p, end, 0, 11, enc, &code); +        if (r < 0) return r;          if (!PEND) {            c2 = PPEEK;            if (IS_CODE_DIGIT_ASCII(enc, c2)) @@ -4816,7 +4802,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)            PINC;            tok->type   = TK_CODE_POINT;            tok->base   = 8; -          tok->u.code = (OnigCodePoint )num; +          tok->u.code = code;          }          else {            /* can't read nothing or invalid format */ @@ -4831,13 +4817,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {          PINC; -        num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); -        if (num < 0) { -          if (num == ONIGERR_TOO_BIG_NUMBER) -            return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; -          else -            return num; -        } +        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code); +        if (r < 0) return r;          if (!PEND) {            c2 = PPEEK;            if (IS_CODE_XDIGIT_ASCII(enc, c2)) @@ -4848,7 +4829,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)            PINC;            tok->type   = TK_CODE_POINT;            tok->base   = 16; -          tok->u.code = (OnigCodePoint )num; +          tok->u.code = code;          }          else {            /* can't read nothing or invalid format */ @@ -4856,14 +4837,14 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)          }        }        else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { -        num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); -        if (num < 0) return num; +        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code); +        if (r < 0) return r;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          } -        tok->type = TK_RAW_BYTE; +        tok->type = TK_CRUDE_BYTE;          tok->base = 16; -        tok->u.c  = num; +        tok->u.byte = (UChar )code;        }        break; @@ -4872,14 +4853,14 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { -        num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); -        if (num < 0) return num; +        r = scan_hexadecimal_number(&p, end, 4, 4, enc, &code); +        if (r < 0) return r;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          }          tok->type   = TK_CODE_POINT;          tok->base   = 16; -        tok->u.code = (OnigCodePoint )num; +        tok->u.code = code;        }        break; @@ -4888,22 +4869,23 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {          PUNFETCH;          prev = p; -        num = scan_unsigned_octal_number(&p, end, 3, enc); -        if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; +        r = scan_octal_number(&p, end, 0, 3, enc, &code); +        if (r < 0) return r; +        if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          } -        tok->type = TK_RAW_BYTE; +        tok->type = TK_CRUDE_BYTE;          tok->base = 8; -        tok->u.c  = num; +        tok->u.byte = (UChar )code;        }        break;      default:        PUNFETCH; -      num = fetch_escaped_value(&p, end, env, &c2); -      if (num < 0) return num; -      if (tok->u.c != c2) { +      r = fetch_escaped_value(&p, end, env, &c2); +      if (r < 0) return r; +      if (tok->u.code != c2) {          tok->u.code = c2;          tok->type   = TK_CODE_POINT;        } @@ -4917,7 +4899,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        PINC;        if (str_exist_check_with_esc(send, 2, p, end,                                     (OnigCodePoint )']', enc, syn)) { -        tok->type = TK_POSIX_BRACKET_OPEN; +        tok->type = TK_CC_POSIX_BRACKET_OPEN;        }        else {          PUNFETCH; @@ -4927,7 +4909,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      else {      cc_in_cc:        if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { -        tok->type = TK_CC_CC_OPEN; +        tok->type = TK_CC_OPEN_CC;        }        else {          CC_ESC_WARN(env, (UChar* )"["); @@ -4950,7 +4932,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)  static int  fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)  { -  int r, num; +  int r; +  OnigCodePoint code;    OnigCodePoint c;    OnigEncoding enc = env->enc;    OnigSyntaxType* syn = env->syntax; @@ -4975,7 +4958,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      tok->backp = p;      PFETCH(c); -    tok->u.c = c; +    tok->u.code = c;      tok->escaped = 1;      switch (c) {      case '*': @@ -5026,7 +5009,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '{':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; -      r = fetch_interval_quantifier(&p, end, tok, env); +      r = fetch_interval(&p, end, tok, env);        if (r < 0) return r;  /* error */        if (r == 0) goto greedy_check2;        else if (r == 2) { /* {n} */ @@ -5214,8 +5197,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {          PINC; -        num = scan_unsigned_octal_number(&p, end, 11, enc); -        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +        r = scan_octal_number(&p, end, 0, 11, enc, &code); +        if (r < 0) return r;          if (!PEND) {            if (IS_CODE_DIGIT_ASCII(enc, PPEEK))              return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; @@ -5224,7 +5207,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)          if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {            PINC;            tok->type   = TK_CODE_POINT; -          tok->u.code = (OnigCodePoint )num; +          tok->u.code = code;          }          else {            /* can't read nothing or invalid format */ @@ -5239,13 +5222,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {          PINC; -        num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); -        if (num < 0) { -          if (num == ONIGERR_TOO_BIG_NUMBER) -            return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; -          else -            return num; -        } +        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code); +        if (r < 0) return r;          if (!PEND) {            if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))              return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; @@ -5254,7 +5232,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)          if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {            PINC;            tok->type   = TK_CODE_POINT; -          tok->u.code = (OnigCodePoint )num; +          tok->u.code = code;          }          else {            /* can't read nothing or invalid format */ @@ -5262,14 +5240,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)          }        }        else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { -        num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); -        if (num < 0) return num; +        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code); +        if (r < 0) return r;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          } -        tok->type = TK_RAW_BYTE; +        tok->type = TK_CRUDE_BYTE;          tok->base = 16; -        tok->u.c  = num; +        tok->u.byte = (UChar )code;        }        break; @@ -5278,14 +5256,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        prev = p;        if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { -        num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); -        if (num < 0) return num; +        r = scan_hexadecimal_number(&p, end, 4, 4, enc, &code); +        if (r < 0) return r;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          }          tok->type   = TK_CODE_POINT;          tok->base   = 16; -        tok->u.code = (OnigCodePoint )num; +        tok->u.code = code;        }        break; @@ -5293,21 +5271,21 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '5': case '6': case '7': case '8': case '9':        PUNFETCH;        prev = p; -      num = onig_scan_unsigned_number(&p, end, enc); -      if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { +      r = scan_number(&p, end, enc); +      if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {          goto skip_backref;        }        if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && -          (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ +          (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */          if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { -          if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) +          if (r > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[r].mem_node))              return ONIGERR_INVALID_BACKREF;          }          tok->type = TK_BACKREF;          tok->u.backref.num     = 1; -        tok->u.backref.ref1    = num; +        tok->u.backref.ref1    = r;          tok->u.backref.by_name = 0;  #ifdef USE_BACKREF_WITH_LEVEL          tok->u.backref.exist_level = 0; @@ -5327,14 +5305,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '0':        if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {          prev = p; -        num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); -        if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; +        r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code); +        if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;          if (p == prev) {  /* can't read nothing. */ -          num = 0; /* but, it's not error */ +          code = 0; /* but, it's not error */          } -        tok->type = TK_RAW_BYTE; +        tok->type = TK_CRUDE_BYTE;          tok->base = 8; -        tok->u.c  = num; +        tok->u.byte = (UChar )code;        }        else if (c != '0') {          PINC; @@ -5359,7 +5337,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)            if (r == 1) tok->u.backref.exist_level = 1;            else        tok->u.backref.exist_level = 0;  #else -          r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1); +          r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);  #endif            if (r < 0) return r; @@ -5372,7 +5350,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)              if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {                if (back_num > env->num_mem || -                  IS_NULL(SCANENV_MEMENV(env)[back_num].node)) +                  IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node))                  return ONIGERR_INVALID_BACKREF;              }              tok->type = TK_BACKREF; @@ -5381,7 +5359,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)              tok->u.backref.ref1 = back_num;            }            else { -            num = name_to_group_numbers(env, prev, name_end, &backs); +            int num = name_to_group_numbers(env, prev, name_end, &backs);              if (num <= 0) {                return ONIGERR_UNDEFINED_NAME_REFERENCE;              } @@ -5389,7 +5367,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)                int i;                for (i = 0; i < num; i++) {                  if (backs[i] > env->num_mem || -                    IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) +                    IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node))                    return ONIGERR_INVALID_BACKREF;                }              } @@ -5422,7 +5400,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)            prev = p;            r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, -                         &gnum, &num_type, 1); +                         &gnum, &num_type, TRUE);            if (r < 0) return r;            if (num_type != IS_NOT_NUM) { @@ -5483,10 +5461,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)          OnigCodePoint c2;          PUNFETCH; -        num = fetch_escaped_value(&p, end, env, &c2); -        if (num < 0) return num; -        /* set_raw: */ -        if (tok->u.c != c2) { +        r = fetch_escaped_value(&p, end, env, &c2); +        if (r < 0) return r; +        if (tok->u.code != c2) {            tok->type = TK_CODE_POINT;            tok->u.code = c2;          } @@ -5498,7 +5475,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      }    }    else { -    tok->u.c = c; +    tok->u.code = c;      tok->escaped = 0;  #ifdef USE_VARIABLE_META_CHARS @@ -5563,7 +5540,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '{':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; -      r = fetch_interval_quantifier(&p, end, tok, env); +      r = fetch_interval(&p, end, tok, env);        if (r < 0) return r;  /* error */        if (r == 0) goto greedy_check2;        else if (r == 2) { /* {n} */ @@ -5611,8 +5588,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)                {                  PINC;                  name = p; -                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, -                               &num_type, 0); +                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, +                               &gnum, &num_type, FALSE);                  if (r < 0) return r;                  tok->type = TK_CALL; @@ -5644,7 +5621,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)                {                  name = p;                  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, -                               &gnum, &num_type, 1); +                               &gnum, &num_type, TRUE);                  if (r < 0) return r;                  if (num_type == IS_NOT_NUM) { @@ -5700,7 +5677,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '[':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; -      tok->type = TK_CC_OPEN; +      tok->type = TK_OPEN_CC;        break;      case ']': @@ -5911,6 +5888,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)    int c, r;    int ascii_mode; +  int is_single;    const OnigCodePoint *ranges;    OnigCodePoint limit;    OnigCodePoint sb_out; @@ -5932,6 +5910,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)    }    r = 0; +  is_single = ONIGENC_IS_SINGLEBYTE(enc);    limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;    switch (ctype) { @@ -5948,19 +5927,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)    case ONIGENC_CTYPE_ALNUM:      if (not != 0) {        for (c = 0; c < (int )limit; c++) { -        if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) -          BITSET_SET_BIT(cc->bs, c); +        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) { +          if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) +            BITSET_SET_BIT(cc->bs, c); +        }        }        for (c = limit; c < SINGLE_BYTE_SIZE; c++) { -        BITSET_SET_BIT(cc->bs, c); +        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) +          BITSET_SET_BIT(cc->bs, c);        } -      ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); +      if (is_single == 0) +        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);      }      else {        for (c = 0; c < (int )limit; c++) { -        if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) -          BITSET_SET_BIT(cc->bs, c); +        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) { +          if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) +            BITSET_SET_BIT(cc->bs, c); +        }        }      }      break; @@ -5970,21 +5955,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)    case ONIGENC_CTYPE_WORD:      if (not != 0) {        for (c = 0; c < (int )limit; c++) { -        if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */ +        /* check invalid code point */ +        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)              && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))            BITSET_SET_BIT(cc->bs, c);        }        for (c = limit; c < SINGLE_BYTE_SIZE; c++) { -        if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) +        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)            BITSET_SET_BIT(cc->bs, c);        } +      if (ascii_mode != 0 && is_single == 0) +        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);      }      else {        for (c = 0; c < (int )limit; c++) { -        if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) +        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) +            && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))            BITSET_SET_BIT(cc->bs, c);        } -      if (ascii_mode == 0) +      if (ascii_mode == 0 && is_single == 0)          ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);      }      break; @@ -6076,10 +6065,12 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)  {    int r;    OnigCodePoint c; -  OnigEncoding enc = env->enc; -  UChar *prev, *start, *p = *src; +  OnigEncoding enc; +  UChar *prev, *start, *p; -  r = 0; +  p = *src; +  enc = env->enc; +  r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;    start = prev = p;    while (!PEND) { @@ -6087,18 +6078,20 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)      PFETCH_S(c);      if (c == '}') {        r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); -      if (r < 0) break; +      if (r >= 0) { +        *src = p; +      } +      else { +        onig_scan_env_set_error_string(env, r, *src, prev); +      } -      *src = p;        return r;      }      else if (c == '(' || c == ')' || c == '{' || c == '|') { -      r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;        break;      }    } -  onig_scan_env_set_error_string(env, r, *src, prev);    return r;  } @@ -6114,7 +6107,7 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en    *np = node_new_cclass();    CHECK_NULL_RETURN_MEMERR(*np);    cc = CCLASS_(*np); -  r = add_ctype_to_cc(cc, ctype, 0, env); +  r = add_ctype_to_cc(cc, ctype, FALSE, env);    if (r != 0) return r;    if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); @@ -6122,67 +6115,67 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en  } -enum CCSTATE { -  CCS_VALUE, -  CCS_RANGE, -  CCS_COMPLETE, -  CCS_START -}; +typedef enum { +  CS_VALUE, +  CS_RANGE, +  CS_COMPLETE, +  CS_START +} CSTATE; -enum CCVALTYPE { -  CCV_SB, -  CCV_CODE_POINT, -  CCV_CLASS -}; +typedef enum { +  CV_UNDEF, +  CV_SB, +  CV_MB, +  CV_CPROP +} CVAL;  static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, -                 enum CCSTATE* state, ScanEnv* env) +cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state, +              ScanEnv* env)  {    int r; -  if (*state == CCS_RANGE) +  if (*state == CS_RANGE)      return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; -  if (*state == CCS_VALUE && *type != CCV_CLASS) { -    if (*type == CCV_SB) -      BITSET_SET_BIT(cc->bs, (int )(*vs)); -    else if (*type == CCV_CODE_POINT) { -      r = add_code_range(&(cc->mbuf), env, *vs, *vs); +  if (*state == CS_VALUE) { +    if (*val == CV_SB) +      BITSET_SET_BIT(cc->bs, (int )(*pcode)); +    else if (*val == CV_MB) { +      r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);        if (r < 0) return r;      }    } -  *state = CCS_VALUE; -  *type  = CCV_CLASS; +  *state = CS_VALUE; +  *val   = CV_CPROP;    return 0;  }  static int -next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, -               int* from_israw, int to_israw, -               enum CCVALTYPE intype, enum CCVALTYPE* type, -               enum CCSTATE* state, ScanEnv* env) +cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, +             int* from_raw, int to_raw, CVAL intype, CVAL* type, +             CSTATE* state, ScanEnv* env)  {    int r;    switch (*state) { -  case CCS_VALUE: -    if (*type == CCV_SB) { +  case CS_VALUE: +    if (*type == CV_SB) {        if (*from > 0xff)            return ONIGERR_INVALID_CODE_POINT_VALUE;        BITSET_SET_BIT(cc->bs, (int )(*from));      } -    else if (*type == CCV_CODE_POINT) { +    else if (*type == CV_MB) {        r = add_code_range(&(cc->mbuf), env, *from, *from);        if (r < 0) return r;      }      break; -  case CCS_RANGE: +  case CS_RANGE:      if (intype == *type) { -      if (intype == CCV_SB) { +      if (intype == CV_SB) {          if (*from > 0xff || to > 0xff)            return ONIGERR_INVALID_CODE_POINT_VALUE; @@ -6211,21 +6204,21 @@ next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,        if (r < 0) return r;      }    ccs_range_end: -    *state = CCS_COMPLETE; +    *state = CS_COMPLETE;      break; -  case CCS_COMPLETE: -  case CCS_START: -    *state = CCS_VALUE; +  case CS_COMPLETE: +  case CS_START: +    *state = CS_VALUE;      break;    default:      break;    } -  *from_israw = to_israw; -  *from       = to; -  *type       = intype; +  *from_raw = to_raw; +  *from     = to; +  *type     = intype;    return 0;  } @@ -6253,27 +6246,25 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,  }  static int -parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) +parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)  {    int r, neg, len, fetched, and_start; -  OnigCodePoint v, vs; +  OnigCodePoint in_code, curr_code;    UChar *p;    Node* node;    CClassNode *cc, *prev_cc;    CClassNode work_cc; - -  enum CCSTATE state; -  enum CCVALTYPE val_type, in_type; -  int val_israw, in_israw; +  int curr_raw, in_raw; +  CSTATE state; +  CVAL in_type; +  CVAL curr_type;    *np = NULL_NODE; -  env->parse_depth++; -  if (env->parse_depth > ParseDepthLimit) -    return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +  INC_PARSE_DEPTH(env->parse_depth);    prev_cc = (CClassNode* )NULL;    r = fetch_token_in_cc(tok, src, end, env); -  if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { +  if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {      neg = 1;      r = fetch_token_in_cc(tok, src, end, env);    } @@ -6296,31 +6287,27 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)    cc = CCLASS_(node);    and_start = 0; -  state = CCS_START; +  state = CS_START; +  curr_type = CV_UNDEF; +    p = *src;    while (r != TK_CC_CLOSE) {      fetched = 0;      switch (r) {      case TK_CHAR:      any_char_in: -      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); -      if (len > 1) { -        in_type = CCV_CODE_POINT; -      } -      else if (len < 0) { +      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code); +      if (len < 0) {          r = len;          goto err;        } -      else { -        /* sb_char: */ -        in_type = CCV_SB; -      } -      v = (OnigCodePoint )tok->u.c; -      in_israw = 0; +      in_type = (len == 1) ? CV_SB : CV_MB; +      in_code = tok->u.code; +      in_raw = 0;        goto val_entry2;        break; -    case TK_RAW_BYTE: +    case TK_CRUDE_BYTE:        /* tok->base != 0 : octal or hexadec. */        if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {          int i, j; @@ -6329,15 +6316,15 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)          UChar* psave = p;          int base = tok->base; -        buf[0] = tok->u.c; +        buf[0] = tok->u.byte;          for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {            r = fetch_token_in_cc(tok, &p, end, env);            if (r < 0) goto err; -          if (r != TK_RAW_BYTE || tok->base != base) { +          if (r != TK_CRUDE_BYTE || tok->base != base) {              fetched = 1;              break;            } -          buf[i] = tok->u.c; +          buf[i] = tok->u.byte;          }          if (i < ONIGENC_MBC_MINLEN(env->enc)) { @@ -6362,63 +6349,63 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)          }          if (i == 1) { -          v = (OnigCodePoint )buf[0]; -          goto raw_single; +          in_code = (OnigCodePoint )buf[0]; +          goto crude_single;          }          else { -          v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); -          in_type = CCV_CODE_POINT; +          in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); +          in_type = CV_MB;          }        }        else { -        v = (OnigCodePoint )tok->u.c; -      raw_single: -        in_type = CCV_SB; +        in_code = (OnigCodePoint )tok->u.byte; +      crude_single: +        in_type = CV_SB;        } -      in_israw = 1; +      in_raw = 1;        goto val_entry2;        break;      case TK_CODE_POINT: -      v = tok->u.code; -      in_israw = 1; +      in_code = tok->u.code; +      in_raw  = 1;      val_entry: -      len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); +      len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);        if (len < 0) { -        if (state != CCS_RANGE || +        if (state != CS_RANGE ||              ! IS_SYNTAX_BV(env->syntax,                             ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) || -            v < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) { +            in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {            r = len;            goto err;          }        } -      in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); +      in_type = (len == 1 ? CV_SB : CV_MB);      val_entry2: -      r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, -                         &state, env); +      r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type, +                       &curr_type, &state, env);        if (r != 0) goto err;        break; -    case TK_POSIX_BRACKET_OPEN: +    case TK_CC_POSIX_BRACKET_OPEN:        r = parse_posix_bracket(cc, &p, end, env);        if (r < 0) goto err;        if (r == 1) {  /* is not POSIX bracket */          CC_ESC_WARN(env, (UChar* )"[");          p = tok->backp; -        v = (OnigCodePoint )tok->u.c; -        in_israw = 0; +        in_code = tok->u.code; +        in_raw = 0;          goto val_entry;        } -      goto next_class; +      goto next_cprop;        break;      case TK_CHAR_TYPE:        r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);        if (r != 0) goto err; -    next_class: -      r = next_state_class(cc, &vs, &val_type, &state, env); +    next_cprop: +      r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);        if (r != 0) goto err;        break; @@ -6431,19 +6418,20 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)          }          r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);          if (r != 0) goto err; -        goto next_class; +        goto next_cprop;        }        break;      case TK_CC_RANGE: -      if (state == CCS_VALUE) { +      if (state == CS_VALUE) {          r = fetch_token_in_cc(tok, &p, end, env);          if (r < 0) goto err; +          fetched = 1;          if (r == TK_CC_CLOSE) { /* allow [x-] */          range_end_val: -          v = (OnigCodePoint )'-'; -          in_israw = 0; +          in_code = (OnigCodePoint )'-'; +          in_raw = 0;            goto val_entry;          }          else if (r == TK_CC_AND) { @@ -6451,20 +6439,21 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)            goto range_end_val;          } -        if (val_type == CCV_CLASS) { +        if (curr_type == CV_CPROP) {            r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;            goto err;          } -        state = CCS_RANGE; +        state = CS_RANGE;        } -      else if (state == CCS_START) { +      else if (state == CS_START) {          /* [-xa] is allowed */ -        v = (OnigCodePoint )tok->u.c; -        in_israw = 0; +        in_code = tok->u.code; +        in_raw = 0;          r = fetch_token_in_cc(tok, &p, end, env);          if (r < 0) goto err; +          fetched = 1;          /* [--x] or [a&&-x] is warned. */          if (r == TK_CC_RANGE || and_start != 0) @@ -6472,15 +6461,17 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)          goto val_entry;        } -      else if (state == CCS_RANGE) { +      else if (state == CS_RANGE) {          CC_ESC_WARN(env, (UChar* )"-"); -        goto any_char_in;  /* [!--x] is allowed */ +        goto any_char_in;  /* [!--] is allowed */        } -      else { /* CCS_COMPLETE */ +      else { /* CS_COMPLETE */          r = fetch_token_in_cc(tok, &p, end, env);          if (r < 0) goto err; +          fetched = 1; -        if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ +        if (r == TK_CC_CLOSE) +          goto range_end_val; /* allow [a-b-] */          else if (r == TK_CC_AND) {            CC_ESC_WARN(env, (UChar* )"-");            goto range_end_val; @@ -6495,12 +6486,19 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)        }        break; -    case TK_CC_CC_OPEN: /* [ */ +    case TK_CC_OPEN_CC: /* [ */        {          Node *anode;          CClassNode* acc; -        r = parse_char_class(&anode, tok, &p, end, env); +        if (state == CS_VALUE) { +          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type, +                           &state, env); +          if (r != 0) goto err; +        } +        state = CS_COMPLETE; + +        r = parse_cc(&anode, tok, &p, end, env);          if (r != 0) {            onig_node_free(anode);            goto cc_open_err; @@ -6516,14 +6514,14 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case TK_CC_AND: /* && */        { -        if (state == CCS_VALUE) { -          r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, -                             &val_type, &state, env); +        if (state == CS_VALUE) { +          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type, +                           &state, env);            if (r != 0) goto err;          }          /* initialize local variables */          and_start = 1; -        state = CCS_START; +        state = CS_START;          if (IS_NOT_NULL(prev_cc)) {            r = and_cclass(prev_cc, cc, env->enc); @@ -6556,9 +6554,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)      }    } -  if (state == CCS_VALUE) { -    r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, -                       &val_type, &state, env); +  if (state == CS_VALUE) { +    r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type, +                     &state, env);      if (r != 0) goto err;    } @@ -6591,7 +6589,7 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)      }    }    *src = p; -  env->parse_depth--; +  DEC_PARSE_DEPTH(env->parse_depth);    return 0;   err: @@ -6600,8 +6598,8 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)    return r;  } -static int parse_subexp(Node** top, PToken* tok, int term, -                        UChar** src, UChar* end, ScanEnv* env, int group_head); +static int parse_alts(Node** top, PToken* tok, int term, +                      UChar** src, UChar* end, ScanEnv* env, int group_head);  #ifdef USE_CALLOUT @@ -6772,7 +6770,8 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long*  static int  parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, -                   unsigned int types[], OnigValue vals[], ScanEnv* env) +                   int max_arg_num, unsigned int types[], OnigValue vals[], +                   ScanEnv* env)  {  #define MAX_CALLOUT_ARG_BYTE_LENGTH   128 @@ -6791,9 +6790,9 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; +  c = 0;    n = 0;    while (n < ONIG_CALLOUT_MAX_ARGS_NUM) { -    c   = 0;      cn  = 0;      esc = 0;      eesc = 0; @@ -6826,7 +6825,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,            size_t clen;          add_char: -          if (skip_mode == 0) { +          if (skip_mode == FALSE) {              clen = p - e;              if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)                return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ @@ -6840,7 +6839,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,      }      if (cn != 0) { -      if (skip_mode == 0) { +      if (max_arg_num >= 0 && n >= max_arg_num) +        return ONIGERR_INVALID_CALLOUT_ARG; + +      if (skip_mode == FALSE) {          if ((types[n] & ONIG_TYPE_LONG) != 0) {            int fixed = 0;            if (cn > 0) { @@ -6972,7 +6974,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en      /* read for single check only */      save = p; -    arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env); +    arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);      if (arg_num < 0) return arg_num;      is_not_single = PPEEK_IS(cterm) ?  0 : 1; @@ -6986,7 +6988,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en        types[i] = get_callout_arg_type_by_name_id(name_id, i);      } -    arg_num = parse_callout_args(0, '}', &p, end, types, vals, env); +    arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);      if (arg_num < 0) return arg_num;      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; @@ -7086,17 +7088,17 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,      group:        r = fetch_token(tok, &p, end, env);        if (r < 0) return r; -      r = parse_subexp(np, tok, term, &p, end, env, 0); +      r = parse_alts(np, tok, term, &p, end, env, FALSE);        if (r < 0) return r;        *src = p;        return 1; /* group */        break;      case '=': -      *np = onig_node_new_anchor(ANCR_PREC_READ, 0); +      *np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);        break;      case '!':  /*         preceding read */ -      *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0); +      *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);        break;      case '>':            /* (?>...) stop backtrack */        *np = node_new_bag(BAG_STOP_BACKTRACK); @@ -7114,9 +7116,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,        if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;        PFETCH(c);        if (c == '=') -        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0); +        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);        else if (c == '!') -        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0); +        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);        else {          if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {            UChar *name; @@ -7132,7 +7134,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          named_group2:            name = p;            r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, -                         &num_type, 0); +                         &num_type, FALSE);            if (r < 0) return r;            num = scan_env_add_mem_entry(env); @@ -7146,7 +7148,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            CHECK_NULL_RETURN_MEMERR(*np);            BAG_(*np)->m.regnum = num;            if (list_capture != 0) -            MEM_STATUS_ON_SIMPLE(env->capture_history, num); +            MEM_STATUS_ON_SIMPLE(env->cap_history, num);            env->num_named++;          }          else { @@ -7181,7 +7183,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          r = fetch_token(tok, &p, end, env);          if (r < 0) return r; -        r = parse_subexp(&absent, tok, term, &p, end, env, 1); +        r = parse_alts(&absent, tok, term, &p, end, env, TRUE);          if (r < 0) {            onig_node_free(absent);            return r; @@ -7268,7 +7270,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            if (r == 1) exist_level = 1;  #else            r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('), -                         &p, end, &name_end, env, &back_num, &num_type, 1); +                         &p, end, &name_end, env, &back_num, &num_type, TRUE);  #endif            if (r < 0) {              if (is_enclosed == 0) { @@ -7288,11 +7290,11 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {                if (back_num > env->num_mem || -                  IS_NULL(SCANENV_MEMENV(env)[back_num].node)) +                  IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node))                  return ONIGERR_INVALID_BACKREF;              } -            condition = node_new_backref_checker(1, &back_num, 0, +            condition = node_new_backref_checker(1, &back_num, FALSE,  #ifdef USE_BACKREF_WITH_LEVEL                                                   exist_level, level,  #endif @@ -7310,12 +7312,12 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,                int i;                for (i = 0; i < num; i++) {                  if (backs[i] > env->num_mem || -                    IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) +                    IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node))                    return ONIGERR_INVALID_BACKREF;                }              } -            condition = node_new_backref_checker(num, backs, 1, +            condition = node_new_backref_checker(num, backs, TRUE,  #ifdef USE_BACKREF_WITH_LEVEL                                                   exist_level, level,  #endif @@ -7357,7 +7359,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            condition_is_checker = 0;            r = fetch_token(tok, &p, end, env);            if (r < 0) return r; -          r = parse_subexp(&condition, tok, term, &p, end, env, 0); +          r = parse_alts(&condition, tok, term, &p, end, env, FALSE);            if (r < 0) {              onig_node_free(condition);              return r; @@ -7400,7 +7402,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              onig_node_free(condition);              return r;            } -          r = parse_subexp(&target, tok, term, &p, end, env, 1); +          r = parse_alts(&target, tok, term, &p, end, env, TRUE);            if (r < 0) {              onig_node_free(condition);              onig_node_free(target); @@ -7465,7 +7467,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;          }          BAG_(*np)->m.regnum = num; -        MEM_STATUS_ON_SIMPLE(env->capture_history, num); +        MEM_STATUS_ON_SIMPLE(env->cap_history, num);        }        else {          return ONIGERR_UNDEFINED_GROUP_OPTION; @@ -7501,7 +7503,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,            case 'm':              if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { -              OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); +              OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));              }              else if (IS_SYNTAX_OP2(env->syntax,                          ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) { @@ -7537,16 +7539,16 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,                  if (! ONIGENC_IS_UNICODE_ENCODING(enc))                    return ONIGERR_UNDEFINED_GROUP_OPTION; -                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0); -                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1); +                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE); +                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);                  break;  #ifdef USE_UNICODE_WORD_BREAK                case 'w':                  if (! ONIGENC_IS_UNICODE_ENCODING(enc))                    return ONIGERR_UNDEFINED_GROUP_OPTION; -                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0); -                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1); +                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE); +                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);                  break;  #endif                default: @@ -7576,7 +7578,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              env->options = option;              r = fetch_token(tok, &p, end, env);              if (r < 0) return r; -            r = parse_subexp(&target, tok, term, &p, end, env, 0); +            r = parse_alts(&target, tok, term, &p, end, env, FALSE);              env->options = prev;              if (r < 0) {                onig_node_free(target); @@ -7623,7 +7625,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,    CHECK_NULL_RETURN_MEMERR(*np);    r = fetch_token(tok, &p, end, env);    if (r < 0) return r; -  r = parse_subexp(&target, tok, term, &p, end, env, 0); +  r = parse_alts(&target, tok, term, &p, end, env, FALSE);    if (r < 0) {      onig_node_free(target);      return r; @@ -7633,7 +7635,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,    if (NODE_TYPE(*np) == NODE_BAG) {      if (BAG_(*np)->type == BAG_MEMORY) { -      /* Don't move this to previous of parse_subexp() */ +      /* Don't move this to previous of parse_alts() */        r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);        if (r != 0) return r;      } @@ -7653,7 +7655,7 @@ static const char* ReduceQStr[] = {  };  static int -set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) +assign_quantifier_body(Node* qnode, Node* target, int group, ScanEnv* env)  {    QuantNode* qn; @@ -7725,9 +7727,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)          }        }        else { +        int r; +          NODE_BODY(qnode) = target; -        onig_reduce_nested_quantifier(qnode, target); -        goto q_exit; +        r = onig_reduce_nested_quantifier(qnode); +        return r;        }      }      break; @@ -7737,7 +7741,6 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)    }    NODE_BODY(qnode) = target; - q_exit:    return 0;  } @@ -7767,6 +7770,38 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)  }  #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ +#define ADD_CODE_INTO_CC(cc, code, enc) do {\ +  if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\ +    add_code_range_to_buf(&((cc)->mbuf), code, code);\ +  }\ +  else {\ +    BITSET_SET_BIT((cc)->bs, code);\ +  }\ +} while (0) + +extern int +onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, +                               int n, OnigCodePoint codes[]) +{ +  int i; +  Node* node; +  CClassNode* cc; + +  *rnode = NULL_NODE; + +  node = node_new_cclass(); +  CHECK_NULL_RETURN_MEMERR(node); + +  cc = CCLASS_(node); + +  for (i = 0; i < n; i++) { +    ADD_CODE_INTO_CC(cc, codes[i], enc); +  } + +  *rnode = node; +  return 0; +} +  typedef struct {    ScanEnv*    env;    CClassNode* cc; @@ -7780,37 +7815,31 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)    IApplyCaseFoldArg* iarg;    ScanEnv* env;    CClassNode* cc; -  BitSetRef bs;    iarg = (IApplyCaseFoldArg* )arg;    env = iarg->env;    cc  = iarg->cc; -  bs = cc->bs;    if (to_len == 1) {      int is_in = onig_is_code_in_cc(env->enc, from, cc);  #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS      if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||          (is_in == 0 &&  IS_NCCLASS_NOT(cc))) { -      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { -        add_code_range(&(cc->mbuf), env, *to, *to); -      } -      else { -        BITSET_SET_BIT(bs, *to); -      } +      ADD_CODE_INTO_CC(cc, *to, env->enc);      }  #else      if (is_in != 0) { -      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { +      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || +          ONIGENC_CODE_TO_MBCLEN(env->enc, *to) != 1) {          if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);          add_code_range(&(cc->mbuf), env, *to, *to);        }        else {          if (IS_NCCLASS_NOT(cc)) { -          BITSET_CLEAR_BIT(bs, *to); +          BITSET_CLEAR_BIT(cc->bs, *to);          }          else -          BITSET_SET_BIT(bs, *to); +          BITSET_SET_BIT(cc->bs, *to);        }      }  #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ @@ -7818,34 +7847,65 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)    else {      int r, i, len;      UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; -    Node *snode = NULL_NODE;      if (onig_is_code_in_cc(env->enc, from, cc)  #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS          && !IS_NCCLASS_NOT(cc)  #endif          ) { +      int n, j, m, index; +      Node* list_node; +      Node* ns[3]; + +      n = 0;        for (i = 0; i < to_len; i++) { -        len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); -        if (i == 0) { -          snode = onig_node_new_str(buf, buf + len); -          CHECK_NULL_RETURN_MEMERR(snode); - -          /* char-class expanded multi-char only -             compare with string folded at match time. */ -          NODE_STRING_SET_AMBIG(snode); +        OnigCodePoint code; +        Node* csnode; +        CClassNode* cs_cc; + +        index = onigenc_unicode_fold1_key(&to[i]); +        if (index >= 0) { +          csnode = node_new_cclass(); +          cs_cc = CCLASS_(csnode); +          if (IS_NULL(csnode)) { +          err_free_ns: +            for (j = 0; j < n; j++) onig_node_free(ns[j]); +            return ONIGERR_MEMORY; +          } +          m = FOLDS1_UNFOLDS_NUM(index); +          for (j = 0; j < m; j++) { +            code = FOLDS1_UNFOLDS(index)[j]; +            ADD_CODE_INTO_CC(cs_cc, code, env->enc); +          } +          ADD_CODE_INTO_CC(cs_cc, to[i], env->enc); +          ns[n++] = csnode;          }          else { -          r = onig_node_str_cat(snode, buf, buf + len); -          if (r < 0) { -            onig_node_free(snode); -            return r; +          len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); +          if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) { +            csnode = onig_node_new_str(buf, buf + len); +            if (IS_NULL(csnode)) goto err_free_ns; + +            NODE_STRING_SET_CASE_EXPANDED(csnode); +            ns[n++] = csnode; +          } +          else { +            r = onig_node_str_cat(ns[n-1], buf, buf + len); +            if (r < 0) goto err_free_ns;            }          }        } -      *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); -      CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); +      if (n == 1) +        list_node = ns[0]; +      else +        list_node = make_list(n, ns); + +      *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE); +      if (IS_NULL(*(iarg->ptail))) { +        onig_node_free(list_node); +        return ONIGERR_MEMORY; +      }        iarg->ptail = &(NODE_CDR((*(iarg->ptail))));      }    } @@ -7901,7 +7961,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        env->options = BAG_(*np)->o.options;        r = fetch_token(tok, src, end, env);        if (r < 0) return r; -      r = parse_subexp(&target, tok, term, src, end, env, 0); +      r = parse_alts(&target, tok, term, src, end, env, FALSE);        env->options = prev;        if (r < 0) {          onig_node_free(target); @@ -7916,7 +7976,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,      if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))        return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; -    if (tok->escaped) goto tk_raw_byte; +    if (tok->escaped) goto tk_crude_byte;      else goto tk_byte;      break; @@ -7941,36 +8001,36 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,      }      break; -  case TK_RAW_BYTE: -  tk_raw_byte: +  case TK_CRUDE_BYTE: +  tk_crude_byte:      { -      *np = node_new_str_raw_char((UChar )tok->u.c); +      *np = node_new_str_crude_char(tok->u.byte);        CHECK_NULL_RETURN_MEMERR(*np);        len = 1;        while (1) {          if (len >= ONIGENC_MBC_MINLEN(env->enc)) {            if (len == enclen(env->enc, STR_(*np)->s)) {              r = fetch_token(tok, src, end, env); -            goto tk_raw_byte_end; +            goto tk_crude_byte_end;            }          }          r = fetch_token(tok, src, end, env);          if (r < 0) return r; -        if (r != TK_RAW_BYTE) +        if (r != TK_CRUDE_BYTE)            return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; -        r = node_str_cat_char(*np, (UChar )tok->u.c); +        r = node_str_cat_char(*np, tok->u.byte);          if (r < 0) return r;          len++;        } -    tk_raw_byte_end: +    tk_crude_byte_end:        if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))          return ONIGERR_INVALID_WIDE_CHAR_VALUE; -      NODE_STRING_CLEAR_RAW(*np); +      NODE_STRING_CLEAR_CRUDE(*np);        goto string_end;      }      break; @@ -7981,7 +8041,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);        if (len < 0) return len;  #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG -      *np = node_new_str_raw(buf, buf + len); +      *np = node_new_str_crude(buf, buf + len);  #else        *np = node_new_str(buf, buf + len);  #endif @@ -8024,7 +8084,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,            *np = node_new_cclass();            CHECK_NULL_RETURN_MEMERR(*np);            cc = CCLASS_(*np); -          add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); +          add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);            if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);          }          break; @@ -8041,11 +8101,11 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,      if (r != 0) return r;      break; -  case TK_CC_OPEN: +  case TK_OPEN_CC:      {        CClassNode* cc; -      r = parse_char_class(np, tok, src, end, env); +      r = parse_cc(np, tok, src, end, env);        if (r != 0) return r;        cc = CCLASS_(*np); @@ -8083,7 +8143,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,    case TK_ANYCHAR_ANYTIME:      *np = node_new_anychar();      CHECK_NULL_RETURN_MEMERR(*np); -    qn = node_new_quantifier(0, INFINITE_REPEAT, 0); +    qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);      CHECK_NULL_RETURN_MEMERR(qn);      NODE_BODY(qn) = *np;      *np = qn; @@ -8186,9 +8246,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        if (is_invalid_quantifier_target(*tp))          return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; -      parse_depth++; -      if (parse_depth > ParseDepthLimit) -        return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +      INC_PARSE_DEPTH(parse_depth);        qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,                                 r == TK_INTERVAL); @@ -8201,9 +8259,10 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        else {          target = *tp;        } -      r = set_quantifier(qn, target, group, env); +      r = assign_quantifier_body(qn, target, group, env);        if (r < 0) {          onig_node_free(qn); +        *tp = NULL_NODE;          return r;        } @@ -8256,6 +8315,8 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,    Node *node, **headp;    *top = NULL; +  INC_PARSE_DEPTH(env->parse_depth); +    r = parse_exp(&node, tok, term, src, end, env, group_head);    if (r < 0) {      onig_node_free(node); @@ -8266,7 +8327,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,      *top = node;    }    else { -    *top  = node_new_list(node, NULL); +    *top = node_new_list(node, NULL);      if (IS_NULL(*top)) {        onig_node_free(node);        return ONIGERR_MEMORY; @@ -8274,7 +8335,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,      headp = &(NODE_CDR(*top));      while (r != TK_EOT && r != term && r != TK_ALT) { -      r = parse_exp(&node, tok, term, src, end, env, 0); +      r = parse_exp(&node, tok, term, src, end, env, FALSE);        if (r < 0) {          onig_node_free(node);          return r; @@ -8292,21 +8353,20 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,      }    } +  DEC_PARSE_DEPTH(env->parse_depth);    return r;  }  /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */  static int -parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end, -             ScanEnv* env, int group_head) +parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, +           ScanEnv* env, int group_head)  {    int r;    Node *node, **headp;    *top = NULL; -  env->parse_depth++; -  if (env->parse_depth > ParseDepthLimit) -    return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +  INC_PARSE_DEPTH(env->parse_depth);    r = parse_branch(&node, tok, term, src, end, env, group_head);    if (r < 0) { @@ -8328,7 +8388,7 @@ parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,      while (r == TK_ALT) {        r = fetch_token(tok, src, end, env);        if (r < 0) return r; -      r = parse_branch(&node, tok, term, src, end, env, 0); +      r = parse_branch(&node, tok, term, src, end, env, FALSE);        if (r < 0) {          onig_node_free(node);          return r; @@ -8355,7 +8415,7 @@ parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,        return ONIGERR_PARSER_BUG;    } -  env->parse_depth--; +  DEC_PARSE_DEPTH(env->parse_depth);    return r;  } @@ -8367,7 +8427,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)    r = fetch_token(&tok, src, end, env);    if (r < 0) return r; -  r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0); +  r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);    if (r < 0) return r;    return 0; diff --git a/src/regparse.h b/src/regparse.h index 231f7b5..1525ccb 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@    regparse.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@  #include "regint.h"  #define NODE_STRING_MARGIN         16 -#define NODE_STRING_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_BUF_SIZE       20  /* sizeof(CClassNode) - sizeof(int)*4 */  #define NODE_BACKREFS_SIZE          6  /* node type */ @@ -73,20 +73,25 @@ enum BodyEmptyType {    BODY_IS_EMPTY_POSSIBILITY_REC = 3  }; +struct _Node; +  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    UChar* s;    UChar* end;    unsigned int flag; -  int    capacity;    /* (allocated size - 1) or 0: use buf[] */    UChar  buf[NODE_STRING_BUF_SIZE]; +  int    capacity;    /* (allocated size - 1) or 0: use buf[] */ +  int    case_min_len;  } StrNode;  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    unsigned int flags;    BitSet bs; @@ -96,6 +101,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    struct _Node* body;    int lower; @@ -104,12 +110,13 @@ typedef struct {    enum BodyEmptyType emptiness;    struct _Node* head_exact;    struct _Node* next_head_exact; -  int is_refered;     /* include called node. don't eliminate even if {0} */ +  int include_referred;   /* include called node. don't eliminate even if {0} */  } QuantNode;  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    struct _Node* body;    enum BagType type; @@ -152,6 +159,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    struct _Node* body; /* to BagNode : BAG_MEMORY */    int     by_number; @@ -166,6 +174,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    int  back_num;    int  back_static[NODE_BACKREFS_SIZE]; @@ -176,6 +185,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    struct _Node* body;    int type; @@ -186,6 +196,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    struct _Node* car;    struct _Node* cdr; @@ -194,6 +205,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    int ctype;    int not; @@ -204,6 +216,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; +  struct _Node* parent;    enum GimmickType type;    int  detail_type; @@ -216,6 +229,7 @@ typedef struct _Node {      struct {        NodeType node_type;        int status; +      struct _Node* parent;        struct _Node* body;      } base; @@ -280,26 +294,21 @@ typedef struct _Node {  #define ANCR_ANYCHAR_INF_MASK  (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)  #define ANCR_END_BUF_MASK      (ANCR_END_BUF | ANCR_SEMI_END_BUF) -#define NODE_STRING_RAW                (1<<0) /* by backslashed number */ -#define NODE_STRING_AMBIG              (1<<1) -#define NODE_STRING_GOOD_AMBIG         (1<<2) -#define NODE_STRING_DONT_GET_OPT_INFO  (1<<3) +#define NODE_STRING_CRUDE              (1<<0) +#define NODE_STRING_CASE_EXPANDED      (1<<1) +#define NODE_STRING_CASE_FOLD_MATCH    (1<<2)  #define NODE_STRING_LEN(node)            (int )((node)->u.str.end - (node)->u.str.s) -#define NODE_STRING_SET_RAW(node)        (node)->u.str.flag |= NODE_STRING_RAW -#define NODE_STRING_CLEAR_RAW(node)      (node)->u.str.flag &= ~NODE_STRING_RAW -#define NODE_STRING_SET_AMBIG(node)      (node)->u.str.flag |= NODE_STRING_AMBIG -#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG -#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ -  (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO -#define NODE_STRING_IS_RAW(node) \ -  (((node)->u.str.flag & NODE_STRING_RAW) != 0) -#define NODE_STRING_IS_AMBIG(node) \ -  (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) -#define NODE_STRING_IS_GOOD_AMBIG(node) \ -  (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0) -#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ -  (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) +#define NODE_STRING_SET_CRUDE(node)         (node)->u.str.flag |= NODE_STRING_CRUDE +#define NODE_STRING_CLEAR_CRUDE(node)       (node)->u.str.flag &= ~NODE_STRING_CRUDE +#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED +#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH +#define NODE_STRING_IS_CRUDE(node) \ +  (((node)->u.str.flag & NODE_STRING_CRUDE) != 0) +#define NODE_STRING_IS_CASE_EXPANDED(node) \ +  (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0) +#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \ +  (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)  #define BACKREFS_P(br) \    (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) @@ -326,6 +335,7 @@ typedef struct _Node {  #define NODE_ST_FIXED_OPTION          (1<<18)  #define NODE_ST_PROHIBIT_RECURSION    (1<<19)  #define NODE_ST_SUPER                 (1<<20) +#define NODE_ST_EMPTY_STATUS_CHECK    (1<<21)  #define NODE_STATUS(node)           (((Node* )node)->u.base.status) @@ -355,7 +365,10 @@ typedef struct _Node {      ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)  #define NODE_IS_STRICT_REAL_REPEAT(node) \      ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0) +#define NODE_IS_EMPTY_STATUS_CHECK(node) \ +    ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0) +#define NODE_PARENT(node)         ((node)->u.base.parent)  #define NODE_BODY(node)           ((node)->u.base.body)  #define NODE_QUANT_BODY(node)     ((node)->body)  #define NODE_BAG_BODY(node)       ((node)->body) @@ -368,11 +381,8 @@ typedef struct _Node {      (senv)->mem_env_dynamic : (senv)->mem_env_static)  typedef struct { -  Node* node; -#if 0 -  int in; -  int recursion; -#endif +  Node* mem_node; +  Node* empty_repeat_node;  } MemEnv;  typedef struct { @@ -384,9 +394,8 @@ typedef struct {    OnigCaseFoldType case_fold_flag;    OnigEncoding     enc;    OnigSyntaxType*  syntax; -  MemStatusType    capture_history; -  MemStatusType    bt_mem_start; -  MemStatusType    bt_mem_end; +  MemStatusType    cap_history; +  MemStatusType    backtrack_mem; /* backtrack/recursion */    MemStatusType    backrefed_mem;    UChar*           pattern;    UChar*           pattern_end; @@ -404,7 +413,10 @@ typedef struct {    MemEnv           mem_env_static[SCANENV_MEMENV_SIZE];    MemEnv*          mem_env_dynamic;    unsigned int     parse_depth; - +#ifdef ONIG_DEBUG_PARSE +  unsigned int     max_parse_depth; +#endif +  int backref_num;    int keep_num;    int save_num;    int save_alloc_num; @@ -425,9 +437,7 @@ extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));  extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));  extern void   onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));  extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void   onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); -extern void   onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int    onig_reduce_nested_quantifier P_((Node* pnode));  extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));  extern int    onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));  extern void   onig_node_free P_((Node* node)); @@ -435,13 +445,13 @@ extern Node*  onig_node_new_bag P_((enum BagType type));  extern Node*  onig_node_new_anchor P_((int type, int ascii_mode));  extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));  extern Node*  onig_node_new_list P_((Node* left, Node* right)); -extern Node*  onig_node_list_add P_((Node* list, Node* x));  extern Node*  onig_node_new_alt P_((Node* left, Node* right));  extern void   onig_node_str_clear P_((Node* node));  extern int    onig_names_free P_((regex_t* reg));  extern int    onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));  extern int    onig_free_shared_cclass_table P_((void));  extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int    onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);  extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);  #ifdef USE_CALLOUT diff --git a/src/regposerr.c b/src/regposerr.c index e389531..e1747c5 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -2,7 +2,7 @@    regposerr.c - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regposix.c b/src/regposix.c index 09e16ac..b3e78ff 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@    regposix.c - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regsyntax.c b/src/regsyntax.c index d4420cc..513c7f7 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@    regsyntax.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regtrav.c b/src/regtrav.c index 58a17f5..8307695 100644 --- a/src/regtrav.c +++ b/src/regtrav.c @@ -2,7 +2,7 @@    regtrav.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without diff --git a/src/regversion.c b/src/regversion.c index 594a52c..de993d3 100644 --- a/src/regversion.c +++ b/src/regversion.c @@ -2,7 +2,7 @@    regversion.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -2,7 +2,7 @@    sjis.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -149,10 +149,6 @@ code_to_mbc(OnigCodePoint code, UChar *buf)    if ((code & 0xff00) != 0) *p++ = (UChar )(((code >>  8) & 0xff));    *p++ = (UChar )(code & 0xff); -#if 0 -  if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) -    return REGERR_INVALID_CODE_POINT_VALUE; -#endif    return (int )(p - buf);  } @@ -179,31 +175,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,    }  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); - -} -#endif - -#if 0 -static int -is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ -  if (code < 128) -    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); -  else { -    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { -      return (code_to_mbclen(code) > 1 ? TRUE : FALSE); -    } -  } - -  return FALSE; -} -#endif -  static UChar*  left_adjust_char_head(const UChar* start, const UChar* s)  { diff --git a/src/sjis_prop.c b/src/sjis_prop.c index 3a88a38..e33fbb2 100644 --- a/src/sjis_prop.c +++ b/src/sjis_prop.c @@ -1,5 +1,5 @@  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -pt -T -L ANSI-C -N onigenc_sjis_lookup_property_name --output-file gperf2.tmp sjis_prop.gperf  */ +/* Command-line: gperf -pt -T -L ANSI-C -N onigenc_sjis_lookup_property_name --output-file gperf2.tmp sjis_prop.gperf  */  /* Computed positions: -k'1,3' */  #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ diff --git a/src/unicode.c b/src/unicode.c index 5820319..474436a 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -2,7 +2,7 @@    unicode.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -356,16 +356,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,          for (fn = 0; fn < 2; fn++) {            int index;            cs[fn][0] = FOLDS2_FOLD(buk->index)[fn]; +          ncs[fn] = 1;            index = onigenc_unicode_fold1_key(&cs[fn][0]);            if (index >= 0) {              int m = FOLDS1_UNFOLDS_NUM(index);              for (i = 0; i < m; i++) {                cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];              } -            ncs[fn] = m + 1; +            ncs[fn] += m;            } -          else -            ncs[fn] = 1;          }          for (i = 0; i < ncs[0]; i++) { @@ -393,16 +392,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,          for (fn = 0; fn < 3; fn++) {            int index;            cs[fn][0] = FOLDS3_FOLD(buk->index)[fn]; +          ncs[fn] = 1;            index = onigenc_unicode_fold1_key(&cs[fn][0]);            if (index >= 0) {              int m = FOLDS1_UNFOLDS_NUM(index);              for (i = 0; i < m; i++) {                cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];              } -            ncs[fn] = m + 1; +            ncs[fn] += m;            } -          else -            ncs[fn] = 1;          }          for (i = 0; i < ncs[0]; i++) { diff --git a/src/unicode_egcb_data.c b/src/unicode_egcb_data.c index 6a74c77..3c49422 100644 --- a/src/unicode_egcb_data.c +++ b/src/unicode_egcb_data.c @@ -1,6 +1,6 @@  /* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@   * SUCH DAMAGE.   */ -#define GRAPHEME_BREAK_PROPERTY_VERSION  12_1_0 +#define GRAPHEME_BREAK_PROPERTY_VERSION  120100  /*  CR diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index b84b528..171a0fa 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf  */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf  */  /* Computed positions: -k'1-3' */ @@ -9,7 +9,7 @@  /* This gperf source file was generated by make_unicode_fold_data.py */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -2983,7 +2983,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])        4026      }; -  if (0 == 0) +      {        int key = hash(codes); diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 2310f0a..c39b19d 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf  */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf  */  /* Computed positions: -k'3,6' */ @@ -9,7 +9,7 @@  /* This gperf source file was generated by make_unicode_fold_data.py */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -211,7 +211,7 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])        129      }; -  if (0 == 0) +      {        int key = hash(codes); diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 0e02a62..295c447 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf  */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf  */  /* Computed positions: -k'3,6,9' */ @@ -9,7 +9,7 @@  /* This gperf source file was generated by make_unicode_fold_data.py */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -121,7 +121,7 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])        0      }; -  if (0 == 0) +      {        int key = hash(codes); diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c index 0dbf9ae..68694b0 100644 --- a/src/unicode_fold_data.c +++ b/src/unicode_fold_data.c @@ -1,7 +1,7 @@  /* This file was generated by make_unicode_fold_data.py. */  #include "regenc.h" -#define UNICODE_CASEFOLD_VERSION  12_1_0 +#define UNICODE_CASEFOLD_VERSION  120100  OnigCodePoint OnigUnicodeFolds1[] = { diff --git a/src/unicode_property_data.c b/src/unicode_property_data.c index 5c1c8a9..0083dd6 100644 --- a/src/unicode_property_data.c +++ b/src/unicode_property_data.c @@ -1,5 +1,5 @@  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf  */ +/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf  */  /* Computed positions: -k'1-3,5-6,12,16,$' */  #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -29580,7 +29580,8 @@ unicode_lookup_property_name (register const char *str, register size_t len) -#define UNICODE_PROPERTY_VERSION  12_1_0 +#define UNICODE_PROPERTY_VERSION  120100 +#define UNICODE_EMOJI_VERSION     1201  #define PROPERTY_NAME_MAX_SIZE  59  #define CODE_RANGES_NUM         568 diff --git a/src/unicode_property_data_posix.c b/src/unicode_property_data_posix.c index eddc108..e299e85 100644 --- a/src/unicode_property_data_posix.c +++ b/src/unicode_property_data_posix.c @@ -1,5 +1,5 @@  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf  */ +/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf  */  /* Computed positions: -k'1,3' */  #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index b2228e0..51a037b 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_unfold_key_conv.py        from gperf output file. */  /* ANSI-C code produced by gperf version 3.1 */ -/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf  */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf  */  /* Computed positions: -k'1-3' */ @@ -9,7 +9,7 @@  /* This gperf source file was generated by make_unicode_fold_data.py */  /*- - * Copyright (c) 2017-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2017-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -3288,7 +3288,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code)        {0x1e907, 4005, 1}      }; -  if (0 == 0) +      {        int key = hash(&code); diff --git a/src/unicode_wb_data.c b/src/unicode_wb_data.c index 7778157..8e1a267 100644 --- a/src/unicode_wb_data.c +++ b/src/unicode_wb_data.c @@ -1,6 +1,6 @@  /* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */  /*- - * Copyright (c) 2019  K.Kosako  <kkosako0 AT gmail DOT com> + * Copyright (c) 2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@   * SUCH DAMAGE.   */ -#define WORD_BREAK_PROPERTY_VERSION  12_1_0 +#define WORD_BREAK_PROPERTY_VERSION  120100  /*  ALetter diff --git a/src/utf16_be.c b/src/utf16_be.c index b66d868..d99af71 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@    utf16_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -227,39 +227,6 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag,                                           pp, end, fold);  } -#if 0 -static int -utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  (*pp) += EncLen_UTF16[*p]; - -  if (*p == 0) { -    int c, v; - -    p++; -    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -      return TRUE; -    } - -    c = *p; -    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); - -    if ((v | BIT_CTYPE_LOWER) != 0) { -      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -      if (c >= 0xaa && c <= 0xba) -        return FALSE; -      else -        return TRUE; -    } -    return (v != 0 ? TRUE : FALSE); -  } - -  return FALSE; -} -#endif -  static UChar*  utf16be_left_adjust_char_head(const UChar* start, const UChar* s)  { diff --git a/src/utf16_le.c b/src/utf16_le.c index cdc74b0..c6edd94 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@    utf16_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -227,39 +227,6 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,                                           fold);  } -#if 0 -static int -utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, -                         const UChar* end) -{ -  const UChar* p = *pp; - -  (*pp) += EncLen_UTF16[*(p+1)]; - -  if (*(p+1) == 0) { -    int c, v; - -    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -      return TRUE; -    } - -    c = *p; -    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, -                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -    if ((v | BIT_CTYPE_LOWER) != 0) { -      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -      if (c >= 0xaa && c <= 0xba) -        return FALSE; -      else -        return TRUE; -    } -    return (v != 0 ? TRUE : FALSE); -  } - -  return FALSE; -} -#endif -  static UChar*  utf16le_left_adjust_char_head(const UChar* start, const UChar* s)  { diff --git a/src/utf32_be.c b/src/utf32_be.c index dd17d3b..67e50a2 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@    utf32_be.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -119,39 +119,6 @@ utf32be_mbc_case_fold(OnigCaseFoldType flag,                                           fold);  } -#if 0 -static int -utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  (*pp) += 4; - -  if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { -    int c, v; - -    p += 3; -    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -      return TRUE; -    } - -    c = *p; -    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, -                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -    if ((v | BIT_CTYPE_LOWER) != 0) { -      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -      if (c >= 0xaa && c <= 0xba) -        return FALSE; -      else -        return TRUE; -    } -    return (v != 0 ? TRUE : FALSE); -  } - -  return FALSE; -} -#endif -  static UChar*  utf32be_left_adjust_char_head(const UChar* start, const UChar* s)  { diff --git a/src/utf32_le.c b/src/utf32_le.c index d9fe3c6..2ae2275 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@    utf32_le.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -120,38 +120,6 @@ utf32le_mbc_case_fold(OnigCaseFoldType flag,                                           fold);  } -#if 0 -static int -utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  (*pp) += 4; - -  if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { -    int c, v; - -    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -      return TRUE; -    } - -    c = *p; -    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, -                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); -    if ((v | BIT_CTYPE_LOWER) != 0) { -      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ -      if (c >= 0xaa && c <= 0xba) -        return FALSE; -      else -        return TRUE; -    } -    return (v != 0 ? TRUE : FALSE); -  } - -  return FALSE; -} -#endif -  static UChar*  utf32le_left_adjust_char_head(const UChar* start, const UChar* s)  { @@ -2,7 +2,7 @@    utf8.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019  K.Kosako   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -97,33 +97,6 @@ is_valid_mbc_string(const UChar* p, const UChar* end)    return TRUE;  } -#if 0 -static int -is_mbc_newline(const UChar* p, const UChar* end) -{ -  if (p < end) { -    if (*p == 0x0a) return 1; - -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS -#ifndef USE_CRNL_AS_LINE_TERMINATOR -    if (*p == 0x0d) return 1; -#endif -    if (p + 1 < end) { -      if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ -        return 1; -      if (p + 2 < end) { -        if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) -            && *(p+1) == 0x80 && *p == 0xe2)  /* U+2028, U+2029 */ -          return 1; -      } -    } -#endif -  } - -  return 0; -} -#endif -  static OnigCodePoint  mbc_to_code(const UChar* p, const UChar* end)  { | 
