diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-04-09 19:36:01 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-04-09 19:36:01 +0200 | 
| commit | f42b61800a1a1b360c5ac915ec1caf34bf1fb86b (patch) | |
| tree | 89be88e0c37aeaa53e55de643586f9d5d4a3d5d2 /src | |
| parent | cce716909e5236f1aa2971a4db45a007c7efe416 (diff) | |
New upstream version 6.2.0upstream/6.2.0
Diffstat (limited to 'src')
| -rw-r--r-- | src/oniguruma.h | 15 | ||||
| -rw-r--r-- | src/regcomp.c | 33 | ||||
| -rw-r--r-- | src/regerror.c | 24 | ||||
| -rw-r--r-- | src/regexec.c | 41 | ||||
| -rw-r--r-- | src/regint.h | 5 | ||||
| -rw-r--r-- | src/regparse.c | 52 | ||||
| -rw-r--r-- | src/regparse.h | 1 | 
7 files changed, 131 insertions, 40 deletions
diff --git a/src/oniguruma.h b/src/oniguruma.h index 090b809..33e2a0a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@    oniguruma.h - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6 -#define ONIGURUMA_VERSION_MINOR   1 -#define ONIGURUMA_VERSION_TEENY   3 +#define ONIGURUMA_VERSION_MINOR   2 +#define ONIGURUMA_VERSION_TEENY   0  #ifdef __cplusplus  # ifndef  HAVE_PROTOTYPES @@ -372,7 +372,7 @@ int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const  /* config parameters */  #define ONIG_NREGION                          10 -#define ONIG_MAX_CAPTURE_NUM               32767 +#define ONIG_MAX_CAPTURE_NUM          2147483647  /* 2**31 - 1 */  #define ONIG_MAX_BACKREF_NUM                1000  #define ONIG_MAX_REPEAT_NUM               100000  #define ONIG_MAX_MULTI_BYTE_RANGES_NUM     10000 @@ -543,6 +543,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIGERR_UNDEFINED_BYTECODE                            -13  #define ONIGERR_UNEXPECTED_BYTECODE                           -14  #define ONIGERR_MATCH_STACK_LIMIT_OVER                        -15 +#define ONIGERR_PARSE_DEPTH_LIMIT_OVER                        -16  #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED                -21  #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR  -22  #define ONIGERR_FAIL_TO_INITIALIZE                            -23 @@ -821,6 +822,12 @@ unsigned int onig_get_match_stack_limit_size P_((void));  ONIG_EXTERN  int onig_set_match_stack_limit_size P_((unsigned int size));  ONIG_EXTERN +unsigned int onig_get_parse_depth_limit P_((void)); +ONIG_EXTERN +int onig_set_capture_num_limit P_((int num)); +ONIG_EXTERN +int onig_set_parse_depth_limit P_((unsigned int depth)); +ONIG_EXTERN  int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges));  ONIG_EXTERN  int onig_end P_((void)); diff --git a/src/regcomp.c b/src/regcomp.c index 11ba1e7..5c924b5 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -1230,6 +1230,11 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)          len += (IS_ENCLOSE_RECURSION(node)                  ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);      } +    else if (IS_ENCLOSE_RECURSION(node)) { +      len = SIZE_OP_MEMORY_START_PUSH; +      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) +                     ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); +    }      else  #endif      { @@ -1321,6 +1326,14 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)        if (r) return r;        r = add_opcode(reg, OP_RETURN);      } +    else if (IS_ENCLOSE_RECURSION(node)) { +      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) +        r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); +      else +        r = add_opcode(reg, OP_MEMORY_END_REC); +      if (r) return r; +      r = add_mem_num(reg, node->regnum); +    }      else  #endif      { @@ -2231,6 +2244,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)              return 0;            }            else { +            if (IS_NOT_NULL(xc->mbuf)) return 0;              for (i = 0; i < SINGLE_BYTE_SIZE; i++) {                if (! IS_CODE_SB_WORD(reg->enc, i)) {                  if (!IS_NCCLASS_NOT(xc)) { @@ -3673,6 +3687,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)  #define IN_NOT        (1<<1)  #define IN_REPEAT     (1<<2)  #define IN_VAR_REPEAT (1<<3) +#define IN_CALL       (1<<4) +#define IN_RECCALL    (1<<5)  /* setup_tree does the following work.   1. check empty loop. (set qn->target_empty_info) @@ -3843,10 +3859,16 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          break;        case ENCLOSE_MEMORY: -        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { +        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {            BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);            /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */          } +        if (IS_ENCLOSE_CALLED(en)) +          state |= IN_CALL; +        if (IS_ENCLOSE_RECURSION(en)) +          state |= IN_RECCALL; +        else if ((state & IN_RECCALL) != 0) +          SET_CALL_RECURSION(node);          r = setup_tree(en->target, reg, state, env);          break; @@ -4160,6 +4182,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,    if (right_len == 0) {      to->right_anchor |= left->right_anchor;    } +  else { +    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); +  }  }  static int @@ -5003,12 +5028,14 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)    if (r) return r;    reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | -        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); +        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | +        ANCHOR_LOOK_BEHIND);    if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)      reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; -  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); +  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | +       ANCHOR_PREC_READ_NOT);    if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {      reg->anchor_dmin = opt.len.min; diff --git a/src/regerror.c b/src/regerror.c index 05fc9d8..ee35b36 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -54,6 +54,8 @@ onig_error_code_to_format(int code)      p = "fail to memory allocation"; break;    case ONIGERR_MATCH_STACK_LIMIT_OVER:      p = "match-stack limit over"; break; +  case ONIGERR_PARSE_DEPTH_LIMIT_OVER: +    p = "parse depth limit over"; break;    case ONIGERR_TYPE_BUG:      p = "undefined type (bug)"; break;    case ONIGERR_PARSER_BUG: @@ -348,21 +350,12 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)      p = pat;      while (p < pat_end) { -      if (*p == '\\') { -	*s++ = *p++; -	len = enclen(enc, p); -	while (len-- > 0) *s++ = *p++; -      } -      else if (*p == '/') { -	*s++ = (unsigned char )'\\'; -	*s++ = *p++; -      } -      else if (ONIGENC_IS_MBC_HEAD(enc, p)) { +      if (ONIGENC_IS_MBC_HEAD(enc, p)) {          len = enclen(enc, p);          if (ONIGENC_MBC_MINLEN(enc) == 1) {            while (len-- > 0) *s++ = *p++;          } -        else { /* for UTF16 */ +        else { /* for UTF16/32 */            int blen;            while (len-- > 0) { @@ -373,6 +366,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)            }          }        } +      else if (*p == '\\') { +	*s++ = *p++; +	len = enclen(enc, p); +	while (len-- > 0) *s++ = *p++; +      } +      else if (*p == '/') { +	*s++ = (unsigned char )'\\'; +	*s++ = *p++; +      }        else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&  	       !ONIGENC_IS_CODE_SPACE(enc, *p)) {  	sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); diff --git a/src/regexec.c b/src/regexec.c index 7e8d3d1..35fef11 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -462,6 +462,7 @@ stack_double(int is_alloca, char** arg_alloc_base,    unsigned int n;    int used;    size_t size; +  size_t new_size;    char* alloc_base;    char* new_alloc_base;    OnigStackType *stk_base, *stk_end, *stk; @@ -472,10 +473,11 @@ stack_double(int is_alloca, char** arg_alloc_base,    stk      = *arg_stk;    n = stk_end - stk_base; -  n *= 2;    size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; +  n *= 2; +  new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;    if (is_alloca != 0) { -    new_alloc_base = (char* )xmalloc(size); +    new_alloc_base = (char* )xmalloc(new_size);      if (IS_NULL(new_alloc_base)) {        STACK_SAVE;        return ONIGERR_MEMORY; @@ -489,7 +491,7 @@ stack_double(int is_alloca, char** arg_alloc_base,        else          n = MatchStackLimitSize;      } -    new_alloc_base = (char* )xrealloc(alloc_base, size); +    new_alloc_base = (char* )xrealloc(alloc_base, new_size);      if (IS_NULL(new_alloc_base)) {        STACK_SAVE;        return ONIGERR_MEMORY; @@ -1242,16 +1244,24 @@ onig_statistics_init(void)    MaxStackDepth = 0;  } -extern void +extern int  onig_print_statistics(FILE* f)  { +  int r;    int i; -  fprintf(f, "   count      prev        time\n"); + +  r = fprintf(f, "   count      prev        time\n"); +  if (r < 0) return -1; +    for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { -    fprintf(f, "%8d: %8d: %10ld: %s\n", -	    OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); +    r = fprintf(f, "%8d: %8d: %10ld: %s\n", +                OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); +    if (r < 0) return -1;    } -  fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +  r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +  if (r < 0) return -1; + +  return 0;  }  #define STACK_INC do {\ @@ -3493,15 +3503,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,            start = min_semi_end - reg->anchor_dmax;            if (start < end)              start = onigenc_get_right_adjust_char_head(reg->enc, str, start); -          else { /* match with empty at end */ -            start = onigenc_get_prev_char_head(reg->enc, str, end); -          }          }          if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {            range = max_semi_end - reg->anchor_dmin + 1;          } -        if (start >= range) goto mismatch_no_msa; +        if (start > range) goto mismatch_no_msa; +        /* If start == range, match with empty at end. +           Backward search is used. */        }        else {          if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { @@ -3626,9 +3635,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,              prev = s;              s += enclen(reg->enc, s); -            while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { -              prev = s; -              s += enclen(reg->enc, s); +            if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { +              while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { +                prev = s; +                s += enclen(reg->enc, s); +              }              }            } while (s < range);            goto mismatch; diff --git a/src/regint.h b/src/regint.h index 7a3283d..9835143 100644 --- a/src/regint.h +++ b/src/regint.h @@ -71,6 +71,7 @@  #define INIT_MATCH_STACK_SIZE                     160  #define DEFAULT_MATCH_STACK_LIMIT_SIZE              0 /* unlimited */ +#define DEFAULT_PARSE_DEPTH_LIMIT                4096  #if defined(__GNUC__)  #  define ARG_UNUSED  __attribute__ ((unused)) @@ -522,7 +523,7 @@ typedef int RelAddrType;  typedef int AbsAddrType;  typedef int LengthType;  typedef int RepeatNumType; -typedef short int MemNumType; +typedef int MemNumType;  typedef short int StateCheckNumType;  typedef void* PointerType; @@ -747,7 +748,7 @@ extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp,  #ifdef ONIG_DEBUG_STATISTICS  extern void onig_statistics_init P_((void)); -extern void onig_print_statistics P_((FILE* f)); +extern int  onig_print_statistics P_((FILE* f));  #endif  #endif diff --git a/src/regparse.c b/src/regparse.c index 8f1d1cb..11f9e34 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -108,6 +108,38 @@ onig_warning(const char* s)    (*onig_warn)(s);  } +#define DEFAULT_MAX_CAPTURE_NUM   32767 + +static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM; + +extern int +onig_set_capture_num_limit(int num) +{ +  if (num < 0) return -1; + +  MaxCaptureNum = num; +  return 0; +} + +static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; + +extern unsigned int +onig_get_parse_depth_limit(void) +{ +  return ParseDepthLimit; +} + +extern int +onig_set_parse_depth_limit(unsigned int depth) +{ +  if (depth == 0) +    ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; +  else +    ParseDepthLimit = depth; +  return 0; +} + +  static void  bbuf_free(BBuf* bbuf)  { @@ -959,6 +991,7 @@ scan_env_clear(ScanEnv* env)    env->curr_max_regnum     = 0;    env->has_recursion       = 0;  #endif +  env->parse_depth         = 0;  }  static int @@ -968,7 +1001,7 @@ scan_env_add_mem_entry(ScanEnv* env)    Node** p;    need = env->num_mem + 1; -  if (need > ONIG_MAX_CAPTURE_NUM) +  if (need > MaxCaptureNum && MaxCaptureNum != 0)      return ONIGERR_TOO_MANY_CAPTURES;    if (need >= SCANENV_MEMNODES_SIZE) { @@ -1639,9 +1672,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)        bound = x;    } -  for (high = low, bound = n; high < bound; ) { +  high = (to == ~((OnigCodePoint )0)) ? n : low; +  for (bound = n; high < bound; ) {      x = (high + bound) >> 1; -    if (to >= data[x*2] - 1) +    if (to + 1 >= data[x*2])        high = x + 1;      else        bound = x; @@ -4113,8 +4147,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,    enum CCVALTYPE val_type, in_type;    int val_israw, in_israw; -  prev_cc = (CClassNode* )NULL;    *np = NULL_NODE; +  env->parse_depth++; +  if (env->parse_depth > ParseDepthLimit) +    return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +  prev_cc = (CClassNode* )NULL;    r = fetch_token_in_cc(tok, src, end, env);    if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {      neg = 1; @@ -4315,7 +4352,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {            CC_ESC_WARN(env, (UChar* )"-"); -          goto any_char_in;   /* [0-9-a] is allowed as [0-9\-a] */ +          goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */          }          r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;          goto err; @@ -4420,6 +4457,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,      }    }    *src = p; +  env->parse_depth--;    return 0;   err: @@ -5281,6 +5319,9 @@ parse_subexp(Node** top, OnigToken* tok, int term,    Node *node, **headp;    *top = NULL; +  env->parse_depth++; +  if (env->parse_depth > ParseDepthLimit) +    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;    r = parse_branch(&node, tok, term, src, end, env);    if (r < 0) {      onig_node_free(node); @@ -5317,6 +5358,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,        return ONIGERR_PARSER_BUG;    } +  env->parse_depth--;    return r;  } diff --git a/src/regparse.h b/src/regparse.h index 9e366fe..c9d1fe8 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -306,6 +306,7 @@ typedef struct {    int curr_max_regnum;    int has_recursion;  #endif +  unsigned int parse_depth;  } ScanEnv;  | 
