diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2019-08-07 09:32:48 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2019-08-07 09:32:48 +0200 | 
| commit | 40f3d0030e6e98bcb02d6523e5ee48497dec49a6 (patch) | |
| tree | a992f9a6acd3edc2c7bb6b1aba2e52084918c9ab /src/regparse.c | |
| parent | e25c754918ae26e8b9e68a47bc1af36248e91800 (diff) | |
New upstream version 6.9.3upstream/6.9.3
Diffstat (limited to 'src/regparse.c')
| -rw-r--r-- | src/regparse.c | 190 | 
1 files changed, 111 insertions, 79 deletions
diff --git a/src/regparse.c b/src/regparse.c index f1deea3..7f8b1a9 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -77,6 +77,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {        ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |        ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |        ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | +      ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |        ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |        ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )    , ONIG_OPTION_NONE @@ -1093,6 +1094,35 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,    return e->back_num;  } +static int +name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end, +                      int** nums) +{ +  regex_t* reg; +  NameEntry* e; + +  reg = env->reg; +  e = name_find(reg, name, name_end); + +  if (IS_NULL(e)) { +    onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, +                                   (UChar* )name, (UChar* )name_end); +    return ONIGERR_UNDEFINED_NAME_REFERENCE; +  } + +  switch (e->back_num) { +  case 0: +    break; +  case 1: +    *nums = &(e->back_ref1); +    break; +  default: +    *nums = e->back_refs; +    break; +  } +  return e->back_num; +} +  extern int  onig_name_to_backref_number(regex_t* reg, const UChar* name,                              const UChar* name_end, OnigRegion *region) @@ -1869,8 +1899,8 @@ callout_tag_table_new(CalloutTagTable** rt)  }  static int -callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end, -                      CalloutTagVal entry_val) +callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name, +                      UChar* name_end, CalloutTagVal entry_val)  {    int r;    CalloutTagVal val; @@ -1879,8 +1909,11 @@ callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,      return ONIGERR_INVALID_CALLOUT_TAG_NAME;    val = callout_tag_find(t, name, name_end); -  if (val >= 0) +  if (val >= 0) { +    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, +                                   name, name_end);      return ONIGERR_MULTIPLEX_DEFINED_NAME; +  }    r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);    if (r < 0) return r; @@ -1909,7 +1942,7 @@ ext_ensure_tag_table(regex_t* reg)  }  static int -callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, +callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,                    CalloutTagVal entry_val)  {    int r; @@ -1921,7 +1954,7 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,    ext = onig_get_regex_ext(reg);    CHECK_NULL_RETURN_MEMERR(ext); -  r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); +  r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);    e = onig_reg_callout_list_at(reg, (int )entry_val);    CHECK_NULL_RETURN_MEMERR(e); @@ -2391,10 +2424,10 @@ node_new_quantifier(int lower, int upper, int by_number)    CHECK_NULL_RETURN(node);    NODE_SET_TYPE(node, NODE_QUANT); -  QUANT_(node)->lower      = lower; -  QUANT_(node)->upper      = upper; -  QUANT_(node)->greedy     = 1; -  QUANT_(node)->empty_info = BODY_IS_NOT_EMPTY; +  QUANT_(node)->lower           = lower; +  QUANT_(node)->upper           = upper; +  QUANT_(node)->greedy          = 1; +  QUANT_(node)->emptiness       = BODY_IS_NOT_EMPTY;    QUANT_(node)->head_exact      = NULL_NODE;    QUANT_(node)->next_head_exact = NULL_NODE;    QUANT_(node)->is_refered      = 0; @@ -2694,7 +2727,7 @@ make_text_segment(Node** node, ScanEnv* env)    ns[0] = x;    ns[1] = NULL_NODE; -  x = node_new_quantifier(0, REPEAT_INFINITE, 1); +  x = node_new_quantifier(0, INFINITE_REPEAT, 1);    if (IS_NULL(x)) goto err;    NODE_BODY(x) = ns[0]; @@ -3044,7 +3077,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,      if (expr == NULL_NODE) {        /* default expr \O* */ -      quant = node_new_quantifier(0, REPEAT_INFINITE, 0); +      quant = node_new_quantifier(0, INFINITE_REPEAT, 0);        if (IS_NULL(quant)) goto err0;        r = node_new_true_anychar(&body, env); @@ -3086,7 +3119,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,    if (r != 0) goto err;    possessive = 1; -  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE, +  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,                           possessive, is_range_cutter, env);    if (r != 0) goto err; @@ -3236,10 +3269,18 @@ node_new_empty(void)  static Node*  node_new_str_raw_char(UChar c)  { +  int i;    UChar p[1]; +  Node* node;    p[0] = c; -  return node_new_str_raw(p, p + 1); +  node = node_new_str_raw(p, p + 1); + +  /* clear buf tail */ +  for (i = 1; i < NODE_STRING_BUF_SIZE; i++) +    STR_(node)->buf[i] = '\0'; + +  return node;  }  static Node* @@ -3275,24 +3316,6 @@ str_node_can_be_split(Node* node, OnigEncoding enc)    return 0;  } -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR -static int -node_str_head_pad(StrNode* sn, int num, UChar val) -{ -  UChar buf[NODE_STRING_BUF_SIZE]; -  int i, len; - -  len = sn->end - sn->s; -  onig_strcpy(buf, sn->s, sn->end); -  onig_strcpy(&(sn->s[num]), buf, buf + len); -  sn->end += num; - -  for (i = 0; i < num; i++) { -    sn->s[i] = val; -  } -} -#endif -  extern int  onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)  { @@ -3877,19 +3900,19 @@ quantifier_type_num(QuantNode* q)    if (q->greedy) {      if (q->lower == 0) {        if (q->upper == 1) return 0; -      else if (IS_REPEAT_INFINITE(q->upper)) return 1; +      else if (IS_INFINITE_REPEAT(q->upper)) return 1;      }      else if (q->lower == 1) { -      if (IS_REPEAT_INFINITE(q->upper)) return 2; +      if (IS_INFINITE_REPEAT(q->upper)) return 2;      }    }    else {      if (q->lower == 0) {        if (q->upper == 1) return 3; -      else if (IS_REPEAT_INFINITE(q->upper)) return 4; +      else if (IS_INFINITE_REPEAT(q->upper)) return 4;      }      else if (q->lower == 1) { -      if (IS_REPEAT_INFINITE(q->upper)) return 5; +      if (IS_INFINITE_REPEAT(q->upper)) return 5;      }    }    return -1; @@ -3926,8 +3949,8 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)    pnum = quantifier_type_num(p);    cnum = quantifier_type_num(c);    if (pnum < 0 || cnum < 0) { -    if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { -      if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { +    if ((p->lower == p->upper) && ! IS_INFINITE_REPEAT(p->upper)) { +      if ((c->lower == c->upper) && ! IS_INFINITE_REPEAT(c->upper)) {          int n = onig_positive_int_multiply(p->lower, c->lower);          if (n >= 0) {            p->lower = p->upper = n; @@ -3946,11 +3969,11 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)      break;    case RQ_A:      NODE_BODY(pnode) = NODE_BODY(cnode); -    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1; +    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;      break;    case RQ_AQ:      NODE_BODY(pnode) = NODE_BODY(cnode); -    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0; +    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;      break;    case RQ_QQ:      NODE_BODY(pnode) = NODE_BODY(cnode); @@ -3959,13 +3982,13 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)    case RQ_P_QQ:      NODE_BODY(pnode) = cnode;      p->lower  = 0;  p->upper = 1;  p->greedy = 0; -    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1; +    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;      return ;      break;    case RQ_PQ_Q:      NODE_BODY(pnode) = cnode;      p->lower  = 0;  p->upper = 1;  p->greedy = 1; -    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0; +    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 0;      return ;      break;    case RQ_ASIS: @@ -4158,7 +4181,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)      if (p == prev) {        if (non_low != 0)          goto invalid; -      up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */ +      up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */      }    }    else { @@ -4178,7 +4201,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)    }    if (c != '}') goto invalid; -  if (!IS_REPEAT_INFINITE(up) && low > up) { +  if (!IS_INFINITE_REPEAT(up) && low > up) {      /* {n,m}+ supported case */      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))        return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; @@ -4959,7 +4982,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;        tok->type = TK_REPEAT;        tok->u.repeat.lower = 0; -      tok->u.repeat.upper = REPEAT_INFINITE; +      tok->u.repeat.upper = INFINITE_REPEAT;        goto greedy_check;        break; @@ -4967,7 +4990,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;        tok->type = TK_REPEAT;        tok->u.repeat.lower = 1; -      tok->u.repeat.upper = REPEAT_INFINITE; +      tok->u.repeat.upper = INFINITE_REPEAT;        goto greedy_check;        break; @@ -5358,10 +5381,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)              tok->u.backref.ref1 = back_num;            }            else { -            num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); +            num = name_to_group_numbers(env, prev, name_end, &backs);              if (num <= 0) { -              onig_scan_env_set_error_string(env, -                        ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);                return ONIGERR_UNDEFINED_NAME_REFERENCE;              }              if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { @@ -5514,7 +5535,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)  #endif        tok->type = TK_REPEAT;        tok->u.repeat.lower = 0; -      tok->u.repeat.upper = REPEAT_INFINITE; +      tok->u.repeat.upper = INFINITE_REPEAT;        goto greedy_check;        break; @@ -5525,7 +5546,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)  #endif        tok->type = TK_REPEAT;        tok->u.repeat.lower = 1; -      tok->u.repeat.upper = REPEAT_INFINITE; +      tok->u.repeat.upper = INFINITE_REPEAT;        goto greedy_check;        break; @@ -5608,7 +5629,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)                tok->u.call.gnum      = 0;                tok->u.call.name      = p;                PINC; -              if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; +              if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;                tok->u.call.name_end  = p;                break; @@ -6249,6 +6270,7 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)    env->parse_depth++;    if (env->parse_depth > ParseDepthLimit)      return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +    prev_cc = (CClassNode* )NULL;    r = fetch_token_in_cc(tok, src, end, env);    if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { @@ -6301,10 +6323,11 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)      case TK_RAW_BYTE:        /* tok->base != 0 : octal or hexadec. */        if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { +        int i, j;          UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];          UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;          UChar* psave = p; -        int i, base = tok->base; +        int base = tok->base;          buf[0] = tok->u.c;          for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { @@ -6322,6 +6345,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)            goto err;          } +        /* clear buf tail */ +        for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0'; +          len = enclen(env->enc, buf);          if (i < len) {            r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; @@ -6359,8 +6385,13 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)      val_entry:        len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);        if (len < 0) { -        r = len; -        goto err; +        if (state != CCS_RANGE || +            ! IS_SYNTAX_BV(env->syntax, +                           ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) || +            v < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) { +          r = len; +          goto err; +        }        }        in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);      val_entry2: @@ -6673,7 +6704,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv    }    if (tag_start != tag_end) { -    r = callout_tag_entry(env->reg, tag_start, tag_end, num); +    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);      if (r != ONIG_NORMAL) return r;    } @@ -6994,7 +7025,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en    }    if (tag_start != tag_end) { -    r = callout_tag_entry(env->reg, tag_start, tag_end, num); +    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);      if (r != ONIG_NORMAL) return r;    } @@ -7271,10 +7302,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,              int num;              int* backs; -            num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); +            num = name_to_group_numbers(env, prev, name_end, &backs);              if (num <= 0) { -              onig_scan_env_set_error_string(env, -                        ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);                return ONIGERR_UNDEFINED_NAME_REFERENCE;              }              if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { @@ -7414,6 +7443,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,        }        break; +#ifdef USE_CAPTURE_HISTORY      case '@':        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {          if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { @@ -7441,6 +7471,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,          return ONIGERR_UNDEFINED_GROUP_OPTION;        }        break; +#endif  #ifdef USE_POSIXLINE_OPTION      case 'p': @@ -7688,7 +7719,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)        if (targetq_num >= 0 && nestq_num < 0) {          if (targetq_num == 1 || targetq_num == 2) { /* * or + */            /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ -          if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { +          if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {              qn->upper = (qn->lower == 0 ? 1 : qn->lower);            }          } @@ -7826,14 +7857,18 @@ static int  parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,            ScanEnv* env, int group_head)  { -  int r, len, group = 0; +  int r, len, group;    Node* qn;    Node** tp; +  unsigned int parse_depth; +  group = 0;    *np = NULL;    if (tok->type == (enum TokenSyms )term)      goto end_of_token; +  parse_depth = env->parse_depth; +    switch (tok->type) {    case TK_ALT:    case TK_EOT: @@ -7914,36 +7949,29 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        len = 1;        while (1) {          if (len >= ONIGENC_MBC_MINLEN(env->enc)) { -          if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */ +          if (len == enclen(env->enc, STR_(*np)->s)) {              r = fetch_token(tok, src, end, env); -            NODE_STRING_CLEAR_RAW(*np); -            goto string_end; +            goto tk_raw_byte_end;            }          }          r = fetch_token(tok, src, end, env);          if (r < 0) return r; -        if (r != TK_RAW_BYTE) { -          /* Don't use this, it is wrong for little endian encodings. */ -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR -          int rem; -          if (len < ONIGENC_MBC_MINLEN(env->enc)) { -            rem = ONIGENC_MBC_MINLEN(env->enc) - len; -            (void )node_str_head_pad(STR_(*np), rem, (UChar )0); -            if (len + rem == enclen(env->enc, STR_(*np)->s)) { -              NODE_STRING_CLEAR_RAW(*np); -              goto string_end; -            } -          } -#endif +        if (r != TK_RAW_BYTE)            return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; -        }          r = node_str_cat_char(*np, (UChar )tok->u.c);          if (r < 0) return r;          len++;        } + +    tk_raw_byte_end: +      if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end)) +        return ONIGERR_INVALID_WIDE_CHAR_VALUE; + +      NODE_STRING_CLEAR_RAW(*np); +      goto string_end;      }      break; @@ -8055,7 +8083,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,    case TK_ANYCHAR_ANYTIME:      *np = node_new_anychar();      CHECK_NULL_RETURN_MEMERR(*np); -    qn = node_new_quantifier(0, REPEAT_INFINITE, 0); +    qn = node_new_quantifier(0, INFINITE_REPEAT, 0);      CHECK_NULL_RETURN_MEMERR(qn);      NODE_BODY(qn) = *np;      *np = qn; @@ -8158,6 +8186,10 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,        if (is_invalid_quantifier_target(*tp))          return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; +      parse_depth++; +      if (parse_depth > ParseDepthLimit) +        return ONIGERR_PARSE_DEPTH_LIMIT_OVER; +        qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,                                 r == TK_INTERVAL);        CHECK_NULL_RETURN_MEMERR(qn);  | 
