diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/gperf_unfold_key_conv.py | 2 | ||||
| -rw-r--r-- | src/oniguruma.h | 3 | ||||
| -rw-r--r-- | src/regcomp.c | 98 | ||||
| -rw-r--r-- | src/regexec.c | 46 | ||||
| -rw-r--r-- | src/regparse.c | 179 | ||||
| -rw-r--r-- | src/regsyntax.c | 6 | ||||
| -rw-r--r-- | src/unicode_fold1_key.c | 12 | ||||
| -rw-r--r-- | src/unicode_fold2_key.c | 12 | ||||
| -rw-r--r-- | src/unicode_fold3_key.c | 12 | ||||
| -rw-r--r-- | src/unicode_unfold_key.c | 14 | 
10 files changed, 233 insertions, 151 deletions
| diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py index dcd8587..34f9c2f 100755 --- a/src/gperf_unfold_key_conv.py +++ b/src/gperf_unfold_key_conv.py @@ -36,7 +36,7 @@ def parse_line(s):      if r != s: return r      r = re.sub(REG_GET_CODE, 'OnigCodePoint gcode = wordlist[key].code;', s)      if r != s: return r -    r = re.sub(REG_CODE_CHECK, 'if (code == gcode)', s) +    r = re.sub(REG_CODE_CHECK, 'if (code == gcode && wordlist[key].index >= 0)', s)      if r != s: return r      return s diff --git a/src/oniguruma.h b/src/oniguruma.h index 33e2a0a..02d4254 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,7 +35,7 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6 -#define ONIGURUMA_VERSION_MINOR   2 +#define ONIGURUMA_VERSION_MINOR   3  #define ONIGURUMA_VERSION_TEENY   0  #ifdef __cplusplus @@ -473,6 +473,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIG_SYN_OP_ESC_OCTAL3                  (1U<<28)  /* \OOO */  #define ONIG_SYN_OP_ESC_X_HEX2                  (1U<<29)  /* \xHH */  #define ONIG_SYN_OP_ESC_X_BRACE_HEX8            (1U<<30)  /* \x{7HHHHHHH} */ +#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL           (1U<<31)  /* \o{1OOOOOOOOOO} */  #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE        (1U<<0)  /* \Q...\E */  #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT         (1U<<1)  /* (?...) */ diff --git a/src/regcomp.c b/src/regcomp.c index 5c924b5..0e9a9ab 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@    regcomp.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -761,17 +761,17 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)    if (infinite && qn->lower <= 1) {      if (qn->greedy) {        if (qn->lower == 1) -	len = SIZE_OP_JUMP; +        len = SIZE_OP_JUMP;        else -	len = 0; +        len = 0;        len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;      }      else {        if (qn->lower == 0) -	len = SIZE_OP_JUMP; +        len = SIZE_OP_JUMP;        else -	len = 0; +        len = 0;        len += mod_tlen + SIZE_OP_PUSH + cklen;      } @@ -785,10 +785,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)    else if (qn->upper == 1 && qn->greedy) {      if (qn->lower == 0) {        if (CKN_ON) { -	len = SIZE_OP_STATE_CHECK_PUSH + tlen; +        len = SIZE_OP_STATE_CHECK_PUSH + tlen;        }        else { -	len = SIZE_OP_PUSH + tlen; +        len = SIZE_OP_PUSH + tlen;        }      }      else { @@ -1255,7 +1255,7 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)        if (tlen < 0) return tlen;        len = tlen * qn->lower -	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; +        + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;      }      else {        len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; @@ -1362,7 +1362,7 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)        r = add_opcode(reg, OP_POP);        if (r) return r;        r = add_opcode_rel_addr(reg, OP_JUMP, -	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); +             -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));      }      else {        r = add_opcode(reg, OP_PUSH_STOP_BT); @@ -2145,16 +2145,16 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)        switch (en->type) {        case ENCLOSE_MEMORY:  #ifdef USE_SUBEXP_CALL -	if (IS_ENCLOSE_CLEN_FIXED(en)) -	  *len = en->char_len; -	else { -	  r = get_char_length_tree1(en->target, reg, len, level); -	  if (r == 0) { -	    en->char_len = *len; -	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); -	  } -	} -	break; +        if (IS_ENCLOSE_CLEN_FIXED(en)) +          *len = en->char_len; +        else { +          r = get_char_length_tree1(en->target, reg, len, level); +          if (r == 0) { +            en->char_len = *len; +            SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); +          } +        } +        break;  #endif        case ENCLOSE_OPTION:        case ENCLOSE_STOP_BACKTRACK: @@ -2594,17 +2594,17 @@ get_min_len(Node* node, OnigLen *min, ScanEnv* env)          if (IS_ENCLOSE_MIN_FIXED(en))            *min = en->min_len;          else { -	  if (IS_ENCLOSE_MARK1(NENCLOSE(node))) -	    *min = 0;  // recursive -	  else { -	    SET_ENCLOSE_STATUS(node, NST_MARK1); -	    r = get_min_len(en->target, min, env); -	    CLEAR_ENCLOSE_STATUS(node, NST_MARK1); -	    if (r == 0) { -	      en->min_len = *min; -	      SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); -	    } -	  } +          if (IS_ENCLOSE_MARK1(NENCLOSE(node))) +            *min = 0;  // recursive +          else { +            SET_ENCLOSE_STATUS(node, NST_MARK1); +            r = get_min_len(en->target, min, env); +            CLEAR_ENCLOSE_STATUS(node, NST_MARK1); +            if (r == 0) { +              en->min_len = *min; +              SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); +            } +          }          }          break; @@ -2713,22 +2713,22 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env)        EncloseNode* en = NENCLOSE(node);        switch (en->type) {        case ENCLOSE_MEMORY: -	if (IS_ENCLOSE_MAX_FIXED(en)) -	  *max = en->max_len; -	else { -	  if (IS_ENCLOSE_MARK1(NENCLOSE(node))) -	    *max = ONIG_INFINITE_DISTANCE; -	  else { -	    SET_ENCLOSE_STATUS(node, NST_MARK1); -	    r = get_max_len(en->target, max, env); -	    CLEAR_ENCLOSE_STATUS(node, NST_MARK1); -	    if (r == 0) { -	      en->max_len = *max; -	      SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); -	    } -	  } -	} -	break; +        if (IS_ENCLOSE_MAX_FIXED(en)) +          *max = en->max_len; +        else { +          if (IS_ENCLOSE_MARK1(NENCLOSE(node))) +            *max = ONIG_INFINITE_DISTANCE; +          else { +            SET_ENCLOSE_STATUS(node, NST_MARK1); +            r = get_max_len(en->target, max, env); +            CLEAR_ENCLOSE_STATUS(node, NST_MARK1); +            if (r == 0) { +              en->max_len = *max; +              SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); +            } +          } +        } +        break;        case ENCLOSE_OPTION:        case ENCLOSE_STOP_BACKTRACK: @@ -4559,7 +4559,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)    if (to->expr.len > 0) {      if (add->len.max > 0) {        if (to->expr.len > (int )add->len.max) -	to->expr.len = add->len.max; +        to->expr.len = add->len.max;        if (to->expr.mmd.max == 0)          select_opt_exact_info(enc, &to->exb, &to->expr); @@ -4957,7 +4957,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)      reg->exact_end = reg->exact + e->len;      allow_reverse = -	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); +      ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);      if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {        r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, @@ -5045,7 +5045,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)    if (opt.exb.len > 0 || opt.exm.len > 0) {      select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);      if (opt.map.value > 0 && -	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { +        comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {        goto set_map;      }      else { diff --git a/src/regexec.c b/src/regexec.c index 35fef11..c0626ef 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@    regexec.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -1346,8 +1346,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        fprintf(stderr, "%4d> \"", (int )(s - str));        bp = buf;        for (i = 0, q = s; i < 7 && q < end; i++) { -	len = enclen(encode, q); -	while (len-- > 0) *bp++ = *q++; +        len = enclen(encode, q); +        while (len-- > 0) *bp++ = *q++;        }        if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }        else         { xmemcpy(bp, "\"",    1); bp += 1; } @@ -1473,14 +1473,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        break;      case OP_EXACT1:  MOP_IN(OP_EXACT1); -#if 0        DATA_ENSURE(1);        if (*p != *s) goto fail;        p++; s++; -#endif -      if (*p != *s++) goto fail; -      DATA_ENSURE(0); -      p++;        MOP_OUT;        break; @@ -3159,6 +3154,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,      }      else {        UChar *q = p + reg->dmin; + +      if (q >= end) return 0; /* fail */        while (p < q) p += enclen(reg->enc, p);      }    } @@ -3238,18 +3235,25 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,      }      else {        if (reg->dmax != ONIG_INFINITE_DISTANCE) { -        *low = p - reg->dmax; -        if (*low > s) { -          *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, -                                          *low, (const UChar** )low_prev); -          if (low_prev && IS_NULL(*low_prev)) -            *low_prev = onigenc_get_prev_char_head(reg->enc, -                                                   (pprev ? pprev : s), *low); +        if (p - str < reg->dmax) { +          *low = (UChar* )str; +          if (low_prev) +            *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);          }          else { -          if (low_prev) -            *low_prev = onigenc_get_prev_char_head(reg->enc, -                                                   (pprev ? pprev : str), *low); +          *low = p - reg->dmax; +          if (*low > s) { +            *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, +                                                 *low, (const UChar** )low_prev); +            if (low_prev && IS_NULL(*low_prev)) +              *low_prev = onigenc_get_prev_char_head(reg->enc, +                                                     (pprev ? pprev : s), *low); +          } +          else { +            if (low_prev) +              *low_prev = onigenc_get_prev_char_head(reg->enc, +                                                     (pprev ? pprev : str), *low); +          }          }        }      } @@ -3790,8 +3794,10 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,        if (rs != 0)          return rs; -      if (region->end[0] == start - str) -        start++; +      if (region->end[0] == start - str) { +        if (start >= end) break; +        start += enclen(reg->enc, start); +      }        else          start = str + region->end[0]; diff --git a/src/regparse.c b/src/regparse.c index 11f9e34..8153513 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@    regparse.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,8 @@  OnigSyntaxType OnigSyntaxRuby = {    (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |       ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | -     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | +     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | +     ONIG_SYN_OP_ESC_CONTROL_CHARS |       ONIG_SYN_OP_ESC_C_CONTROL )     & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )    , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | @@ -553,8 +554,8 @@ i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)    int r = (*(arg->func))(e->name,                           e->name + e->name_len,                           e->back_num, -			 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), -			 arg->reg, arg->arg); +                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), +                         arg->reg, arg->arg);    if (r != 0) {      arg->ret = r;      return ST_STOP; @@ -1053,7 +1054,7 @@ onig_node_free(Node* node)    switch (NTYPE(node)) {    case NT_STR:      if (NSTR(node)->capa != 0 && -	IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { +        IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {        xfree(NSTR(node)->s);      }      break; @@ -2519,8 +2520,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,        int flag = (c == '-' ? -1 : 1);        if (PEND) { -	r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; -	goto end; +        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; +        goto end;        }        PFETCH(c);        if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; @@ -2531,9 +2532,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,        exist_level = 1;        if (!PEND) { -	PFETCH(c); -	if (c == end_code) -	  goto end; +        PFETCH(c); +        if (c == end_code) +          goto end;        }      } @@ -2945,19 +2946,46 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        c2 = PPEEK;        if (c2 == '{' && -	  IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { -	PINC; -	tok->type = TK_CHAR_PROPERTY; -	tok->u.prop.not = (c == 'P' ? 1 : 0); - -	if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { -	  PFETCH(c2); -	  if (c2 == '^') { -	    tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); -	  } -	  else -	    PUNFETCH; -	} +          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { +        PINC; +        tok->type = TK_CHAR_PROPERTY; +        tok->u.prop.not = (c == 'P' ? 1 : 0); + +        if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { +          PFETCH(c2); +          if (c2 == '^') { +            tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); +          } +          else +            PUNFETCH; +        } +      } +      break; + +    case 'o': +      if (PEND) break; + +      prev = p; +      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { +        PINC; +        num = scan_unsigned_octal_number(&p, end, 11, enc); +        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +        if (!PEND) { +          c2 = PPEEK; +          if (ONIGENC_IS_CODE_DIGIT(enc, c2)) +            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; +        } + +        if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { +          PINC; +          tok->type   = TK_CODE_POINT; +          tok->base   = 8; +          tok->u.code = (OnigCodePoint )num; +        } +        else { +          /* can't read nothing or invalid format */ +          p = prev; +        }        }        break; @@ -3020,7 +3048,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)          PUNFETCH;          prev = p;          num = scan_unsigned_octal_number(&p, end, 3, enc); -        if (num < 0) return ONIGERR_TOO_BIG_NUMBER; +        if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;          if (p == prev) {  /* can't read nothing. */            num = 0; /* but, it's not error */          } @@ -3132,7 +3160,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        tok->u.repeat.upper = 1;      greedy_check:        if (!PEND && PPEEK_IS('?') && -	  IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { +          IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {          PFETCH(c);          tok->u.repeat.greedy     = 0;          tok->u.repeat.possessive = 0; @@ -3302,6 +3330,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        goto end_buf;        break; +    case 'o': +      if (PEND) break; + +      prev = p; +      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { +        PINC; +        num = scan_unsigned_octal_number(&p, end, 11, enc); +        if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +        if (!PEND) { +          if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK)) +            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; +        } + +        if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { +          PINC; +          tok->type   = TK_CODE_POINT; +          tok->u.code = (OnigCodePoint )num; +        } +        else { +          /* can't read nothing or invalid format */ +          p = prev; +        } +      } +      break; +      case 'x':        if (PEND) break; @@ -3392,7 +3445,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {          prev = p;          num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); -        if (num < 0) return ONIGERR_TOO_BIG_NUMBER; +        if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;          if (p == prev) {  /* can't read nothing. */            num = 0; /* but, it's not error */          } @@ -3541,7 +3594,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)          }          else { /* string */            p = tok->backp + enclen(enc, tok->backp); -	} +        }        }        break;      } @@ -3753,8 +3806,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,      OnigCodePoint prev = 0;      for (i = 0; i < n; i++) { -      for (j = prev; -	   j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { +      for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {          if (j >= sb_out) {            goto sb_end2;          } @@ -4028,14 +4080,16 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,      }    } -  *state = CCS_VALUE; +  if (*state != CCS_START) +    *state = CCS_VALUE; +    *type  = CCV_CLASS;    return 0;  }  static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, -	       int* vs_israw, int v_israw, +next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, +	       int* from_israw, int to_israw,  	       enum CCVALTYPE intype, enum CCVALTYPE* type,  	       enum CCSTATE* state, ScanEnv* env)  { @@ -4044,10 +4098,13 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,    switch (*state) {    case CCS_VALUE:      if (*type == CCV_SB) { -      BITSET_SET_BIT(cc->bs, (int )(*vs)); +      if (*from > 0xff) +          return ONIGERR_INVALID_CODE_POINT_VALUE; + +      BITSET_SET_BIT(cc->bs, (int )(*from));      }      else if (*type == CCV_CODE_POINT) { -      r = add_code_range(&(cc->mbuf), env, *vs, *vs); +      r = add_code_range(&(cc->mbuf), env, *from, *from);        if (r < 0) return r;      }      break; @@ -4055,40 +4112,32 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,    case CCS_RANGE:      if (intype == *type) {        if (intype == CCV_SB) { -        if (*vs > 0xff || v > 0xff) +        if (*from > 0xff || to > 0xff)            return ONIGERR_INVALID_CODE_POINT_VALUE; -        if (*vs > v) { +        if (*from > to) {            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))              goto ccs_range_end;            else              return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;          } -        bitset_set_range(cc->bs, (int )*vs, (int )v); +        bitset_set_range(cc->bs, (int )*from, (int )to);        }        else { -        r = add_code_range(&(cc->mbuf), env, *vs, v); +        r = add_code_range(&(cc->mbuf), env, *from, to);          if (r < 0) return r;        }      }      else { -#if 0 -      if (intype == CCV_CODE_POINT && *type == CCV_SB) { -#endif -        if (*vs > v) { -          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) -            goto ccs_range_end; -          else -            return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; -        } -        bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); -        r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); -        if (r < 0) return r; -#if 0 +      if (*from > to) { +        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) +          goto ccs_range_end; +        else +          return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;        } -      else -        return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; -#endif +      bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); +      r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to); +      if (r < 0) return r;      }    ccs_range_end:      *state = CCS_COMPLETE; @@ -4103,9 +4152,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,      break;    } -  *vs_israw = v_israw; -  *vs       = v; -  *type     = intype; +  *from_israw = to_israw; +  *from       = to; +  *type       = intype;    return 0;  } @@ -4366,9 +4415,9 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,          r = parse_char_class(&anode, tok, &p, end, env);          if (r != 0) { -	  onig_node_free(anode); -	  goto cc_open_err; -	} +          onig_node_free(anode); +          goto cc_open_err; +        }          acc = NCCLASS(anode);          r = or_cclass(cc, acc, env->enc); @@ -4663,9 +4712,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              r = parse_subexp(&target, tok, term, &p, end, env);              env->option = prev;              if (r < 0) { -	      onig_node_free(target); -	      return r; -	    } +              onig_node_free(target); +              return r; +            }              *np = node_new_option(option);              CHECK_NULL_RETURN_MEMERR(*np);              NENCLOSE(*np)->target = target; @@ -5291,8 +5340,8 @@ parse_branch(Node** top, OnigToken* tok, int term,      while (r != TK_EOT && r != term && r != TK_ALT) {        r = parse_exp(&node, tok, term, src, end, env);        if (r < 0) { -	onig_node_free(node); -	return r; +        onig_node_free(node); +        return r;        }        if (NTYPE(node) == NT_LIST) { diff --git a/src/regsyntax.c b/src/regsyntax.c index ade5b55..e751e24 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -168,7 +168,8 @@ OnigSyntaxType OnigSyntaxJava = {  OnigSyntaxType OnigSyntaxPerl = {    (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |       ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | -     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | +     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | +     ONIG_SYN_OP_ESC_CONTROL_CHARS |       ONIG_SYN_OP_ESC_C_CONTROL )     & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )    , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | @@ -192,7 +193,8 @@ OnigSyntaxType OnigSyntaxPerl = {  OnigSyntaxType OnigSyntaxPerl_NG = {    (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |       ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | -     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | +     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | +     ONIG_SYN_OP_ESC_CONTROL_CHARS |       ONIG_SYN_OP_ESC_C_CONTROL )     & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )    , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 6b390fc..2151211 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf  */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf  */  /* Computed positions: -k'1-3' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])    return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+3] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)];  } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif  int  unicode_fold1_key(OnigCodePoint codes[])  { @@ -2534,7 +2540,7 @@ unicode_fold1_key(OnigCodePoint codes[])      {        int key = hash(codes); -      if (key <= MAX_HASH_VALUE) +      if (key <= MAX_HASH_VALUE && key >= 0)          {            int index = wordlist[key]; diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 74e9876..07cfa4e 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf  */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf  */  /* Computed positions: -k'3,6' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])    return asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];  } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif  int  unicode_fold2_key(OnigCodePoint codes[])  { @@ -189,7 +195,7 @@ unicode_fold2_key(OnigCodePoint codes[])      {        int key = hash(codes); -      if (key <= MAX_HASH_VALUE) +      if (key <= MAX_HASH_VALUE && key >= 0)          {            int index = wordlist[key]; diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 8095b1c..1b4d9d4 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_fold_key_conv.py        from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf  */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf  */  /* Computed positions: -k'3,6,9' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])    return asso_values[(unsigned char)onig_codes_byte_at(codes, 8)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];  } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif  int  unicode_fold3_key(OnigCodePoint codes[])  { @@ -99,7 +105,7 @@ unicode_fold3_key(OnigCodePoint codes[])      {        int key = hash(codes); -      if (key <= MAX_HASH_VALUE) +      if (key <= MAX_HASH_VALUE && key >= 0)          {            int index = wordlist[key]; diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index c6261d2..15302ca 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -1,7 +1,7 @@  /* This file was converted by gperf_unfold_key_conv.py        from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf  */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf  */  /* Computed positions: -k'1-3' */ @@ -64,6 +64,12 @@ hash(OnigCodePoint codes[])    return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+35] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)+1] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)];  } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif  const struct ByUnfoldKey *  unicode_unfold_key(OnigCodePoint code)  { @@ -2840,11 +2846,11 @@ unicode_unfold_key(OnigCodePoint code)      {        int key = hash(&code); -      if (key <= MAX_HASH_VALUE) +      if (key <= MAX_HASH_VALUE && key >= 0)          {            OnigCodePoint gcode = wordlist[key].code; -          if (code == gcode) +          if (code == gcode && wordlist[key].index >= 0)              return &wordlist[key];          }      } | 
