diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-12-14 14:22:27 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-12-14 14:22:27 +0100 | 
| commit | fa131e9ad996d099bb48a3b1302e4a3b3c0f03bd (patch) | |
| tree | 7b4963ddd27303d1a8e856acdd3a55ce9ed179f7 /src/regcomp.c | |
| parent | a89a4ac904bc93b1d93b410394fa05c23260351b (diff) | |
| parent | b7ad39361a923207113da1145f8655a1bc50c3aa (diff) | |
Merge branch 'release/6.6.1-1'6.6.1-1
Diffstat (limited to 'src/regcomp.c')
| -rw-r--r-- | src/regcomp.c | 253 | 
1 files changed, 162 insertions, 91 deletions
diff --git a/src/regcomp.c b/src/regcomp.c index 47023cb..ab5701c 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -277,7 +277,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)  static int  add_opcode(regex_t* reg, int opcode)  { -  BBUF_ADD1(reg, opcode); +  BB_ADD1(reg, opcode);    return 0;  } @@ -287,7 +287,7 @@ add_state_check_num(regex_t* reg, int num)  {    StateCheckNumType n = (StateCheckNumType )num; -  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); +  BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM);    return 0;  }  #endif @@ -297,7 +297,7 @@ add_rel_addr(regex_t* reg, int addr)  {    RelAddrType ra = (RelAddrType )addr; -  BBUF_ADD(reg, &ra, SIZE_RELADDR); +  BB_ADD(reg, &ra, SIZE_RELADDR);    return 0;  } @@ -306,7 +306,7 @@ add_abs_addr(regex_t* reg, int addr)  {    AbsAddrType ra = (AbsAddrType )addr; -  BBUF_ADD(reg, &ra, SIZE_ABSADDR); +  BB_ADD(reg, &ra, SIZE_ABSADDR);    return 0;  } @@ -315,7 +315,7 @@ add_length(regex_t* reg, int len)  {    LengthType l = (LengthType )len; -  BBUF_ADD(reg, &l, SIZE_LENGTH); +  BB_ADD(reg, &l, SIZE_LENGTH);    return 0;  } @@ -324,7 +324,7 @@ add_mem_num(regex_t* reg, int num)  {    MemNumType n = (MemNumType )num; -  BBUF_ADD(reg, &n, SIZE_MEMNUM); +  BB_ADD(reg, &n, SIZE_MEMNUM);    return 0;  } @@ -334,7 +334,7 @@ add_pointer(regex_t* reg, void* addr)  {    PointerType ptr = (PointerType )addr; -  BBUF_ADD(reg, &ptr, SIZE_POINTER); +  BB_ADD(reg, &ptr, SIZE_POINTER);    return 0;  }  #endif @@ -342,7 +342,7 @@ add_pointer(regex_t* reg, void* addr)  static int  add_option(regex_t* reg, OnigOptionType option)  { -  BBUF_ADD(reg, &option, SIZE_OPTION); +  BB_ADD(reg, &option, SIZE_OPTION);    return 0;  } @@ -351,7 +351,7 @@ add_save_type(regex_t* reg, enum SaveType type)  {    SaveType t = (SaveType )type; -  BBUF_ADD(reg, &t, SIZE_SAVE_TYPE); +  BB_ADD(reg, &t, SIZE_SAVE_TYPE);    return 0;  } @@ -360,7 +360,14 @@ add_update_var_type(regex_t* reg, enum UpdateVarType type)  {    UpdateVarType t = (UpdateVarType )type; -  BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); +  BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); +  return 0; +} + +static int +add_mode(regex_t* reg, ModeType mode) +{ +  BB_ADD(reg, &mode, SIZE_MODE);    return 0;  } @@ -378,14 +385,14 @@ add_opcode_rel_addr(regex_t* reg, int opcode, int addr)  static int  add_bytes(regex_t* reg, UChar* bytes, int len)  { -  BBUF_ADD(reg, bytes, len); +  BB_ADD(reg, bytes, len);    return 0;  }  static int  add_bitset(regex_t* reg, BitSetRef bs)  { -  BBUF_ADD(reg, bs, SIZE_BITSET); +  BB_ADD(reg, bs, SIZE_BITSET);    return 0;  } @@ -492,7 +499,7 @@ compile_call(CallNode* node, regex_t* reg, ScanEnv* env)    r = add_opcode(reg, OP_CALL);    if (r != 0) return r; -  r = unset_addr_list_add(env->unset_addr_list, BBUF_GET_OFFSET_POS(reg), +  r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),                            NODE_CALL_BODY(node));    if (r != 0) return r;    r = add_abs_addr(reg, 0 /*dummy addr.*/); @@ -655,7 +662,7 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)    return add_bytes(reg, mbuf->p, mbuf->used);  #else    int r, pad_size; -  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; +  UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;    GET_ALIGNMENT_PAD_SIZE(p, pad_size);    add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); @@ -1400,7 +1407,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {      r = add_opcode(reg, OP_CALL);      if (r != 0) return r; -    node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; +    node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;      NODE_STATUS_ADD(node, NST_ADDR_FIXED);      r = add_abs_addr(reg, (int )node->m.called_addr);      if (r != 0) return r; @@ -1418,7 +1425,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    if (NODE_IS_CALLED(node)) {      r = add_opcode(reg, OP_CALL);      if (r != 0) return r; -    node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; +    node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;      NODE_STATUS_ADD(node, NST_ADDR_FIXED);      r = add_abs_addr(reg, (int )node->m.called_addr);      if (r != 0) return r; @@ -1588,6 +1595,20 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)      len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;      break; +  case ANCHOR_WORD_BOUNDARY: +  case ANCHOR_NO_WORD_BOUNDARY: +#ifdef USE_WORD_BEGIN_END +  case ANCHOR_WORD_BEGIN: +  case ANCHOR_WORD_END: +#endif +    len = SIZE_OP_WORD_BOUNDARY; +    break; + +  case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +  case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +    len = SIZE_OPCODE; +    break; +    default:      len = SIZE_OPCODE;      break; @@ -1600,6 +1621,7 @@ static int  compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)  {    int r, len; +  enum OpCode op;    switch (node->type) {    case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break; @@ -1609,13 +1631,34 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)    case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;    case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; -  case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break; -  case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +  case ANCHOR_WORD_BOUNDARY: +    op = OP_WORD_BOUNDARY; +  word: +    r = add_opcode(reg, op); +    if (r != 0) return r; +    r = add_mode(reg, (ModeType )node->ascii_mode); +    break; + +  case ANCHOR_NO_WORD_BOUNDARY: +    op = OP_NO_WORD_BOUNDARY; goto word; +    break;  #ifdef USE_WORD_BEGIN_END -  case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break; -  case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break; +  case ANCHOR_WORD_BEGIN: +    op = OP_WORD_BEGIN; goto word; +    break; +  case ANCHOR_WORD_END: +    op = OP_WORD_END; goto word; +    break;  #endif +  case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +    r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +    break; + +  case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +    r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +    break; +    case ANCHOR_PREC_READ:      r = add_opcode(reg, OP_PREC_READ_START);      if (r != 0) return r; @@ -1914,9 +1957,12 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)          break;        case ONIGENC_CTYPE_WORD: -        if (CTYPE_(node)->not != 0)  op = OP_NOT_WORD; -        else                         op = OP_WORD; - +        if (CTYPE_(node)->ascii_mode == 0) { +          op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD; +        } +        else { +          op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII; +        }          r = add_opcode(reg, op);          break; @@ -2038,8 +2084,6 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)    return r;  } -#ifdef USE_NAMED_GROUP -  static int  noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)  { @@ -2283,7 +2327,6 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)    return onig_renumber_name_table(reg, map);  } -#endif /* USE_NAMED_GROUP */  #ifdef USE_CALL  static int @@ -2301,7 +2344,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)      addr   = en->m.called_addr;      offset = uslist->us[i].offset; -    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); +    BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);    }    return 0;  } @@ -2394,9 +2437,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)  #endif    case NODE_CTYPE: -    *len = 1; -    break; -    case NODE_CCLASS:      *len = 1;      break; @@ -2496,7 +2536,8 @@ is_exclusive(Node* x, Node* y, regex_t* reg)        switch (ytype) {        case NODE_CTYPE:          if (CTYPE_(y)->ctype == CTYPE_(x)->ctype && -            CTYPE_(y)->not   != CTYPE_(x)->not) +            CTYPE_(y)->not   != CTYPE_(x)->not && +            CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)            return 1;          else            return 0; @@ -2523,6 +2564,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)    case NODE_CCLASS:      { +      int range;        CClassNode* xc = CCLASS_(x);        switch (ytype) {        case NODE_CTYPE: @@ -2534,9 +2576,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg)          case ONIGENC_CTYPE_WORD:            if (CTYPE_(y)->not == 0) {              if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { -              for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +              range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE; +              for (i = 0; i < range; i++) {                  if (BITSET_AT(xc->bs, i)) { -                  if (IS_CODE_SB_WORD(reg->enc, i)) return 0; +                  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;                  }                }                return 1; @@ -2545,18 +2588,18 @@ is_exclusive(Node* x, Node* y, regex_t* reg)            }            else {              if (IS_NOT_NULL(xc->mbuf)) return 0; -            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { -              if (! IS_CODE_SB_WORD(reg->enc, i)) { -                if (!IS_NCCLASS_NOT(xc)) { -                  if (BITSET_AT(xc->bs, i)) -                    return 0; -                } -                else { -                  if (! BITSET_AT(xc->bs, i)) -                    return 0; -                } +            if (IS_NCCLASS_NOT(xc)) return 0; + +            range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE; +            for (i = 0; i < range; i++) { +              if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) { +                if (BITSET_AT(xc->bs, i)) +                  return 0;                }              } +            for (i = range; i < SINGLE_BYTE_SIZE; i++) { +              if (BITSET_AT(xc->bs, i)) return 0; +            }              return 1;            }            break; @@ -2612,10 +2655,18 @@ is_exclusive(Node* x, Node* y, regex_t* reg)            break;          case ONIGENC_CTYPE_WORD: -          if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) -            return CTYPE_(y)->not; -          else -            return !(CTYPE_(y)->not); +          if (CTYPE_(y)->ascii_mode == 0) { +            if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) +              return CTYPE_(y)->not; +            else +              return !(CTYPE_(y)->not); +          } +          else { +            if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end)) +              return CTYPE_(y)->not; +            else +              return !(CTYPE_(y)->not); +          }            break;          default:            break; @@ -2780,7 +2831,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)    case NODE_ENCLOSURE:      {        EnclosureNode* en = ENCLOSURE_(node); -      if ((en->type & enclosure_mask) == 0) +      if (((1<<en->type) & enclosure_mask) == 0)          return 1;        r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); @@ -3512,7 +3563,7 @@ divide_look_behind_alternatives(Node* node)    np = node;    while (IS_NOT_NULL(np = NODE_CDR(np))) { -    insert_node = onig_node_new_anchor(anc_type); +    insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);      CHECK_NULL_RETURN_MEMERR(insert_node);      NODE_BODY(insert_node) = NODE_CAR(np);      NODE_CAR(np) = insert_node; @@ -4150,22 +4201,19 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)    if (cn->by_number != 0) {      int gnum = cn->group_num; -#ifdef USE_NAMED_GROUP      if (env->num_named > 0 &&          IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&          !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {        return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;      } -#endif +      if (gnum > env->num_mem) {        onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,                                       cn->name, cn->name_end);        return ONIGERR_UNDEFINED_GROUP_REFERENCE;      } -#ifdef USE_NAMED_GROUP    set_call_attr: -#endif      NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;      if (IS_NULL(NODE_CALL_BODY(cn))) {        onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, @@ -4173,7 +4221,6 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)        return ONIGERR_UNDEFINED_NAME_REFERENCE;      }    } -#ifdef USE_NAMED_GROUP    else {      int *refs; @@ -4193,7 +4240,6 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)        goto set_call_attr;      }    } -#endif    return 0;  } @@ -4579,18 +4625,22 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)    | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \    | BIT_NODE_CALL ) -#define ALLOWED_ENCLOSURE_IN_LB       ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION ) -#define ALLOWED_ENCLOSURE_IN_LB_NOT   ENCLOSURE_OPTION +#define ALLOWED_ENCLOSURE_IN_LB       ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION ) +#define ALLOWED_ENCLOSURE_IN_LB_NOT   (1<<ENCLOSURE_OPTION)  #define ALLOWED_ANCHOR_IN_LB \    ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \ -  | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND \ -  | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) +  | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \ +  | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ +  | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ +  | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )  #define ALLOWED_ANCHOR_IN_LB_NOT \    ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \ -  | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND \ -  | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) +  | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \ +  | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ +  | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ +  | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )    int r;    AnchorNode* an = ANCHOR_(node); @@ -5603,6 +5653,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)    case NODE_CTYPE:      {        int i, min, max; +      int range;        max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -5614,15 +5665,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)            break;          case ONIGENC_CTYPE_WORD: +          range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;            if (CTYPE_(node)->not != 0) { -            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +            for (i = 0; i < range; i++) {                if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {                  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);                }              } +            for (i = range; i < SINGLE_BYTE_SIZE; i++) { +              add_char_opt_map_info(&opt->map, (UChar )i, env->enc); +            }            }            else { -            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +            for (i = 0; i < range; i++) {                if (ONIGENC_IS_CODE_WORD(env->enc, i)) {                  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);                } @@ -6171,9 +6226,7 @@ onig_free_body(regex_t* reg)      if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);      if (IS_NOT_NULL(REG_EXTP(reg)))         xfree(REG_EXTP(reg)); -#ifdef USE_NAMED_GROUP      onig_names_free(reg); -#endif    }  } @@ -6202,7 +6255,7 @@ onig_transfer(regex_t* to, regex_t* from)  #ifdef ONIG_DEBUG_COMPILE  static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));  #endif -#ifdef ONIG_DEBUG_PARSE_TREE +#ifdef ONIG_DEBUG_PARSE  static void print_tree P_((FILE* f, Node* node));  #endif @@ -6229,7 +6282,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    if (reg->alloc == 0) {      init_size = (pattern_end - pattern) * 2;      if (init_size <= 0) init_size = COMPILE_INIT_SIZE; -    r = BBUF_INIT(reg, init_size); +    r = BB_INIT(reg, init_size);      if (r != 0) goto end;    }    else @@ -6247,7 +6300,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);    if (r != 0) goto err; -#ifdef USE_NAMED_GROUP    /* mixed use named group and no-named group */    if (scan_env.num_named > 0 &&        IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -6259,7 +6311,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,      if (r != 0) goto err;    } -#endif    r = check_backrefs(root, &scan_env);    if (r != 0) goto err; @@ -6287,7 +6338,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    r = setup_tree(root, reg, 0, &scan_env);    if (r != 0) goto err_unset; -#ifdef ONIG_DEBUG_PARSE_TREE +#ifdef ONIG_DEBUG_PARSE    print_tree(stderr, root);  #endif @@ -6377,9 +6428,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    onig_node_free(root);  #ifdef ONIG_DEBUG_COMPILE -#ifdef USE_NAMED_GROUP    onig_print_names(stderr, reg); -#endif    print_compiled_byte_code_list(stderr, reg);  #endif @@ -6642,6 +6691,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)  #define ARG_MEMNUM       4  #define ARG_OPTION       5  #define ARG_STATE_CHECK  6 +#define ARG_MODE         7  OnigOpInfoType OnigOpInfo[] = {    { OP_FINISH,            "finish",          ARG_NON }, @@ -6666,7 +6716,9 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },    { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },    { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE    { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL }, +#endif    { OP_ANYCHAR,           "anychar",         ARG_NON },    { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },    { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON }, @@ -6674,11 +6726,13 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },    { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },    { OP_WORD,                "word",            ARG_NON }, -  { OP_NOT_WORD,            "not-word",        ARG_NON }, -  { OP_WORD_BOUND,          "word-bound",      ARG_NON }, -  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON }, -  { OP_WORD_BEGIN,          "word-begin",      ARG_NON }, -  { OP_WORD_END,            "word-end",        ARG_NON }, +  { OP_WORD_ASCII,          "word-ascii",      ARG_NON }, +  { OP_NO_WORD,             "not-word",        ARG_NON }, +  { OP_NO_WORD_ASCII,       "not-word-ascii",  ARG_NON }, +  { OP_WORD_BOUNDARY,       "word-boundary",     ARG_MODE }, +  { OP_NO_WORD_BOUNDARY,    "not-word-boundary", ARG_MODE }, +  { OP_WORD_BEGIN,          "word-begin",      ARG_MODE }, +  { OP_WORD_END,            "word-end",        ARG_MODE },    { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },    { OP_END_BUF,             "end-buf",         ARG_NON },    { OP_BEGIN_LINE,          "begin-line",      ARG_NON }, @@ -6800,6 +6854,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,    StateCheckNumType scn;    OnigCodePoint code;    OnigOptionType option; +  ModeType mode;    UChar *q;    fprintf(f, "%s", op2name(*bp)); @@ -6840,6 +6895,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,        bp += SIZE_STATE_CHECK_NUM;        fprintf(f, ":%d", scn);        break; + +    case ARG_MODE: +      mode = *((ModeType* )bp); +      bp += SIZE_MODE; +      fprintf(f, ":%d", mode); +      break;      }    }    else { @@ -6939,6 +7000,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,        fprintf(f, ":%d:%d:%d", n, (int )code, len);        break; +#ifdef USE_OP_CCLASS_NODE      case OP_CCLASS_NODE:        {          CClassNode *cc; @@ -6948,6 +7010,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,          fprintf(f, ":%p:%d", cc, n);        }        break; +#endif      case OP_BACKREF_N_IC:        mem = *((MemNumType* )bp); @@ -7082,7 +7145,7 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)  }  #endif -#ifdef ONIG_DEBUG_PARSE_TREE +#ifdef ONIG_DEBUG_PARSE  static void  Indent(FILE* f, int indent) @@ -7157,9 +7220,13 @@ print_indent_tree(FILE* f, Node* node, int indent)      case ONIGENC_CTYPE_WORD:        if (CTYPE_(node)->not != 0) -        fputs("not word",       f); +        fputs("not word", f);        else -        fputs("word",           f); +        fputs("word",     f); + +      if (CTYPE_(node)->ascii_mode != 0) +        fputs(" (ascii)", f); +        break;      default: @@ -7171,19 +7238,23 @@ print_indent_tree(FILE* f, Node* node, int indent)    case NODE_ANCHOR:      fprintf(f, "<anchor:%p> ", node);      switch (ANCHOR_(node)->type) { -    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break; -    case ANCHOR_END_BUF:        fputs("end buf",        f); break; -    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break; -    case ANCHOR_END_LINE:       fputs("end line",       f); break; -    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break; -    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; - -    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break; -    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break; +    case ANCHOR_BEGIN_BUF:        fputs("begin buf",      f); break; +    case ANCHOR_END_BUF:          fputs("end buf",        f); break; +    case ANCHOR_BEGIN_LINE:       fputs("begin line",     f); break; +    case ANCHOR_END_LINE:         fputs("end line",       f); break; +    case ANCHOR_SEMI_END_BUF:     fputs("semi end buf",   f); break; +    case ANCHOR_BEGIN_POSITION:   fputs("begin position", f); break; + +    case ANCHOR_WORD_BOUNDARY:    fputs("word boundary",     f); break; +    case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;  #ifdef USE_WORD_BEGIN_END -    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break; -    case ANCHOR_WORD_END:        fputs("word end", f);       break; +    case ANCHOR_WORD_BEGIN:       fputs("word begin", f);     break; +    case ANCHOR_WORD_END:         fputs("word end", f);       break;  #endif +    case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +      fputs("extended-grapheme-cluster boundary", f); break; +    case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +      fputs("no-extended-grapheme-cluster boundary", f); break;      case ANCHOR_PREC_READ:        fprintf(f, "prec read\n");        print_indent_tree(f, NODE_BODY(node), indent + add);  | 
