summaryrefslogtreecommitdiff
path: root/src/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regcomp.c')
-rw-r--r--src/regcomp.c253
1 files changed, 162 insertions, 91 deletions
diff --git a/src/regcomp.c b/src/regcomp.c
index 47023cb..ab5701c 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -277,7 +277,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
static int
add_opcode(regex_t* reg, int opcode)
{
- BBUF_ADD1(reg, opcode);
+ BB_ADD1(reg, opcode);
return 0;
}
@@ -287,7 +287,7 @@ add_state_check_num(regex_t* reg, int num)
{
StateCheckNumType n = (StateCheckNumType )num;
- BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
return 0;
}
#endif
@@ -297,7 +297,7 @@ add_rel_addr(regex_t* reg, int addr)
{
RelAddrType ra = (RelAddrType )addr;
- BBUF_ADD(reg, &ra, SIZE_RELADDR);
+ BB_ADD(reg, &ra, SIZE_RELADDR);
return 0;
}
@@ -306,7 +306,7 @@ add_abs_addr(regex_t* reg, int addr)
{
AbsAddrType ra = (AbsAddrType )addr;
- BBUF_ADD(reg, &ra, SIZE_ABSADDR);
+ BB_ADD(reg, &ra, SIZE_ABSADDR);
return 0;
}
@@ -315,7 +315,7 @@ add_length(regex_t* reg, int len)
{
LengthType l = (LengthType )len;
- BBUF_ADD(reg, &l, SIZE_LENGTH);
+ BB_ADD(reg, &l, SIZE_LENGTH);
return 0;
}
@@ -324,7 +324,7 @@ add_mem_num(regex_t* reg, int num)
{
MemNumType n = (MemNumType )num;
- BBUF_ADD(reg, &n, SIZE_MEMNUM);
+ BB_ADD(reg, &n, SIZE_MEMNUM);
return 0;
}
@@ -334,7 +334,7 @@ add_pointer(regex_t* reg, void* addr)
{
PointerType ptr = (PointerType )addr;
- BBUF_ADD(reg, &ptr, SIZE_POINTER);
+ BB_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
#endif
@@ -342,7 +342,7 @@ add_pointer(regex_t* reg, void* addr)
static int
add_option(regex_t* reg, OnigOptionType option)
{
- BBUF_ADD(reg, &option, SIZE_OPTION);
+ BB_ADD(reg, &option, SIZE_OPTION);
return 0;
}
@@ -351,7 +351,7 @@ add_save_type(regex_t* reg, enum SaveType type)
{
SaveType t = (SaveType )type;
- BBUF_ADD(reg, &t, SIZE_SAVE_TYPE);
+ BB_ADD(reg, &t, SIZE_SAVE_TYPE);
return 0;
}
@@ -360,7 +360,14 @@ add_update_var_type(regex_t* reg, enum UpdateVarType type)
{
UpdateVarType t = (UpdateVarType )type;
- BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
+ BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
+ return 0;
+}
+
+static int
+add_mode(regex_t* reg, ModeType mode)
+{
+ BB_ADD(reg, &mode, SIZE_MODE);
return 0;
}
@@ -378,14 +385,14 @@ add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
static int
add_bytes(regex_t* reg, UChar* bytes, int len)
{
- BBUF_ADD(reg, bytes, len);
+ BB_ADD(reg, bytes, len);
return 0;
}
static int
add_bitset(regex_t* reg, BitSetRef bs)
{
- BBUF_ADD(reg, bs, SIZE_BITSET);
+ BB_ADD(reg, bs, SIZE_BITSET);
return 0;
}
@@ -492,7 +499,7 @@ compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
r = add_opcode(reg, OP_CALL);
if (r != 0) return r;
- r = unset_addr_list_add(env->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
+ r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
NODE_CALL_BODY(node));
if (r != 0) return r;
r = add_abs_addr(reg, 0 /*dummy addr.*/);
@@ -655,7 +662,7 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
return add_bytes(reg, mbuf->p, mbuf->used);
#else
int r, pad_size;
- UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
+ UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
GET_ALIGNMENT_PAD_SIZE(p, pad_size);
add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
@@ -1400,7 +1407,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
r = add_opcode(reg, OP_CALL);
if (r != 0) return r;
- node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
NODE_STATUS_ADD(node, NST_ADDR_FIXED);
r = add_abs_addr(reg, (int )node->m.called_addr);
if (r != 0) return r;
@@ -1418,7 +1425,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
if (NODE_IS_CALLED(node)) {
r = add_opcode(reg, OP_CALL);
if (r != 0) return r;
- node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
NODE_STATUS_ADD(node, NST_ADDR_FIXED);
r = add_abs_addr(reg, (int )node->m.called_addr);
if (r != 0) return r;
@@ -1588,6 +1595,20 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
break;
+ case ANCHOR_WORD_BOUNDARY:
+ case ANCHOR_NO_WORD_BOUNDARY:
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN:
+ case ANCHOR_WORD_END:
+#endif
+ len = SIZE_OP_WORD_BOUNDARY;
+ break;
+
+ case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ len = SIZE_OPCODE;
+ break;
+
default:
len = SIZE_OPCODE;
break;
@@ -1600,6 +1621,7 @@ static int
compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
{
int r, len;
+ enum OpCode op;
switch (node->type) {
case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
@@ -1609,13 +1631,34 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
- case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
+ case ANCHOR_WORD_BOUNDARY:
+ op = OP_WORD_BOUNDARY;
+ word:
+ r = add_opcode(reg, op);
+ if (r != 0) return r;
+ r = add_mode(reg, (ModeType )node->ascii_mode);
+ break;
+
+ case ANCHOR_NO_WORD_BOUNDARY:
+ op = OP_NO_WORD_BOUNDARY; goto word;
+ break;
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
- case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
+ case ANCHOR_WORD_BEGIN:
+ op = OP_WORD_BEGIN; goto word;
+ break;
+ case ANCHOR_WORD_END:
+ op = OP_WORD_END; goto word;
+ break;
#endif
+ case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ break;
+
+ case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ break;
+
case ANCHOR_PREC_READ:
r = add_opcode(reg, OP_PREC_READ_START);
if (r != 0) return r;
@@ -1914,9 +1957,12 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
break;
case ONIGENC_CTYPE_WORD:
- if (CTYPE_(node)->not != 0) op = OP_NOT_WORD;
- else op = OP_WORD;
-
+ if (CTYPE_(node)->ascii_mode == 0) {
+ op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
+ }
+ else {
+ op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
+ }
r = add_opcode(reg, op);
break;
@@ -2038,8 +2084,6 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
return r;
}
-#ifdef USE_NAMED_GROUP
-
static int
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
@@ -2283,7 +2327,6 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
return onig_renumber_name_table(reg, map);
}
-#endif /* USE_NAMED_GROUP */
#ifdef USE_CALL
static int
@@ -2301,7 +2344,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
addr = en->m.called_addr;
offset = uslist->us[i].offset;
- BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
+ BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
}
return 0;
}
@@ -2394,9 +2437,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
#endif
case NODE_CTYPE:
- *len = 1;
- break;
-
case NODE_CCLASS:
*len = 1;
break;
@@ -2496,7 +2536,8 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
switch (ytype) {
case NODE_CTYPE:
if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
- CTYPE_(y)->not != CTYPE_(x)->not)
+ CTYPE_(y)->not != CTYPE_(x)->not &&
+ CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
return 1;
else
return 0;
@@ -2523,6 +2564,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
case NODE_CCLASS:
{
+ int range;
CClassNode* xc = CCLASS_(x);
switch (ytype) {
case NODE_CTYPE:
@@ -2534,9 +2576,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
case ONIGENC_CTYPE_WORD:
if (CTYPE_(y)->not == 0) {
if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
+ for (i = 0; i < range; i++) {
if (BITSET_AT(xc->bs, i)) {
- if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
}
}
return 1;
@@ -2545,18 +2588,18 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
}
else {
if (IS_NOT_NULL(xc->mbuf)) return 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! IS_CODE_SB_WORD(reg->enc, i)) {
- if (!IS_NCCLASS_NOT(xc)) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- else {
- if (! BITSET_AT(xc->bs, i))
- return 0;
- }
+ if (IS_NCCLASS_NOT(xc)) return 0;
+
+ range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
+ for (i = 0; i < range; i++) {
+ if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
}
}
+ for (i = range; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) return 0;
+ }
return 1;
}
break;
@@ -2612,10 +2655,18 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
break;
case ONIGENC_CTYPE_WORD:
- if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
- return CTYPE_(y)->not;
- else
- return !(CTYPE_(y)->not);
+ if (CTYPE_(y)->ascii_mode == 0) {
+ if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+ return CTYPE_(y)->not;
+ else
+ return !(CTYPE_(y)->not);
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
+ return CTYPE_(y)->not;
+ else
+ return !(CTYPE_(y)->not);
+ }
break;
default:
break;
@@ -2780,7 +2831,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
case NODE_ENCLOSURE:
{
EnclosureNode* en = ENCLOSURE_(node);
- if ((en->type & enclosure_mask) == 0)
+ if (((1<<en->type) & enclosure_mask) == 0)
return 1;
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
@@ -3512,7 +3563,7 @@ divide_look_behind_alternatives(Node* node)
np = node;
while (IS_NOT_NULL(np = NODE_CDR(np))) {
- insert_node = onig_node_new_anchor(anc_type);
+ insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
CHECK_NULL_RETURN_MEMERR(insert_node);
NODE_BODY(insert_node) = NODE_CAR(np);
NODE_CAR(np) = insert_node;
@@ -4150,22 +4201,19 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
if (cn->by_number != 0) {
int gnum = cn->group_num;
-#ifdef USE_NAMED_GROUP
if (env->num_named > 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
!ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
-#endif
+
if (gnum > env->num_mem) {
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
cn->name, cn->name_end);
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
}
-#ifdef USE_NAMED_GROUP
set_call_attr:
-#endif
NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
if (IS_NULL(NODE_CALL_BODY(cn))) {
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
@@ -4173,7 +4221,6 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
}
-#ifdef USE_NAMED_GROUP
else {
int *refs;
@@ -4193,7 +4240,6 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
goto set_call_attr;
}
}
-#endif
return 0;
}
@@ -4579,18 +4625,22 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
| BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \
| BIT_NODE_CALL )
-#define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION )
-#define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION
+#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
+#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
#define ALLOWED_ANCHOR_IN_LB \
( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
- | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND \
- | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+ | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
+ | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
+ | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
+ | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
#define ALLOWED_ANCHOR_IN_LB_NOT \
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
- | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND \
- | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+ | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
+ | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
+ | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
+ | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
int r;
AnchorNode* an = ANCHOR_(node);
@@ -5603,6 +5653,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case NODE_CTYPE:
{
int i, min, max;
+ int range;
max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
@@ -5614,15 +5665,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
break;
case ONIGENC_CTYPE_WORD:
+ range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
if (CTYPE_(node)->not != 0) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ for (i = 0; i < range; i++) {
if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
+ for (i = range; i < SINGLE_BYTE_SIZE; i++) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
}
else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ for (i = 0; i < range; i++) {
if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
@@ -6171,9 +6226,7 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg));
-#ifdef USE_NAMED_GROUP
onig_names_free(reg);
-#endif
}
}
@@ -6202,7 +6255,7 @@ onig_transfer(regex_t* to, regex_t* from)
#ifdef ONIG_DEBUG_COMPILE
static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
+#ifdef ONIG_DEBUG_PARSE
static void print_tree P_((FILE* f, Node* node));
#endif
@@ -6229,7 +6282,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (reg->alloc == 0) {
init_size = (pattern_end - pattern) * 2;
if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
- r = BBUF_INIT(reg, init_size);
+ r = BB_INIT(reg, init_size);
if (r != 0) goto end;
}
else
@@ -6247,7 +6300,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
-#ifdef USE_NAMED_GROUP
/* mixed use named group and no-named group */
if (scan_env.num_named > 0 &&
IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -6259,7 +6311,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (r != 0) goto err;
}
-#endif
r = check_backrefs(root, &scan_env);
if (r != 0) goto err;
@@ -6287,7 +6338,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = setup_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
-#ifdef ONIG_DEBUG_PARSE_TREE
+#ifdef ONIG_DEBUG_PARSE
print_tree(stderr, root);
#endif
@@ -6377,9 +6428,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
onig_node_free(root);
#ifdef ONIG_DEBUG_COMPILE
-#ifdef USE_NAMED_GROUP
onig_print_names(stderr, reg);
-#endif
print_compiled_byte_code_list(stderr, reg);
#endif
@@ -6642,6 +6691,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
#define ARG_MEMNUM 4
#define ARG_OPTION 5
#define ARG_STATE_CHECK 6
+#define ARG_MODE 7
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
@@ -6666,7 +6716,9 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+#ifdef USE_OP_CCLASS_NODE
{ OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
+#endif
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@@ -6674,11 +6726,13 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
{ OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
+ { OP_WORD_ASCII, "word-ascii", ARG_NON },
+ { OP_NO_WORD, "not-word", ARG_NON },
+ { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON },
+ { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE },
+ { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE },
+ { OP_WORD_BEGIN, "word-begin", ARG_MODE },
+ { OP_WORD_END, "word-end", ARG_MODE },
{ OP_BEGIN_BUF, "begin-buf", ARG_NON },
{ OP_END_BUF, "end-buf", ARG_NON },
{ OP_BEGIN_LINE, "begin-line", ARG_NON },
@@ -6800,6 +6854,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
StateCheckNumType scn;
OnigCodePoint code;
OnigOptionType option;
+ ModeType mode;
UChar *q;
fprintf(f, "%s", op2name(*bp));
@@ -6840,6 +6895,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
bp += SIZE_STATE_CHECK_NUM;
fprintf(f, ":%d", scn);
break;
+
+ case ARG_MODE:
+ mode = *((ModeType* )bp);
+ bp += SIZE_MODE;
+ fprintf(f, ":%d", mode);
+ break;
}
}
else {
@@ -6939,6 +7000,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
+#ifdef USE_OP_CCLASS_NODE
case OP_CCLASS_NODE:
{
CClassNode *cc;
@@ -6948,6 +7010,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
fprintf(f, ":%p:%d", cc, n);
}
break;
+#endif
case OP_BACKREF_N_IC:
mem = *((MemNumType* )bp);
@@ -7082,7 +7145,7 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
}
#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
+#ifdef ONIG_DEBUG_PARSE
static void
Indent(FILE* f, int indent)
@@ -7157,9 +7220,13 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ONIGENC_CTYPE_WORD:
if (CTYPE_(node)->not != 0)
- fputs("not word", f);
+ fputs("not word", f);
else
- fputs("word", f);
+ fputs("word", f);
+
+ if (CTYPE_(node)->ascii_mode != 0)
+ fputs(" (ascii)", f);
+
break;
default:
@@ -7171,19 +7238,23 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_ANCHOR:
fprintf(f, "<anchor:%p> ", node);
switch (ANCHOR_(node)->type) {
- case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
- case ANCHOR_END_BUF: fputs("end buf", f); break;
- case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
- case ANCHOR_END_LINE: fputs("end line", f); break;
- case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
- case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
-
- case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
- case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
+ case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
+ case ANCHOR_END_BUF: fputs("end buf", f); break;
+ case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
+ case ANCHOR_END_LINE: fputs("end line", f); break;
+ case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
+ case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
+
+ case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;
+ case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
- case ANCHOR_WORD_END: fputs("word end", f); break;
+ case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
+ case ANCHOR_WORD_END: fputs("word end", f); break;
#endif
+ case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ fputs("extended-grapheme-cluster boundary", f); break;
+ case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ fputs("no-extended-grapheme-cluster boundary", f); break;
case ANCHOR_PREC_READ:
fprintf(f, "prec read\n");
print_indent_tree(f, NODE_BODY(node), indent + add);