diff options
Diffstat (limited to 'src/regexec.c')
-rw-r--r-- | src/regexec.c | 2553 |
1 files changed, 1997 insertions, 556 deletions
diff --git a/src/regexec.c b/src/regexec.c index 53f42ee..35e3698 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,11 +26,8 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include "regint.h" -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) @@ -40,6 +37,565 @@ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif +#define CHECK_INTERRUPT_IN_MATCH + +#ifdef USE_CALLOUT +typedef struct { + int last_match_at_call_counter; + struct { + OnigType type; + OnigValue val; + } slot[ONIG_CALLOUT_DATA_SLOT_NUM]; +} CalloutData; +#endif + +struct OnigMatchParamStruct { + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigCalloutFunc progress_callout_of_contents; + OnigCalloutFunc retraction_callout_of_contents; +#ifdef USE_CALLOUT + int match_at_call_counter; + void* callout_user_data; + CalloutData* callout_data; + int callout_data_alloc_num; +#endif +}; + +extern int +onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param, + unsigned int limit) +{ + param->match_stack_limit = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, + unsigned long limit) +{ + param->retry_limit_in_match = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->progress_callout_of_contents = f; + return ONIG_NORMAL; +} + +extern int +onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->retraction_callout_of_contents = f; + return ONIG_NORMAL; +} + + + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + int ptr_num; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigMatchParam* mp; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +} MatchArg; + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_MODE 6 + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OpInfoType; + +static OpInfoType OpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, +#endif + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_WORD_ASCII, "word-ascii", ARG_NON }, + { OP_NO_WORD, "not-word", ARG_NON }, + { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, + { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, + { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, + { OP_WORD_BEGIN, "word-begin", ARG_MODE }, + { OP_WORD_END, "word-end", ARG_MODE }, + { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON }, + { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push-super", ARG_RELADDR }, + { OP_POP_OUT, "pop-out", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, +#ifdef USE_CALLOUT + { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL }, + { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL }, +#endif + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ + RelAddrType curr = (RelAddrType )(p - start); + + fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + OnigCodePoint code; + OnigOptionType option; + ModeType mode; + UChar *q; + + fprintf(f, "%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fputc(':', f); + p_rel_addr(f, addr, bp, start); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":{/%d}", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_MODE: + mode = *((ModeType* )bp); + bp += SIZE_MODE; + fprintf(f, ":%d", mode); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + +#ifdef USE_OP_CCLASS_NODE + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%p:%d", cc, n); + } + break; +#endif + + case OP_BACKREF_N_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fputc(':', f); + p_rel_addr(f, addr, bp, start); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_LOOK_BEHIND_NOT_START: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:", len); + p_rel_addr(f, addr, bp, start); + break; + + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: + { + GET_MEMNUM_INC(mem, bp); // number + fprintf(f, ":%d", mem); + } + break; + + case OP_CALLOUT_NAME: + { + int id; + + GET_MEMNUM_INC(id, bp); // id + GET_MEMNUM_INC(mem, bp); // number + + fprintf(f, ":%d:%d", id, mem); + } + break; +#endif + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); + } + } + if (nextp) *nextp = bp; +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_COMPILE +extern void +onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + UChar* bp; + UChar* start = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", + reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "code-length: %d\n", reg->used); + + bp = start; + while (bp < end) { + int pos = bp - start; + + fprintf(f, "%4d: ", pos); + onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} +#endif + + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif } +#ifdef USE_CALLOUT +#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \ + args.in = (ain);\ + args.name_id = (aname_id);\ + args.num = anum;\ + args.regex = reg;\ + args.string = str;\ + args.string_end = end;\ + args.start = sstart;\ + args.right_range = right_range;\ + args.current = s;\ + args.retry_in_match_counter = retry_in_match_counter;\ + args.msa = msa;\ + args.stk_base = stk_base;\ + args.stk = stk;\ + args.mem_start_stk = mem_start_stk;\ + args.mem_end_stk = mem_end_stk;\ + result = (func)(&args, user);\ +} while (0) + +#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\ + int result;\ + OnigCalloutArgs args;\ + CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\ + switch (result) {\ + case ONIG_CALLOUT_FAIL:\ + case ONIG_CALLOUT_SUCCESS:\ + break;\ + default:\ + if (result > 0) {\ + result = ONIGERR_INVALID_ARGUMENT;\ + }\ + best_len = result;\ + goto finish;\ + break;\ + }\ +} while(0) +#endif + /** stack **/ #define INVALID_STACK_INDEX -1 @@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_ALT (0x0002 | STK_ALT_FLAG) #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) + /* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 +#define STK_MEM_START 0x0010 +#define STK_MEM_END 0x8030 +#define STK_REPEAT_INC 0x0050 +#ifdef USE_CALLOUT +#define STK_CALLOUT 0x0070 +#endif + /* avoided by normal-POP */ #define STK_VOID 0x0000 /* for fill a blank */ #define STK_EMPTY_CHECK_START 0x3000 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0600 -#define STK_CALL_FRAME 0x0700 -#define STK_RETURN 0x0800 -#define STK_SAVE_VAL 0x0900 +#define STK_MEM_END_MARK 0x8100 +#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0300 +#define STK_CALL_FRAME 0x0400 +#define STK_RETURN 0x0500 +#define STK_SAVE_VAL 0x0600 /* stack type check mask */ #define STK_MASK_POP_USED STK_ALT_FLAG -#define STK_MASK_TO_VOID_TARGET 0x10fe +#define STK_MASK_POP_HANDLED 0x0010 +#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004) +#define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ typedef intptr_t StackIndex; typedef struct _StackType { unsigned int type; - int id; + int zid; union { struct { UChar *pcode; /* byte code position */ UChar *pstr; /* string position */ UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif } state; struct { int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ @@ -378,67 +976,66 @@ typedef struct _StackType { UChar* v; UChar* v2; } val; +#ifdef USE_CALLOUT + struct { + int num; + OnigCalloutFunc func; + } callout; +#endif } u; } StackType; +#ifdef USE_CALLOUT + +struct OnigCalloutArgsStruct { + OnigCalloutIn in; + int name_id; /* name id or ONIG_NON_NAME_ID */ + int num; + OnigRegex regex; + const OnigUChar* string; + const OnigUChar* string_end; + const OnigUChar* start; + const OnigUChar* right_range; + const OnigUChar* current; // current matching position + unsigned long retry_in_match_counter; + + /* invisible to users */ + MatchArg* msa; + StackType* stk_base; + StackType* stk; + StackIndex* mem_start_stk; + StackIndex* mem_end_stk; +}; + +#endif + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #else -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ -} while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif #define ALLOCA_PTR_NUM_LIMIT 50 @@ -495,25 +1092,303 @@ typedef struct _StackType { mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; +static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE; extern unsigned int onig_get_match_stack_limit_size(void) { - return MatchStackLimitSize; + return MatchStackLimit; } extern int onig_set_match_stack_limit_size(unsigned int size) { - MatchStackLimitSize = size; + MatchStackLimit = size; return 0; } +#ifdef USE_RETRY_LIMIT_IN_MATCH + +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; + +#define CHECK_RETRY_LIMIT_IN_MATCH do {\ + if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +} while (0) + +#else + +#define CHECK_RETRY_LIMIT_IN_MATCH + +#endif /* USE_RETRY_LIMIT_IN_MATCH */ + +extern unsigned long +onig_get_retry_limit_in_match(void) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + return RetryLimitInMatch; +#else + //return ONIG_NO_SUPPORT_CONFIG; + return 0; +#endif +} + +extern int +onig_set_retry_limit_in_match(unsigned long size) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + RetryLimitInMatch = size; + return 0; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +static OnigCalloutFunc DefaultProgressCallout; +static OnigCalloutFunc DefaultRetractionCallout; + +extern OnigMatchParam* +onig_new_match_param(void) +{ + OnigMatchParam* p; + + p = (OnigMatchParam* )xmalloc(sizeof(*p)); + if (IS_NOT_NULL(p)) { + onig_initialize_match_param(p); + } + + return p; +} + +extern void +onig_free_match_param_content(OnigMatchParam* p) +{ +#ifdef USE_CALLOUT + if (IS_NOT_NULL(p->callout_data)) { + xfree(p->callout_data); + p->callout_data = 0; + } +#endif +} + +extern void +onig_free_match_param(OnigMatchParam* p) +{ + if (IS_NOT_NULL(p)) { + onig_free_match_param_content(p); + xfree(p); + } +} + +extern int +onig_initialize_match_param(OnigMatchParam* mp) +{ + mp->match_stack_limit = MatchStackLimit; +#ifdef USE_RETRY_LIMIT_IN_MATCH + mp->retry_limit_in_match = RetryLimitInMatch; +#endif + mp->progress_callout_of_contents = DefaultProgressCallout; + mp->retraction_callout_of_contents = DefaultRetractionCallout; + +#ifdef USE_CALLOUT + mp->match_at_call_counter = 0; + mp->callout_user_data = 0; + mp->callout_data = 0; + mp->callout_data_alloc_num = 0; +#endif + + return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +static int +adjust_match_param(regex_t* reg, OnigMatchParam* mp) +{ + RegexExt* ext = REG_EXTP(reg); + + mp->match_at_call_counter = 0; + + if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL; + + if (ext->callout_num > mp->callout_data_alloc_num) { + CalloutData* d; + size_t n = ext->callout_num * sizeof(*d); + if (IS_NOT_NULL(mp->callout_data)) + d = (CalloutData* )xrealloc(mp->callout_data, n); + else + d = (CalloutData* )xmalloc(n); + CHECK_NULL_RETURN_MEMERR(d); + + mp->callout_data = d; + mp->callout_data_alloc_num = ext->callout_num; + } + + xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData)); + return ONIG_NORMAL; +} + +#define ADJUST_MATCH_PARAM(reg, mp) \ + r = adjust_match_param(reg, mp);\ + if (r != ONIG_NORMAL) return r; + +#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1)) + +extern int +onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args) +{ + OnigMatchParam* mp; + int num; + CalloutData* d; + + mp = args->msa->mp; + num = args->num; + + d = CALLOUT_DATA_AT_NUM(mp, num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + return d->last_match_at_call_counter; + } + + return 0; +} + +extern int +onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, + int slot, OnigType* type, + OnigValue* val) +{ + return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp, + args->num, slot, type, val); +} + +extern int +onig_get_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + } + + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType* type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_get_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +extern int +onig_set_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + d->slot[slot].type = type; + d->slot[slot].val = *val; + d->last_match_at_call_counter = mp->match_at_call_counter; + + return ONIG_NORMAL; +} + +extern int +onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_set_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +#else +#define ADJUST_MATCH_PARAM(reg, mp) +#endif /* USE_CALLOUT */ + + static int stack_double(int is_alloca, char** arg_alloc_base, StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, - OnigMatchArg* msa) + MatchArg* msa) { unsigned int n; int used; @@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base, xmemcpy(new_alloc_base, alloc_base, size); } else { - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { + if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) return ONIGERR_MATCH_STACK_LIMIT_OVER; else - n = MatchStackLimitSize; + n = msa->match_stack_limit; } new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { @@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8));\ - } - #define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) #define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\ STACK_INC;\ } while(0) -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) { \ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ +#ifdef ONIG_DEBUG_MATCH +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = s;\ + stk->u.state.pstr_prev = sprev;\ STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +} while (0) +#else +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ +} while (0) +#endif #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) @@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_REPEAT(sid, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ STACK_INC;\ @@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ STACK_INC;\ } while(0) @@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base, while (k > stk_base) {\ k--;\ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->id == (mnum)) {\ + && k->zid == (mnum)) {\ level++;\ }\ - else if (k->type == STK_MEM_START && k->id == (mnum)) {\ + else if (k->type == STK_MEM_START && k->zid == (mnum)) {\ if (level == 0) break;\ level--;\ }\ @@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_START;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) @@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_END;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ STACK_INC;\ } while(0) @@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ STACK_INC;\ @@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ stk->u.val.v2 = sprev;\ @@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ break;\ @@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ sprev = k->u.val.v2;\ @@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while (0) +#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = ONIG_NON_NAME_ID;\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = (aid);\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ if ((p) < stk_base) {\ @@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) + +#ifdef USE_CALLOUT +#define POP_CALLOUT_CASE \ + else if (stk->type == STK_CALLOUT) {\ + RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\ + } +#else +#define POP_CALLOUT_CASE +#endif + #define STACK_POP do {\ switch (pop_level) {\ case STACK_POP_LEVEL_FREE:\ @@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ @@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ default:\ @@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + POP_CALLOUT_CASE\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ }\ } while(0) -#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ +#define POP_TIL_BODY(aname, til_type) do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ - if (stk->type == STK_ALT_PREC_READ_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + STACK_BASE_CHECK(stk, (aname));\ + if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ + if (stk->type == (til_type)) break;\ + else {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ + }\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ } while(0) +#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ + POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\ +} while(0) + #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ + POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\ } while(0) + #define STACK_EXEC_TO_VOID(k) do {\ k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ if (IS_TO_VOID_TARGET(k)) {\ + if (k->type == STK_TO_VOID_START) {\ + k->type = STK_VOID;\ + break;\ + }\ k->type = STK_VOID;\ }\ - else if (k->type == STK_TO_VOID_START) {\ - k->type = STK_VOID;\ - break;\ - }\ }\ } while(0) @@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ @@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ @@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (level == 0) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ @@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ else if (k->type == STK_EMPTY_CHECK_END) {\ - if (k->id == (sid)) level++;\ + if (k->zid == (sid)) level++;\ }\ }\ } while(0) @@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ break;\ }\ }\ @@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range -#else -#define INIT_RIGHT_RANGE right_range = (UChar* )end -#endif #ifdef USE_CAPTURE_HISTORY static int @@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, while (k < stk_top) { if (k->type == STK_MEM_START) { - n = k->id; + n = k->zid; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); @@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } } else if (k->type == STK_MEM_END) { - if (k->id == node->group) { + if (k->zid == node->group) { node->end = (int )(k->u.mem.pstr - str); *kp = k; return 0; @@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pstart = k->u.mem.pstr; if (IS_NOT_NULL(pend)) { if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ @@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg, } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pend = k->u.mem.pstr; } } @@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { return 1; } } @@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +#define SOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ } while(0) -#define MOP_OUT do {\ +#define SOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while(0) @@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f) r = fprintf(f, " count prev time\n"); if (r < 0) return -1; - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + for (i = 0; OpInfo[i].opcode >= 0; i++) { r = fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name); if (r < 0) return -1; } r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); @@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f) #else #define STACK_INC stk++ -#define MOP_IN(opcode) -#define MOP_OUT +#define SOP_IN(opcode) +#define SOP_OUT #endif @@ -1459,10 +2307,8 @@ typedef struct { /* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* in_right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* in_right_range, const UChar* sstart, UChar* sprev, + MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; UChar* keep; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; +#ifdef USE_RETRY_LIMIT_IN_MATCH + unsigned long retry_limit_in_match; + unsigned long retry_in_match_counter; #endif + +#ifdef USE_CALLOUT + int of; +#endif + UChar *p = reg->p; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef USE_CALLOUT + msa->mp->match_at_call_counter++; +#endif + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match = msa->retry_limit_in_match; +#endif + //n = reg->num_repeat + reg->num_mem * 2; pop_level = reg->stack_pop_level; num_mem = reg->num_mem; @@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; keep = s = (UChar* )sstart; + STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */ INIT_RIGHT_RANGE; +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_in_match_counter = 0; +#endif + while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - fprintf(stderr, "%4d: ", (int )(p - reg->p)); + if (p == FinishCode) + fprintf(stderr, "----: "); + else + fprintf(stderr, "%4d: ", (int )(p - reg->p)); onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); fprintf(stderr, "\n"); } @@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sbegin = s; switch (*p++) { - case OP_END: MOP_IN(OP_END); + case OP_END: SOP_IN(OP_END); n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; @@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - MOP_OUT; + SOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { @@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto finish; break; - case OP_EXACT1: MOP_IN(OP_EXACT1); + case OP_EXACT1: SOP_IN(OP_EXACT1); DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); { int len; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - MOP_OUT; + SOP_OUT; break; - case OP_EXACT2: MOP_IN(OP_EXACT2); + case OP_EXACT2: SOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT3: MOP_IN(OP_EXACT3); + case OP_EXACT3: SOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT4: MOP_IN(OP_EXACT4); + case OP_EXACT4: SOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT5: MOP_IN(OP_EXACT5); + case OP_EXACT5: SOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN: MOP_IN(OP_EXACTN); + case OP_EXACTN: SOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); { int len; UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - MOP_OUT; + SOP_OUT; continue; break; - case OP_CCLASS: MOP_IN(OP_CCLASS); + case OP_CCLASS: SOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; #ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - MOP_OUT; + SOP_OUT; break; #endif - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; + SOP_OUT; break; - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); + case OP_WORD: SOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII); + case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD: MOP_IN(OP_NO_WORD); + case OP_NO_WORD: SOP_IN(OP_NO_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII); + case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY); + case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY); + case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || - ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - MOP_OUT; + if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + SOP_OUT; continue; } } @@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_WORD_END: MOP_IN(OP_WORD_END); + case OP_WORD_END: SOP_IN(OP_WORD_END); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_END_BUF: MOP_IN(OP_END_BUF); + case OP_END_BUF: SOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; - case OP_END_LINE: MOP_IN(OP_END_LINE); + case OP_END_LINE: SOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } @@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_BACKREF1: MOP_IN(OP_BACKREF1); + case OP_BACKREF1: SOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: MOP_IN(OP_BACKREF2); + case OP_BACKREF2: SOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREF_N: MOP_IN(OP_BACKREF_N); + case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); GET_MEMNUM_INC(mem, p); backref: { @@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC); + case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif - case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK); + case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); { GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { @@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); + case OP_SET_OPTION_PUSH: SOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; + SOP_OUT; continue; break; - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); + case OP_SET_OPTION: SOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START); + case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_EMPTY_CHECK_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END); + case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); { int is_empty; @@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT - case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST); + case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); { int is_empty; @@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto empty_check_found; } } - MOP_OUT; + SOP_OUT; continue; break; #endif #ifdef USE_CALL case OP_EMPTY_CHECK_END_MEMST_PUSH: - MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); + SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { int is_empty; @@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_EMPTY_CHECK_END(mem); } } - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_JUMP: MOP_IN(OP_JUMP); + case OP_JUMP: SOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_PUSH: MOP_IN(OP_PUSH); + case OP_PUSH: SOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER); + case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); GET_RELADDR_INC(addr, p); STACK_PUSH_SUPER_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; + SOP_OUT; continue; break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: MOP_IN(OP_POP); + case OP_POP_OUT: SOP_IN(OP_POP_OUT); STACK_POP_ONE; - MOP_OUT; + // for stop backtrack + //CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p += (addr + 1); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT: MOP_IN(OP_REPEAT); + case OP_REPEAT: SOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START); + case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); STACK_PUSH_POS(s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END); + case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); { STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START); + case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END); + case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; - case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START); + case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); STACK_PUSH_TO_VOID_START; - MOP_OUT; + SOP_OUT; continue; break; - case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END); + case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); STACK_EXEC_TO_VOID(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START); + case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END); + case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END); STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_CALL - case OP_CALL: MOP_IN(OP_CALL); + case OP_CALL: SOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - MOP_OUT; + SOP_OUT; continue; break; - case OP_RETURN: MOP_IN(OP_RETURN); + case OP_RETURN: SOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); + case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); { SaveType type; GET_SAVE_TYPE_INC(type, p); @@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); + case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); { UpdateVarType type; enum SaveType save_type; @@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; + continue; + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + of = ONIG_CALLOUT_OF_CONTENTS; + goto callout_common_entry; + + SOP_OUT; continue; break; + case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + { + int call_result; + int name_id; + int num; + int in; + CalloutListEntry* e; + OnigCalloutFunc func; + OnigCalloutArgs args; + + of = ONIG_CALLOUT_OF_NAME; + GET_MEMNUM_INC(name_id, p); + + callout_common_entry: + GET_MEMNUM_INC(num, p); + e = onig_reg_callout_list_at(reg, num); + in = e->in; + if (of == ONIG_CALLOUT_OF_NAME) { + func = onig_get_callout_start_func(reg, num); + } + else { + name_id = ONIG_NON_NAME_ID; + func = msa->mp->progress_callout_of_contents; + } + + if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) { + CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id, + num, msa->mp->callout_user_data, args, call_result); + switch (call_result) { + case ONIG_CALLOUT_FAIL: + goto fail; + break; + case ONIG_CALLOUT_SUCCESS: + goto retraction_callout2; + break; + default: /* error code */ + if (call_result > 0) { + call_result = ONIGERR_INVALID_ARGUMENT; + } + best_len = call_result; + goto finish; + break; + } + } + else { + retraction_callout2: + if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) { + if (of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_NAME(name_id, num, func); + } + } + else { + func = msa->mp->retraction_callout_of_contents; + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_CONTENTS(num, func); + } + } + } + } + } + SOP_OUT; + continue; + break; +#endif + case OP_FINISH: goto finish; break; fail: - MOP_OUT; + SOP_OUT; /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); + case OP_FAIL: SOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; + CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; @@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unexpected_bytecode_error: STACK_SAVE; return ONIGERR_UNEXPECTED_BYTECODE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match_over: + STACK_SAVE; + return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +#endif } @@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[], } return (UChar* )NULL; } - extern int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) { int r; - UChar *prev; - OnigMatchArg msa; + OnigMatchParam mp; - MATCH_ARG_INIT(msa, reg, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + onig_initialize_match_param(&mp); + r = onig_match_with_param(reg, str, end, at, region, option, &mp); + onig_free_match_param_content(&mp); + return r; +} + +extern int +onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, OnigOptionType option, + OnigMatchParam* mp) +{ + int r; + UChar *prev; + MatchArg msa; + ADJUST_MATCH_PARAM(reg, mp); + MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, } prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); + r = match_at(reg, str, end, end, at, prev, &msa); } end: @@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM_NO_REV: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search(reg->enc, reg->map, p, range); break; } @@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM_NO_REV: #ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { int r; @@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #endif break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search_backward(reg->enc, reg->map, range, adjrange, p); break; } @@ -3725,12 +4590,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, OnigOptionType option) { int r; + OnigMatchParam mp; + + onig_initialize_match_param(&mp); + r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); + onig_free_match_param_content(&mp); + return r; + +} + +extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) +{ + int r; UChar *s, *prev; - OnigMatchArg msa; + MatchArg msa; const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar *orig_range = range; -#endif #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif + ADJUST_MATCH_PARAM(reg, mp); + if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ /* anchor optimize: resume search range */ @@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; prev = (UChar* )NULL; - MATCH_ARG_INIT(msa, reg, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif + MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); goto mismatch; } @@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, reg, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp); s = (UChar* )start; if (range > start) { /* forward search */ @@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else prev = (UChar* )NULL; - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; sch_range = (UChar* )range; @@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; @@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; if (range < end) @@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from) *to = *from; } + +/* for callout functions */ + +#ifdef USE_CALLOUT + +extern OnigCalloutFunc +onig_get_progress_callout(void) +{ + return DefaultProgressCallout; +} + +extern int +onig_set_progress_callout(OnigCalloutFunc f) +{ + DefaultProgressCallout = f; + return ONIG_NORMAL; +} + +extern OnigCalloutFunc +onig_get_retraction_callout(void) +{ + return DefaultRetractionCallout; +} + +extern int +onig_set_retraction_callout(OnigCalloutFunc f) +{ + DefaultRetractionCallout = f; + return ONIG_NORMAL; +} + +extern int +onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) +{ + return args->num; +} + +extern OnigCalloutIn +onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) +{ + return args->in; +} + +extern int +onig_get_name_id_by_callout_args(OnigCalloutArgs* args) +{ + return args->name_id; +} + +extern const UChar* +onig_get_contents_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.start; + } + + return 0; +} + +extern const UChar* +onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.end; + } + + return 0; +} + +extern int +onig_get_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.passed_num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, + OnigType* type, OnigValue* val) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; + if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index]; + return ONIG_NORMAL; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern const UChar* +onig_get_string_by_callout_args(OnigCalloutArgs* args) +{ + return args->string; +} + +extern const UChar* +onig_get_string_end_by_callout_args(OnigCalloutArgs* args) +{ + return args->string_end; +} + +extern const UChar* +onig_get_start_by_callout_args(OnigCalloutArgs* args) +{ + return args->start; +} + +extern const UChar* +onig_get_right_range_by_callout_args(OnigCalloutArgs* args) +{ + return args->right_range; +} + +extern const UChar* +onig_get_current_by_callout_args(OnigCalloutArgs* args) +{ + return args->current; +} + +extern OnigRegex +onig_get_regex_by_callout_args(OnigCalloutArgs* args) +{ + return args->regex; +} + +extern unsigned long +onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) +{ + return args->retry_in_match_counter; +} + + +extern int +onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end) +{ + OnigRegex reg; + const UChar* str; + StackType* stk_base; + int i; + + i = mem_num; + reg = a->regex; + str = a->string; + stk_base = a->stk_base; + + if (i > 0) { + if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); + else + *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); + + *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )a->mem_end_stk[i])) - str); + } + else { + *begin = *end = ONIG_REGION_NOTPOS; + } + } + else if (i == 0) { +#if 0 + *begin = a->start - str; + *end = a->current - str; +#else + return ONIGERR_INVALID_ARGUMENT; +#endif + } + else + return ONIGERR_INVALID_ARGUMENT; + + return ONIG_NORMAL; +} + +extern int +onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes) +{ + int n; + + n = (int )(a->stk - a->stk_base); + + if (used_num != 0) + *used_num = n; + + if (used_bytes != 0) + *used_bytes = n * sizeof(StackType); + + return ONIG_NORMAL; +} + + +/* builtin callout functions */ + +extern int +onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_FAIL; +} + +extern int +onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_MISMATCH; +} + +#if 0 +extern int +onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_SUCCESS; +} +#endif + +extern int +onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int n; + OnigValue val; + + r = onig_get_arg_by_callout_args(args, 0, 0, &val); + if (r != ONIG_NORMAL) return r; + + n = (int )val.l; + if (n >= 0) { + n = ONIGERR_INVALID_CALLOUT_BODY; + } + + return n; +} + +extern int +onig_builtin_count(OnigCalloutArgs* args, void* user_data) +{ + (void )onig_check_callout_data_and_clear_old_values(args); + + return onig_builtin_total_count(args, user_data); +} + +extern int +onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + OnigCodePoint count_type; + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; + + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + val.l = 0; + } + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + slot = 2; + if (count_type == '<') + val.l++; + else if (count_type == 'X') + val.l--; + } + else { + slot = 1; + if (count_type != '<') + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + /* slot 1: in progress counter, slot 2: in retraction counter */ + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + val.l = 0; + } + + val.l++; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + + (void )onig_check_callout_data_and_clear_old_values(args); + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + type = ONIG_TYPE_LONG; + val.l = 0; + } + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + val.l--; + } + else { + if (val.l >= aval.l) return ONIG_CALLOUT_FAIL; + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +enum OP_CMP { + OP_EQ, + OP_NE, + OP_LT, + OP_GT, + OP_LE, + OP_GE +}; + +extern int +onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + long lv; + long rv; + OnigType type; + OnigValue val; + regex_t* reg; + enum OP_CMP op; + + reg = args->regex; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + lv = 0L; + else + lv = val.l; + } + else { /* ONIG_TYPE_LONG */ + lv = val.l; + } + + r = onig_get_arg_by_callout_args(args, 2, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + rv = 0L; + else + rv = val.l; + } + else { /* ONIG_TYPE_LONG */ + rv = val.l; + } + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + OnigCodePoint c1, c2; + UChar* p; + + r = onig_get_arg_by_callout_args(args, 1, &type, &val); + if (r != ONIG_NORMAL) return r; + + p = val.s.start; + c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p < val.s.end) { + c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG; + } + else + c2 = 0; + + switch (c1) { + case '=': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_EQ; + break; + case '!': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_NE; + break; + case '<': + if (c2 == '=') op = OP_LE; + else if (c2 == 0) op = OP_LT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + case '>': + if (c2 == '=') op = OP_GE; + else if (c2 == 0) op = OP_GT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + default: + return ONIGERR_INVALID_CALLOUT_ARG; + break; + } + val.l = (long )op; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + } + else { + op = (enum OP_CMP )val.l; + } + + switch (op) { + case OP_EQ: r = (lv == rv); break; + case OP_NE: r = (lv != rv); break; + case OP_LT: r = (lv < rv); break; + case OP_GT: r = (lv > rv); break; + case OP_LE: r = (lv <= rv); break; + case OP_GE: r = (lv >= rv); break; + } + + return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS; +} + + +#include <stdio.h> + +static FILE* OutFp; + +/* name start with "onig_" for macros. */ +static int +onig_builtin_monitor(OnigCalloutArgs* args, void* user_data) +{ + int r; + int num; + size_t tag_len; + const UChar* start; + const UChar* right; + const UChar* current; + const UChar* string; + const UChar* strend; + const UChar* tag_start; + const UChar* tag_end; + regex_t* reg; + OnigCalloutIn in; + OnigType type; + OnigValue val; + char buf[20]; + FILE* fp; + + fp = OutFp; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + in = onig_get_callout_in_by_callout_args(args); + if (in == ONIG_CALLOUT_IN_PROGRESS) { + if (val.c == '<') + return ONIG_CALLOUT_SUCCESS; + } + else { + if (val.c != 'X' && val.c != '<') + return ONIG_CALLOUT_SUCCESS; + } + + num = onig_get_callout_num_by_callout_args(args); + start = onig_get_start_by_callout_args(args); + right = onig_get_right_range_by_callout_args(args); + current = onig_get_current_by_callout_args(args); + string = onig_get_string_by_callout_args(args); + strend = onig_get_string_end_by_callout_args(args); + reg = onig_get_regex_by_callout_args(args); + tag_start = onig_get_callout_tag_start(reg, num); + tag_end = onig_get_callout_tag_end(reg, num); + + if (tag_start == 0) + xsnprintf(buf, sizeof(buf), "#%d", num); + else { + /* CAUTION: tag string is not terminated with NULL. */ + int i; + + tag_len = tag_end - tag_start; + if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1; + for (i = 0; i < tag_len; i++) buf[i] = tag_start[i]; + buf[tag_len] = '\0'; + } + + fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n", + buf, + in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=", + (int )(current - string), + (int )(start - string), + (int )(right - string), + (int )(strend - string)); + fflush(fp); + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) +{ + int id; + char* name; + OnigEncoding enc; + unsigned int ts[4]; + OnigValue opts[4]; + + if (IS_NOT_NULL(fp)) + OutFp = (FILE* )fp; + else + OutFp = stdout; + + enc = ONIG_ENCODING_ASCII; + + name = "MON"; + ts[0] = ONIG_TYPE_CHAR; + opts[0].c = '>'; + BC_B_O(name, monitor, 1, ts, 1, opts); + + return ONIG_NORMAL; +} + +#endif /* USE_CALLOUT */ |