summaryrefslogtreecommitdiff
path: root/src/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexec.c')
-rw-r--r--src/regexec.c2553
1 files changed, 1997 insertions, 556 deletions
diff --git a/src/regexec.c b/src/regexec.c
index 53f42ee..35e3698 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,11 +26,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
#include "regint.h"
-#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-
#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
@@ -40,6 +37,565 @@
ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
#endif
+#define CHECK_INTERRUPT_IN_MATCH
+
+#ifdef USE_CALLOUT
+typedef struct {
+ int last_match_at_call_counter;
+ struct {
+ OnigType type;
+ OnigValue val;
+ } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
+} CalloutData;
+#endif
+
+struct OnigMatchParamStruct {
+ unsigned int match_stack_limit;
+ unsigned long retry_limit_in_match;
+ OnigCalloutFunc progress_callout_of_contents;
+ OnigCalloutFunc retraction_callout_of_contents;
+#ifdef USE_CALLOUT
+ int match_at_call_counter;
+ void* callout_user_data;
+ CalloutData* callout_data;
+ int callout_data_alloc_num;
+#endif
+};
+
+extern int
+onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
+ unsigned int limit)
+{
+ param->match_stack_limit = limit;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
+ unsigned long limit)
+{
+ param->retry_limit_in_match = limit;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
+{
+ param->progress_callout_of_contents = f;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
+{
+ param->retraction_callout_of_contents = f;
+ return ONIG_NORMAL;
+}
+
+
+
+typedef struct {
+ void* stack_p;
+ int stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ int ptr_num;
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+ unsigned int match_stack_limit;
+ unsigned long retry_limit_in_match;
+ OnigMatchParam* mp;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ int best_len; /* for ONIG_OPTION_FIND_LONGEST */
+ UChar* best_s;
+#endif
+} MatchArg;
+
+
+#ifdef ONIG_DEBUG
+
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_MODE 6
+
+typedef struct {
+ short int opcode;
+ char* name;
+ short int arg_type;
+} OpInfoType;
+
+static OpInfoType OpInfo[] = {
+ { OP_FINISH, "finish", ARG_NON },
+ { OP_END, "end", ARG_NON },
+ { OP_EXACT1, "exact1", ARG_SPECIAL },
+ { OP_EXACT2, "exact2", ARG_SPECIAL },
+ { OP_EXACT3, "exact3", ARG_SPECIAL },
+ { OP_EXACT4, "exact4", ARG_SPECIAL },
+ { OP_EXACT5, "exact5", ARG_SPECIAL },
+ { OP_EXACTN, "exactn", ARG_SPECIAL },
+ { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
+ { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
+ { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
+ { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
+ { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
+ { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
+ { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
+ { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
+ { OP_CCLASS, "cclass", ARG_SPECIAL },
+ { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
+ { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
+ { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
+ { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+#ifdef USE_OP_CCLASS_NODE
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
+#endif
+ { OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
+ { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_WORD_ASCII, "word-ascii", ARG_NON },
+ { OP_NO_WORD, "not-word", ARG_NON },
+ { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON },
+ { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE },
+ { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE },
+ { OP_WORD_BEGIN, "word-begin", ARG_MODE },
+ { OP_WORD_END, "word-end", ARG_MODE },
+ { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON },
+ { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREF_N, "backref-n", ARG_MEMNUM },
+ { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL },
+ { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL },
+ { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_PUSH_SUPER, "push-super", ARG_RELADDR },
+ { OP_POP_OUT, "pop-out", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
+ { OP_PREC_READ_START, "push-pos", ARG_NON },
+ { OP_PREC_READ_END, "pop-pos", ARG_NON },
+ { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR },
+ { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON },
+ { OP_ATOMIC_START, "atomic-start", ARG_NON },
+ { OP_ATOMIC_END, "atomic-end", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL },
+ { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL },
+ { OP_UPDATE_VAR, "update-var", ARG_SPECIAL },
+#ifdef USE_CALLOUT
+ { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL },
+ { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL },
+#endif
+ { -1, "", ARG_NON }
+};
+
+static char*
+op2name(int opcode)
+{
+ int i;
+
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
+ if (opcode == OpInfo[i].opcode)
+ return OpInfo[i].name;
+ }
+ return "";
+}
+
+static int
+op2arg_type(int opcode)
+{
+ int i;
+
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
+ if (opcode == OpInfo[i].opcode)
+ return OpInfo[i].arg_type;
+ }
+ return ARG_SPECIAL;
+}
+
+static void
+p_string(FILE* f, int len, UChar* s)
+{
+ fputs(":", f);
+ while (len-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
+{
+ int x = len * mb_len;
+
+ fprintf(f, ":%d:", len);
+ while (x-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start)
+{
+ RelAddrType curr = (RelAddrType )(p - start);
+
+ fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
+}
+
+static int
+bitset_on_num(BitSetRef bs)
+{
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) n++;
+ }
+ return n;
+}
+
+extern void
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
+ OnigEncoding enc)
+{
+ int i, n, arg_type;
+ RelAddrType addr;
+ LengthType len;
+ MemNumType mem;
+ OnigCodePoint code;
+ OnigOptionType option;
+ ModeType mode;
+ UChar *q;
+
+ fprintf(f, "%s", op2name(*bp));
+ arg_type = op2arg_type(*bp);
+ if (arg_type != ARG_SPECIAL) {
+ bp++;
+ switch (arg_type) {
+ case ARG_NON:
+ break;
+ case ARG_RELADDR:
+ GET_RELADDR_INC(addr, bp);
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
+ break;
+ case ARG_ABSADDR:
+ GET_ABSADDR_INC(addr, bp);
+ fprintf(f, ":{/%d}", addr);
+ break;
+ case ARG_LENGTH:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+ case ARG_MEMNUM:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+ case ARG_OPTION:
+ {
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
+ }
+ break;
+
+ case ARG_MODE:
+ mode = *((ModeType* )bp);
+ bp += SIZE_MODE;
+ fprintf(f, ":%d", mode);
+ break;
+ }
+ }
+ else {
+ switch (*bp++) {
+ case OP_EXACT1:
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p_string(f, 1, bp++); break;
+ case OP_EXACT2:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACT3:
+ p_string(f, 3, bp); bp += 3; break;
+ case OP_EXACT4:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACT5:
+ p_string(f, 5, bp); bp += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_EXACTMB2N1:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACTMB2N2:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACTMB2N3:
+ p_string(f, 6, bp); bp += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 2, bp);
+ bp += len * 2;
+ break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 3, bp);
+ bp += len * 3;
+ break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enclen(enc, bp);
+ p_string(f, len, bp);
+ bp += len;
+ break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_CCLASS:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d", (int )code, len);
+ break;
+
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
+ break;
+
+#ifdef USE_OP_CCLASS_NODE
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%p:%d", cc, n);
+ }
+ break;
+#endif
+
+ case OP_BACKREF_N_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
+ case OP_BACKREF_CHECK:
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_BACKREF_WITH_LEVEL:
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ /* fall */
+ case OP_BACKREF_CHECK_WITH_LEVEL:
+ {
+ LengthType level;
+
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ {
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
+ }
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
+ case OP_LOOK_BEHIND:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+
+ case OP_LOOK_BEHIND_NOT_START:
+ GET_RELADDR_INC(addr, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:", len);
+ p_rel_addr(f, addr, bp, start);
+ break;
+
+ case OP_PUSH_SAVE_VAL:
+ {
+ SaveType type;
+ GET_SAVE_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
+ case OP_UPDATE_VAR:
+ {
+ UpdateVarType type;
+ GET_UPDATE_VAR_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
+#ifdef USE_CALLOUT
+ case OP_CALLOUT_CONTENTS:
+ {
+ GET_MEMNUM_INC(mem, bp); // number
+ fprintf(f, ":%d", mem);
+ }
+ break;
+
+ case OP_CALLOUT_NAME:
+ {
+ int id;
+
+ GET_MEMNUM_INC(id, bp); // id
+ GET_MEMNUM_INC(mem, bp); // number
+
+ fprintf(f, ":%d:%d", id, mem);
+ }
+ break;
+#endif
+
+ default:
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp);
+ }
+ }
+ if (nextp) *nextp = bp;
+}
+#endif /* ONIG_DEBUG */
+
+#ifdef ONIG_DEBUG_COMPILE
+extern void
+onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
+{
+ UChar* bp;
+ UChar* start = reg->p;
+ UChar* end = reg->p + reg->used;
+
+ fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
+ reg->bt_mem_start, reg->bt_mem_end);
+ fprintf(f, "code-length: %d\n", reg->used);
+
+ bp = start;
+ while (bp < end) {
+ int pos = bp - start;
+
+ fprintf(f, "%4d: ", pos);
+ onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+}
+#endif
+
+
#ifdef USE_CAPTURE_HISTORY
static void history_tree_free(OnigCaptureTreeNode* node);
@@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#endif
}
+#ifdef USE_CALLOUT
+#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
+ args.in = (ain);\
+ args.name_id = (aname_id);\
+ args.num = anum;\
+ args.regex = reg;\
+ args.string = str;\
+ args.string_end = end;\
+ args.start = sstart;\
+ args.right_range = right_range;\
+ args.current = s;\
+ args.retry_in_match_counter = retry_in_match_counter;\
+ args.msa = msa;\
+ args.stk_base = stk_base;\
+ args.stk = stk;\
+ args.mem_start_stk = mem_start_stk;\
+ args.mem_end_stk = mem_end_stk;\
+ result = (func)(&args, user);\
+} while (0)
+
+#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
+ int result;\
+ OnigCalloutArgs args;\
+ CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
+ switch (result) {\
+ case ONIG_CALLOUT_FAIL:\
+ case ONIG_CALLOUT_SUCCESS:\
+ break;\
+ default:\
+ if (result > 0) {\
+ result = ONIGERR_INVALID_ARGUMENT;\
+ }\
+ best_len = result;\
+ goto finish;\
+ break;\
+ }\
+} while(0)
+#endif
+
/** stack **/
#define INVALID_STACK_INDEX -1
@@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_ALT (0x0002 | STK_ALT_FLAG)
#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
+
/* handled by normal-POP */
-#define STK_MEM_START 0x0100
-#define STK_MEM_END 0x8200
-#define STK_REPEAT_INC 0x0300
-#define STK_STATE_CHECK_MARK 0x1000
+#define STK_MEM_START 0x0010
+#define STK_MEM_END 0x8030
+#define STK_REPEAT_INC 0x0050
+#ifdef USE_CALLOUT
+#define STK_CALLOUT 0x0070
+#endif
+
/* avoided by normal-POP */
#define STK_VOID 0x0000 /* for fill a blank */
#define STK_EMPTY_CHECK_START 0x3000
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
-#define STK_MEM_END_MARK 0x8400
-#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0600
-#define STK_CALL_FRAME 0x0700
-#define STK_RETURN 0x0800
-#define STK_SAVE_VAL 0x0900
+#define STK_MEM_END_MARK 0x8100
+#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0300
+#define STK_CALL_FRAME 0x0400
+#define STK_RETURN 0x0500
+#define STK_SAVE_VAL 0x0600
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
-#define STK_MASK_TO_VOID_TARGET 0x10fe
+#define STK_MASK_POP_HANDLED 0x0010
+#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
+#define STK_MASK_TO_VOID_TARGET 0x100e
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
typedef intptr_t StackIndex;
typedef struct _StackType {
unsigned int type;
- int id;
+ int zid;
union {
struct {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@@ -378,67 +976,66 @@ typedef struct _StackType {
UChar* v;
UChar* v2;
} val;
+#ifdef USE_CALLOUT
+ struct {
+ int num;
+ OnigCalloutFunc func;
+ } callout;
+#endif
} u;
} StackType;
+#ifdef USE_CALLOUT
+
+struct OnigCalloutArgsStruct {
+ OnigCalloutIn in;
+ int name_id; /* name id or ONIG_NON_NAME_ID */
+ int num;
+ OnigRegex regex;
+ const OnigUChar* string;
+ const OnigUChar* string_end;
+ const OnigUChar* start;
+ const OnigUChar* right_range;
+ const OnigUChar* current; // current matching position
+ unsigned long retry_in_match_counter;
+
+ /* invisible to users */
+ MatchArg* msa;
+ StackType* stk_base;
+ StackType* stk;
+ StackIndex* mem_start_stk;
+ StackIndex* mem_end_stk;
+};
+
+#endif
+
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
+ (msa).match_stack_limit = (mp)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
+ (msa).mp = mp;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#else
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
+ (msa).match_stack_limit = (mp)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
+ (msa).mp = mp;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
-
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\
- if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
- unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
- offset = ((offset) * (state_num)) >> 3;\
- if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
- if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
- (msa).state_check_buff = (void* )xmalloc(size);\
- else \
- (msa).state_check_buff = (void* )xalloca(size);\
- xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
- (size_t )(size - (offset))); \
- (msa).state_check_buff_size = size;\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
-} while(0)
-
-#define MATCH_ARG_FREE(msa) do {\
- if ((msa).stack_p) xfree((msa).stack_p);\
- if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
- if ((msa).state_check_buff) xfree((msa).state_check_buff);\
- }\
-} while(0)
-#else
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
-#endif
#define ALLOCA_PTR_NUM_LIMIT 50
@@ -495,25 +1092,303 @@ typedef struct _StackType {
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
-static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
onig_get_match_stack_limit_size(void)
{
- return MatchStackLimitSize;
+ return MatchStackLimit;
}
extern int
onig_set_match_stack_limit_size(unsigned int size)
{
- MatchStackLimitSize = size;
+ MatchStackLimit = size;
return 0;
}
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+
+static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
+
+#define CHECK_RETRY_LIMIT_IN_MATCH do {\
+ if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
+} while (0)
+
+#else
+
+#define CHECK_RETRY_LIMIT_IN_MATCH
+
+#endif /* USE_RETRY_LIMIT_IN_MATCH */
+
+extern unsigned long
+onig_get_retry_limit_in_match(void)
+{
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ return RetryLimitInMatch;
+#else
+ //return ONIG_NO_SUPPORT_CONFIG;
+ return 0;
+#endif
+}
+
+extern int
+onig_set_retry_limit_in_match(unsigned long size)
+{
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ RetryLimitInMatch = size;
+ return 0;
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
+}
+
+static OnigCalloutFunc DefaultProgressCallout;
+static OnigCalloutFunc DefaultRetractionCallout;
+
+extern OnigMatchParam*
+onig_new_match_param(void)
+{
+ OnigMatchParam* p;
+
+ p = (OnigMatchParam* )xmalloc(sizeof(*p));
+ if (IS_NOT_NULL(p)) {
+ onig_initialize_match_param(p);
+ }
+
+ return p;
+}
+
+extern void
+onig_free_match_param_content(OnigMatchParam* p)
+{
+#ifdef USE_CALLOUT
+ if (IS_NOT_NULL(p->callout_data)) {
+ xfree(p->callout_data);
+ p->callout_data = 0;
+ }
+#endif
+}
+
+extern void
+onig_free_match_param(OnigMatchParam* p)
+{
+ if (IS_NOT_NULL(p)) {
+ onig_free_match_param_content(p);
+ xfree(p);
+ }
+}
+
+extern int
+onig_initialize_match_param(OnigMatchParam* mp)
+{
+ mp->match_stack_limit = MatchStackLimit;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ mp->retry_limit_in_match = RetryLimitInMatch;
+#endif
+ mp->progress_callout_of_contents = DefaultProgressCallout;
+ mp->retraction_callout_of_contents = DefaultRetractionCallout;
+
+#ifdef USE_CALLOUT
+ mp->match_at_call_counter = 0;
+ mp->callout_user_data = 0;
+ mp->callout_data = 0;
+ mp->callout_data_alloc_num = 0;
+#endif
+
+ return ONIG_NORMAL;
+}
+
+#ifdef USE_CALLOUT
+
+static int
+adjust_match_param(regex_t* reg, OnigMatchParam* mp)
+{
+ RegexExt* ext = REG_EXTP(reg);
+
+ mp->match_at_call_counter = 0;
+
+ if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
+
+ if (ext->callout_num > mp->callout_data_alloc_num) {
+ CalloutData* d;
+ size_t n = ext->callout_num * sizeof(*d);
+ if (IS_NOT_NULL(mp->callout_data))
+ d = (CalloutData* )xrealloc(mp->callout_data, n);
+ else
+ d = (CalloutData* )xmalloc(n);
+ CHECK_NULL_RETURN_MEMERR(d);
+
+ mp->callout_data = d;
+ mp->callout_data_alloc_num = ext->callout_num;
+ }
+
+ xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
+ return ONIG_NORMAL;
+}
+
+#define ADJUST_MATCH_PARAM(reg, mp) \
+ r = adjust_match_param(reg, mp);\
+ if (r != ONIG_NORMAL) return r;
+
+#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
+
+extern int
+onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
+{
+ OnigMatchParam* mp;
+ int num;
+ CalloutData* d;
+
+ mp = args->msa->mp;
+ num = args->num;
+
+ d = CALLOUT_DATA_AT_NUM(mp, num);
+ if (d->last_match_at_call_counter != mp->match_at_call_counter) {
+ xmemset(d, 0, sizeof(*d));
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+ return d->last_match_at_call_counter;
+ }
+
+ return 0;
+}
+
+extern int
+onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ OnigType t;
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ t = d->slot[slot].type;
+ if (IS_NOT_NULL(type)) *type = t;
+ if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
+ return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
+}
+
+extern int
+onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
+ int slot, OnigType* type,
+ OnigValue* val)
+{
+ return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
+ args->num, slot, type, val);
+}
+
+extern int
+onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ OnigType t;
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ if (d->last_match_at_call_counter != mp->match_at_call_counter) {
+ xmemset(d, 0, sizeof(*d));
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+ }
+
+ t = d->slot[slot].type;
+ if (IS_NOT_NULL(type)) *type = t;
+ if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
+ return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
+}
+
+extern int
+onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
+ const UChar* tag, const UChar* tag_end, int slot,
+ OnigType* type, OnigValue* val)
+{
+ int num;
+
+ num = onig_get_callout_num_by_tag(reg, tag, tag_end);
+ if (num < 0) return num;
+ if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ return onig_get_callout_data(reg, mp, num, slot, type, val);
+}
+
+extern int
+onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
+ type, val);
+}
+
+extern int
+onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
+ int slot, OnigType* type, OnigValue* val)
+{
+ return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
+ type, val);
+}
+
+extern int
+onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType type, OnigValue* val)
+{
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ d->slot[slot].type = type;
+ d->slot[slot].val = *val;
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
+ const UChar* tag, const UChar* tag_end, int slot,
+ OnigType type, OnigValue* val)
+{
+ int num;
+
+ num = onig_get_callout_num_by_tag(reg, tag, tag_end);
+ if (num < 0) return num;
+ if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ return onig_set_callout_data(reg, mp, num, slot, type, val);
+}
+
+extern int
+onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
+ int callout_num, int slot,
+ OnigType type, OnigValue* val)
+{
+ return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
+ type, val);
+}
+
+extern int
+onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
+ int slot, OnigType type, OnigValue* val)
+{
+ return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
+ type, val);
+}
+
+#else
+#define ADJUST_MATCH_PARAM(reg, mp)
+#endif /* USE_CALLOUT */
+
+
static int
stack_double(int is_alloca, char** arg_alloc_base,
StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
- OnigMatchArg* msa)
+ MatchArg* msa)
{
unsigned int n;
int used;
@@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
xmemcpy(new_alloc_base, alloc_base, size);
}
else {
- if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
- if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
+ if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
return ONIGERR_MATCH_STACK_LIMIT_OVER;
else
- n = MatchStackLimitSize;
+ n = msa->match_stack_limit;
}
new_alloc_base = (char* )xrealloc(alloc_base, new_size);
if (IS_NULL(new_alloc_base)) {
@@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define STATE_CHECK_POS(s,snum) \
- (((s) - str) * num_comb_exp_check + ((snum) - 1))
-#define STATE_CHECK_VAL(v,snum) do {\
- if (IS_NOT_NULL(state_check_buff)) {\
- int x = STATE_CHECK_POS(s,snum);\
- (v) = state_check_buff[x/8] & (1<<(x%8));\
- }\
- else (v) = 0;\
-} while(0)
-
-
-#define ELSE_IF_STATE_CHECK_MARK(stk) \
- else if ((stk)->type == STK_STATE_CHECK_MARK) { \
- int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
- state_check_buff[x/8] |= (1<<(x%8));\
- }
-
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
- stk->u.state.state_check = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_ALT;\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\
STACK_INC;\
} while(0)
-#define STACK_PUSH_STATE_CHECK(s,snum) do {\
- if (IS_NOT_NULL(state_check_buff)) { \
- STACK_ENSURE(1);\
- stk->type = STK_STATE_CHECK_MARK;\
- stk->u.state.pstr = (s);\
- stk->u.state.state_check = (snum);\
- STACK_INC;\
- }\
-} while(0)
-
-#else /* USE_COMBINATION_EXPLOSION_CHECK */
-
-#define ELSE_IF_STATE_CHECK_MARK(stk)
-
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
- STACK_ENSURE(1);\
+#ifdef ONIG_DEBUG_MATCH
+#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = s;\
+ stk->u.state.pstr_prev = sprev;\
STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+} while (0)
+#else
+#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
STACK_INC;\
-} while(0)
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+} while (0)
+#endif
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
@@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_REPEAT(sid, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.repeat.pcode = (pat);\
stk->u.repeat.count = 0;\
STACK_INC;\
@@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_START(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_START;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
stk->u.mem.end = mem_end_stk[mnum];\
@@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_END(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
stk->u.mem.end = mem_end_stk[mnum];\
@@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_END_MARK(mnum) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END_MARK;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
STACK_INC;\
} while(0)
@@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base,
while (k > stk_base) {\
k--;\
if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
- && k->id == (mnum)) {\
+ && k->zid == (mnum)) {\
level++;\
}\
- else if (k->type == STK_MEM_START && k->id == (mnum)) {\
+ else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
if (level == 0) break;\
level--;\
}\
@@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_EMPTY_CHECK_START;\
- stk->id = (cnum);\
+ stk->zid = (cnum);\
stk->u.empty_check.pstr = (s);\
STACK_INC;\
} while(0)
@@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
STACK_ENSURE(1);\
stk->type = STK_EMPTY_CHECK_END;\
- stk->id = (cnum);\
+ stk->zid = (cnum);\
STACK_INC;\
} while(0)
@@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
STACK_ENSURE(1);\
stk->type = STK_SAVE_VAL;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
STACK_INC;\
@@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
STACK_ENSURE(1);\
stk->type = STK_SAVE_VAL;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
stk->u.val.v2 = sprev;\
@@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->id == (sid)) {\
+ && k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
break;\
@@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->id == (sid)) {\
+ && k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
sprev = k->u.val.v2;\
@@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while (0)
+#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALLOUT;\
+ stk->zid = ONIG_NON_NAME_ID;\
+ stk->u.callout.num = (anum);\
+ stk->u.callout.func = (func);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALLOUT;\
+ stk->zid = (aid);\
+ stk->u.callout.num = (anum);\
+ stk->u.callout.func = (func);\
+ STACK_INC;\
+} while(0)
+
#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
@@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
} while(0)
+
+#ifdef USE_CALLOUT
+#define POP_CALLOUT_CASE \
+ else if (stk->type == STK_CALLOUT) {\
+ RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
+ }
+#else
+#define POP_CALLOUT_CASE
+#endif
+
#define STACK_POP do {\
switch (pop_level) {\
case STACK_POP_LEVEL_FREE:\
@@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
@@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(stk, "STACK_POP 2"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
default:\
@@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP 3"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
+ if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ POP_CALLOUT_CASE\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
}\
} while(0)
-#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
+#define POP_TIL_BODY(aname, til_type) do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \
- if (stk->type == STK_ALT_PREC_READ_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ STACK_BASE_CHECK(stk, (aname));\
+ if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
+ if (stk->type == (til_type)) break;\
+ else {\
+ if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
+ }\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
+#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
+ POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
+} while(0)
+
#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \
- if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
+ POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
} while(0)
+
#define STACK_EXEC_TO_VOID(k) do {\
k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
if (IS_TO_VOID_TARGET(k)) {\
+ if (k->type == STK_TO_VOID_START) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
k->type = STK_VOID;\
}\
- else if (k->type == STK_TO_VOID_START) {\
- k->type = STK_VOID;\
- break;\
- }\
}\
} while(0)
@@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
(isnull) = (k->u.empty_check.pstr == (s));\
break;\
}\
@@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
break;\
@@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
@@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
if (level == 0) {\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
@@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
@@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
else if (k->type == STK_EMPTY_CHECK_END) {\
- if (k->id == (sid)) level++;\
+ if (k->zid == (sid)) level++;\
}\
}\
} while(0)
@@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
if (k->type == STK_REPEAT) {\
if (level == 0) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
break;\
}\
}\
@@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
-#else
-#define INIT_RIGHT_RANGE right_range = (UChar* )end
-#endif
#ifdef USE_CAPTURE_HISTORY
static int
@@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
while (k < stk_top) {
if (k->type == STK_MEM_START) {
- n = k->id;
+ n = k->zid;
if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
MEM_STATUS_AT(reg->capture_history, n) != 0) {
child = history_node_new();
@@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
}
}
else if (k->type == STK_MEM_END) {
- if (k->id == node->group) {
+ if (k->zid == node->group) {
node->end = (int )(k->u.mem.pstr - str);
*kp = k;
return 0;
@@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg,
}
else if (level == nest) {
if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
pstart = k->u.mem.pstr;
if (IS_NOT_NULL(pend)) {
if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
@@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg,
}
}
else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
pend = k->u.mem.pstr;
}
}
@@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg,
}
else if (level == nest) {
if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
return 1;
}
}
@@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH;
static int OpPrevTarget = OP_FAIL;
static int MaxStackDepth = 0;
-#define MOP_IN(opcode) do {\
+#define SOP_IN(opcode) do {\
if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
OpCurr = opcode;\
OpCounter[opcode]++;\
GETTIME(ts);\
} while(0)
-#define MOP_OUT do {\
+#define SOP_OUT do {\
GETTIME(te);\
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while(0)
@@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f)
r = fprintf(f, " count prev time\n");
if (r < 0) return -1;
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
r = fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
if (r < 0) return -1;
}
r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
@@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f)
#else
#define STACK_INC stk++
-#define MOP_IN(opcode)
-#define MOP_OUT
+#define SOP_IN(opcode)
+#define SOP_OUT
#endif
@@ -1459,10 +2307,8 @@ typedef struct {
/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar* in_right_range,
-#endif
- const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
+ const UChar* in_right_range, const UChar* sstart, UChar* sprev,
+ MatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
UChar* keep;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int scv;
- unsigned char* state_check_buff = msa->state_check_buff;
- int num_comb_exp_check = reg->num_comb_exp_check;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ unsigned long retry_limit_in_match;
+ unsigned long retry_in_match_counter;
#endif
+
+#ifdef USE_CALLOUT
+ int of;
+#endif
+
UChar *p = reg->p;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
+#ifdef USE_CALLOUT
+ msa->mp->match_at_call_counter++;
+#endif
+
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_limit_in_match = msa->retry_limit_in_match;
+#endif
+
//n = reg->num_repeat + reg->num_mem * 2;
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
@@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(sstart - str));
#endif
- STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
keep = s = (UChar* )sstart;
+ STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
INIT_RIGHT_RANGE;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_in_match_counter = 0;
+#endif
+
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
@@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- fprintf(stderr, "%4d: ", (int )(p - reg->p));
+ if (p == FinishCode)
+ fprintf(stderr, "----: ");
+ else
+ fprintf(stderr, "%4d: ", (int )(p - reg->p));
onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);
fprintf(stderr, "\n");
}
@@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sbegin = s;
switch (*p++) {
- case OP_END: MOP_IN(OP_END);
+ case OP_END: SOP_IN(OP_END);
n = (int )(s - sstart);
if (n > best_len) {
OnigRegion* region;
@@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
end_best_len:
#endif
- MOP_OUT;
+ SOP_OUT;
if (IS_FIND_CONDITION(option)) {
if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
@@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto finish;
break;
- case OP_EXACT1: MOP_IN(OP_EXACT1);
+ case OP_EXACT1: SOP_IN(OP_EXACT1);
DATA_ENSURE(1);
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
+ case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC);
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; q++;
}
}
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACT2: MOP_IN(OP_EXACT2);
+ case OP_EXACT2: SOP_IN(OP_EXACT2);
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT3: MOP_IN(OP_EXACT3);
+ case OP_EXACT3: SOP_IN(OP_EXACT3);
DATA_ENSURE(3);
if (*p != *s) goto fail;
p++; s++;
@@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT4: MOP_IN(OP_EXACT4);
+ case OP_EXACT4: SOP_IN(OP_EXACT4);
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT5: MOP_IN(OP_EXACT5);
+ case OP_EXACT5: SOP_IN(OP_EXACT5);
DATA_ENSURE(5);
if (*p != *s) goto fail;
p++; s++;
@@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTN: MOP_IN(OP_EXACTN);
+ case OP_EXACTN: SOP_IN(OP_EXACTN);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen);
while (tlen-- > 0) {
if (*p++ != *s++) goto fail;
}
sprev = s - 1;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
+ case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC);
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
+ case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1);
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
+ case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2);
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
+ case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3);
DATA_ENSURE(6);
if (*p != *s) goto fail;
p++; s++;
@@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
+ case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 2);
while (tlen-- > 0) {
@@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 2;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
+ case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 3);
while (tlen-- > 0) {
@@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 3;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
+ case OP_EXACTMBN: SOP_IN(OP_EXACTMBN);
GET_LENGTH_INC(tlen, p); /* mb-len */
GET_LENGTH_INC(tlen2, p); /* string len */
tlen2 *= tlen;
@@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - tlen;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_CCLASS: MOP_IN(OP_CCLASS);
+ case OP_CCLASS: SOP_IN(OP_CCLASS);
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
+ case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
p += tlen;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
+ case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
+ case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT);
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
+ case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
s++;
@@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
cc_mb_not_success:
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
+ case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- MOP_OUT;
+ SOP_OUT;
break;
#ifdef USE_OP_CCLASS_NODE
- case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
+ case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE);
{
OnigCodePoint code;
void *node;
@@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
}
- MOP_OUT;
+ SOP_OUT;
break;
#endif
- case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
+ case OP_ANYCHAR: SOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
+ case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
s += n;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
+ case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s;
s += n;
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
+ case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s++;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s += n;
}
p++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
p++;
- MOP_OUT;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_STATE_CHECK_ANYCHAR_ML_STAR:
- MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
-
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- MOP_OUT;
- break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
- case OP_WORD: MOP_IN(OP_WORD);
+ case OP_WORD: SOP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII);
+ case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_NO_WORD: MOP_IN(OP_NO_WORD);
+ case OP_NO_WORD: SOP_IN(OP_NO_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII);
+ case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY);
+ case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
@@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY);
+ case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
@@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_WORD_BEGIN_END
- case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
+ case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- if (ON_STR_BEGIN(s) ||
- ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- MOP_OUT;
+ if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ SOP_OUT;
continue;
}
}
@@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
break;
- case OP_WORD_END: MOP_IN(OP_WORD_END);
+ case OP_WORD_END: SOP_IN(OP_WORD_END);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
}
@@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
goto fail;
break;
case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
+ case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF);
if (! ON_STR_BEGIN(s)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_END_BUF: MOP_IN(OP_END_BUF);
+ case OP_END_BUF: SOP_IN(OP_END_BUF);
if (! ON_STR_END(s)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
+ case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE);
if (ON_STR_BEGIN(s)) {
if (IS_NOTBOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
goto fail;
break;
- case OP_END_LINE: MOP_IN(OP_END_LINE);
+ case OP_END_LINE: SOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#endif
goto fail;
break;
- case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
+ case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
@@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enclen(encode, s))) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
@@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar* ss = s + enclen(encode, s);
ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
}
@@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
break;
- case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
+ case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION);
if (s != msa->start)
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
+ case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH);
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_START(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
+ case OP_MEMORY_START: SOP_IN(OP_MEMORY_START);
GET_MEMNUM_INC(mem, p);
mem_start_stk[mem] = (StackIndex )((void* )s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
+ case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH);
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_END(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
+ case OP_MEMORY_END: SOP_IN(OP_MEMORY_END);
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_CALL
- case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
+ case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC);
GET_MEMNUM_INC(mem, p);
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
+ case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC);
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
@@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_BACKREF1: MOP_IN(OP_BACKREF1);
+ case OP_BACKREF1: SOP_IN(OP_BACKREF1);
mem = 1;
goto backref;
break;
- case OP_BACKREF2: MOP_IN(OP_BACKREF2);
+ case OP_BACKREF2: SOP_IN(OP_BACKREF2);
mem = 2;
goto backref;
break;
- case OP_BACKREF_N: MOP_IN(OP_BACKREF_N);
+ case OP_BACKREF_N: SOP_IN(OP_BACKREF_N);
GET_MEMNUM_INC(mem, p);
backref:
{
@@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC);
+ case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC);
GET_MEMNUM_INC(mem, p);
{
int len;
@@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
+ case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI);
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
+ case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC);
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
@@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
#endif
- case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK);
+ case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK);
{
GET_LENGTH_INC(tlen, p);
for (i = 0; i < tlen; i++) {
@@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
@@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
#endif
#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
- case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
+ case OP_SET_OPTION_PUSH: SOP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
STACK_PUSH_ALT(p, s, sprev);
p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
+ case OP_SET_OPTION: SOP_IN(OP_SET_OPTION);
GET_OPTION_INC(option, p);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START);
+ case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START);
GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_PUSH_EMPTY_CHECK_START(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END);
+ case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END);
{
int is_empty;
@@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
- case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST);
+ case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST);
{
int is_empty;
@@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto empty_check_found;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
- MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
+ SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
{
int is_empty;
@@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_EMPTY_CHECK_END(mem);
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_JUMP: MOP_IN(OP_JUMP);
+ case OP_JUMP: SOP_IN(OP_JUMP);
GET_RELADDR_INC(addr, p);
p += addr;
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_PUSH: MOP_IN(OP_PUSH);
+ case OP_PUSH: SOP_IN(OP_PUSH);
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER);
+ case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER);
GET_RELADDR_INC(addr, p);
STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- GET_RELADDR_INC(addr, p);
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
- GET_STATE_CHECK_NUM_INC(mem, p);
- GET_RELADDR_INC(addr, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) {
- p += addr;
- }
- else {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_STATE_CHECK(s, mem);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
- case OP_POP: MOP_IN(OP_POP);
+ case OP_POP_OUT: SOP_IN(OP_POP_OUT);
STACK_POP_ONE;
- MOP_OUT;
+ // for stop backtrack
+ //CHECK_RETRY_LIMIT_IN_MATCH;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
}
p += (addr + 1);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
+ case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT);
GET_RELADDR_INC(addr, p);
if (*p == *s) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
}
p++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT: MOP_IN(OP_REPEAT);
+ case OP_REPEAT: SOP_IN(OP_REPEAT);
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT(p + addr, s, sprev);
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
+ case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG);
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += addr;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
+ case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p = stkp->u.repeat.pcode;
}
STACK_PUSH_REPEAT_INC(si);
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
+ case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
break;
- case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
+ case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
STACK_PUSH_REPEAT_INC(si);
}
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
+ case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
break;
- case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START);
+ case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START);
STACK_PUSH_POS(s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END);
+ case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END);
{
STACK_EXEC_TO_VOID(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START);
+ case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START);
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END);
+ case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END);
STACK_POP_TIL_ALT_PREC_READ_NOT;
goto fail;
break;
- case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START);
+ case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START);
STACK_PUSH_TO_VOID_START;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END);
+ case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END);
STACK_EXEC_TO_VOID(stkp);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
+ case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND);
GET_LENGTH_INC(tlen, p);
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START);
+ case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START);
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
@@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s = q;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END);
+ case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END);
STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
goto fail;
break;
#ifdef USE_CALL
- case OP_CALL: MOP_IN(OP_CALL);
+ case OP_CALL: SOP_IN(OP_CALL);
GET_ABSADDR_INC(addr, p);
STACK_PUSH_CALL_FRAME(p);
p = reg->p + addr;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_RETURN: MOP_IN(OP_RETURN);
+ case OP_RETURN: SOP_IN(OP_RETURN);
STACK_RETURN(p);
STACK_PUSH_RETURN;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL);
+ case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL);
{
SaveType type;
GET_SAVE_TYPE_INC(type, p);
@@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR);
+ case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR);
{
UpdateVarType type;
enum SaveType save_type;
@@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- MOP_OUT;
+ SOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_CALLOUT
+ case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS);
+ of = ONIG_CALLOUT_OF_CONTENTS;
+ goto callout_common_entry;
+
+ SOP_OUT;
continue;
break;
+ case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME);
+ {
+ int call_result;
+ int name_id;
+ int num;
+ int in;
+ CalloutListEntry* e;
+ OnigCalloutFunc func;
+ OnigCalloutArgs args;
+
+ of = ONIG_CALLOUT_OF_NAME;
+ GET_MEMNUM_INC(name_id, p);
+
+ callout_common_entry:
+ GET_MEMNUM_INC(num, p);
+ e = onig_reg_callout_list_at(reg, num);
+ in = e->in;
+ if (of == ONIG_CALLOUT_OF_NAME) {
+ func = onig_get_callout_start_func(reg, num);
+ }
+ else {
+ name_id = ONIG_NON_NAME_ID;
+ func = msa->mp->progress_callout_of_contents;
+ }
+
+ if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
+ CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
+ num, msa->mp->callout_user_data, args, call_result);
+ switch (call_result) {
+ case ONIG_CALLOUT_FAIL:
+ goto fail;
+ break;
+ case ONIG_CALLOUT_SUCCESS:
+ goto retraction_callout2;
+ break;
+ default: /* error code */
+ if (call_result > 0) {
+ call_result = ONIGERR_INVALID_ARGUMENT;
+ }
+ best_len = call_result;
+ goto finish;
+ break;
+ }
+ }
+ else {
+ retraction_callout2:
+ if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
+ if (of == ONIG_CALLOUT_OF_NAME) {
+ if (IS_NOT_NULL(func)) {
+ STACK_PUSH_CALLOUT_NAME(name_id, num, func);
+ }
+ }
+ else {
+ func = msa->mp->retraction_callout_of_contents;
+ if (IS_NOT_NULL(func)) {
+ STACK_PUSH_CALLOUT_CONTENTS(num, func);
+ }
+ }
+ }
+ }
+ }
+ SOP_OUT;
+ continue;
+ break;
+#endif
+
case OP_FINISH:
goto finish;
break;
fail:
- MOP_OUT;
+ SOP_OUT;
/* fall */
- case OP_FAIL: MOP_IN(OP_FAIL);
+ case OP_FAIL: SOP_IN(OP_FAIL);
STACK_POP;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- if (stk->u.state.state_check != 0) {
- stk->type = STK_STATE_CHECK_MARK;
- stk++;
- }
-#endif
-
- MOP_OUT;
+ CHECK_RETRY_LIMIT_IN_MATCH;
+ SOP_OUT;
continue;
break;
@@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
unexpected_bytecode_error:
STACK_SAVE;
return ONIGERR_UNEXPECTED_BYTECODE;
+
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_limit_in_match_over:
+ STACK_SAVE;
+ return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
+#endif
}
@@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[],
}
return (UChar* )NULL;
}
-
extern int
onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
OnigRegion* region, OnigOptionType option)
{
int r;
- UChar *prev;
- OnigMatchArg msa;
+ OnigMatchParam mp;
- MATCH_ARG_INIT(msa, reg, option, region, at);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = at - str;
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
+ onig_initialize_match_param(&mp);
+ r = onig_match_with_param(reg, str, end, at, region, option, &mp);
+ onig_free_match_param_content(&mp);
+ return r;
+}
+
+extern int
+onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* at, OnigRegion* region, OnigOptionType option,
+ OnigMatchParam* mp)
+{
+ int r;
+ UChar *prev;
+ MatchArg msa;
+ ADJUST_MATCH_PARAM(reg, mp);
+ MATCH_ARG_INIT(msa, reg, option, region, at, mp);
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
@@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
}
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- end,
-#endif
- at, prev, &msa);
+ r = match_at(reg, str, end, end, at, prev, &msa);
}
end:
@@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
retry:
switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
+ case OPTIMIZE_EXACT:
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_EXACT_IC:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_BM:
+ case OPTIMIZE_EXACT_BM:
p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case OPTIMIZE_EXACT_BM_NO_REV:
p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_MAP:
+ case OPTIMIZE_MAP:
p = map_search(reg->enc, reg->map, p, range);
break;
}
@@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
retry:
switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
+ case OPTIMIZE_EXACT:
exact_method:
p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case ONIG_OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_EXACT_IC:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case ONIG_OPTIMIZE_EXACT_BM:
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case OPTIMIZE_EXACT_BM:
+ case OPTIMIZE_EXACT_BM_NO_REV:
#ifdef USE_INT_MAP_BACKWARD
if (IS_NULL(reg->int_map_backward)) {
int r;
@@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
#endif
break;
- case ONIG_OPTIMIZE_MAP:
+ case OPTIMIZE_MAP:
p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
break;
}
@@ -3725,12 +4590,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
OnigOptionType option)
{
int r;
+ OnigMatchParam mp;
+
+ onig_initialize_match_param(&mp);
+ r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
+ onig_free_match_param_content(&mp);
+ return r;
+
+}
+
+extern int
+onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
+{
+ int r;
UChar *s, *prev;
- OnigMatchArg msa;
+ MatchArg msa;
const UChar *orig_start = start;
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
const UChar *orig_range = range;
-#endif
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
+ ADJUST_MATCH_PARAM(reg, mp);
+
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
@@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
@@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else goto finish; /* error */ \
}
#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#else
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
- }
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
/* anchor optimize: resume search range */
@@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
goto end_buf;
}
}
- else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
+ else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) {
goto begin_position;
}
}
@@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
prev = (UChar* )NULL;
- MATCH_ARG_INIT(msa, reg, option, region, start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- msa.state_check_buff = (void* )0;
- msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
-#endif
+ MATCH_ARG_INIT(msa, reg, option, region, start, mp);
MATCH_AND_RETURN_CHECK(end);
goto mismatch;
}
@@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
- MATCH_ARG_INIT(msa, reg, option, region, orig_start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = (MIN(start, range) - str);
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
+ MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
s = (UChar* )start;
if (range > start) { /* forward search */
@@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else
prev = (UChar* )NULL;
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ if (reg->optimize != OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
sch_range = (UChar* )range;
@@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, (UChar** )NULL)) goto mismatch;
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) {
do {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
@@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else { /* backward search */
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
if (orig_start < end)
orig_start += enclen(reg->enc, orig_start); /* is upper range */
-#endif
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ if (reg->optimize != OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
if (range < end)
@@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
*to = *from;
}
+
+/* for callout functions */
+
+#ifdef USE_CALLOUT
+
+extern OnigCalloutFunc
+onig_get_progress_callout(void)
+{
+ return DefaultProgressCallout;
+}
+
+extern int
+onig_set_progress_callout(OnigCalloutFunc f)
+{
+ DefaultProgressCallout = f;
+ return ONIG_NORMAL;
+}
+
+extern OnigCalloutFunc
+onig_get_retraction_callout(void)
+{
+ return DefaultRetractionCallout;
+}
+
+extern int
+onig_set_retraction_callout(OnigCalloutFunc f)
+{
+ DefaultRetractionCallout = f;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->num;
+}
+
+extern OnigCalloutIn
+onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->in;
+}
+
+extern int
+onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->name_id;
+}
+
+extern const UChar*
+onig_get_contents_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
+ return e->u.content.start;
+ }
+
+ return 0;
+}
+
+extern const UChar*
+onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
+ return e->u.content.end;
+ }
+
+ return 0;
+}
+
+extern int
+onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ return e->u.arg.num;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern int
+onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ return e->u.arg.passed_num;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern int
+onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
+ OnigType* type, OnigValue* val)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
+ if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
+ return ONIG_NORMAL;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern const UChar*
+onig_get_string_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->string;
+}
+
+extern const UChar*
+onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->string_end;
+}
+
+extern const UChar*
+onig_get_start_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->start;
+}
+
+extern const UChar*
+onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->right_range;
+}
+
+extern const UChar*
+onig_get_current_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->current;
+}
+
+extern OnigRegex
+onig_get_regex_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->regex;
+}
+
+extern unsigned long
+onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->retry_in_match_counter;
+}
+
+
+extern int
+onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
+{
+ OnigRegex reg;
+ const UChar* str;
+ StackType* stk_base;
+ int i;
+
+ i = mem_num;
+ reg = a->regex;
+ str = a->string;
+ stk_base = a->stk_base;
+
+ if (i > 0) {
+ if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (MEM_STATUS_AT(reg->bt_mem_start, i))
+ *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
+ else
+ *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
+
+ *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )a->mem_end_stk[i])) - str);
+ }
+ else {
+ *begin = *end = ONIG_REGION_NOTPOS;
+ }
+ }
+ else if (i == 0) {
+#if 0
+ *begin = a->start - str;
+ *end = a->current - str;
+#else
+ return ONIGERR_INVALID_ARGUMENT;
+#endif
+ }
+ else
+ return ONIGERR_INVALID_ARGUMENT;
+
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
+{
+ int n;
+
+ n = (int )(a->stk - a->stk_base);
+
+ if (used_num != 0)
+ *used_num = n;
+
+ if (used_bytes != 0)
+ *used_bytes = n * sizeof(StackType);
+
+ return ONIG_NORMAL;
+}
+
+
+/* builtin callout functions */
+
+extern int
+onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_CALLOUT_FAIL;
+}
+
+extern int
+onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_MISMATCH;
+}
+
+#if 0
+extern int
+onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_CALLOUT_SUCCESS;
+}
+#endif
+
+extern int
+onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int n;
+ OnigValue val;
+
+ r = onig_get_arg_by_callout_args(args, 0, 0, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ n = (int )val.l;
+ if (n >= 0) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
+
+ return n;
+}
+
+extern int
+onig_builtin_count(OnigCalloutArgs* args, void* user_data)
+{
+ (void )onig_check_callout_data_and_clear_old_values(args);
+
+ return onig_builtin_total_count(args, user_data);
+}
+
+extern int
+onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ OnigType type;
+ OnigValue val;
+ OnigValue aval;
+ OnigCodePoint count_type;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
+ if (r != ONIG_NORMAL) return r;
+
+ count_type = aval.c;
+ if (count_type != '>' && count_type != 'X' && count_type != '<')
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
+ &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ val.l = 0;
+ }
+
+ if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
+ slot = 2;
+ if (count_type == '<')
+ val.l++;
+ else if (count_type == 'X')
+ val.l--;
+ }
+ else {
+ slot = 1;
+ if (count_type != '<')
+ val.l++;
+ }
+
+ r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ /* slot 1: in progress counter, slot 2: in retraction counter */
+ r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
+ &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ val.l = 0;
+ }
+
+ val.l++;
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+extern int
+onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ OnigType type;
+ OnigValue val;
+ OnigValue aval;
+
+ (void )onig_check_callout_data_and_clear_old_values(args);
+
+ slot = 0;
+ r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ type = ONIG_TYPE_LONG;
+ val.l = 0;
+ }
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
+ if (r != ONIG_NORMAL) return r;
+
+ if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
+ val.l--;
+ }
+ else {
+ if (val.l >= aval.l) return ONIG_CALLOUT_FAIL;
+ val.l++;
+ }
+
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+enum OP_CMP {
+ OP_EQ,
+ OP_NE,
+ OP_LT,
+ OP_GT,
+ OP_LE,
+ OP_GE
+};
+
+extern int
+onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ long lv;
+ long rv;
+ OnigType type;
+ OnigValue val;
+ regex_t* reg;
+ enum OP_CMP op;
+
+ reg = args->regex;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ if (type == ONIG_TYPE_TAG) {
+ r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
+ if (r < ONIG_NORMAL) return r;
+ else if (r > ONIG_NORMAL)
+ lv = 0L;
+ else
+ lv = val.l;
+ }
+ else { /* ONIG_TYPE_LONG */
+ lv = val.l;
+ }
+
+ r = onig_get_arg_by_callout_args(args, 2, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ if (type == ONIG_TYPE_TAG) {
+ r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
+ if (r < ONIG_NORMAL) return r;
+ else if (r > ONIG_NORMAL)
+ rv = 0L;
+ else
+ rv = val.l;
+ }
+ else { /* ONIG_TYPE_LONG */
+ rv = val.l;
+ }
+
+ slot = 0;
+ r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ OnigCodePoint c1, c2;
+ UChar* p;
+
+ r = onig_get_arg_by_callout_args(args, 1, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ p = val.s.start;
+ c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
+ p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
+ if (p < val.s.end) {
+ c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
+ p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
+ if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
+ }
+ else
+ c2 = 0;
+
+ switch (c1) {
+ case '=':
+ if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
+ op = OP_EQ;
+ break;
+ case '!':
+ if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
+ op = OP_NE;
+ break;
+ case '<':
+ if (c2 == '=') op = OP_LE;
+ else if (c2 == 0) op = OP_LT;
+ else return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ case '>':
+ if (c2 == '=') op = OP_GE;
+ else if (c2 == 0) op = OP_GT;
+ else return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ default:
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ }
+ val.l = (long )op;
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+ }
+ else {
+ op = (enum OP_CMP )val.l;
+ }
+
+ switch (op) {
+ case OP_EQ: r = (lv == rv); break;
+ case OP_NE: r = (lv != rv); break;
+ case OP_LT: r = (lv < rv); break;
+ case OP_GT: r = (lv > rv); break;
+ case OP_LE: r = (lv <= rv); break;
+ case OP_GE: r = (lv >= rv); break;
+ }
+
+ return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
+}
+
+
+#include <stdio.h>
+
+static FILE* OutFp;
+
+/* name start with "onig_" for macros. */
+static int
+onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
+{
+ int r;
+ int num;
+ size_t tag_len;
+ const UChar* start;
+ const UChar* right;
+ const UChar* current;
+ const UChar* string;
+ const UChar* strend;
+ const UChar* tag_start;
+ const UChar* tag_end;
+ regex_t* reg;
+ OnigCalloutIn in;
+ OnigType type;
+ OnigValue val;
+ char buf[20];
+ FILE* fp;
+
+ fp = OutFp;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ in = onig_get_callout_in_by_callout_args(args);
+ if (in == ONIG_CALLOUT_IN_PROGRESS) {
+ if (val.c == '<')
+ return ONIG_CALLOUT_SUCCESS;
+ }
+ else {
+ if (val.c != 'X' && val.c != '<')
+ return ONIG_CALLOUT_SUCCESS;
+ }
+
+ num = onig_get_callout_num_by_callout_args(args);
+ start = onig_get_start_by_callout_args(args);
+ right = onig_get_right_range_by_callout_args(args);
+ current = onig_get_current_by_callout_args(args);
+ string = onig_get_string_by_callout_args(args);
+ strend = onig_get_string_end_by_callout_args(args);
+ reg = onig_get_regex_by_callout_args(args);
+ tag_start = onig_get_callout_tag_start(reg, num);
+ tag_end = onig_get_callout_tag_end(reg, num);
+
+ if (tag_start == 0)
+ xsnprintf(buf, sizeof(buf), "#%d", num);
+ else {
+ /* CAUTION: tag string is not terminated with NULL. */
+ int i;
+
+ tag_len = tag_end - tag_start;
+ if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
+ for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
+ buf[tag_len] = '\0';
+ }
+
+ fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
+ buf,
+ in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
+ (int )(current - string),
+ (int )(start - string),
+ (int )(right - string),
+ (int )(strend - string));
+ fflush(fp);
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+extern int
+onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
+{
+ int id;
+ char* name;
+ OnigEncoding enc;
+ unsigned int ts[4];
+ OnigValue opts[4];
+
+ if (IS_NOT_NULL(fp))
+ OutFp = (FILE* )fp;
+ else
+ OutFp = stdout;
+
+ enc = ONIG_ENCODING_ASCII;
+
+ name = "MON";
+ ts[0] = ONIG_TYPE_CHAR;
+ opts[0].c = '>';
+ BC_B_O(name, monitor, 1, ts, 1, opts);
+
+ return ONIG_NORMAL;
+}
+
+#endif /* USE_CALLOUT */