summaryrefslogtreecommitdiff
path: root/src/regexec.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2020-11-08 10:59:08 +0100
committerJörg Frings-Fürst <debian@jff.email>2020-11-08 10:59:08 +0100
commit22bb4b319b3d722ac7bf041a6374cd40afdc4d53 (patch)
treea07d7d0764a8488f4b5ebef1561e2f3d8caacc05 /src/regexec.c
parent0f259c3073f341c48468e80e93731daa31698030 (diff)
parentb4158caf13bc715096c1df7c040c9d25995d76f2 (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src/regexec.c')
-rw-r--r--src/regexec.c592
1 files changed, 320 insertions, 272 deletions
diff --git a/src/regexec.c b/src/regexec.c
index 1b6895d..bb6b474 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -46,15 +46,15 @@
#define CHECK_INTERRUPT_IN_MATCH
-#define STACK_MEM_START(reg, i) \
- (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
- STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
+#define STACK_MEM_START(reg, idx) \
+ (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \
+ STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s)
-#define STACK_MEM_END(reg, i) \
- (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
- STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
+#define STACK_MEM_END(reg, idx) \
+ (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \
+ STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s)
-static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
+static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);
static int
search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
@@ -170,6 +170,9 @@ typedef struct {
int best_len; /* for ONIG_OPTION_FIND_LONGEST */
UChar* best_s;
#endif
+#ifdef USE_CALL
+ unsigned long subexp_call_in_search_counter;
+#endif
} MatchArg;
@@ -1057,8 +1060,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
-#define INVALID_STACK_INDEX -1
-
#define STK_ALT_FLAG 0x0001
/* stack type */
@@ -1099,7 +1100,15 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_MASK_TO_VOID_TARGET 0x100e
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-typedef intptr_t StackIndex;
+typedef ptrdiff_t StackIndex;
+
+#define INVALID_STACK_INDEX ((StackIndex )-1)
+
+typedef union {
+ StackIndex i;
+ UChar* s;
+} StkPtrType;
+
typedef struct _StackType {
unsigned int type;
@@ -1108,7 +1117,6 @@ typedef struct _StackType {
struct {
Operation* pcode; /* byte code position */
UChar* pstr; /* string position */
- UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
int count;
@@ -1119,8 +1127,8 @@ typedef struct _StackType {
struct {
UChar *pstr; /* start/end position */
/* Following information is set, if this stack type is MEM-START */
- StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
+ StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */
+ StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
UChar *pstr; /* start position */
@@ -1166,8 +1174,8 @@ struct OnigCalloutArgsStruct {
MatchArg* msa;
StackType* stk_base;
StackType* stk;
- StackIndex* mem_start_stk;
- StackIndex* mem_end_stk;
+ StkPtrType* mem_start_stk;
+ StkPtrType* mem_end_stk;
};
#endif
@@ -1178,7 +1186,7 @@ struct OnigCalloutArgsStruct {
#define UPDATE_FOR_STACK_REALLOC do{\
repeat_stk = (StackIndex* )alloc_base;\
empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
- mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
+ mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
@@ -1194,7 +1202,7 @@ struct OnigCalloutArgsStruct {
#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
#define UPDATE_FOR_STACK_REALLOC do{\
- mem_start_stk = (StackIndex* )alloc_base;\
+ mem_start_stk = (StkPtrType* )alloc_base;\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
@@ -1218,8 +1226,12 @@ struct OnigCalloutArgsStruct {
#endif
#if defined(USE_CALL)
+#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \
+ (msa).subexp_call_in_search_counter = 0;
+
#define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
#else
+#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)
#define POP_CALL
#endif
@@ -1231,6 +1243,7 @@ struct OnigCalloutArgsStruct {
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
@@ -1243,6 +1256,7 @@ struct OnigCalloutArgsStruct {
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
@@ -1258,27 +1272,27 @@ struct OnigCalloutArgsStruct {
is_alloca = 0;\
alloc_base = msa->stack_p;\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
is_alloca = 0;\
- alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
+ alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\
+ sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
else {\
is_alloca = 1;\
- alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
+ alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\
+ sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
@@ -1288,7 +1302,7 @@ struct OnigCalloutArgsStruct {
#define STACK_SAVE(msa,is_alloca,alloc_base) do{\
(msa)->stack_n = (int )(stk_end - stk_base);\
if ((is_alloca) != 0) {\
- size_t size = sizeof(StackIndex) * (msa)->ptr_num\
+ size_t size = sizeof(StkPtrType) * (msa)->ptr_num\
+ sizeof(StackType) * (msa)->stack_n;\
(msa)->stack_p = xmalloc(size);\
CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
@@ -1373,6 +1387,24 @@ onig_set_retry_limit_in_search(unsigned long n)
#endif
}
+#ifdef USE_CALL
+static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH;
+
+extern unsigned long
+onig_get_subexp_call_limit_in_search(void)
+{
+ return SubexpCallLimitInSearch;
+}
+
+extern int
+onig_set_subexp_call_limit_in_search(unsigned long n)
+{
+ SubexpCallLimitInSearch = n;
+ return 0;
+}
+
+#endif
+
#ifdef USE_CALLOUT
static OnigCalloutFunc DefaultProgressCallout;
static OnigCalloutFunc DefaultRetractionCallout;
@@ -1637,9 +1669,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk = *arg_stk;
n = (unsigned int )(stk_end - stk_base);
- size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
+ size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
n *= 2;
- new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
+ new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
if (*is_alloca != 0) {
new_alloc_base = (char* )xmalloc(new_size);
if (IS_NULL(new_alloc_base)) {
@@ -1669,7 +1701,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
used = (int )(stk - stk_base);
*arg_alloc_base = alloc_base;
*arg_stk_base = (StackType* )(alloc_base
- + (sizeof(StackIndex) * msa->ptr_num));
+ + (sizeof(StkPtrType) * msa->ptr_num));
*arg_stk = *arg_stk_base + used;
*arg_stk_end = *arg_stk_base + n;
return 0;
@@ -1694,22 +1726,20 @@ stack_double(int* is_alloca, char** arg_alloc_base,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+#define STACK_PUSH(stack_type,pat,s) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
-#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\
+#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->zid = (int )(id);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
@@ -1724,7 +1754,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = s;\
- stk->u.state.pstr_prev = sprev;\
STACK_INC;\
} while (0)
#else
@@ -1735,10 +1764,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,
} while (0)
#endif
-#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
-#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \
- STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id)
+#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s)
+#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
+#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
@@ -1767,8 +1795,8 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->u.mem.pstr = (s);\
stk->u.mem.prev_start = mem_start_stk[mnum];\
stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
- mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum].i = INVALID_STACK_INDEX;\
STACK_INC;\
} while(0)
@@ -1779,7 +1807,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->u.mem.pstr = (s);\
stk->u.mem.prev_start = mem_start_stk[mnum];\
stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\
STACK_INC;\
} while(0)
@@ -1861,12 +1889,11 @@ stack_double(int* is_alloca, char** arg_alloc_base,
STACK_INC;\
} while(0)
-#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\
+#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MARK;\
stk->zid = (sid);\
stk->u.val.v = (UChar* )(s);\
- stk->u.val.v2 = (sprev);\
STACK_INC;\
} while(0)
@@ -1885,7 +1912,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
- stk->u.val.v2 = sprev;\
STACK_INC;\
} while(0)
@@ -1932,7 +1958,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
&& k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
- sprev = k->u.val.v2;\
break;\
}\
}\
@@ -2135,14 +2160,14 @@ stack_double(int* is_alloca, char** arg_alloc_base,
} while(0)
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
- if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
+ if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\
(addr) = 0;\
}\
else {\
if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
- (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
+ (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\
else\
- (addr) = (UChar* )k->u.mem.prev_end;\
+ (addr) = k->u.mem.prev_end.s;\
}\
} while (0)
@@ -2163,7 +2188,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (endp == 0) {\
(isnull) = 0; break;\
}\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
+ else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
else if (endp != s) {\
@@ -2199,7 +2224,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (endp == 0) {\
(isnull) = 0; break;\
}\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
+ else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \
(isnull) = 0; break;\
}\
else if (endp != s) {\
@@ -2362,6 +2387,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
p1++;
p2++;
}
+ if (s2 >= end2) {
+ if (s1 < end1) return 0;
+ else break;
+ }
}
*ps2 = s2;
@@ -2390,7 +2419,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define ON_STR_END(s) ((s) == end)
#define DATA_ENSURE_CHECK1 (s < right_range)
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
-#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+#define DATA_ENSURE(n) if (right_range - s < (n)) goto fail
#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
@@ -2632,9 +2661,9 @@ typedef struct {
#define BYTECODE_INTERPRETER_START GOTO_OP;
#define BYTECODE_INTERPRETER_END
-#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
+#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
#define DEFAULT_OP /* L_DEFAULT: */
-#define NEXT_OP sprev = sbegin; JUMP_OP
+#define NEXT_OP JUMP_OP
#define JUMP_OP GOTO_OP
#ifdef USE_DIRECT_THREADED_CODE
#define GOTO_OP goto *(p->opaddr)
@@ -2648,9 +2677,8 @@ typedef struct {
#define BYTECODE_INTERPRETER_START \
while (1) {\
MATCH_DEBUG_OUT(0)\
- sbegin = s;\
switch (p->opcode) {
-#define BYTECODE_INTERPRETER_END } sprev = sbegin; }
+#define BYTECODE_INTERPRETER_END } }
#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
#define DEFAULT_OP default:
#define NEXT_OP break
@@ -2718,12 +2746,22 @@ typedef struct {
best_len = err_code; goto match_at_end;\
} while(0)
+#define MATCH_COUNTER_OUT(title) do {\
+ int i;\
+ fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \
+ fprintf(DBGFP, " ");\
+ for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\
+ fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\
+ }\
+ fprintf(DBGFP, "\n");\
+ fflush(DBGFP);\
+} while (0)
+
/* match data(str - end) from position (sstart). */
-/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* in_right_range, const UChar* sstart, UChar* sprev,
+ const UChar* in_right_range, const UChar* sstart,
MatchArg* msa)
{
@@ -2782,10 +2820,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_BACKREF_N_IC,
&&L_BACKREF_MULTI,
&&L_BACKREF_MULTI_IC,
+#ifdef USE_BACKREF_WITH_LEVEL
&&L_BACKREF_WITH_LEVEL,
&&L_BACKREF_WITH_LEVEL_IC,
+#endif
&&L_BACKREF_CHECK,
+#ifdef USE_BACKREF_WITH_LEVEL
&&L_BACKREF_CHECK_WITH_LEVEL,
+#endif
&&L_MEM_START,
&&L_MEM_START_PUSH,
&&L_MEM_END_PUSH,
@@ -2838,13 +2880,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
LengthType tlen, tlen2;
MemNumType mem;
RelAddrType addr;
- UChar *s, *ps, *sbegin;
+ UChar *s, *ps;
UChar *right_range;
int is_alloca;
char *alloc_base;
StackType *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
- StackIndex *mem_start_stk, *mem_end_stk;
+ StkPtrType *mem_start_stk, *mem_end_stk;
UChar* keep;
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
@@ -2858,6 +2900,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CALLOUT
int of;
#endif
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+#define MAX_SUBEXP_CALL_COUNTERS 9
+ unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS];
+#endif
Operation* p = reg->ops;
OnigOptionType option = reg->options;
@@ -2872,6 +2918,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
static unsigned int counter = 1;
#endif
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {
+ subexp_call_counters[i] = 0;
+ }
+#endif
+
#ifdef USE_DIRECT_THREADED_CODE
if (IS_NULL(msa)) {
for (i = 0; i < reg->ops_used; i++) {
@@ -2903,12 +2955,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_INIT(INIT_MATCH_STACK_SIZE);
UPDATE_FOR_STACK_REALLOC;
for (i = 1; i <= num_mem; i++) {
- mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
+ mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;
}
#ifdef ONIG_DEBUG_MATCH
- fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
- str, end, sstart, sprev);
+ fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
fprintf(DBGFP, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@@ -2932,24 +2983,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (n > msa->best_len) {
msa->best_len = n;
msa->best_s = (UChar* )sstart;
+ goto set_region;
}
else
goto end_best_len;
}
#endif
best_len = n;
+
+ set_region:
region = msa->region;
if (region) {
if (keep > s) keep = s;
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
if (OPTON_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
rmt[0].rm_so = (regoff_t )(keep - str);
rmt[0].rm_eo = (regoff_t )(s - str);
for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
}
@@ -2959,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
else {
-#endif /* USE_POSIX_API_REGION_OPTION */
+#endif /* USE_POSIX_API */
region->beg[0] = (int )(keep - str);
region->end[0] = (int )(s - str);
for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
}
@@ -2996,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (r < 0) MATCH_AT_ERROR_RETURN(r);
}
#endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
} /* else OPTON_POSIX_REGION() */
#endif
} /* if (region) */
@@ -3012,8 +3066,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
- if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
+ if (OPTON_FIND_LONGEST(option)) {
+ if (s >= in_right_range && msa->best_s == sstart)
+ best_len = msa->best_len;
+ else
+ goto fail; /* for retry */
}
}
@@ -3034,7 +3091,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3047,7 +3103,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3062,7 +3117,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3079,7 +3133,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3091,7 +3144,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (tlen-- > 0) {
if (*ps++ != *s++) goto fail;
}
- sprev = s - 1;
INC_OP;
JUMP_OUT;
@@ -3112,7 +3164,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
- sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@@ -3131,7 +3182,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
- sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@@ -3149,7 +3199,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - 2;
INC_OP;
JUMP_OUT;
@@ -3165,7 +3214,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - 3;
INC_OP;
JUMP_OUT;
@@ -3179,7 +3227,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - tlen;
INC_OP;
JUMP_OUT;
@@ -3295,11 +3342,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
s += n;
}
JUMP_OUT;
@@ -3307,15 +3353,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_ML_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
- sprev = s;
s += n;
}
else {
- sprev = s;
s++;
}
}
@@ -3329,12 +3373,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
s += n;
}
}
@@ -3348,16 +3391,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
- sprev = s;
s += n;
}
else {
- sprev = s;
s++;
}
}
@@ -3410,14 +3451,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
- else if (ON_STR_END(s)) {
- if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ON_STR_END(s)) {
+ if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
}
INC_OP;
@@ -3432,14 +3476,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
- else if (ON_STR_END(s)) {
- if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ON_STR_END(s)) {
+ if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
}
INC_OP;
@@ -3452,7 +3499,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mode = p->word_boundary.mode;
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ UChar* sprev;
+ if (ON_STR_BEGIN(s)) {
+ INC_OP;
+ JUMP_OUT;
+ }
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
INC_OP;
JUMP_OUT;
}
@@ -3465,10 +3518,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ModeType mode;
mode = p->word_boundary.mode;
- if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- INC_OP;
- JUMP_OUT;
+ if (! ON_STR_BEGIN(s)) {
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+ INC_OP;
+ JUMP_OUT;
+ }
}
}
}
@@ -3478,6 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(TEXT_SEGMENT_BOUNDARY)
{
int is_break;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
switch (p->text_segment_boundary.type) {
case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
@@ -3507,12 +3564,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BEGIN_BUF)
if (! ON_STR_BEGIN(s)) goto fail;
+ if (OPTON_NOTBOL(msa->options)) goto fail;
+ if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
CASE_OP(END_BUF)
if (! ON_STR_END(s)) goto fail;
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
@@ -3523,15 +3584,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
}
- else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- INC_OP;
- JUMP_OUT;
+ else if (! ON_STR_END(s)) {
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+ INC_OP;
+ JUMP_OUT;
+ }
}
goto fail;
CASE_OP(END_LINE)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
@@ -3556,9 +3621,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(SEMI_END_BUF)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
@@ -3567,6 +3634,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enclen(encode, s))) {
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
}
@@ -3575,6 +3644,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar* ss = s + enclen(encode, s);
ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
}
@@ -3586,6 +3657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
switch (p->check_position.type) {
case CHECK_POSITION_SEARCH_START:
if (s != msa->start) goto fail;
+ if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail;
break;
case CHECK_POSITION_CURRENT_RIGHT_RANGE:
if (s != right_range) goto fail;
@@ -3604,7 +3676,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MEM_START)
mem = p->memory_start.num;
- mem_start_stk[mem] = (StackIndex )((void* )s);
+ mem_start_stk[mem].s = s;
INC_OP;
JUMP_OUT;
@@ -3616,7 +3688,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MEM_END)
mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem].s = s;
INC_OP;
JUMP_OUT;
@@ -3629,20 +3701,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
si = GET_STACK_INDEX(stkp);
STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = si;
+ mem_start_stk[mem].i = si;
INC_OP;
JUMP_OUT;
}
CASE_OP(MEM_END_REC)
mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem].s = s;
STACK_GET_MEM_START(mem, stkp);
if (MEM_STATUS_AT(reg->push_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ mem_start_stk[mem].i = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem].s = stkp->u.mem.pstr;
STACK_PUSH_MEM_END_MARK(mem);
INC_OP;
@@ -3661,21 +3733,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->backref_n.n1;
backref:
{
- int len;
UChar *pstart, *pend;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
STRING_CMP(s, pstart, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
}
INC_OP;
@@ -3684,21 +3752,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_N_IC)
mem = p->backref_n.n1;
{
- int len;
UChar *pstart, *pend;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
}
INC_OP;
@@ -3706,28 +3770,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI)
{
- int len, is_fail;
+ int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
for (i = 0; i < tlen; i++) {
mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
swork = s;
STRING_CMP_VALUE(swork, pstart, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
break; /* success */
}
@@ -3738,28 +3799,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI_IC)
{
- int len, is_fail;
+ int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
for (i = 0; i < tlen; i++) {
mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
swork = s;
STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
break; /* success */
}
@@ -3774,10 +3832,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto backref_with_level;
CASE_OP(BACKREF_WITH_LEVEL)
{
- int len;
int level;
MemNumType* mems;
- UChar* ssave;
n = 0;
backref_with_level:
@@ -3785,17 +3841,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- ssave = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, n,
- case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (ssave != s) {
- sprev = ssave;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- }
- else
+ if (! backref_match_at_nested_level(reg, stk, stk_base, n,
+ case_fold_flag, level, (int )tlen, mems, &s, end)) {
goto fail;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3810,8 +3859,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
for (i = 0; i < tlen; i++) {
mem = mems[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
break; /* success */
}
if (i == tlen) goto fail;
@@ -3928,13 +3977,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(PUSH)
addr = p->push.addr;
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
CASE_OP(PUSH_SUPER)
addr = p->push.addr;
- STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
+ STACK_PUSH_SUPER_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
@@ -3956,7 +4005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_or_jump_exact1.addr;
c = p->push_or_jump_exact1.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
}
@@ -3972,9 +4021,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_if_peek_next.addr;
c = p->push_if_peek_next.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
+ STACK_PUSH_ALT(p + addr, s);
}
}
INC_OP;
@@ -3986,7 +4033,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
}
INC_OP;
JUMP_OUT;
@@ -3997,7 +4044,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + 1, s, sprev);
+ STACK_PUSH_ALT(p + 1, s);
p += addr;
}
else
@@ -4014,7 +4061,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
p = reg->repeat_range[mem].u.pcode;
}
else {
@@ -4033,7 +4080,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else {
if (n >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
+ STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
INC_OP;
}
else {
@@ -4047,6 +4094,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
goto fail;
subexp_call_nest_counter++;
+
+ if (SubexpCallLimitInSearch != 0) {
+ msa->subexp_call_in_search_counter++;
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
+ subexp_call_counters[p->call.called_mem]++;
+ if (msa->subexp_call_in_search_counter % 1000 == 0)
+ MATCH_COUNTER_OUT("CALL");
+#endif
+ if (msa->subexp_call_in_search_counter >
+ SubexpCallLimitInSearch) {
+ MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER);
+ }
+ }
+
addr = p->call.addr;
INC_OP; STACK_PUSH_CALL_FRAME(p);
p = reg->ops + addr;
@@ -4070,7 +4132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
for (tlen = p->move.n; tlen > 0; tlen--) {
len = enclen(encode, s);
- sprev = s;
s += len;
if (s > end) goto fail;
if (s == end) {
@@ -4079,7 +4140,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
INC_OP;
JUMP_OUT;
@@ -4088,10 +4148,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != 0) {
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
if (p->step_back_start.remaining != 0) {
- STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining);
+ STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
p += p->step_back_start.addr;
}
else
@@ -4103,9 +4162,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != INFINITE_LEN) tlen--;
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (tlen != 0) {
- STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen);
+ STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
}
INC_OP;
JUMP_OUT;
@@ -4114,8 +4172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->cut_to_mark.id; /* mem: mark id */
STACK_TO_VOID_TO_MARK(stkp, mem);
if (p->cut_to_mark.restore_pos != 0) {
- s = stkp->u.val.v;
- sprev = stkp->u.val.v2;
+ s = stkp->u.val.v;
}
INC_OP;
JUMP_OUT;
@@ -4123,7 +4180,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MARK)
mem = p->mark.id; /* mem: mark id */
if (p->mark.save_pos != 0)
- STACK_PUSH_MARK_WITH_POS(mem, s, sprev);
+ STACK_PUSH_MARK_WITH_POS(mem, s);
else
STACK_PUSH_MARK(mem);
@@ -4275,9 +4332,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fail:
#endif
STACK_POP;
- p = stk->u.state.pcode;
- s = stk->u.state.pstr;
- sprev = stk->u.state.pstr_prev;
+ p = stk->u.state.pcode;
+ s = stk->u.state.pstr;
CHECK_RETRY_LIMIT_IN_MATCH;
JUMP_OUT;
@@ -4290,6 +4346,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (msa->retry_limit_in_search != 0) {
msa->retry_limit_in_search_counter += retry_in_match_counter;
}
+
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ MATCH_COUNTER_OUT("END");
+#endif
+
STACK_SAVE(msa, is_alloca, alloc_base);
return best_len;
}
@@ -4324,12 +4385,11 @@ typedef struct {
int state; /* value of enum SearchRangeStatus */
UChar* low;
UChar* high;
- UChar* low_prev;
UChar* sch_range;
} SearchRange;
#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
+ r = match_at(reg, str, end, (upper_range), s, msas + i); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
@@ -4345,8 +4405,8 @@ regset_search_body_position_lead(OnigRegSet* set,
OnigOptionType option, MatchArg* msas, int* rmatch_pos)
{
int r, n, i;
- UChar *s, *prev;
- UChar *low, *high, *low_prev;
+ UChar *s;
+ UChar *low, *high;
UChar* sch_range;
regex_t* reg;
OnigEncoding enc;
@@ -4354,12 +4414,7 @@ regset_search_body_position_lead(OnigRegSet* set,
n = set->n;
enc = set->enc;
-
s = (UChar* )start;
- if (s > str)
- prev = onigenc_get_prev_char_head(enc, str, s);
- else
- prev = (UChar* )NULL;
sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
CHECK_NULL_RETURN_MEMERR(sr);
@@ -4375,18 +4430,16 @@ regset_search_body_position_lead(OnigRegSet* set,
else
sch_range = (UChar* )end;
- if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
+ if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
sr[i].state = SRS_LOW_HIGH;
sr[i].low = low;
sr[i].high = high;
- sr[i].low_prev = low_prev;
sr[i].sch_range = sch_range;
}
}
else {
sch_range = (UChar* )end;
- if (forward_search(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) {
+ if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
goto total_active;
}
}
@@ -4396,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set,
sr[i].state = SRS_ALL_RANGE;
sr[i].low = s;
sr[i].high = (UChar* )range;
- sr[i].low_prev = prev;
}
}
@@ -4412,10 +4464,9 @@ regset_search_body_position_lead(OnigRegSet* set,
if (s < sr[i].low) continue;
if (s >= sr[i].high) {
if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
- &low, &high, &low_prev) != 0) {
+ &low, &high) != 0) {
sr[i].low = low;
sr[i].high = high;
- sr[i].low_prev = low_prev;
if (s < low) continue;
}
else {
@@ -4436,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set,
for (i = 0; i < n; i++) {
if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
low = sr[i].low;
- low_prev = sr[i].low_prev;
}
}
if (low == range) break;
s = low;
- prev = low_prev;
}
else {
- prev = s;
s += enclen(enc, s);
}
} while (1);
@@ -4459,10 +4507,9 @@ regset_search_body_position_lead(OnigRegSet* set,
if (s < sr[i].low) continue;
if (s >= sr[i].high) {
if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
- &low, &high, &low_prev) != 0) {
+ &low, &high) != 0) {
sr[i].low = low;
sr[i].high = high;
- /* sr[i].low_prev = low_prev; */
if (s < low) continue;
}
else {
@@ -4483,7 +4530,6 @@ regset_search_body_position_lead(OnigRegSet* set,
if (set->anychar_inf != 0)
prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
- prev = s;
s += enclen(enc, s);
} while (1);
}
@@ -4552,7 +4598,7 @@ onig_regset_search_with_param(OnigRegSet* set,
{
int r;
int i;
- UChar *s, *prev;
+ UChar *s;
regex_t* reg;
OnigEncoding enc;
OnigRegion* region;
@@ -4654,7 +4700,6 @@ onig_regset_search_with_param(OnigRegSet* set,
else if (str == end) { /* empty string */
start = end = str;
s = (UChar* )start;
- prev = (UChar* )NULL;
msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
CHECK_NULL_RETURN_MEMERR(msas);
@@ -4669,7 +4714,7 @@ onig_regset_search_with_param(OnigRegSet* set,
/* Can't use REGSET_MATCH_AND_RETURN_CHECK()
because r must be set regex index (i)
*/
- r = match_at(reg, str, end, end, s, prev, msas + i);
+ r = match_at(reg, str, end, end, s, msas + i);
if (r != ONIG_MISMATCH) {
if (r >= 0) {
r = i;
@@ -4814,7 +4859,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
- while (s >= text) {
+ while (PTR_GE(s, text)) {
if (*s == *target) {
p = s + 1;
t = target + 1;
@@ -4855,7 +4900,7 @@ sunday_quick_search_step_forward(regex_t* reg,
tail = target_end - 1;
tlen1 = (int )(tail - target);
end = text_range;
- if (end + tlen1 > text_end)
+ if (tlen1 > text_end - end)
end = text_end - tlen1;
map_offset = reg->map_offset;
@@ -4893,15 +4938,38 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *s, *t, *p, *end;
const UChar *tail;
int map_offset;
-
- end = text_range + (target_end - target);
- if (end > text_end)
- end = text_end;
+ ptrdiff_t target_len;
map_offset = reg->map_offset;
tail = target_end - 1;
- s = text + (tail - target);
+ target_len = target_end - target;
+ if (target_len > text_end - text_range) {
+ end = text_end;
+ if (target_len > text_end - text)
+ return (UChar* )NULL;
+ }
+ else {
+ end = text_range + target_len;
+ }
+
+ s = text + target_len - 1;
+
+#ifdef USE_STRICT_POINTER_ADDRESS
+ if (s < end) {
+ while (TRUE) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
+ }
+ if (text_end - s <= map_offset) break;
+ if (reg->map[*(s + map_offset)] >= end - s) break;
+ s += reg->map[*(s + map_offset)];
+ }
+ }
+#else
while (s < end) {
p = s;
t = tail;
@@ -4909,9 +4977,10 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
if (t == target) return (UChar* )p;
p--; t--;
}
- if (s + map_offset >= text_end) break;
+ if (text_end - s <= map_offset) break;
s += reg->map[*(s + map_offset)];
}
+#endif
return (UChar* )NULL;
}
@@ -4937,7 +5006,7 @@ map_search_backward(OnigEncoding enc, UChar map[],
{
const UChar *s = text_start;
- while (s >= text) {
+ while (PTR_GE(s, text)) {
if (map[*s]) return (UChar* )s;
s = onigenc_get_prev_char_head(enc, adjust_text, s);
@@ -4963,13 +5032,16 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
OnigMatchParam* mp)
{
int r;
- UChar *prev;
MatchArg msa;
+#ifndef USE_POSIX_API
+ if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT;
+#endif
+
ADJUST_MATCH_PARAM(reg, mp);
MATCH_ARG_INIT(msa, reg, option, region, at, mp);
if (region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& !OPTON_POSIX_REGION(option)
#endif
) {
@@ -4986,8 +5058,14 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
}
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end, end, at, prev, &msa);
+ r = match_at(reg, str, end, end, at, &msa);
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
+ if (msa.best_len >= 0) {
+ r = msa.best_len;
+ }
+ }
+#endif
}
end:
@@ -4997,7 +5075,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
static int
forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
+ UChar* range, UChar** low, UChar** high)
{
UChar *p, *pprev = (UChar* )NULL;
@@ -5081,33 +5159,18 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
}
if (reg->dist_max == 0) {
- *low = p;
- if (low_prev) {
- if (*low > start)
- *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- }
+ *low = p;
*high = p;
}
else {
if (reg->dist_max != INFINITE_LEN) {
if (p - str < reg->dist_max) {
*low = (UChar* )str;
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
}
else {
*low = p - reg->dist_max;
if (*low > start) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
- *low, (const UChar** )low_prev);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
+ *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);
}
}
}
@@ -5263,7 +5326,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
OnigOptionType option, OnigMatchParam* mp)
{
int r;
- UChar *s, *prev;
+ UChar *s;
MatchArg msa;
const UChar *orig_start = start;
@@ -5275,8 +5338,15 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
ADJUST_MATCH_PARAM(reg, mp);
+#ifndef USE_POSIX_API
+ if (OPTON_POSIX_REGION(option)) {
+ r = ONIGERR_INVALID_ARGUMENT;
+ goto finish_no_msa;
+ }
+#endif
+
if (region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& ! OPTON_POSIX_REGION(option)
#endif
) {
@@ -5294,27 +5364,14 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! OPTON_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ r = match_at(reg, str, end, (upper_range), s, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
}\
else goto finish; /* error */ \
}
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
/* anchor optimize: resume search range */
@@ -5422,7 +5479,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (reg->threshold_len == 0) {
start = end = str = address_for_empty_string;
s = (UChar* )start;
- prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, reg, option, region, start, mp);
MATCH_AND_RETURN_CHECK(end);
@@ -5440,13 +5496,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
if (range > start) { /* forward search */
- if (s > str)
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- else
- prev = (UChar* )NULL;
-
if (reg->optimize != OPTIMIZE_NONE) {
- UChar *sch_range, *low, *high, *low_prev;
+ UChar *sch_range, *low, *high;
if (reg->dist_max != 0) {
if (reg->dist_max == INFINITE_LEN)
@@ -5467,27 +5518,27 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (reg->dist_max != INFINITE_LEN) {
do {
- if (! forward_search(reg, str, end, s, sch_range, &low, &high,
- &low_prev)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high))
+ goto mismatch;
if (s < low) {
s = low;
- prev = low_prev;
}
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
- prev = s;
s += enclen(reg->enc, s);
}
} while (s < range);
goto mismatch;
}
else { /* check only. */
- if (! forward_search(reg, str, end, s, sch_range, &low, &high,
- (UChar** )NULL)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high))
+ goto mismatch;
if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
(reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
do {
+ UChar* prev;
+
MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
@@ -5504,7 +5555,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
do {
MATCH_AND_RETURN_CHECK(data_range);
- prev = s;
s += enclen(reg->enc, s);
} while (s < range);
@@ -5549,12 +5599,11 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (s > high)
s = high;
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ while (PTR_GE(s, low)) {
MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
+ s = onigenc_get_prev_char_head(reg->enc, str, s);
}
- } while (s >= range);
+ } while (PTR_GE(s, range));
goto mismatch;
}
else { /* check only. */
@@ -5566,10 +5615,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- } while (s >= range);
+ s = onigenc_get_prev_char_head(reg->enc, str, s);
+ } while (PTR_GE(s, range));
}
mismatch:
@@ -5589,7 +5637,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not set in match_at(). */
if (OPTON_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& !OPTON_POSIX_REGION(option)
#endif
) {
@@ -5952,7 +6000,7 @@ extern int
onig_init_for_match_at(regex_t* reg)
{
return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
- (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
+ (const UChar* )NULL, (const UChar* )NULL,
(MatchArg* )NULL);
}
#endif
@@ -6139,8 +6187,8 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
const UChar* str;
StackType* stk_base;
int i;
- StackIndex* mem_start_stk;
- StackIndex* mem_end_stk;
+ StkPtrType* mem_start_stk;
+ StkPtrType* mem_end_stk;
i = mem_num;
reg = a->regex;
@@ -6150,7 +6198,7 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
mem_end_stk = a->mem_end_stk;
if (i > 0) {
- if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {
*begin = (int )(STACK_MEM_START(reg, i) - str);
*end = (int )(STACK_MEM_END(reg, i) - str);
}