summaryrefslogtreecommitdiff
path: root/src/regexec.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2016-08-31 03:42:07 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2016-08-31 03:42:07 +0200
commit75ed3a79bf4fdcb71dd709495de544cb6cef17b3 (patch)
tree67e4b404206c31d3d7d52673c12eb9f756db5c64 /src/regexec.c
parentc752981613de81bfa2723749b79e80bf0008f27c (diff)
parenta76fa337cc657dbe669ffb8dbdac606d4d6616f1 (diff)
Merge tag 'upstream/6.1.0'
Upstream version 6.1.0
Diffstat (limited to 'src/regexec.c')
-rw-r--r--src/regexec.c1560
1 files changed, 821 insertions, 739 deletions
diff --git a/src/regexec.c b/src/regexec.c
index 2c768e1..70ac89e 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -327,19 +327,21 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).best_len = ONIG_MISMATCH;\
+ (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
} while(0)
#else
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
+ (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
} while(0)
#endif
@@ -369,7 +371,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
(msa).state_check_buff = (void* )0;\
(msa).state_check_buff_size = 0;\
}\
- } while(0)
+} while(0)
#define MATCH_ARG_FREE(msa) do {\
if ((msa).stack_p) xfree((msa).stack_p);\
@@ -383,32 +385,59 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#endif
+#define ALLOCA_PTR_NUM_LIMIT 50
-#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
+#define STACK_INIT(stack_num) do {\
if (msa->stack_p) {\
- alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
- stk_alloc = (OnigStackType* )(msa->stack_p);\
- stk_base = stk_alloc;\
+ is_alloca = 0;\
+ alloc_base = msa->stack_p;\
+ stk_base = (OnigStackType* )(alloc_base\
+ + (sizeof(OnigStackIndex) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
+ else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
+ is_alloca = 0;\
+ alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\
+ + sizeof(OnigStackType) * (stack_num));\
+ stk_base = (OnigStackType* )(alloc_base\
+ + (sizeof(OnigStackIndex) * msa->ptr_num));\
+ stk = stk_base;\
+ stk_end = stk_base + (stack_num);\
+ }\
else {\
- alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
- + sizeof(OnigStackType) * (stack_num));\
- stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
- stk_base = stk_alloc;\
+ is_alloca = 1;\
+ alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\
+ + sizeof(OnigStackType) * (stack_num));\
+ stk_base = (OnigStackType* )(alloc_base\
+ + (sizeof(OnigStackIndex) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
-} while(0)
+} while(0);
+
#define STACK_SAVE do{\
- if (stk_base != stk_alloc) {\
- msa->stack_p = stk_base;\
- msa->stack_n = stk_end - stk_base;\
+ msa->stack_n = stk_end - stk_base;\
+ if (is_alloca != 0) {\
+ size_t size = sizeof(OnigStackIndex) * msa->ptr_num \
+ + sizeof(OnigStackType) * msa->stack_n;\
+ msa->stack_p = xmalloc(size);\
+ xmemcpy(msa->stack_p, alloc_base, size);\
+ }\
+ else {\
+ msa->stack_p = alloc_base;\
};\
} while(0)
+#define UPDATE_FOR_STACK_REALLOC do{\
+ repeat_stk = (OnigStackIndex* )alloc_base;\
+ mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\
+ mem_end_stk = mem_start_stk + num_mem;\
+ mem_start_stk--; /* for index start from 1 */\
+ mem_end_stk--; /* for index start from 1 */\
+} while(0)
+
static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
@@ -425,50 +454,65 @@ onig_set_match_stack_limit_size(unsigned int size)
}
static int
-stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
- OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
+stack_double(int is_alloca, char** arg_alloc_base,
+ OnigStackType** arg_stk_base,
+ OnigStackType** arg_stk_end, OnigStackType** arg_stk,
+ OnigMatchArg* msa)
{
unsigned int n;
- OnigStackType *x, *stk_base, *stk_end, *stk;
+ int used;
+ size_t size;
+ char* alloc_base;
+ char* new_alloc_base;
+ OnigStackType *stk_base, *stk_end, *stk;
+ alloc_base = *arg_alloc_base;
stk_base = *arg_stk_base;
stk_end = *arg_stk_end;
stk = *arg_stk;
n = stk_end - stk_base;
- if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
- x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
- if (IS_NULL(x)) {
+ n *= 2;
+ size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
+ if (is_alloca != 0) {
+ new_alloc_base = (char* )xmalloc(size);
+ if (IS_NULL(new_alloc_base)) {
STACK_SAVE;
return ONIGERR_MEMORY;
}
- xmemcpy(x, stk_base, n * sizeof(OnigStackType));
- n *= 2;
+ xmemcpy(new_alloc_base, alloc_base, size);
}
else {
- n *= 2;
if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
return ONIGERR_MATCH_STACK_LIMIT_OVER;
else
n = MatchStackLimitSize;
}
- x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
- if (IS_NULL(x)) {
+ new_alloc_base = (char* )xrealloc(alloc_base, size);
+ if (IS_NULL(new_alloc_base)) {
STACK_SAVE;
return ONIGERR_MEMORY;
}
}
- *arg_stk = x + (stk - stk_base);
- *arg_stk_base = x;
- *arg_stk_end = x + n;
+
+ alloc_base = new_alloc_base;
+ used = stk - stk_base;
+ *arg_alloc_base = alloc_base;
+ *arg_stk_base = (OnigStackType* )(alloc_base
+ + (sizeof(OnigStackIndex) * msa->ptr_num));
+ *arg_stk = *arg_stk_base + used;
+ *arg_stk_end = *arg_stk_base + n;
return 0;
}
#define STACK_ENSURE(n) do {\
if (stk_end - stk < (n)) {\
- int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
+ int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\
+ msa);\
if (r != 0) { STACK_SAVE; return r; } \
+ is_alloca = 0;\
+ UPDATE_FOR_STACK_REALLOC;\
}\
} while(0)
@@ -1108,33 +1152,33 @@ static int backref_match_at_nested_level(regex_t* reg
}
else if (level == nest) {
if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pstart = k->u.mem.pstr;
- if (pend != NULL_UCHARP) {
- if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
- p = pstart;
- ss = *s;
-
- if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, case_fold_flag,
- pstart, &ss, (int )(pend - pstart)) == 0)
- return 0; /* or goto next_mem; */
- }
- else {
- while (p < pend) {
- if (*p++ != *ss++) return 0; /* or goto next_mem; */
- }
- }
-
- *s = ss;
- return 1;
- }
- }
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pstart = k->u.mem.pstr;
+ if (pend != NULL_UCHARP) {
+ if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+ p = pstart;
+ ss = *s;
+
+ if (ignore_case != 0) {
+ if (string_cmp_ic(reg->enc, case_fold_flag,
+ pstart, &ss, (int )(pend - pstart)) == 0)
+ return 0; /* or goto next_mem; */
+ }
+ else {
+ while (p < pend) {
+ if (*p++ != *ss++) return 0; /* or goto next_mem; */
+ }
+ }
+
+ *s = ss;
+ return 1;
+ }
+ }
}
else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pend = k->u.mem.pstr;
- }
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pend = k->u.mem.pstr;
+ }
}
}
k--;
@@ -1247,13 +1291,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
LengthType tlen, tlen2;
MemNumType mem;
RelAddrType addr;
- OnigOptionType option = reg->options;
- OnigEncoding encode = reg->enc;
- OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
UChar *s, *q, *sbegin;
- UChar *p = reg->p;
- char *alloca_base;
- OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ int is_alloca;
+ char *alloc_base;
+ OnigStackType *stk_base, *stk, *stk_end;
OnigStackType *stkp; /* used as any purpose. */
OnigStackIndex si;
OnigStackIndex *repeat_stk;
@@ -1263,19 +1304,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
unsigned char* state_check_buff = msa->state_check_buff;
int num_comb_exp_check = reg->num_comb_exp_check;
#endif
- n = reg->num_repeat + reg->num_mem * 2;
+ UChar *p = reg->p;
+ OnigOptionType option = reg->options;
+ OnigEncoding encode = reg->enc;
+ OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
- STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
+ //n = reg->num_repeat + reg->num_mem * 2;
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
- repeat_stk = (OnigStackIndex* )alloca_base;
-
- mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
- mem_end_stk = mem_start_stk + num_mem;
- mem_start_stk--; /* for index start from 1,
- mem_start_stk[1]..mem_start_stk[num_mem] */
- mem_end_stk--; /* for index start from 1,
- mem_end_stk[1]..mem_end_stk[num_mem] */
+ STACK_INIT(INIT_MATCH_STACK_SIZE);
+ UPDATE_FOR_STACK_REALLOC;
for (i = 1; i <= num_mem; i++) {
mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
}
@@ -1316,64 +1354,64 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_END: MOP_IN(OP_END);
n = s - sstart;
if (n > best_len) {
- OnigRegion* region;
+ OnigRegion* region;
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(option)) {
- if (n > msa->best_len) {
- msa->best_len = n;
- msa->best_s = (UChar* )sstart;
- }
- else
- goto end_best_len;
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
}
#endif
- best_len = n;
- region = msa->region;
- if (region) {
+ best_len = n;
+ region = msa->region;
+ if (region) {
#ifdef USE_POSIX_API_REGION_OPTION
- if (IS_POSIX_REGION(msa->options)) {
- posix_regmatch_t* rmt = (posix_regmatch_t* )region;
-
- rmt[0].rm_so = sstart - str;
- rmt[0].rm_eo = s - str;
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
- else
- rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
-
- rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str;
- }
- else {
- rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
- }
- }
- }
- else {
+ if (IS_POSIX_REGION(msa->options)) {
+ posix_regmatch_t* rmt = (posix_regmatch_t* )region;
+
+ rmt[0].rm_so = sstart - str;
+ rmt[0].rm_eo = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
+
+ rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
+ }
+ }
+ }
+ else {
#endif /* USE_POSIX_API_REGION_OPTION */
- region->beg[0] = sstart - str;
- region->end[0] = s - str;
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
- else
- region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
-
- region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str;
- }
- else {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
- }
+ region->beg[0] = sstart - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ }
#ifdef USE_CAPTURE_HISTORY
- if (reg->capture_history != 0) {
+ if (reg->capture_history != 0) {
int r;
OnigCaptureTreeNode* node;
@@ -1397,12 +1435,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
best_len = r; /* error code */
goto finish;
}
- }
+ }
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API_REGION_OPTION
- } /* else IS_POSIX_REGION() */
+ } /* else IS_POSIX_REGION() */
#endif
- } /* if (region) */
+ } /* if (region) */
} /* n > best_len */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
@@ -1411,13 +1449,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
if (IS_FIND_CONDITION(option)) {
- if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
- best_len = ONIG_MISMATCH;
- goto fail; /* for retry */
- }
- if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
- }
+ if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
+ best_len = ONIG_MISMATCH;
+ goto fail; /* for retry */
+ }
+ if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
+ goto fail; /* for retry */
+ }
}
/* default behavior: return first-matching result. */
@@ -1438,22 +1476,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
{
- int len;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) {
+ int len;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) {
goto fail;
}
- p++; q++;
- }
+ p++; q++;
+ }
}
MOP_OUT;
break;
@@ -1518,7 +1556,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen);
while (tlen-- > 0) {
- if (*p++ != *s++) goto fail;
+ if (*p++ != *s++) goto fail;
}
sprev = s - 1;
MOP_OUT;
@@ -1527,26 +1565,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
{
- int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ int len;
+ UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- GET_LENGTH_INC(tlen, p);
- endp = p + tlen;
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
- while (p < endp) {
- sprev = s;
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) goto fail;
- p++; q++;
- }
- }
+ while (p < endp) {
+ sprev = s;
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
}
MOP_OUT;
@@ -1600,10 +1638,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 2);
while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - 2;
MOP_OUT;
@@ -1614,12 +1652,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 3);
while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - 3;
MOP_OUT;
@@ -1632,8 +1670,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen2 *= tlen;
DATA_ENSURE(tlen2);
while (tlen2-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
}
sprev = s - tlen;
MOP_OUT;
@@ -1654,23 +1692,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
cclass_mb:
GET_LENGTH_INC(tlen, p);
{
- OnigCodePoint code;
- UChar *ss;
- int mb_len;
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len;
- DATA_ENSURE(1);
- mb_len = enclen(encode, s);
- DATA_ENSURE(mb_len);
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ DATA_ENSURE(1);
+ mb_len = enclen(encode, s);
+ DATA_ENSURE(mb_len);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (! onig_is_in_code_range(p, code)) goto fail;
+ if (! onig_is_in_code_range(p, code)) goto fail;
#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (! onig_is_in_code_range(q, code)) goto fail;
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (! onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -1680,17 +1718,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
- p += SIZE_BITSET;
- goto cclass_mb;
+ p += SIZE_BITSET;
+ goto cclass_mb;
}
else {
- if (BITSET_AT(((BitSetRef )p), *s) == 0)
- goto fail;
+ if (BITSET_AT(((BitSetRef )p), *s) == 0)
+ goto fail;
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
}
MOP_OUT;
break;
@@ -1706,36 +1744,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
- s++;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- goto cc_mb_not_success;
+ s++;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ goto cc_mb_not_success;
}
cclass_mb_not:
GET_LENGTH_INC(tlen, p);
{
- OnigCodePoint code;
- UChar *ss;
- int mb_len = enclen(encode, s);
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enclen(encode, s);
- if (! DATA_ENSURE_CHECK(mb_len)) {
+ if (! DATA_ENSURE_CHECK(mb_len)) {
DATA_ENSURE(1);
- s = (UChar* )end;
- p += tlen;
- goto cc_mb_not_success;
- }
+ s = (UChar* )end;
+ p += tlen;
+ goto cc_mb_not_success;
+ }
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (onig_is_in_code_range(p, code)) goto fail;
+ if (onig_is_in_code_range(p, code)) goto fail;
#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (onig_is_in_code_range(q, code)) goto fail;
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -1747,36 +1785,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
- p += SIZE_BITSET;
- goto cclass_mb_not;
+ p += SIZE_BITSET;
+ goto cclass_mb_not;
}
else {
- if (BITSET_AT(((BitSetRef )p), *s) != 0)
- goto fail;
+ if (BITSET_AT(((BitSetRef )p), *s) != 0)
+ goto fail;
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
}
MOP_OUT;
break;
case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
{
- OnigCodePoint code;
+ OnigCodePoint code;
void *node;
int mb_len;
UChar *ss;
DATA_ENSURE(1);
GET_POINTER_INC(node, p);
- mb_len = enclen(encode, s);
- ss = s;
- s += mb_len;
- DATA_ENSURE(0);
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
+ mb_len = enclen(encode, s);
+ ss = s;
+ s += mb_len;
+ DATA_ENSURE(0);
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
}
MOP_OUT;
break;
@@ -1800,8 +1838,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s);
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1812,27 +1850,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enclen(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
MOP_OUT;
break;
case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev);
- }
- n = enclen(encode, s);
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1844,19 +1882,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev);
- }
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enclen(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
p++;
MOP_OUT;
@@ -1866,11 +1904,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1884,20 +1922,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_STATE_CHECK_NUM_INC(mem, p);
while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enclen(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
}
MOP_OUT;
break;
@@ -1906,7 +1944,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_WORD: MOP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s);
MOP_OUT;
@@ -1915,7 +1953,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ goto fail;
s += enclen(encode, s);
MOP_OUT;
@@ -1923,18 +1961,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- == ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
continue;
@@ -1942,17 +1980,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
}
else if (ON_STR_END(s)) {
- if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- != ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
}
MOP_OUT;
continue;
@@ -1961,20 +1999,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_WORD_BEGIN_END
case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- MOP_OUT;
- continue;
- }
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ MOP_OUT;
+ continue;
+ }
}
goto fail;
break;
case OP_WORD_END: MOP_IN(OP_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
- MOP_OUT;
- continue;
- }
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ MOP_OUT;
+ continue;
+ }
}
goto fail;
break;
@@ -1996,13 +2034,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
if (ON_STR_BEGIN(s)) {
- if (IS_NOTBOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
+ if (IS_NOTBOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
goto fail;
break;
@@ -2010,23 +2048,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_END_LINE: MOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
+ }
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
#endif
goto fail;
@@ -2035,24 +2073,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
+ }
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
- ON_STR_END(s + enclen(encode, s))) {
- MOP_OUT;
- continue;
+ ON_STR_END(s + enclen(encode, s))) {
+ MOP_OUT;
+ continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
UChar* ss = s + enclen(encode, s);
- ss += enclen(encode, ss);
+ ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
MOP_OUT;
continue;
@@ -2064,7 +2102,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
if (s != msa->start)
- goto fail;
+ goto fail;
MOP_OUT;
continue;
@@ -2114,9 +2152,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_MEM_START(mem, stkp);
if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
MOP_OUT;
@@ -2138,171 +2176,170 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_MEMNUM_INC(mem, p);
backref:
{
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(pstart, s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
break;
case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
GET_MEMNUM_INC(mem, p);
{
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
break;
case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
{
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- continue;
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(pstart, swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ continue;
}
break;
case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
{
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- continue;
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ continue;
}
break;
#ifdef USE_BACKREF_WITH_LEVEL
case OP_BACKREF_WITH_LEVEL:
{
- int len;
- OnigOptionType ic;
- LengthType level;
+ int len;
+ OnigOptionType ic;
+ LengthType level;
- GET_OPTION_INC(ic, p);
- GET_LENGTH_INC(level, p);
- GET_LENGTH_INC(tlen, p);
+ GET_OPTION_INC(ic, p);
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
- sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic
- , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ sprev = s;
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic
+ , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
- p += (SIZE_MEMNUM * tlen);
- }
- else
- goto fail;
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
- MOP_OUT;
- continue;
+ MOP_OUT;
+ continue;
}
-
break;
#endif
@@ -2331,33 +2368,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK(isnull, mem, s);
- if (isnull) {
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK(isnull, mem, s);
+ if (isnull) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
- (int )mem, (int )s);
-#endif
- null_check_found:
- /* empty loop founded, skip next instruction */
- switch (*p++) {
- case OP_JUMP:
- case OP_PUSH:
- p += SIZE_RELADDR;
- break;
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
- p += SIZE_MEMNUM;
- break;
- default:
- goto unexpected_bytecode_error;
- break;
- }
- }
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch (*p++) {
+ case OP_JUMP:
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ p += SIZE_MEMNUM;
+ break;
+ default:
+ goto unexpected_bytecode_error;
+ break;
+ }
+ }
}
MOP_OUT;
continue;
@@ -2366,18 +2403,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
- if (isnull) {
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
- (int )mem, (int )s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
}
MOP_OUT;
continue;
@@ -2388,25 +2425,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_NULL_CHECK_END_MEMST_PUSH:
MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
{
- int isnull;
+ int isnull;
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
#else
- STACK_NULL_CHECK_REC(isnull, mem, s);
+ STACK_NULL_CHECK_REC(isnull, mem, s);
#endif
- if (isnull) {
+ if (isnull) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
- (int )mem, (int )s);
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
- else {
- STACK_PUSH_NULL_CHECK_END(mem);
- }
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
}
MOP_OUT;
continue;
@@ -2445,10 +2482,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_RELADDR_INC(addr, p);
STATE_CHECK_VAL(scv, mem);
if (scv) {
- p += addr;
+ p += addr;
}
else {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
}
MOP_OUT;
continue;
@@ -2474,10 +2511,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
}
p += (addr + 1);
MOP_OUT;
@@ -2487,10 +2524,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
GET_RELADDR_INC(addr, p);
if (*p == *s) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
}
p++;
MOP_OUT;
@@ -2499,16 +2536,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_REPEAT: MOP_IN(OP_REPEAT);
{
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- }
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ }
}
MOP_OUT;
continue;
@@ -2516,17 +2553,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
{
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p, s, sprev);
- p += addr;
- }
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p += addr;
+ }
}
MOP_OUT;
continue;
@@ -2604,9 +2641,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_POP_POS: MOP_IN(OP_POP_POS);
{
- STACK_POS_END(stkp);
- s = stkp->u.state.pstr;
- sprev = stkp->u.state.pstr_prev;
+ STACK_POS_END(stkp);
+ s = stkp->u.state.pstr;
+ sprev = stkp->u.state.pstr_prev;
}
MOP_OUT;
continue;
@@ -2650,15 +2687,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(q)) {
- /* too short case -> success. ex. /(?<!XXX)a/.match("a")
- If you want to change to fail, replace following line. */
- p += addr;
- /* goto fail; */
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ p += addr;
+ /* goto fail; */
}
else {
- STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
- s = q;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
+ s = q;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
MOP_OUT;
continue;
@@ -2755,12 +2792,12 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
p = s + 1;
t = target + 1;
while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
+ if (*t != *p++)
+ break;
+ t++;
}
if (t == target_end)
- return s;
+ return s;
}
s += enclen(enc, s);
}
@@ -2832,12 +2869,12 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
p = s + 1;
t = target + 1;
while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
+ if (*t != *p++)
+ break;
+ t++;
}
if (t == target_end)
- return s;
+ return s;
}
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
@@ -2898,8 +2935,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
p = se = s + tlen1;
t = tail;
while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
skip = reg->map[*se];
t = s;
@@ -2913,8 +2950,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
p = se = s + tlen1;
t = tail;
while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
skip = reg->int_map[*se];
t = s;
@@ -2945,8 +2982,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
p = s;
t = tail;
while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
s += reg->map[*s];
}
@@ -2956,8 +2993,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
p = s;
t = tail;
while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
s += reg->int_map[*s];
}
@@ -2965,6 +3002,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
return (UChar* )NULL;
}
+#ifdef USE_INT_MAP_BACKWARD
static int
set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
int** skip)
@@ -3015,6 +3053,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
return (UChar* )NULL;
}
+#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
@@ -3053,7 +3092,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
UChar *prev;
OnigMatchArg msa;
- MATCH_ARG_INIT(msa, option, region, at);
+ MATCH_ARG_INIT(msa, reg, option, region, at);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
{
int offset = at - str;
@@ -3142,58 +3181,58 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
- }
- break;
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
+ }
+ break;
case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
+ if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
#endif
- )
- goto retry_gate;
- break;
+ )
+ goto retry_gate;
+ break;
}
}
if (reg->dmax == 0) {
*low = p;
if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
+ if (*low > s)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
+ else
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
}
}
else {
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
- }
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, (const UChar** )low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low);
+ }
}
}
/* no needs to adjust *high, *high is used as range check only */
@@ -3210,8 +3249,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
return 0; /* fail */
}
-static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
- int** skip));
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
@@ -3220,7 +3257,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
UChar* s, const UChar* range, UChar* adjrange,
UChar** low, UChar** high)
{
- int r;
UChar *p;
range += reg->dmin;
@@ -3242,16 +3278,22 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
case ONIG_OPTIMIZE_EXACT_BM:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+#ifdef USE_INT_MAP_BACKWARD
if (IS_NULL(reg->int_map_backward)) {
+ int r;
+
if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
+ goto exact_method;
r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
+ &(reg->int_map_backward));
if (r) return r;
}
p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
+ end, p);
+#else
+ goto exact_method;
+#endif
break;
case ONIG_OPTIMIZE_MAP:
@@ -3265,36 +3307,36 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc, str, p);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
- }
- break;
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, p);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ break;
case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
+ if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(prev)) goto fail;
- if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
-#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(prev)) goto fail;
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+#endif
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
#endif
- ) {
- p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(p)) goto fail;
- goto retry;
- }
- break;
+ ) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(p)) goto fail;
+ goto retry;
+ }
+ break;
}
}
@@ -3405,56 +3447,56 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
/* search start-position only */
begin_position:
if (range > start)
- range = start + 1;
+ range = start + 1;
else
- range = start;
+ range = start;
}
else if (reg->anchor & ANCHOR_BEGIN_BUF) {
/* search str-position only */
if (range > start) {
- if (start != str) goto mismatch_no_msa;
- range = str + 1;
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
}
else {
- if (range <= str) {
- start = str;
- range = str;
- }
- else
- goto mismatch_no_msa;
+ if (range <= str) {
+ start = str;
+ range = str;
+ }
+ else
+ goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
+ if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
goto mismatch_no_msa;
if (range > start) {
- if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
- if (start < end)
- start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
- else { /* match with empty at end */
- start = onigenc_get_prev_char_head(reg->enc, str, end);
- }
- }
- if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
- }
-
- if (start >= range) goto mismatch_no_msa;
+ if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
+ else { /* match with empty at end */
+ start = onigenc_get_prev_char_head(reg->enc, str, end);
+ }
+ }
+ if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = max_semi_end - reg->anchor_dmin + 1;
+ }
+
+ if (start >= range) goto mismatch_no_msa;
}
else {
- if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
- }
- if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
- }
- if (range > start) goto mismatch_no_msa;
+ if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
+ }
+ if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
+ start = max_semi_end - reg->anchor_dmin;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ }
+ if (range > start) goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
@@ -3462,22 +3504,22 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
max_semi_end = (UChar* )end;
if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
+ min_semi_end = pre_end;
#ifdef USE_CRNL_AS_LINE_TERMINATOR
- pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
- if (IS_NOT_NULL(pre_end) &&
- ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
- }
+ pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
#endif
- if (min_semi_end > str && start <= min_semi_end) {
- goto end_buf;
- }
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
}
else {
- min_semi_end = (UChar* )end;
- goto end_buf;
+ min_semi_end = (UChar* )end;
+ goto end_buf;
}
}
else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
@@ -3496,7 +3538,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
prev = (UChar* )NULL;
- MATCH_ARG_INIT(msa, option, region, start);
+ MATCH_ARG_INIT(msa, reg, option, region, start);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
msa.state_check_buff = (void* )0;
msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
@@ -3512,7 +3554,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
- MATCH_ARG_INIT(msa, option, region, orig_start);
+ MATCH_ARG_INIT(msa, reg, option, region, orig_start);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
{
int offset = (MIN(start, range) - str);
@@ -3532,36 +3574,36 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
sch_range = (UChar* )range;
if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_range = (UChar* )end;
- else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
- }
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_range = (UChar* )end;
+ else {
+ sch_range += reg->dmax;
+ if (sch_range > end) sch_range = (UChar* )end;
+ }
}
if ((end - start) < reg->threshold_len)
goto mismatch;
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
- if (s < low) {
- s = low;
- prev = low_prev;
- }
- while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s);
- }
- } while (s < range);
- goto mismatch;
+ do {
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, &low_prev)) goto mismatch;
+ if (s < low) {
+ s = low;
+ prev = low_prev;
+ }
+ while (s <= high) {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s);
+ }
+ } while (s < range);
+ goto mismatch;
}
else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) goto mismatch;
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
do {
@@ -3599,47 +3641,47 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
UChar *low, *high, *adjrange, *sch_start;
if (range < end)
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
else
- adjrange = (UChar* )end;
+ adjrange = (UChar* )end;
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
- (end - range) >= reg->threshold_len) {
- do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
- goto mismatch;
-
- if (s > high)
- s = high;
-
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- }
- } while (s >= range);
- goto mismatch;
+ (end - range) >= reg->threshold_len) {
+ do {
+ sch_start = s + reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0)
+ goto mismatch;
+
+ if (s > high)
+ s = high;
+
+ while (s >= low) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ MATCH_AND_RETURN_CHECK(orig_start);
+ s = prev;
+ }
+ } while (s >= range);
+ goto mismatch;
}
else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
-
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
+ if ((end - range) < reg->threshold_len) goto mismatch;
+
+ sch_start = s;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_start = (UChar* )end;
+ else {
+ sch_start += reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ else
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start);
+ }
+ }
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0) goto mismatch;
}
}
@@ -3694,6 +3736,46 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
return s - str;
}
+extern int
+onig_scan(regex_t* reg, const UChar* str, const UChar* end,
+ OnigRegion* region, OnigOptionType option,
+ int (*scan_callback)(int, int, OnigRegion*, void*),
+ void* callback_arg)
+{
+ int r;
+ int n;
+ int rs;
+ const UChar* start;
+
+ n = 0;
+ start = str;
+ while (1) {
+ r = onig_search(reg, str, end, start, end, region, option);
+ if (r >= 0) {
+ rs = scan_callback(n, r, region, callback_arg);
+ n++;
+ if (rs != 0)
+ return rs;
+
+ if (region->end[0] == start - str)
+ start++;
+ else
+ start = str + region->end[0];
+
+ if (start > end)
+ break;
+ }
+ else if (r == ONIG_MISMATCH) {
+ break;
+ }
+ else { /* error */
+ return r;
+ }
+ }
+
+ return n;
+}
+
extern OnigEncoding
onig_get_encoding(regex_t* reg)
{