diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2016-08-31 03:42:07 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2016-08-31 03:42:07 +0200 |
commit | 75ed3a79bf4fdcb71dd709495de544cb6cef17b3 (patch) | |
tree | 67e4b404206c31d3d7d52673c12eb9f756db5c64 /src/regexec.c | |
parent | c752981613de81bfa2723749b79e80bf0008f27c (diff) | |
parent | a76fa337cc657dbe669ffb8dbdac606d4d6616f1 (diff) |
Merge tag 'upstream/6.1.0'
Upstream version 6.1.0
Diffstat (limited to 'src/regexec.c')
-rw-r--r-- | src/regexec.c | 1560 |
1 files changed, 821 insertions, 739 deletions
diff --git a/src/regexec.c b/src/regexec.c index 2c768e1..70ac89e 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -327,19 +327,21 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).best_len = ONIG_MISMATCH;\ + (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\ } while(0) #else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\ } while(0) #endif @@ -369,7 +371,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).state_check_buff = (void* )0;\ (msa).state_check_buff_size = 0;\ }\ - } while(0) +} while(0) #define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ @@ -383,32 +385,59 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif +#define ALLOCA_PTR_NUM_LIMIT 50 -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ +#define STACK_INIT(stack_num) do {\ if (msa->stack_p) {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (OnigStackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ + is_alloca = 0;\ + alloc_base = msa->stack_p;\ + stk_base = (OnigStackType* )(alloc_base\ + + (sizeof(OnigStackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ + else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ + is_alloca = 0;\ + alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\ + + sizeof(OnigStackType) * (stack_num));\ + stk_base = (OnigStackType* )(alloc_base\ + + (sizeof(OnigStackIndex) * msa->ptr_num));\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ else {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ - stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ + is_alloca = 1;\ + alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\ + + sizeof(OnigStackType) * (stack_num));\ + stk_base = (OnigStackType* )(alloc_base\ + + (sizeof(OnigStackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ -} while(0) +} while(0); + #define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = stk_end - stk_base;\ + msa->stack_n = stk_end - stk_base;\ + if (is_alloca != 0) {\ + size_t size = sizeof(OnigStackIndex) * msa->ptr_num \ + + sizeof(OnigStackType) * msa->stack_n;\ + msa->stack_p = xmalloc(size);\ + xmemcpy(msa->stack_p, alloc_base, size);\ + }\ + else {\ + msa->stack_p = alloc_base;\ };\ } while(0) +#define UPDATE_FOR_STACK_REALLOC do{\ + repeat_stk = (OnigStackIndex* )alloc_base;\ + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\ + mem_end_stk = mem_start_stk + num_mem;\ + mem_start_stk--; /* for index start from 1 */\ + mem_end_stk--; /* for index start from 1 */\ +} while(0) + static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; extern unsigned int @@ -425,50 +454,65 @@ onig_set_match_stack_limit_size(unsigned int size) } static int -stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) +stack_double(int is_alloca, char** arg_alloc_base, + OnigStackType** arg_stk_base, + OnigStackType** arg_stk_end, OnigStackType** arg_stk, + OnigMatchArg* msa) { unsigned int n; - OnigStackType *x, *stk_base, *stk_end, *stk; + int used; + size_t size; + char* alloc_base; + char* new_alloc_base; + OnigStackType *stk_base, *stk_end, *stk; + alloc_base = *arg_alloc_base; stk_base = *arg_stk_base; stk_end = *arg_stk_end; stk = *arg_stk; n = stk_end - stk_base; - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); - if (IS_NULL(x)) { + n *= 2; + size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; + if (is_alloca != 0) { + new_alloc_base = (char* )xmalloc(size); + if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; } - xmemcpy(x, stk_base, n * sizeof(OnigStackType)); - n *= 2; + xmemcpy(new_alloc_base, alloc_base, size); } else { - n *= 2; if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) return ONIGERR_MATCH_STACK_LIMIT_OVER; else n = MatchStackLimitSize; } - x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); - if (IS_NULL(x)) { + new_alloc_base = (char* )xrealloc(alloc_base, size); + if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; } } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; + + alloc_base = new_alloc_base; + used = stk - stk_base; + *arg_alloc_base = alloc_base; + *arg_stk_base = (OnigStackType* )(alloc_base + + (sizeof(OnigStackIndex) * msa->ptr_num)); + *arg_stk = *arg_stk_base + used; + *arg_stk_end = *arg_stk_base + n; return 0; } #define STACK_ENSURE(n) do {\ if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\ + msa);\ if (r != 0) { STACK_SAVE; return r; } \ + is_alloca = 0;\ + UPDATE_FOR_STACK_REALLOC;\ }\ } while(0) @@ -1108,33 +1152,33 @@ static int backref_match_at_nested_level(regex_t* reg } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, (int )(pend - pstart)) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } } } k--; @@ -1247,13 +1291,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - OnigOptionType option = reg->options; - OnigEncoding encode = reg->enc; - OnigCaseFoldType case_fold_flag = reg->case_fold_flag; UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + int is_alloca; + char *alloc_base; + OnigStackType *stk_base, *stk, *stk_end; OnigStackType *stkp; /* used as any purpose. */ OnigStackIndex si; OnigStackIndex *repeat_stk; @@ -1263,19 +1304,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unsigned char* state_check_buff = msa->state_check_buff; int num_comb_exp_check = reg->num_comb_exp_check; #endif - n = reg->num_repeat + reg->num_mem * 2; + UChar *p = reg->p; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + //n = reg->num_repeat + reg->num_mem * 2; pop_level = reg->stack_pop_level; num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; - - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ + STACK_INIT(INIT_MATCH_STACK_SIZE); + UPDATE_FOR_STACK_REALLOC; for (i = 1; i <= num_mem; i++) { mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; } @@ -1316,64 +1354,64 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_END: MOP_IN(OP_END); n = s - sstart; if (n > best_len) { - OnigRegion* region; + OnigRegion* region; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; } #endif - best_len = n; - region = msa->region; - if (region) { + best_len = n; + region = msa->region; + if (region) { #ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = sstart - str; - rmt[0].rm_eo = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; - - rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; - } - } - } - else { + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = sstart - str; + rmt[0].rm_eo = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { #endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = sstart - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } #ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { + if (reg->capture_history != 0) { int r; OnigCaptureTreeNode* node; @@ -1397,12 +1435,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = r; /* error code */ goto finish; } - } + } #endif /* USE_CAPTURE_HISTORY */ #ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ + } /* else IS_POSIX_REGION() */ #endif - } /* if (region) */ + } /* if (region) */ } /* n > best_len */ #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -1411,13 +1449,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } } /* default behavior: return first-matching result. */ @@ -1438,22 +1476,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { goto fail; } - p++; q++; - } + p++; q++; + } } MOP_OUT; break; @@ -1518,7 +1556,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { - if (*p++ != *s++) goto fail; + if (*p++ != *s++) goto fail; } sprev = s - 1; MOP_OUT; @@ -1527,26 +1565,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - GET_LENGTH_INC(tlen, p); - endp = p + tlen; + GET_LENGTH_INC(tlen, p); + endp = p + tlen; - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) goto fail; - p++; q++; - } - } + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } } MOP_OUT; @@ -1600,10 +1638,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 2; MOP_OUT; @@ -1614,12 +1652,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 3; MOP_OUT; @@ -1632,8 +1670,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, tlen2 *= tlen; DATA_ENSURE(tlen2); while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - tlen; MOP_OUT; @@ -1654,23 +1692,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, cclass_mb: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len; + OnigCodePoint code; + UChar *ss; + int mb_len; - DATA_ENSURE(1); - mb_len = enclen(encode, s); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + DATA_ENSURE(1); + mb_len = enclen(encode, s); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; + if (! onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -1680,17 +1718,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb; + p += SIZE_BITSET; + goto cclass_mb; } else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } MOP_OUT; break; @@ -1706,36 +1744,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; } cclass_mb_not: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s); + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s); - if (! DATA_ENSURE_CHECK(mb_len)) { + if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; + if (onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -1747,36 +1785,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb_not; + p += SIZE_BITSET; + goto cclass_mb_not; } else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } MOP_OUT; break; case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); { - OnigCodePoint code; + OnigCodePoint code; void *node; int mb_len; UChar *ss; DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enclen(encode, s); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; + mb_len = enclen(encode, s); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } MOP_OUT; break; @@ -1800,8 +1838,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1812,27 +1850,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } MOP_OUT; break; case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1844,19 +1882,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } p++; MOP_OUT; @@ -1866,11 +1904,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1884,20 +1922,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } MOP_OUT; break; @@ -1906,7 +1944,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_WORD: MOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s); MOP_OUT; @@ -1915,7 +1953,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s); MOP_OUT; @@ -1923,18 +1961,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; continue; @@ -1942,17 +1980,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; continue; @@ -1961,20 +1999,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_WORD_BEGIN_END case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; - } + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + continue; + } } goto fail; break; case OP_WORD_END: MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; - } + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + continue; + } } goto fail; break; @@ -1996,13 +2034,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; - continue; + MOP_OUT; + continue; } goto fail; break; @@ -2010,23 +2048,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_END_LINE: MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; - continue; + MOP_OUT; + continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; - continue; + MOP_OUT; + continue; } #endif goto fail; @@ -2035,24 +2073,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; - continue; + ON_STR_END(s + enclen(encode, s))) { + MOP_OUT; + continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { UChar* ss = s + enclen(encode, s); - ss += enclen(encode, ss); + ss += enclen(encode, ss); if (ON_STR_END(ss)) { MOP_OUT; continue; @@ -2064,7 +2102,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); if (s != msa->start) - goto fail; + goto fail; MOP_OUT; continue; @@ -2114,9 +2152,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_MEM_START(mem, stkp); if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); MOP_OUT; @@ -2138,171 +2176,170 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_MEMNUM_INC(mem, p); backref: { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; - MOP_OUT; - continue; + MOP_OUT; + continue; } break; case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; - MOP_OUT; - continue; + MOP_OUT; + continue; } break; case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; } break; case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; } break; #ifdef USE_BACKREF_WITH_LEVEL case OP_BACKREF_WITH_LEVEL: { - int len; - OnigOptionType ic; - LengthType level; + int len; + OnigOptionType ic; + LengthType level; - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; - MOP_OUT; - continue; + MOP_OUT; + continue; } - break; #endif @@ -2331,33 +2368,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } } MOP_OUT; continue; @@ -2366,18 +2403,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); #endif - if (isnull == -1) goto fail; - goto null_check_found; - } + if (isnull == -1) goto fail; + goto null_check_found; + } } MOP_OUT; continue; @@ -2388,25 +2425,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_NULL_CHECK_END_MEMST_PUSH: MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ + GET_MEMNUM_INC(mem, p); /* mem: null check id */ #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); #else - STACK_NULL_CHECK_REC(isnull, mem, s); + STACK_NULL_CHECK_REC(isnull, mem, s); #endif - if (isnull) { + if (isnull) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); #endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } } MOP_OUT; continue; @@ -2445,10 +2482,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); STATE_CHECK_VAL(scv, mem); if (scv) { - p += addr; + p += addr; } else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); } MOP_OUT; continue; @@ -2474,10 +2511,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; } p += (addr + 1); MOP_OUT; @@ -2487,10 +2524,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; } p++; MOP_OUT; @@ -2499,16 +2536,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_REPEAT: MOP_IN(OP_REPEAT); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } } MOP_OUT; continue; @@ -2516,17 +2553,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } } MOP_OUT; continue; @@ -2604,9 +2641,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_POP_POS: MOP_IN(OP_POP_POS); { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; } MOP_OUT; continue; @@ -2650,15 +2687,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(q)) { - /* too short case -> success. ex. /(?<!XXX)a/.match("a") - If you want to change to fail, replace following line. */ - p += addr; - /* goto fail; */ + /* too short case -> success. ex. /(?<!XXX)a/.match("a") + If you want to change to fail, replace following line. */ + p += addr; + /* goto fail; */ } else { - STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); - s = q; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); + s = q; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } MOP_OUT; continue; @@ -2755,12 +2792,12 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; while (t < target_end) { - if (*t != *p++) - break; - t++; + if (*t != *p++) + break; + t++; } if (t == target_end) - return s; + return s; } s += enclen(enc, s); } @@ -2832,12 +2869,12 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; while (t < target_end) { - if (*t != *p++) - break; - t++; + if (*t != *p++) + break; + t++; } if (t == target_end) - return s; + return s; } s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } @@ -2898,8 +2935,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->map[*se]; t = s; @@ -2913,8 +2950,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->int_map[*se]; t = s; @@ -2945,8 +2982,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->map[*s]; } @@ -2956,8 +2993,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->int_map[*s]; } @@ -2965,6 +3002,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#ifdef USE_INT_MAP_BACKWARD static int set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) @@ -3015,6 +3053,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -3053,7 +3092,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On UChar *prev; OnigMatchArg msa; - MATCH_ARG_INIT(msa, option, region, at); + MATCH_ARG_INIT(msa, reg, option, region, at); #ifdef USE_COMBINATION_EXPLOSION_CHECK { int offset = at - str; @@ -3142,58 +3181,58 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) - goto retry_gate; - break; + ) + goto retry_gate; + break; } } if (reg->dmax == 0) { *low = p; if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); } } else { if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); - } + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } } } /* no needs to adjust *high, *high is used as range check only */ @@ -3210,8 +3249,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, return 0; /* fail */ } -static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, - int** skip)); #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 @@ -3220,7 +3257,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, const UChar* range, UChar* adjrange, UChar** low, UChar** high) { - int r; UChar *p; range += reg->dmin; @@ -3242,16 +3278,22 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: +#ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { + int r; + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; + goto exact_method; r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); + &(reg->int_map_backward)); if (r) return r; } p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); + end, p); +#else + goto exact_method; +#endif break; case ONIG_OPTIMIZE_MAP: @@ -3265,36 +3307,36 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; } } @@ -3405,56 +3447,56 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, /* search start-position only */ begin_position: if (range > start) - range = start + 1; + range = start + 1; else - range = start; + range = start; } else if (reg->anchor & ANCHOR_BEGIN_BUF) { /* search str-position only */ if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; + if (start != str) goto mismatch_no_msa; + range = str + 1; } else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_END_BUF) { min_semi_end = max_semi_end = (UChar* )end; end_buf: - if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin) goto mismatch_no_msa; if (range > start) { - if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } - } - if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; + if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end); + } + } + if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; } else { - if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - } - if (range > start) goto mismatch_no_msa; + if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); + } + if (range > start) goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { @@ -3462,22 +3504,22 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; + min_semi_end = pre_end; #ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); - if (IS_NOT_NULL(pre_end) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } #endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } } else { - min_semi_end = (UChar* )end; - goto end_buf; + min_semi_end = (UChar* )end; + goto end_buf; } } else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { @@ -3496,7 +3538,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; prev = (UChar* )NULL; - MATCH_ARG_INIT(msa, option, region, start); + MATCH_ARG_INIT(msa, reg, option, region, start); #ifdef USE_COMBINATION_EXPLOSION_CHECK msa.state_check_buff = (void* )0; msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ @@ -3512,7 +3554,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, option, region, orig_start); + MATCH_ARG_INIT(msa, reg, option, region, orig_start); #ifdef USE_COMBINATION_EXPLOSION_CHECK { int offset = (MIN(start, range) - str); @@ -3532,36 +3574,36 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, sch_range = (UChar* )range; if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = (UChar* )end; - } + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } } if ((end - start) < reg->threshold_len) goto mismatch; if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s); + } + } while (s < range); + goto mismatch; } else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { do { @@ -3599,47 +3641,47 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, UChar *low, *high, *adjrange, *sch_start; if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); else - adjrange = (UChar* )end; + adjrange = (UChar* )end; if (reg->dmax != ONIG_INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; } else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - else - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; } } @@ -3694,6 +3736,46 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, return s - str; } +extern int +onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(int, int, OnigRegion*, void*), + void* callback_arg) +{ + int r; + int n; + int rs; + const UChar* start; + + n = 0; + start = str; + while (1) { + r = onig_search(reg, str, end, start, end, region, option); + if (r >= 0) { + rs = scan_callback(n, r, region, callback_arg); + n++; + if (rs != 0) + return rs; + + if (region->end[0] == start - str) + start++; + else + start = str + region->end[0]; + + if (start > end) + break; + } + else if (r == ONIG_MISMATCH) { + break; + } + else { /* error */ + return r; + } + } + + return n; +} + extern OnigEncoding onig_get_encoding(regex_t* reg) { |