From 81f65b49e828952d496c80a991397fdac96feea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 9 Nov 2016 22:19:08 +0100 Subject: New upstream version 6.1.2 --- src/big5.c | 23 ++++++++++++++++++++-- src/euc_jp.c | 34 +++++++++++++++++++++++++++++++-- src/euc_kr.c | 22 +++++++++++++++++++-- src/euc_tw.c | 37 ++++++++++++++++++++++++++++++++++-- src/gb18030.c | 38 +++++++++++++++++++++++++++++++++++-- src/onigposix.h | 2 +- src/oniguruma.h | 9 +++++---- src/regcomp.c | 34 ++++++++++++++++++++++++++------- src/regenc.h | 2 +- src/regexec.c | 24 ++++++++++++++++++++++- src/regint.h | 2 +- src/regparse.c | 59 +++++++++++++++++++++++++++++++++++++++++---------------- src/sjis.c | 31 ++++++++++++++++++++++++++++-- 13 files changed, 274 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/big5.c b/src/big5.c index 3d44975..bc713ab 100644 --- a/src/big5.c +++ b/src/big5.c @@ -55,9 +55,28 @@ big5_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) return FALSE; + if (*p > 0x7e && *p < 0xa1) return FALSE; + if (*p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_jp.c b/src/euc_jp.c index 19422ce..3b54e95 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -57,9 +57,39 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p > 0xa0) { + if (*p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xdf) return FALSE; + p++; + } + else if (*p == 0x8f) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_kr.c b/src/euc_kr.c index 12803cd..450caf1 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -55,9 +55,27 @@ euckr_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_tw.c b/src/euc_tw.c index 4e07567..b3ee628 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -55,9 +55,42 @@ euctw_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xb0) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/gb18030.c b/src/gb18030.c index 36fc3de..c8b5865 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -76,9 +76,43 @@ gb18030_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p == 0x80 || *p == 0xff) { + return FALSE; + } + else { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) { + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x81 || *p == 0xff) return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + } + else if (*p == 0x7f || *p == 0xff) { + return FALSE; + } + else { + p++; + } + } + } + + return TRUE; } static OnigCodePoint diff --git a/src/onigposix.h b/src/onigposix.h index 6c41537..2af3717 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -39,7 +39,7 @@ extern "C" { #define REG_NEWLINE (1<<1) #define REG_NOTBOL (1<<2) #define REG_NOTEOL (1<<3) -#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ +#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */ #define REG_NOSUB (1<<5) /* POSIX error codes */ diff --git a/src/oniguruma.h b/src/oniguruma.h index 5aa49f6..6090165 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,7 +36,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 1 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 2 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -364,7 +364,7 @@ int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN -int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); @@ -398,7 +398,8 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ +#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_CHECK_VALIDITY_OF_STRING /* limit */ #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -742,7 +743,7 @@ void onig_free P_((OnigRegex)); ONIG_EXTERN void onig_free_body P_((OnigRegex)); ONIG_EXTERN -int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN diff --git a/src/regcomp.c b/src/regcomp.c index 0235a9f..11ba1e7 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -1795,6 +1795,11 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = noname_disable_map(&(NANCHOR(node)->target), map, counter); + break; + default: break; } @@ -1853,6 +1858,11 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_node_backref(node, map); break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = renumber_by_map(NANCHOR(node)->target, map); + break; + default: break; } @@ -1884,6 +1894,11 @@ numbered_ref_check(Node* node) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = numbered_ref_check(NANCHOR(node)->target); + break; + default: break; } @@ -3875,9 +3890,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) + #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) case ANCHOR_LOOK_BEHIND: { @@ -3913,7 +3929,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) return r; } -/* set skip map for Boyer-Moor search */ +/* set skip map for Boyer-Moore search */ static int set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[], int** int_skip) @@ -4641,7 +4657,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int i, z; CClassNode* cc = NCCLASS(node); - /* no need to check ignore case. (setted in setup_tree()) */ + /* no need to check ignore case. (set in setup_tree()) */ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { OnigLen min = ONIGENC_MBC_MINLEN(env->enc); @@ -4712,6 +4728,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; @@ -4734,8 +4752,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ case ANCHOR_LOOK_BEHIND_NOT: break; } @@ -4989,6 +5005,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) + reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { @@ -5133,7 +5152,7 @@ print_anchor(FILE* f, int anchor) } if (anchor & ANCHOR_ANYCHAR_STAR_ML) { if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-pl"); + fprintf(f, "anychar-star-ml"); } fprintf(f, "]"); @@ -5252,6 +5271,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, UnsetAddrList uslist; #endif + root = 0; if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; #ifdef ONIG_DEBUG diff --git a/src/regenc.h b/src/regenc.h index 49227fa..e119dab 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -110,7 +110,7 @@ struct PropertyNameCtype { /* #define USE_CRNL_AS_LINE_TERMINATOR */ #define USE_UNICODE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII diff --git a/src/regexec.c b/src/regexec.c index 70ac89e..7e8d3d1 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -3111,6 +3111,13 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On r = 0; if (r == 0) { + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto end; + } + } + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE @@ -3119,6 +3126,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On at, prev, &msa); } + end: MATCH_ARG_FREE(msa); return r; } @@ -3391,6 +3399,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto finish_no_msa; + } + } + #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -3707,7 +3722,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_FREE(msa); /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ + then the region is not set in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3747,6 +3762,13 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, int rs; const UChar* start; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING); + } + n = 0; start = str; while (1) { diff --git a/src/regint.h b/src/regint.h index d320e26..7a3283d 100644 --- a/src/regint.h +++ b/src/regint.h @@ -685,7 +685,7 @@ typedef struct _OnigStackType { struct { int num; /* memory num */ UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ + /* Following information is set, if this stack type is MEM-START */ OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ } mem; diff --git a/src/regparse.c b/src/regparse.c index e8a6e20..8f1d1cb 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -29,6 +29,10 @@ #include "regparse.h" #include "st.h" +#ifdef DEBUG_NODE_FREE +#include +#endif + #define WARN_BUFSIZE 256 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS @@ -1003,13 +1007,16 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) return 0; } - extern void onig_node_free(Node* node) { start: if (IS_NULL(node)) return ; +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "onig_node_free: %p\n", node); +#endif + switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && @@ -1071,6 +1078,9 @@ node_new(void) node = (Node* )xmalloc(sizeof(Node)); /* xmemset(node, 0, sizeof(Node)); */ +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "node_new: %p\n", node); +#endif return node; } @@ -1449,7 +1459,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) if (sn->end > sn->s) { p = onigenc_get_prev_char_head(enc, sn->s, sn->end); - if (p && p > sn->s) { /* can be splitted. */ + if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); if ((sn->flag & NSTR_RAW) != 0) NSTRING_SET_RAW(n); @@ -2520,7 +2530,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, #endif /* USE_BACKREF_WITH_LEVEL */ /* - def: 0 -> define name (don't allow number name) + ref: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) */ static int @@ -3000,7 +3010,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (c == '[') { if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; - tok->backp = p; /* point at '[' is readed */ + tok->backp = p; /* point at '[' is read */ PINC; if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']', enc, syn)) { @@ -4318,7 +4328,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CClassNode* acc; r = parse_char_class(&anode, tok, &p, end, env); - if (r != 0) goto cc_open_err; + if (r != 0) { + onig_node_free(anode); + goto cc_open_err; + } acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); @@ -4412,7 +4425,6 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, err: if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); - onig_node_free(*np); return r; } @@ -4542,11 +4554,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { - onig_node_free(*np); return num; } else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } NENCLOSE(*np)->regnum = num; @@ -4614,7 +4624,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); NENCLOSE(*np)->target = target; @@ -4647,7 +4660,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } if (NTYPE(*np) == NT_ANCHOR) NANCHOR(*np)->target = target; @@ -4908,7 +4924,10 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } NENCLOSE(*np)->target = target; return tok->type; } @@ -5220,7 +5239,10 @@ parse_branch(Node** top, OnigToken* tok, int term, *top = NULL; r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (r == TK_EOT || r == term || r == TK_ALT) { *top = node; @@ -5230,7 +5252,10 @@ parse_branch(Node** top, OnigToken* tok, int term, headp = &(NCDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (NTYPE(node) == NT_LIST) { *headp = node; @@ -5272,8 +5297,10 @@ parse_subexp(Node** top, OnigToken* tok, int term, r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; - + if (r < 0) { + onig_node_free(node); + return r; + } *headp = onig_node_new_alt(node, NULL); headp = &(NCDR(*headp)); } @@ -5282,8 +5309,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, goto err; } else { - err: onig_node_free(node); + err: if (term == TK_SUBEXP_CLOSE) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; else diff --git a/src/sjis.c b/src/sjis.c index a607b3d..3378474 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -77,9 +77,36 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0xa0 || *p == 0x80) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else if (*p < 0xe0) { + p++; + } + else if (*p < 0xfd) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static int -- cgit v1.2.3