summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2017-04-09 19:36:05 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2017-04-09 19:36:05 +0200
commit55597dfc375615c45377aad508db826caf6b0bdf (patch)
tree0904320d2b2c844a806217a38efe8d2358bcaeee /src
parentdc546147cf8df064e1ea40cc4c339648e7def040 (diff)
parentf42b61800a1a1b360c5ac915ec1caf34bf1fb86b (diff)
Merge tag 'upstream/6.2.0'
Upstream version 6.2.0
Diffstat (limited to 'src')
-rw-r--r--src/oniguruma.h15
-rw-r--r--src/regcomp.c33
-rw-r--r--src/regerror.c24
-rw-r--r--src/regexec.c41
-rw-r--r--src/regint.h5
-rw-r--r--src/regparse.c52
-rw-r--r--src/regparse.h1
7 files changed, 131 insertions, 40 deletions
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 090b809..33e2a0a 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,8 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
-#define ONIGURUMA_VERSION_MINOR 1
-#define ONIGURUMA_VERSION_TEENY 3
+#define ONIGURUMA_VERSION_MINOR 2
+#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -372,7 +372,7 @@ int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const
/* config parameters */
#define ONIG_NREGION 10
-#define ONIG_MAX_CAPTURE_NUM 32767
+#define ONIG_MAX_CAPTURE_NUM 2147483647 /* 2**31 - 1 */
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_REPEAT_NUM 100000
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
@@ -543,6 +543,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_UNDEFINED_BYTECODE -13
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_FAIL_TO_INITIALIZE -23
@@ -821,6 +822,12 @@ unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
+unsigned int onig_get_parse_depth_limit P_((void));
+ONIG_EXTERN
+int onig_set_capture_num_limit P_((int num));
+ONIG_EXTERN
+int onig_set_parse_depth_limit P_((unsigned int depth));
+ONIG_EXTERN
int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges));
ONIG_EXTERN
int onig_end P_((void));
diff --git a/src/regcomp.c b/src/regcomp.c
index 11ba1e7..5c924b5 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -1230,6 +1230,11 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
len += (IS_ENCLOSE_RECURSION(node)
? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH;
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
+ }
else
#endif
{
@@ -1321,6 +1326,14 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
if (r) return r;
r = add_opcode(reg, OP_RETURN);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
+ else
+ r = add_opcode(reg, OP_MEMORY_END_REC);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
else
#endif
{
@@ -2231,6 +2244,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
return 0;
}
else {
+ if (IS_NOT_NULL(xc->mbuf)) return 0;
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! IS_CODE_SB_WORD(reg->enc, i)) {
if (!IS_NCCLASS_NOT(xc)) {
@@ -3673,6 +3687,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -3843,10 +3859,16 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
break;
case ENCLOSE_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
/* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
}
+ if (IS_ENCLOSE_CALLED(en))
+ state |= IN_CALL;
+ if (IS_ENCLOSE_RECURSION(en))
+ state |= IN_RECCALL;
+ else if ((state & IN_RECCALL) != 0)
+ SET_CALL_RECURSION(node);
r = setup_tree(en->target, reg, state, env);
break;
@@ -4160,6 +4182,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
if (right_len == 0) {
to->right_anchor |= left->right_anchor;
}
+ else {
+ to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
+ }
}
static int
@@ -5003,12 +5028,14 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (r) return r;
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
+ ANCHOR_LOOK_BEHIND);
if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
- reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
+ ANCHOR_PREC_READ_NOT);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;
diff --git a/src/regerror.c b/src/regerror.c
index 05fc9d8..ee35b36 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -54,6 +54,8 @@ onig_error_code_to_format(int code)
p = "fail to memory allocation"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
+ case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
+ p = "parse depth limit over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
@@ -348,21 +350,12 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
p = pat;
while (p < pat_end) {
- if (*p == '\\') {
- *s++ = *p++;
- len = enclen(enc, p);
- while (len-- > 0) *s++ = *p++;
- }
- else if (*p == '/') {
- *s++ = (unsigned char )'\\';
- *s++ = *p++;
- }
- else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
+ if (ONIGENC_IS_MBC_HEAD(enc, p)) {
len = enclen(enc, p);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
- else { /* for UTF16 */
+ else { /* for UTF16/32 */
int blen;
while (len-- > 0) {
@@ -373,6 +366,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
}
}
}
+ else if (*p == '\\') {
+ *s++ = *p++;
+ len = enclen(enc, p);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (*p == '/') {
+ *s++ = (unsigned char )'\\';
+ *s++ = *p++;
+ }
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
diff --git a/src/regexec.c b/src/regexec.c
index 7e8d3d1..35fef11 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -462,6 +462,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
unsigned int n;
int used;
size_t size;
+ size_t new_size;
char* alloc_base;
char* new_alloc_base;
OnigStackType *stk_base, *stk_end, *stk;
@@ -472,10 +473,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk = *arg_stk;
n = stk_end - stk_base;
- n *= 2;
size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
+ n *= 2;
+ new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
if (is_alloca != 0) {
- new_alloc_base = (char* )xmalloc(size);
+ new_alloc_base = (char* )xmalloc(new_size);
if (IS_NULL(new_alloc_base)) {
STACK_SAVE;
return ONIGERR_MEMORY;
@@ -489,7 +491,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
else
n = MatchStackLimitSize;
}
- new_alloc_base = (char* )xrealloc(alloc_base, size);
+ new_alloc_base = (char* )xrealloc(alloc_base, new_size);
if (IS_NULL(new_alloc_base)) {
STACK_SAVE;
return ONIGERR_MEMORY;
@@ -1242,16 +1244,24 @@ onig_statistics_init(void)
MaxStackDepth = 0;
}
-extern void
+extern int
onig_print_statistics(FILE* f)
{
+ int r;
int i;
- fprintf(f, " count prev time\n");
+
+ r = fprintf(f, " count prev time\n");
+ if (r < 0) return -1;
+
for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ r = fprintf(f, "%8d: %8d: %10ld: %s\n",
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ if (r < 0) return -1;
}
- fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
+ r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
+ if (r < 0) return -1;
+
+ return 0;
}
#define STACK_INC do {\
@@ -3493,15 +3503,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
start = min_semi_end - reg->anchor_dmax;
if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
- else { /* match with empty at end */
- start = onigenc_get_prev_char_head(reg->enc, str, end);
- }
}
if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
range = max_semi_end - reg->anchor_dmin + 1;
}
- if (start >= range) goto mismatch_no_msa;
+ if (start > range) goto mismatch_no_msa;
+ /* If start == range, match with empty at end.
+ Backward search is used. */
}
else {
if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
@@ -3626,9 +3635,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s);
- while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
- prev = s;
- s += enclen(reg->enc, s);
+ if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
+ prev = s;
+ s += enclen(reg->enc, s);
+ }
}
} while (s < range);
goto mismatch;
diff --git a/src/regint.h b/src/regint.h
index 7a3283d..9835143 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -71,6 +71,7 @@
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+#define DEFAULT_PARSE_DEPTH_LIMIT 4096
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
@@ -522,7 +523,7 @@ typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
-typedef short int MemNumType;
+typedef int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType;
@@ -747,7 +748,7 @@ extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp,
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
-extern void onig_print_statistics P_((FILE* f));
+extern int onig_print_statistics P_((FILE* f));
#endif
#endif
diff --git a/src/regparse.c b/src/regparse.c
index 8f1d1cb..11f9e34 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -108,6 +108,38 @@ onig_warning(const char* s)
(*onig_warn)(s);
}
+#define DEFAULT_MAX_CAPTURE_NUM 32767
+
+static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
+
+extern int
+onig_set_capture_num_limit(int num)
+{
+ if (num < 0) return -1;
+
+ MaxCaptureNum = num;
+ return 0;
+}
+
+static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
+
+extern unsigned int
+onig_get_parse_depth_limit(void)
+{
+ return ParseDepthLimit;
+}
+
+extern int
+onig_set_parse_depth_limit(unsigned int depth)
+{
+ if (depth == 0)
+ ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
+ else
+ ParseDepthLimit = depth;
+ return 0;
+}
+
+
static void
bbuf_free(BBuf* bbuf)
{
@@ -959,6 +991,7 @@ scan_env_clear(ScanEnv* env)
env->curr_max_regnum = 0;
env->has_recursion = 0;
#endif
+ env->parse_depth = 0;
}
static int
@@ -968,7 +1001,7 @@ scan_env_add_mem_entry(ScanEnv* env)
Node** p;
need = env->num_mem + 1;
- if (need > ONIG_MAX_CAPTURE_NUM)
+ if (need > MaxCaptureNum && MaxCaptureNum != 0)
return ONIGERR_TOO_MANY_CAPTURES;
if (need >= SCANENV_MEMNODES_SIZE) {
@@ -1639,9 +1672,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
bound = x;
}
- for (high = low, bound = n; high < bound; ) {
+ high = (to == ~((OnigCodePoint )0)) ? n : low;
+ for (bound = n; high < bound; ) {
x = (high + bound) >> 1;
- if (to >= data[x*2] - 1)
+ if (to + 1 >= data[x*2])
high = x + 1;
else
bound = x;
@@ -4113,8 +4147,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;
- prev_cc = (CClassNode* )NULL;
*np = NULL_NODE;
+ env->parse_depth++;
+ if (env->parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+ prev_cc = (CClassNode* )NULL;
r = fetch_token_in_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
neg = 1;
@@ -4315,7 +4352,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
CC_ESC_WARN(env, (UChar* )"-");
- goto any_char_in; /* [0-9-a] is allowed as [0-9\-a] */
+ goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
}
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err;
@@ -4420,6 +4457,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
}
*src = p;
+ env->parse_depth--;
return 0;
err:
@@ -5281,6 +5319,9 @@ parse_subexp(Node** top, OnigToken* tok, int term,
Node *node, **headp;
*top = NULL;
+ env->parse_depth++;
+ if (env->parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
r = parse_branch(&node, tok, term, src, end, env);
if (r < 0) {
onig_node_free(node);
@@ -5317,6 +5358,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
return ONIGERR_PARSER_BUG;
}
+ env->parse_depth--;
return r;
}
diff --git a/src/regparse.h b/src/regparse.h
index 9e366fe..c9d1fe8 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -306,6 +306,7 @@ typedef struct {
int curr_max_regnum;
int has_recursion;
#endif
+ unsigned int parse_depth;
} ScanEnv;