summaryrefslogtreecommitdiff
path: root/src/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.c')
-rw-r--r--src/regparse.c128
1 files changed, 105 insertions, 23 deletions
diff --git a/src/regparse.c b/src/regparse.c
index d7d188a..1d00b82 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2023 K.Kosako
+ * Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -758,10 +758,14 @@ typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
#ifdef ONIG_DEBUG
static int
-i_print_name_entry(UChar* key, NameEntry* e, void* arg)
+i_print_name_entry(st_data_t akey, st_data_t ae, st_data_t arg)
{
int i;
- FILE* fp = (FILE* )arg;
+ FILE* fp;
+ NameEntry* e;
+
+ e = (NameEntry* )ae;
+ fp = (FILE* )arg;
fprintf(fp, "%s: ", e->name);
if (e->back_num == 0)
@@ -793,8 +797,13 @@ onig_print_names(FILE* fp, regex_t* reg)
#endif /* ONIG_DEBUG */
static int
-i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
+i_free_name_entry(st_data_t akey, st_data_t ae, st_data_t arg ARG_UNUSED)
{
+ UChar* key;
+ NameEntry* e;
+
+ key = (UChar* )akey;
+ e = (NameEntry* )ae;
xfree(e->name);
if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
xfree(key);
@@ -850,8 +859,14 @@ typedef struct {
} INamesArg;
static int
-i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
+i_names(st_data_t key ARG_UNUSED, st_data_t ae, st_data_t aarg)
{
+ NameEntry* e;
+ INamesArg* arg;
+
+ e = (NameEntry* )ae;
+ arg = (INamesArg* )aarg;
+
int r = (*(arg->func))(e->name,
e->name + e->name_len,
e->back_num,
@@ -883,9 +898,14 @@ onig_foreach_name(regex_t* reg,
}
static int
-i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
+i_renumber_name(st_data_t key ARG_UNUSED, st_data_t ae, st_data_t amap)
{
int i;
+ NameEntry* e;
+ GroupNumMap* map;
+
+ e = (NameEntry* )ae;
+ map = (GroupNumMap* )amap;
if (e->back_num > 1) {
for (i = 0; i < e->back_num; i++) {
@@ -1374,9 +1394,14 @@ static int CalloutNameIDCounter;
#ifdef USE_ST_LIBRARY
static int
-i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
- void* arg ARG_UNUSED)
+i_free_callout_name_entry(st_data_t akey, st_data_t ae, st_data_t arg ARG_UNUSED)
{
+ st_callout_name_key* key;
+ CalloutNameEntry* e;
+
+ key = (st_callout_name_key* )akey;
+ e = (CalloutNameEntry* )ae;
+
if (IS_NOT_NULL(e)) {
xfree(e->name);
}
@@ -1870,10 +1895,14 @@ typedef intptr_t CalloutTagVal;
#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
static int
-i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
+i_callout_callout_list_set(st_data_t key ARG_UNUSED, st_data_t ae, st_data_t arg)
{
int num;
- RegexExt* ext = (RegexExt* )arg;
+ CalloutTagVal e;
+ RegexExt* ext;
+
+ e = (CalloutTagVal )ae;
+ ext = (RegexExt* )arg;
num = (int )e - 1;
ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
@@ -1926,8 +1955,11 @@ onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
}
static int
-i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
+i_free_callout_tag_entry(st_data_t akey, st_data_t e ARG_UNUSED, st_data_t arg ARG_UNUSED)
{
+ UChar* key;
+
+ key = (UChar* )akey;
xfree(key);
return ST_DELETE;
}
@@ -3389,6 +3421,34 @@ onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
}
static int
+node_str_remove_char(Node* node, UChar c)
+{
+ UChar* p;
+ int n;
+
+ n = 0;
+ p = STR_(node)->s;
+ while (p < STR_(node)->end) {
+ if (*p == c) {
+ UChar *q, *q1;
+ q = q1 = p;
+ q1++;
+ while (q1 < STR_(node)->end) {
+ *q = *q1;
+ q++; q1++;
+ }
+ n++;
+ STR_(node)->end--;
+ }
+ else {
+ p++;
+ }
+ }
+
+ return n;
+}
+
+static int
node_str_cat_char(Node* node, UChar c)
{
UChar s[1];
@@ -4807,6 +4867,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
end_code = get_name_end_code_point(start_code);
+ *rlevel = 0;
digit_count = 0;
name_end = end;
r = 0;
@@ -5061,7 +5122,7 @@ CC_ESC_WARN(ParseEnv* env, UChar *c)
UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- (UChar* )"character class has '%s' without escape",
+ "character class has '%s' without escape",
c);
(*onig_warn)((char* )buf);
}
@@ -5076,7 +5137,7 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
(env)->pattern, (env)->pattern_end,
- (UChar* )"regular expression has '%s' without escape", c);
+ "regular expression has '%s' without escape", c);
(*onig_warn)((char* )buf);
}
}
@@ -5890,6 +5951,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
tok->u.backref.by_name = 0;
#ifdef USE_BACKREF_WITH_LEVEL
tok->u.backref.exist_level = 0;
+ tok->u.backref.level = 0;
#endif
break;
}
@@ -7062,11 +7124,16 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
goto val_entry;
}
else if (r == TK_CC_AND) {
+ range_end_val_with_warning:
CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
if (curr_type == CV_CPROP) {
+ if (IS_SYNTAX_BV(env->syntax,
+ ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC)) {
+ goto range_end_val_with_warning;
+ }
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err;
}
@@ -7097,16 +7164,16 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
if (r < 0) goto err;
fetched = 1;
- if (r == TK_CC_CLOSE)
+ if (r == TK_CC_CLOSE) {
goto range_end_val; /* allow [a-b-] */
+ }
else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto range_end_val;
+ goto range_end_val_with_warning;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
+ /* [0-9-a] is allowed as [0-9\-a] */
+ goto range_end_val_with_warning;
}
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err;
@@ -8518,7 +8585,7 @@ assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- (UChar* )"redundant nested repeat operator");
+ "redundant nested repeat operator");
(*onig_verb_warn)((char* )buf);
}
goto warn_exit;
@@ -8528,7 +8595,7 @@ assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
+ "nested repeat operator %s and %s was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
(*onig_verb_warn)((char* )buf);
@@ -8824,6 +8891,7 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
tk_byte:
{
*np = node_new_str_with_options(tok->backp, *src, env->options);
+ tk_byte2:
CHECK_NULL_RETURN_MEMERR(*np);
while (1) {
@@ -9040,7 +9108,15 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
}
else {
- goto tk_byte;
+ if (tok->type == TK_INTERVAL &&
+ IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
+ *np = node_new_str_with_options(tok->backp, *src, env->options);
+ node_str_remove_char(*np, (UChar )'\\');
+ goto tk_byte2;
+ }
+ else {
+ goto tk_byte;
+ }
}
break;
@@ -9085,8 +9161,14 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r == TK_REPEAT || r == TK_INTERVAL) {
Node* target;
- if (is_invalid_quantifier_target(*tp))
- return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ if (is_invalid_quantifier_target(*tp)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ }
+
+ return r;
+ }
INC_PARSE_DEPTH(parse_depth);