summaryrefslogtreecommitdiff
path: root/src/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexec.c')
-rw-r--r--src/regexec.c134
1 files changed, 98 insertions, 36 deletions
diff --git a/src/regexec.c b/src/regexec.c
index e7dfb96..9dbef70 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -31,6 +31,9 @@
#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
+ ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
+
#ifdef USE_CRNL_AS_LINE_TERMINATOR
#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
(ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
@@ -2002,6 +2005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
break;
+#ifdef USE_OP_CCLASS_NODE
case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
{
OnigCodePoint code;
@@ -2020,6 +2024,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
MOP_OUT;
break;
+#endif
case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
@@ -2152,7 +2157,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
break;
- case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
+ case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
+ goto fail;
+
+ s += enclen(encode, s);
+ MOP_OUT;
+ break;
+
+ case OP_NO_WORD: MOP_IN(OP_NO_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
@@ -2161,38 +2175,57 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
break;
- case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
- if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- == ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
+ goto fail;
+
+ s += enclen(encode, s);
+ MOP_OUT;
+ break;
+
+ case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY);
+ {
+ ModeType mode;
+ GET_MODE_INC(mode, p); // ascii_mode
+
+ if (ON_STR_BEGIN(s)) {
+ DATA_ENSURE(1);
+ if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
MOP_OUT;
continue;
break;
- case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
- if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- != ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
+ case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY);
+ {
+ ModeType mode;
+ GET_MODE_INC(mode, p); // ascii_mode
+
+ if (ON_STR_BEGIN(s)) {
+ if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
MOP_OUT;
continue;
@@ -2200,26 +2233,55 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_WORD_BEGIN_END
case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- MOP_OUT;
- continue;
+ {
+ ModeType mode;
+ GET_MODE_INC(mode, p); // ascii_mode
+
+ if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+ if (ON_STR_BEGIN(s) ||
+ ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ MOP_OUT;
+ continue;
+ }
}
}
goto fail;
break;
case OP_WORD_END: MOP_IN(OP_WORD_END);
- if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
- MOP_OUT;
- continue;
+ {
+ ModeType mode;
+ GET_MODE_INC(mode, p); // ascii_mode
+
+ if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+ MOP_OUT;
+ continue;
+ }
}
}
goto fail;
break;
#endif
+ case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
+ MOP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
+ goto fail;
+
+ MOP_OUT;
+ continue;
+ break;
+
case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
if (! ON_STR_BEGIN(s)) goto fail;