1 files changed, 98 insertions, 36 deletions
diff --git a/src/regexec.c b/src/regexec.c
index e7dfb96..9dbef70 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -31,6 +31,9 @@
 
 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
 
+#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
+  ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
+
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
@@ -2002,6 +2005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
       MOP_OUT;
       break;
 
+#ifdef USE_OP_CCLASS_NODE
     case OP_CCLASS_NODE:  MOP_IN(OP_CCLASS_NODE);
       {
         OnigCodePoint code;
@@ -2020,6 +2024,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
       }
       MOP_OUT;
       break;
+#endif
 
     case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR);
       DATA_ENSURE(1);
@@ -2152,7 +2157,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
       MOP_OUT;
       break;
 
-    case OP_NOT_WORD:  MOP_IN(OP_NOT_WORD);
+    case OP_WORD_ASCII:  MOP_IN(OP_WORD_ASCII);
+      DATA_ENSURE(1);
+      if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
+        goto fail;
+
+      s += enclen(encode, s);
+      MOP_OUT;
+      break;
+
+    case OP_NO_WORD:  MOP_IN(OP_NO_WORD);
       DATA_ENSURE(1);
       if (ONIGENC_IS_MBC_WORD(encode, s, end))
         goto fail;
@@ -2161,38 +2175,57 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
       MOP_OUT;
       break;
 
-    case OP_WORD_BOUND:  MOP_IN(OP_WORD_BOUND);
-      if (ON_STR_BEGIN(s)) {
-        DATA_ENSURE(1);
-        if (! ONIGENC_IS_MBC_WORD(encode, s, end))
-          goto fail;
-      }
-      else if (ON_STR_END(s)) {
-        if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
-          goto fail;
-      }
-      else {
-        if (ONIGENC_IS_MBC_WORD(encode, s, end)
-            == ONIGENC_IS_MBC_WORD(encode, sprev, end))
-          goto fail;
+    case OP_NO_WORD_ASCII:  MOP_IN(OP_NO_WORD_ASCII);
+      DATA_ENSURE(1);
+      if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
+        goto fail;
+
+      s += enclen(encode, s);
+      MOP_OUT;
+      break;
+
+    case OP_WORD_BOUNDARY:  MOP_IN(OP_WORD_BOUNDARY);
+      {
+        ModeType mode;
+        GET_MODE_INC(mode, p); // ascii_mode
+
+        if (ON_STR_BEGIN(s)) {
+          DATA_ENSURE(1);
+          if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
+            goto fail;
+        }
+        else if (ON_STR_END(s)) {
+          if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+            goto fail;
+        }
+        else {
+          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+              == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+            goto fail;
+        }
       }
       MOP_OUT;
       continue;
       break;
 
-    case OP_NOT_WORD_BOUND:  MOP_IN(OP_NOT_WORD_BOUND);
-      if (ON_STR_BEGIN(s)) {
-        if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
-          goto fail;
-      }
-      else if (ON_STR_END(s)) {
-        if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
-          goto fail;
-      }
-      else {
-        if (ONIGENC_IS_MBC_WORD(encode, s, end)
-            != ONIGENC_IS_MBC_WORD(encode, sprev, end))
-          goto fail;
+    case OP_NO_WORD_BOUNDARY:  MOP_IN(OP_NO_WORD_BOUNDARY);
+      {
+        ModeType mode;
+        GET_MODE_INC(mode, p); // ascii_mode
+
+        if (ON_STR_BEGIN(s)) {
+          if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
+            goto fail;
+        }
+        else if (ON_STR_END(s)) {
+          if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+            goto fail;
+        }
+        else {
+          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+              != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+            goto fail;
+        }
       }
       MOP_OUT;
       continue;
@@ -2200,26 +2233,55 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
 
 #ifdef USE_WORD_BEGIN_END
     case OP_WORD_BEGIN:  MOP_IN(OP_WORD_BEGIN);
-      if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
-        if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
-          MOP_OUT;
-          continue;
+      {
+        ModeType mode;
+        GET_MODE_INC(mode, p); // ascii_mode
+
+        if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+          if (ON_STR_BEGIN(s) ||
+              ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+            MOP_OUT;
+            continue;
+          }
         }
       }
       goto fail;
       break;
 
     case OP_WORD_END:  MOP_IN(OP_WORD_END);
-      if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
-        if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
-          MOP_OUT;
-          continue;
+      {
+        ModeType mode;
+        GET_MODE_INC(mode, p); // ascii_mode
+
+        if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+          if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+            MOP_OUT;
+            continue;
+          }
         }
       }
       goto fail;
       break;
 #endif
 
+    case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+      MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
+        MOP_OUT;
+        continue;
+      }
+      goto fail;
+      break;
+
+    case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+      MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
+        goto fail;
+
+      MOP_OUT;
+      continue;
+      break;
+
     case OP_BEGIN_BUF:  MOP_IN(OP_BEGIN_BUF);
       if (! ON_STR_BEGIN(s)) goto fail;