diff options
Diffstat (limited to 'src/regexec.c')
| -rw-r--r-- | src/regexec.c | 134 | 
1 files changed, 98 insertions, 36 deletions
diff --git a/src/regexec.c b/src/regexec.c index e7dfb96..9dbef70 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -31,6 +31,9 @@  #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ +  ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) +  #ifdef USE_CRNL_AS_LINE_TERMINATOR  #define ONIGENC_IS_MBC_CRNL(enc,p,end) \    (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ @@ -2002,6 +2005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        MOP_OUT;        break; +#ifdef USE_OP_CCLASS_NODE      case OP_CCLASS_NODE:  MOP_IN(OP_CCLASS_NODE);        {          OnigCodePoint code; @@ -2020,6 +2024,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        MOP_OUT;        break; +#endif      case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR);        DATA_ENSURE(1); @@ -2152,7 +2157,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        MOP_OUT;        break; -    case OP_NOT_WORD:  MOP_IN(OP_NOT_WORD); +    case OP_WORD_ASCII:  MOP_IN(OP_WORD_ASCII); +      DATA_ENSURE(1); +      if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) +        goto fail; + +      s += enclen(encode, s); +      MOP_OUT; +      break; + +    case OP_NO_WORD:  MOP_IN(OP_NO_WORD);        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_WORD(encode, s, end))          goto fail; @@ -2161,38 +2175,57 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        MOP_OUT;        break; -    case OP_WORD_BOUND:  MOP_IN(OP_WORD_BOUND); -      if (ON_STR_BEGIN(s)) { -        DATA_ENSURE(1); -        if (! ONIGENC_IS_MBC_WORD(encode, s, end)) -          goto fail; -      } -      else if (ON_STR_END(s)) { -        if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) -          goto fail; -      } -      else { -        if (ONIGENC_IS_MBC_WORD(encode, s, end) -            == ONIGENC_IS_MBC_WORD(encode, sprev, end)) -          goto fail; +    case OP_NO_WORD_ASCII:  MOP_IN(OP_NO_WORD_ASCII); +      DATA_ENSURE(1); +      if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) +        goto fail; + +      s += enclen(encode, s); +      MOP_OUT; +      break; + +    case OP_WORD_BOUNDARY:  MOP_IN(OP_WORD_BOUNDARY); +      { +        ModeType mode; +        GET_MODE_INC(mode, p); // ascii_mode + +        if (ON_STR_BEGIN(s)) { +          DATA_ENSURE(1); +          if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) +            goto fail; +        } +        else if (ON_STR_END(s)) { +          if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +            goto fail; +        } +        else { +          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) +              == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +            goto fail; +        }        }        MOP_OUT;        continue;        break; -    case OP_NOT_WORD_BOUND:  MOP_IN(OP_NOT_WORD_BOUND); -      if (ON_STR_BEGIN(s)) { -        if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) -          goto fail; -      } -      else if (ON_STR_END(s)) { -        if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) -          goto fail; -      } -      else { -        if (ONIGENC_IS_MBC_WORD(encode, s, end) -            != ONIGENC_IS_MBC_WORD(encode, sprev, end)) -          goto fail; +    case OP_NO_WORD_BOUNDARY:  MOP_IN(OP_NO_WORD_BOUNDARY); +      { +        ModeType mode; +        GET_MODE_INC(mode, p); // ascii_mode + +        if (ON_STR_BEGIN(s)) { +          if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) +            goto fail; +        } +        else if (ON_STR_END(s)) { +          if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +            goto fail; +        } +        else { +          if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) +              != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) +            goto fail; +        }        }        MOP_OUT;        continue; @@ -2200,26 +2233,55 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #ifdef USE_WORD_BEGIN_END      case OP_WORD_BEGIN:  MOP_IN(OP_WORD_BEGIN); -      if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { -        if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { -          MOP_OUT; -          continue; +      { +        ModeType mode; +        GET_MODE_INC(mode, p); // ascii_mode + +        if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { +          if (ON_STR_BEGIN(s) || +              ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { +            MOP_OUT; +            continue; +          }          }        }        goto fail;        break;      case OP_WORD_END:  MOP_IN(OP_WORD_END); -      if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { -        if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { -          MOP_OUT; -          continue; +      { +        ModeType mode; +        GET_MODE_INC(mode, p); // ascii_mode + +        if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { +          if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { +            MOP_OUT; +            continue; +          }          }        }        goto fail;        break;  #endif +    case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +      MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { +        MOP_OUT; +        continue; +      } +      goto fail; +      break; + +    case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +      MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +      if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) +        goto fail; + +      MOP_OUT; +      continue; +      break; +      case OP_BEGIN_BUF:  MOP_IN(OP_BEGIN_BUF);        if (! ON_STR_BEGIN(s)) goto fail;  | 
