diff options
Diffstat (limited to 'src/regcomp.c')
| -rw-r--r-- | src/regcomp.c | 6223 | 
1 files changed, 6223 insertions, 0 deletions
| diff --git a/src/regcomp.c b/src/regcomp.c new file mode 100644 index 0000000..8b5b206 --- /dev/null +++ b/src/regcomp.c @@ -0,0 +1,6223 @@ +/********************************************************************** +  regcomp.c -  Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2016  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) +{ +  return OnigDefaultCaseFoldFlag; +} + +extern int +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) +{ +  OnigDefaultCaseFoldFlag = case_fold_flag; +  return 0; +} + + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + +static UChar* +str_dup(UChar* s, UChar* end) +{ +  int len = end - s; + +  if (len > 0) { +    UChar* r = (UChar* )xmalloc(len + 1); +    CHECK_NULL_RETURN(r); +    xmemcpy(r, s, len); +    r[len] = (UChar )0; +    return r; +  } +  else return NULL; +} + +static void +swap_node(Node* a, Node* b) +{ +  Node c; +  c = *a; *a = *b; *b = c; + +  if (NTYPE(a) == NT_STR) { +    StrNode* sn = NSTR(a); +    if (sn->capa == 0) { +      int len = sn->end - sn->s; +      sn->s   = sn->buf; +      sn->end = sn->s + len; +    } +  } + +  if (NTYPE(b) == NT_STR) { +    StrNode* sn = NSTR(b); +    if (sn->capa == 0) { +      int len = sn->end - sn->s; +      sn->s   = sn->buf; +      sn->end = sn->s + len; +    } +  } +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ +  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) +    return ONIG_INFINITE_DISTANCE; +  else { +    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; +    else return ONIG_INFINITE_DISTANCE; +  } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ +  if (m == 0) return 0; + +  if (d < ONIG_INFINITE_DISTANCE / m) +    return d * m; +  else +    return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ +  int i; +  for (i = 0; i < (int )BITSET_SIZE; i++) { +    if (bs[i] != 0) return 0; +  } +  return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ +  int i, n; + +  n = 0; +  for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +    if (BITSET_AT(bs, i)) n++; +  } +  return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ +  if (size <= 0) { +    size   = 0; +    buf->p = NULL; +  } +  else { +    buf->p = (UChar* )xmalloc(size); +    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); +  } + +  buf->alloc = size; +  buf->used  = 0; +  return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ +  UnsetAddr* p; + +  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); +  CHECK_NULL_RETURN_MEMERR(p); +  uslist->num   = 0; +  uslist->alloc = size; +  uslist->us    = p; +  return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ +  if (IS_NOT_NULL(uslist->us)) +    xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ +  UnsetAddr* p; +  int size; + +  if (uslist->num >= uslist->alloc) { +    size = uslist->alloc * 2; +    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); +    CHECK_NULL_RETURN_MEMERR(p); +    uslist->alloc = size; +    uslist->us    = p; +  } + +  uslist->us[uslist->num].offset = offset; +  uslist->us[uslist->num].target = node; +  uslist->num++; +  return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ +  BBUF_ADD1(reg, opcode); +  return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ +  StateCheckNumType n = (StateCheckNumType )num; + +  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); +  return 0; +} +#endif + +static int +add_rel_addr(regex_t* reg, int addr) +{ +  RelAddrType ra = (RelAddrType )addr; + +  BBUF_ADD(reg, &ra, SIZE_RELADDR); +  return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ +  AbsAddrType ra = (AbsAddrType )addr; + +  BBUF_ADD(reg, &ra, SIZE_ABSADDR); +  return 0; +} + +static int +add_length(regex_t* reg, int len) +{ +  LengthType l = (LengthType )len; + +  BBUF_ADD(reg, &l, SIZE_LENGTH); +  return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ +  MemNumType n = (MemNumType )num; + +  BBUF_ADD(reg, &n, SIZE_MEMNUM); +  return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ +  PointerType ptr = (PointerType )addr; + +  BBUF_ADD(reg, &ptr, SIZE_POINTER); +  return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ +  BBUF_ADD(reg, &option, SIZE_OPTION); +  return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ +  int r; + +  r = add_opcode(reg, opcode); +  if (r) return r; +  r = add_rel_addr(reg, addr); +  return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ +  BBUF_ADD(reg, bytes, len); +  return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ +  BBUF_ADD(reg, bs, SIZE_BITSET); +  return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ +  int r; + +  r = add_opcode(reg, opcode); +  if (r) return r; +  r = add_option(reg, option); +  return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ +   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\ +    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ +  int op; + +  if (ignore_case) { +    switch (str_len) { +    case 1:  op = OP_EXACT1_IC; break; +    default: op = OP_EXACTN_IC; break; +    } +  } +  else { +    switch (mb_len) { +    case 1: +      switch (str_len) { +      case 1:  op = OP_EXACT1; break; +      case 2:  op = OP_EXACT2; break; +      case 3:  op = OP_EXACT3; break; +      case 4:  op = OP_EXACT4; break; +      case 5:  op = OP_EXACT5; break; +      default: op = OP_EXACTN; break; +      } +      break; + +    case 2: +      switch (str_len) { +      case 1:  op = OP_EXACTMB2N1; break; +      case 2:  op = OP_EXACTMB2N2; break; +      case 3:  op = OP_EXACTMB2N3; break; +      default: op = OP_EXACTMB2N;  break; +      } +      break; + +    case 3: +      op = OP_EXACTMB3N; +      break; + +    default: +      op = OP_EXACTMBN; +      break; +    } +  } +  return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ +  int r; +  int saved_num_null_check = reg->num_null_check; + +  if (empty_info != 0) { +    r = add_opcode(reg, OP_NULL_CHECK_START); +    if (r) return r; +    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ +    if (r) return r; +    reg->num_null_check++; +  } + +  r = compile_tree(node, reg); +  if (r) return r; + +  if (empty_info != 0) { +    if (empty_info == NQ_TARGET_IS_EMPTY) +      r = add_opcode(reg, OP_NULL_CHECK_END); +    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) +      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); +    else if (empty_info == NQ_TARGET_IS_EMPTY_REC) +      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + +    if (r) return r; +    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ +  } +  return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ +  int r; + +  r = add_opcode(reg, OP_CALL); +  if (r) return r; +  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), +                          node->target); +  if (r) return r; +  r = add_abs_addr(reg, 0 /*dummy addr.*/); +  return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ +  int i, r; + +  for (i = 0; i < n; i++) { +    r = compile_tree(node, reg); +    if (r) return r; +  } +  return 0; +} + +static int +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, +                          regex_t* reg ARG_UNUSED, int ignore_case) +{ +  int len; +  int op = select_str_opcode(mb_len, str_len, ignore_case); + +  len = SIZE_OPCODE; + +  if (op == OP_EXACTMBN)  len += SIZE_LENGTH; +  if (IS_NEED_STR_LEN_OP_EXACT(op)) +    len += SIZE_LENGTH; + +  len += mb_len * str_len; +  return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, +                   regex_t* reg, int ignore_case) +{ +  int op = select_str_opcode(mb_len, str_len, ignore_case); +  add_opcode(reg, op); + +  if (op == OP_EXACTMBN) +    add_length(reg, mb_len); + +  if (IS_NEED_STR_LEN_OP_EXACT(op)) { +    if (op == OP_EXACTN_IC) +      add_length(reg, mb_len * str_len); +    else +      add_length(reg, str_len); +  } + +  add_bytes(reg, s, mb_len * str_len); +  return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ +  int rlen, r, len, prev_len, slen, ambig; +  OnigEncoding enc = reg->enc; +  UChar *p, *prev; +  StrNode* sn; + +  sn = NSTR(node); +  if (sn->end <= sn->s) +    return 0; + +  ambig = NSTRING_IS_AMBIG(node); + +  p = prev = sn->s; +  prev_len = enclen(enc, p); +  p += prev_len; +  slen = 1; +  rlen = 0; + +  for (; p < sn->end; ) { +    len = enclen(enc, p); +    if (len == prev_len) { +      slen++; +    } +    else { +      r = add_compile_string_length(prev, prev_len, slen, reg, ambig); +      rlen += r; +      prev = p; +      slen = 1; +      prev_len = len; +    } +    p += len; +  } +  r = add_compile_string_length(prev, prev_len, slen, reg, ambig); +  rlen += r; +  return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ +  if (sn->end <= sn->s) +    return 0; + +  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ +  int r, len, prev_len, slen, ambig; +  OnigEncoding enc = reg->enc; +  UChar *p, *prev, *end; +  StrNode* sn; + +  sn = NSTR(node); +  if (sn->end <= sn->s) +    return 0; + +  end = sn->end; +  ambig = NSTRING_IS_AMBIG(node); + +  p = prev = sn->s; +  prev_len = enclen(enc, p); +  p += prev_len; +  slen = 1; + +  for (; p < end; ) { +    len = enclen(enc, p); +    if (len == prev_len) { +      slen++; +    } +    else { +      r = add_compile_string(prev, prev_len, slen, reg, ambig); +      if (r) return r; + +      prev  = p; +      slen  = 1; +      prev_len = len; +    } + +    p += len; +  } +  return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ +  if (sn->end <= sn->s) +    return 0; + +  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +  add_length(reg, mbuf->used); +  return add_bytes(reg, mbuf->p, mbuf->used); +#else +  int r, pad_size; +  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + +  GET_ALIGNMENT_PAD_SIZE(p, pad_size); +  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); +  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + +  r = add_bytes(reg, mbuf->p, mbuf->used); + +  /* padding for return value from compile_length_cclass_node() to be fix. */ +  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; +  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); +  return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ +  int len; + +  if (IS_NCCLASS_SHARE(cc)) { +    len = SIZE_OPCODE + SIZE_POINTER; +    return len; +  } + +  if (IS_NULL(cc->mbuf)) { +    len = SIZE_OPCODE + SIZE_BITSET; +  } +  else { +    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { +      len = SIZE_OPCODE; +    } +    else { +      len = SIZE_OPCODE + SIZE_BITSET; +    } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +    len += SIZE_LENGTH + cc->mbuf->used; +#else +    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif +  } + +  return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ +  int r; + +  if (IS_NCCLASS_SHARE(cc)) { +    add_opcode(reg, OP_CCLASS_NODE); +    r = add_pointer(reg, cc); +    return r; +  } + +  if (IS_NULL(cc->mbuf)) { +    if (IS_NCCLASS_NOT(cc)) +      add_opcode(reg, OP_CCLASS_NOT); +    else +      add_opcode(reg, OP_CCLASS); + +    r = add_bitset(reg, cc->bs); +  } +  else { +    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { +      if (IS_NCCLASS_NOT(cc)) +        add_opcode(reg, OP_CCLASS_MB_NOT); +      else +        add_opcode(reg, OP_CCLASS_MB); + +      r = add_multi_byte_cclass(cc->mbuf, reg); +    } +    else { +      if (IS_NCCLASS_NOT(cc)) +        add_opcode(reg, OP_CCLASS_MIX_NOT); +      else +        add_opcode(reg, OP_CCLASS_MIX); + +      r = add_bitset(reg, cc->bs); +      if (r) return r; +      r = add_multi_byte_cclass(cc->mbuf, reg); +    } +  } + +  return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC  4 + +  OnigRepeatRange* p; + +  if (reg->repeat_range_alloc == 0) { +    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); +    CHECK_NULL_RETURN_MEMERR(p); +    reg->repeat_range = p; +    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; +  } +  else if (reg->repeat_range_alloc <= id) { +    int n; +    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; +    p = (OnigRepeatRange* )xrealloc(reg->repeat_range, +                                    sizeof(OnigRepeatRange) * n); +    CHECK_NULL_RETURN_MEMERR(p); +    reg->repeat_range = p; +    reg->repeat_range_alloc = n; +  } +  else { +    p = reg->repeat_range; +  } + +  p[id].lower = lower; +  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); +  return 0; +} + +static int +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, +                          regex_t* reg) +{ +  int r; +  int num_repeat = reg->num_repeat; + +  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); +  if (r) return r; +  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ +  reg->num_repeat++; +  if (r) return r; +  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); +  if (r) return r; + +  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); +  if (r) return r; + +  r = compile_tree_empty_check(qn->target, reg, empty_info); +  if (r) return r; + +  if ( +#ifdef USE_SUBEXP_CALL +      reg->num_call > 0 || +#endif +      IS_QUANTIFIER_IN_REPEAT(qn)) { +    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); +  } +  else { +    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); +  } +  if (r) return r; +  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ +  return r; +} + +static int +is_anychar_star_quantifier(QtfrNode* qn) +{ +  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && +      NTYPE(qn->target) == NT_CANY) +    return 1; +  else +    return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE   50 +#define CKN_ON   (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ +  int len, mod_tlen, cklen; +  int ckn; +  int infinite = IS_REPEAT_INFINITE(qn->upper); +  int empty_info = qn->target_empty_info; +  int tlen = compile_length_tree(qn->target, reg); + +  if (tlen < 0) return tlen; + +  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + +  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + +  /* anychar repeat */ +  if (NTYPE(qn->target) == NT_CANY) { +    if (qn->greedy && infinite) { +      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) +        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; +      else +        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; +    } +  } + +  if (empty_info != 0) +    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); +  else +    mod_tlen = tlen; + +  if (infinite && qn->lower <= 1) { +    if (qn->greedy) { +      if (qn->lower == 1) +	len = SIZE_OP_JUMP; +      else +	len = 0; + +      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; +    } +    else { +      if (qn->lower == 0) +	len = SIZE_OP_JUMP; +      else +	len = 0; + +      len += mod_tlen + SIZE_OP_PUSH + cklen; +    } +  } +  else if (qn->upper == 0) { +    if (qn->is_refered != 0) /* /(?<n>..){0}/ */ +      len = SIZE_OP_JUMP + tlen; +    else +      len = 0; +  } +  else if (qn->upper == 1 && qn->greedy) { +    if (qn->lower == 0) { +      if (CKN_ON) { +	len = SIZE_OP_STATE_CHECK_PUSH + tlen; +      } +      else { +	len = SIZE_OP_PUSH + tlen; +      } +    } +    else { +      len = tlen; +    } +  } +  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ +    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; +  } +  else { +    len = SIZE_OP_REPEAT_INC +        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; +    if (CKN_ON) +      len += SIZE_OP_STATE_CHECK; +  } + +  return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ +  int r, mod_tlen; +  int ckn; +  int infinite = IS_REPEAT_INFINITE(qn->upper); +  int empty_info = qn->target_empty_info; +  int tlen = compile_length_tree(qn->target, reg); + +  if (tlen < 0) return tlen; + +  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + +  if (is_anychar_star_quantifier(qn)) { +    r = compile_tree_n_times(qn->target, qn->lower, reg); +    if (r) return r; +    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { +      if (IS_MULTILINE(reg->options)) +        r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); +      else +        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); +      if (r) return r; +      if (CKN_ON) { +        r = add_state_check_num(reg, ckn); +        if (r) return r; +      } + +      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); +    } +    else { +      if (IS_MULTILINE(reg->options)) { +        r = add_opcode(reg, (CKN_ON ? +                             OP_STATE_CHECK_ANYCHAR_ML_STAR +                             : OP_ANYCHAR_ML_STAR)); +      } +      else { +        r = add_opcode(reg, (CKN_ON ? +                             OP_STATE_CHECK_ANYCHAR_STAR +                             : OP_ANYCHAR_STAR)); +      } +      if (r) return r; +      if (CKN_ON) +        r = add_state_check_num(reg, ckn); + +      return r; +    } +  } + +  if (empty_info != 0) +    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); +  else +    mod_tlen = tlen; + +  if (infinite && qn->lower <= 1) { +    if (qn->greedy) { +      if (qn->lower == 1) { +        r = add_opcode_rel_addr(reg, OP_JUMP, +                       (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); +        if (r) return r; +      } + +      if (CKN_ON) { +        r = add_opcode(reg, OP_STATE_CHECK_PUSH); +        if (r) return r; +        r = add_state_check_num(reg, ckn); +        if (r) return r; +        r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); +      } +      else { +        r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); +      } +      if (r) return r; +      r = compile_tree_empty_check(qn->target, reg, empty_info); +      if (r) return r; +      r = add_opcode_rel_addr(reg, OP_JUMP, +                -(mod_tlen + (int )SIZE_OP_JUMP +                + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); +    } +    else { +      if (qn->lower == 0) { +        r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); +        if (r) return r; +      } +      r = compile_tree_empty_check(qn->target, reg, empty_info); +      if (r) return r; +      if (CKN_ON) { +        r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); +        if (r) return r; +        r = add_state_check_num(reg, ckn); +        if (r) return r; +        r = add_rel_addr(reg, +                         -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); +      } +      else +        r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); +    } +  } +  else if (qn->upper == 0) { +    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ +      r = add_opcode_rel_addr(reg, OP_JUMP, tlen); +      if (r) return r; +      r = compile_tree(qn->target, reg); +    } +    else +      r = 0; +  } +  else if (qn->upper == 1 && qn->greedy) { +    if (qn->lower == 0) { +      if (CKN_ON) { +        r = add_opcode(reg, OP_STATE_CHECK_PUSH); +        if (r) return r; +        r = add_state_check_num(reg, ckn); +        if (r) return r; +        r = add_rel_addr(reg, tlen); +      } +      else { +        r = add_opcode_rel_addr(reg, OP_PUSH, tlen); +      } +      if (r) return r; +    } + +    r = compile_tree(qn->target, reg); +  } +  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ +    if (CKN_ON) { +      r = add_opcode(reg, OP_STATE_CHECK_PUSH); +      if (r) return r; +      r = add_state_check_num(reg, ckn); +      if (r) return r; +      r = add_rel_addr(reg, SIZE_OP_JUMP); +    } +    else { +      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); +    } + +    if (r) return r; +    r = add_opcode_rel_addr(reg, OP_JUMP, tlen); +    if (r) return r; +    r = compile_tree(qn->target, reg); +  } +  else { +    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); +    if (CKN_ON) { +      if (r) return r; +      r = add_opcode(reg, OP_STATE_CHECK); +      if (r) return r; +      r = add_state_check_num(reg, ckn); +    } +  } +  return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ +  int len, mod_tlen; +  int infinite = IS_REPEAT_INFINITE(qn->upper); +  int empty_info = qn->target_empty_info; +  int tlen = compile_length_tree(qn->target, reg); + +  if (tlen < 0) return tlen; + +  /* anychar repeat */ +  if (NTYPE(qn->target) == NT_CANY) { +    if (qn->greedy && infinite) { +      if (IS_NOT_NULL(qn->next_head_exact)) +        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; +      else +        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; +    } +  } + +  if (empty_info != 0) +    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); +  else +    mod_tlen = tlen; + +  if (infinite && +      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { +      len = SIZE_OP_JUMP; +    } +    else { +      len = tlen * qn->lower; +    } + +    if (qn->greedy) { +      if (IS_NOT_NULL(qn->head_exact)) +        len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; +      else if (IS_NOT_NULL(qn->next_head_exact)) +        len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; +      else +        len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; +    } +    else +      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; +  } +  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ +    len = SIZE_OP_JUMP + tlen; +  } +  else if (!infinite && qn->greedy && +           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper +                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +    len = tlen * qn->lower; +    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); +  } +  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ +    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; +  } +  else { +    len = SIZE_OP_REPEAT_INC +        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; +  } + +  return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ +  int i, r, mod_tlen; +  int infinite = IS_REPEAT_INFINITE(qn->upper); +  int empty_info = qn->target_empty_info; +  int tlen = compile_length_tree(qn->target, reg); + +  if (tlen < 0) return tlen; + +  if (is_anychar_star_quantifier(qn)) { +    r = compile_tree_n_times(qn->target, qn->lower, reg); +    if (r) return r; +    if (IS_NOT_NULL(qn->next_head_exact)) { +      if (IS_MULTILINE(reg->options)) +        r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); +      else +        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); +      if (r) return r; +      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); +    } +    else { +      if (IS_MULTILINE(reg->options)) +        return add_opcode(reg, OP_ANYCHAR_ML_STAR); +      else +        return add_opcode(reg, OP_ANYCHAR_STAR); +    } +  } + +  if (empty_info != 0) +    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); +  else +    mod_tlen = tlen; + +  if (infinite && +      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { +      if (qn->greedy) { +        if (IS_NOT_NULL(qn->head_exact)) +          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); +        else if (IS_NOT_NULL(qn->next_head_exact)) +          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); +        else +          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); +      } +      else { +        r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); +      } +      if (r) return r; +    } +    else { +      r = compile_tree_n_times(qn->target, qn->lower, reg); +      if (r) return r; +    } + +    if (qn->greedy) { +      if (IS_NOT_NULL(qn->head_exact)) { +        r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, +                                mod_tlen + SIZE_OP_JUMP); +        if (r) return r; +        add_bytes(reg, NSTR(qn->head_exact)->s, 1); +        r = compile_tree_empty_check(qn->target, reg, empty_info); +        if (r) return r; +        r = add_opcode_rel_addr(reg, OP_JUMP, +         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); +      } +      else if (IS_NOT_NULL(qn->next_head_exact)) { +        r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, +                                mod_tlen + SIZE_OP_JUMP); +        if (r) return r; +        add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); +        r = compile_tree_empty_check(qn->target, reg, empty_info); +        if (r) return r; +        r = add_opcode_rel_addr(reg, OP_JUMP, +          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); +      } +      else { +        r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); +        if (r) return r; +        r = compile_tree_empty_check(qn->target, reg, empty_info); +        if (r) return r; +        r = add_opcode_rel_addr(reg, OP_JUMP, +                   -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); +      } +    } +    else { +      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); +      if (r) return r; +      r = compile_tree_empty_check(qn->target, reg, empty_info); +      if (r) return r; +      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); +    } +  } +  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ +    r = add_opcode_rel_addr(reg, OP_JUMP, tlen); +    if (r) return r; +    r = compile_tree(qn->target, reg); +  } +  else if (!infinite && qn->greedy && +           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper +                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { +    int n = qn->upper - qn->lower; + +    r = compile_tree_n_times(qn->target, qn->lower, reg); +    if (r) return r; + +    for (i = 0; i < n; i++) { +      r = add_opcode_rel_addr(reg, OP_PUSH, +			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); +      if (r) return r; +      r = compile_tree(qn->target, reg); +      if (r) return r; +    } +  } +  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ +    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); +    if (r) return r; +    r = add_opcode_rel_addr(reg, OP_JUMP, tlen); +    if (r) return r; +    r = compile_tree(qn->target, reg); +  } +  else { +    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); +  } +  return r; +} +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_option_node(EncloseNode* node, regex_t* reg) +{ +  int tlen; +  OnigOptionType prev = reg->options; + +  reg->options = node->option; +  tlen = compile_length_tree(node->target, reg); +  reg->options = prev; + +  if (tlen < 0) return tlen; + +  if (IS_DYNAMIC_OPTION(prev ^ node->option)) { +    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL +           + tlen + SIZE_OP_SET_OPTION; +  } +  else +    return tlen; +} + +static int +compile_option_node(EncloseNode* node, regex_t* reg) +{ +  int r; +  OnigOptionType prev = reg->options; + +  if (IS_DYNAMIC_OPTION(prev ^ node->option)) { +    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); +    if (r) return r; +    r = add_opcode_option(reg, OP_SET_OPTION, prev); +    if (r) return r; +    r = add_opcode(reg, OP_FAIL); +    if (r) return r; +  } + +  reg->options = node->option; +  r = compile_tree(node->target, reg); +  reg->options = prev; + +  if (IS_DYNAMIC_OPTION(prev ^ node->option)) { +    if (r) return r; +    r = add_opcode_option(reg, OP_SET_OPTION, prev); +  } +  return r; +} + +static int +compile_length_enclose_node(EncloseNode* node, regex_t* reg) +{ +  int len; +  int tlen; + +  if (node->type == ENCLOSE_OPTION) +    return compile_length_option_node(node, reg); + +  if (node->target) { +    tlen = compile_length_tree(node->target, reg); +    if (tlen < 0) return tlen; +  } +  else +    tlen = 0; + +  switch (node->type) { +  case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +    if (IS_ENCLOSE_CALLED(node)) { +      len = SIZE_OP_MEMORY_START_PUSH + tlen +        + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; +      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) +        len += (IS_ENCLOSE_RECURSION(node) +                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); +      else +        len += (IS_ENCLOSE_RECURSION(node) +                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); +    } +    else +#endif +    { +      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) +        len = SIZE_OP_MEMORY_START_PUSH; +      else +        len = SIZE_OP_MEMORY_START; + +      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) +		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); +    } +    break; + +  case ENCLOSE_STOP_BACKTRACK: +    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { +      QtfrNode* qn = NQTFR(node->target); +      tlen = compile_length_tree(qn->target, reg); +      if (tlen < 0) return tlen; + +      len = tlen * qn->lower +	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; +    } +    else { +      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; +    } +    break; + +  default: +    return ONIGERR_TYPE_BUG; +    break; +  } + +  return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_enclose_node(EncloseNode* node, regex_t* reg) +{ +  int r, len; + +  if (node->type == ENCLOSE_OPTION) +    return compile_option_node(node, reg); + +  switch (node->type) { +  case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +    if (IS_ENCLOSE_CALLED(node)) { +      r = add_opcode(reg, OP_CALL); +      if (r) return r; +      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; +      node->state |= NST_ADDR_FIXED; +      r = add_abs_addr(reg, (int )node->call_addr); +      if (r) return r; +      len = compile_length_tree(node->target, reg); +      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); +      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) +        len += (IS_ENCLOSE_RECURSION(node) +                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); +      else +        len += (IS_ENCLOSE_RECURSION(node) +                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + +      r = add_opcode_rel_addr(reg, OP_JUMP, len); +      if (r) return r; +    } +#endif +    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) +      r = add_opcode(reg, OP_MEMORY_START_PUSH); +    else +      r = add_opcode(reg, OP_MEMORY_START); +    if (r) return r; +    r = add_mem_num(reg, node->regnum); +    if (r) return r; +    r = compile_tree(node->target, reg); +    if (r) return r; +#ifdef USE_SUBEXP_CALL +    if (IS_ENCLOSE_CALLED(node)) { +      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) +        r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) +                             ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); +      else +        r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) +                             ? OP_MEMORY_END_REC : OP_MEMORY_END)); + +      if (r) return r; +      r = add_mem_num(reg, node->regnum); +      if (r) return r; +      r = add_opcode(reg, OP_RETURN); +    } +    else +#endif +    { +      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) +        r = add_opcode(reg, OP_MEMORY_END_PUSH); +      else +        r = add_opcode(reg, OP_MEMORY_END); +      if (r) return r; +      r = add_mem_num(reg, node->regnum); +    } +    break; + +  case ENCLOSE_STOP_BACKTRACK: +    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { +      QtfrNode* qn = NQTFR(node->target); +      r = compile_tree_n_times(qn->target, qn->lower, reg); +      if (r) return r; + +      len = compile_length_tree(qn->target, reg); +      if (len < 0) return len; + +      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); +      if (r) return r; +      r = compile_tree(qn->target, reg); +      if (r) return r; +      r = add_opcode(reg, OP_POP); +      if (r) return r; +      r = add_opcode_rel_addr(reg, OP_JUMP, +	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); +    } +    else { +      r = add_opcode(reg, OP_PUSH_STOP_BT); +      if (r) return r; +      r = compile_tree(node->target, reg); +      if (r) return r; +      r = add_opcode(reg, OP_POP_STOP_BT); +    } +    break; + +  default: +    return ONIGERR_TYPE_BUG; +    break; +  } + +  return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ +  int len; +  int tlen = 0; + +  if (node->target) { +    tlen = compile_length_tree(node->target, reg); +    if (tlen < 0) return tlen; +  } + +  switch (node->type) { +  case ANCHOR_PREC_READ: +    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; +    break; +  case ANCHOR_PREC_READ_NOT: +    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; +    break; +  case ANCHOR_LOOK_BEHIND: +    len = SIZE_OP_LOOK_BEHIND + tlen; +    break; +  case ANCHOR_LOOK_BEHIND_NOT: +    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; +    break; + +  default: +    len = SIZE_OPCODE; +    break; +  } + +  return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ +  int r, len; + +  switch (node->type) { +  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break; +  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break; +  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break; +  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break; +  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break; +  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + +  case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break; +  case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END +  case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break; +  case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break; +#endif + +  case ANCHOR_PREC_READ: +    r = add_opcode(reg, OP_PUSH_POS); +    if (r) return r; +    r = compile_tree(node->target, reg); +    if (r) return r; +    r = add_opcode(reg, OP_POP_POS); +    break; + +  case ANCHOR_PREC_READ_NOT: +    len = compile_length_tree(node->target, reg); +    if (len < 0) return len; +    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); +    if (r) return r; +    r = compile_tree(node->target, reg); +    if (r) return r; +    r = add_opcode(reg, OP_FAIL_POS); +    break; + +  case ANCHOR_LOOK_BEHIND: +    { +      int n; +      r = add_opcode(reg, OP_LOOK_BEHIND); +      if (r) return r; +      if (node->char_len < 0) { +        r = get_char_length_tree(node->target, reg, &n); +        if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +      } +      else +        n = node->char_len; + +      r = add_length(reg, n); +      if (r) return r; +      r = compile_tree(node->target, reg); +    } +    break; + +  case ANCHOR_LOOK_BEHIND_NOT: +    { +      int n; +      len = compile_length_tree(node->target, reg); +      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, +			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); +      if (r) return r; +      if (node->char_len < 0) { +        r = get_char_length_tree(node->target, reg, &n); +        if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +      } +      else +        n = node->char_len; +      r = add_length(reg, n); +      if (r) return r; +      r = compile_tree(node->target, reg); +      if (r) return r; +      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); +    } +    break; + +  default: +    return ONIGERR_TYPE_BUG; +    break; +  } + +  return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ +  int len, type, r; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    len = 0; +    do { +      r = compile_length_tree(NCAR(node), reg); +      if (r < 0) return r; +      len += r; +    } while (IS_NOT_NULL(node = NCDR(node))); +    r = len; +    break; + +  case NT_ALT: +    { +      int n; + +      n = r = 0; +      do { +        r += compile_length_tree(NCAR(node), reg); +        n++; +      } while (IS_NOT_NULL(node = NCDR(node))); +      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); +    } +    break; + +  case NT_STR: +    if (NSTRING_IS_RAW(node)) +      r = compile_length_string_raw_node(NSTR(node), reg); +    else +      r = compile_length_string_node(node, reg); +    break; + +  case NT_CCLASS: +    r = compile_length_cclass_node(NCCLASS(node), reg); +    break; + +  case NT_CTYPE: +  case NT_CANY: +    r = SIZE_OPCODE; +    break; + +  case NT_BREF: +    { +      BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL +      if (IS_BACKREF_NEST_LEVEL(br)) { +        r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + +            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +      } +      else +#endif +      if (br->back_num == 1) { +        r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) +             ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); +      } +      else { +        r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    r = SIZE_OP_CALL; +    break; +#endif + +  case NT_QTFR: +    r = compile_length_quantifier_node(NQTFR(node), reg); +    break; + +  case NT_ENCLOSE: +    r = compile_length_enclose_node(NENCLOSE(node), reg); +    break; + +  case NT_ANCHOR: +    r = compile_length_anchor_node(NANCHOR(node), reg); +    break; + +  default: +    return ONIGERR_TYPE_BUG; +    break; +  } + +  return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ +  int n, type, len, pos, r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    do { +      r = compile_tree(NCAR(node), reg); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_ALT: +    { +      Node* x = node; +      len = 0; +      do { +        len += compile_length_tree(NCAR(x), reg); +        if (NCDR(x) != NULL) { +          len += SIZE_OP_PUSH + SIZE_OP_JUMP; +        } +      } while (IS_NOT_NULL(x = NCDR(x))); +      pos = reg->used + len;  /* goal position */ + +      do { +        len = compile_length_tree(NCAR(node), reg); +        if (IS_NOT_NULL(NCDR(node))) { +          r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); +          if (r) break; +        } +        r = compile_tree(NCAR(node), reg); +        if (r) break; +        if (IS_NOT_NULL(NCDR(node))) { +          len = pos - (reg->used + SIZE_OP_JUMP); +          r = add_opcode_rel_addr(reg, OP_JUMP, len); +          if (r) break; +        } +      } while (IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_STR: +    if (NSTRING_IS_RAW(node)) +      r = compile_string_raw_node(NSTR(node), reg); +    else +      r = compile_string_node(node, reg); +    break; + +  case NT_CCLASS: +    r = compile_cclass_node(NCCLASS(node), reg); +    break; + +  case NT_CTYPE: +    { +      int op; + +      switch (NCTYPE(node)->ctype) { +      case ONIGENC_CTYPE_WORD: +        if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD; +        else                         op = OP_WORD; +        break; +      default: +        return ONIGERR_TYPE_BUG; +        break; +      } +      r = add_opcode(reg, op); +    } +    break; + +  case NT_CANY: +    if (IS_MULTILINE(reg->options)) +      r = add_opcode(reg, OP_ANYCHAR_ML); +    else +      r = add_opcode(reg, OP_ANYCHAR); +    break; + +  case NT_BREF: +    { +      BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL +      if (IS_BACKREF_NEST_LEVEL(br)) { +        r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); +        if (r) return r; +        r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); +        if (r) return r; +        r = add_length(reg, br->nest_level); +        if (r) return r; + +        goto add_bacref_mems; +      } +      else +#endif +      if (br->back_num == 1) { +        n = br->back_static[0]; +        if (IS_IGNORECASE(reg->options)) { +          r = add_opcode(reg, OP_BACKREFN_IC); +          if (r) return r; +          r = add_mem_num(reg, n); +        } +        else { +          switch (n) { +          case 1:  r = add_opcode(reg, OP_BACKREF1); break; +          case 2:  r = add_opcode(reg, OP_BACKREF2); break; +          default: +            r = add_opcode(reg, OP_BACKREFN); +            if (r) return r; +            r = add_mem_num(reg, n); +            break; +          } +        } +      } +      else { +        int i; +        int* p; + +        if (IS_IGNORECASE(reg->options)) { +          r = add_opcode(reg, OP_BACKREF_MULTI_IC); +        } +        else { +          r = add_opcode(reg, OP_BACKREF_MULTI); +        } +        if (r) return r; + +#ifdef USE_BACKREF_WITH_LEVEL +      add_bacref_mems: +#endif +        r = add_length(reg, br->back_num); +        if (r) return r; +        p = BACKREFS_P(br); +        for (i = br->back_num - 1; i >= 0; i--) { +          r = add_mem_num(reg, p[i]); +          if (r) return r; +        } +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    r = compile_call(NCALL(node), reg); +    break; +#endif + +  case NT_QTFR: +    r = compile_quantifier_node(NQTFR(node), reg); +    break; + +  case NT_ENCLOSE: +    r = compile_enclose_node(NENCLOSE(node), reg); +    break; + +  case NT_ANCHOR: +    r = compile_anchor_node(NANCHOR(node), reg); +    break; + +  default: +#ifdef ONIG_DEBUG +    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif +    break; +  } + +  return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ +  int r = 0; +  Node* node = *plink; + +  switch (NTYPE(node)) { +  case NT_LIST: +  case NT_ALT: +    do { +      r = noname_disable_map(&(NCAR(node)), map, counter); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_QTFR: +    { +      Node** ptarget = &(NQTFR(node)->target); +      Node*  old = *ptarget; +      r = noname_disable_map(ptarget, map, counter); +      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { +        onig_reduce_nested_quantifier(node, *ptarget); +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      if (en->type == ENCLOSE_MEMORY) { +        if (IS_ENCLOSE_NAMED_GROUP(en)) { +          (*counter)++; +          map[en->regnum].new_val = *counter; +          en->regnum = *counter; +          r = noname_disable_map(&(en->target), map, counter); +        } +        else { +          *plink = en->target; +          en->target = NULL_NODE; +          onig_node_free(node); +          r = noname_disable_map(plink, map, counter); +        } +      } +      else +        r = noname_disable_map(&(en->target), map, counter); +    } +    break; + +  default: +    break; +  } + +  return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ +  int i, pos, n, old_num; +  int *backs; +  BRefNode* bn = NBREF(node); + +  if (! IS_BACKREF_NAME_REF(bn)) +    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + +  old_num = bn->back_num; +  if (IS_NULL(bn->back_dynamic)) +    backs = bn->back_static; +  else +    backs = bn->back_dynamic; + +  for (i = 0, pos = 0; i < old_num; i++) { +    n = map[backs[i]].new_val; +    if (n > 0) { +      backs[pos] = n; +      pos++; +    } +  } + +  bn->back_num = pos; +  return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ +  int r = 0; + +  switch (NTYPE(node)) { +  case NT_LIST: +  case NT_ALT: +    do { +      r = renumber_by_map(NCAR(node), map); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; +  case NT_QTFR: +    r = renumber_by_map(NQTFR(node)->target, map); +    break; +  case NT_ENCLOSE: +    r = renumber_by_map(NENCLOSE(node)->target, map); +    break; + +  case NT_BREF: +    r = renumber_node_backref(node, map); +    break; + +  default: +    break; +  } + +  return r; +} + +static int +numbered_ref_check(Node* node) +{ +  int r = 0; + +  switch (NTYPE(node)) { +  case NT_LIST: +  case NT_ALT: +    do { +      r = numbered_ref_check(NCAR(node)); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; +  case NT_QTFR: +    r = numbered_ref_check(NQTFR(node)->target); +    break; +  case NT_ENCLOSE: +    r = numbered_ref_check(NENCLOSE(node)->target); +    break; + +  case NT_BREF: +    if (! IS_BACKREF_NAME_REF(NBREF(node))) +      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; +    break; + +  default: +    break; +  } + +  return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ +  int r, i, pos, counter; +  BitStatusType loc; +  GroupNumRemap* map; + +  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); +  CHECK_NULL_RETURN_MEMERR(map); +  for (i = 1; i <= env->num_mem; i++) { +    map[i].new_val = 0; +  } +  counter = 0; +  r = noname_disable_map(root, map, &counter); +  if (r != 0) return r; + +  r = renumber_by_map(*root, map); +  if (r != 0) return r; + +  for (i = 1, pos = 1; i <= env->num_mem; i++) { +    if (map[i].new_val > 0) { +      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; +      pos++; +    } +  } + +  loc = env->capture_history; +  BIT_STATUS_CLEAR(env->capture_history); +  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { +    if (BIT_STATUS_AT(loc, i)) { +      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); +    } +  } + +  env->num_mem = env->num_named; +  reg->num_mem = env->num_named; + +  return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ +  int i, offset; +  EncloseNode* en; +  AbsAddrType addr; + +  for (i = 0; i < uslist->num; i++) { +    en = NENCLOSE(uslist->us[i].target); +    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; +    addr = en->call_addr; +    offset = uslist->us[i].offset; + +    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); +  } +  return 0; +} +#endif + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +static int +quantifiers_memory_node_info(Node* node) +{ +  int r = 0; + +  switch (NTYPE(node)) { +  case NT_LIST: +  case NT_ALT: +    { +      int v; +      do { +        v = quantifiers_memory_node_info(NCAR(node)); +        if (v > r) r = v; +      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (IS_CALL_RECURSION(NCALL(node))) { +      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ +    } +    else +      r = quantifiers_memory_node_info(NCALL(node)->target); +    break; +#endif + +  case NT_QTFR: +    { +      QtfrNode* qn = NQTFR(node); +      if (qn->upper != 0) { +        r = quantifiers_memory_node_info(qn->target); +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      switch (en->type) { +      case ENCLOSE_MEMORY: +        return NQ_TARGET_IS_EMPTY_MEM; +        break; + +      case ENCLOSE_OPTION: +      case ENCLOSE_STOP_BACKTRACK: +        r = quantifiers_memory_node_info(en->target); +        break; +      default: +        break; +      } +    } +    break; + +  case NT_BREF: +  case NT_STR: +  case NT_CTYPE: +  case NT_CCLASS: +  case NT_CANY: +  case NT_ANCHOR: +  default: +    break; +  } + +  return r; +} +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ +  OnigDistance tmin; +  int r = 0; + +  *min = 0; +  switch (NTYPE(node)) { +  case NT_BREF: +    { +      int i; +      int* backs; +      Node** nodes = SCANENV_MEM_NODES(env); +      BRefNode* br = NBREF(node); +      if (br->state & NST_RECURSION) break; + +      backs = BACKREFS_P(br); +      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF; +      r = get_min_match_length(nodes[backs[0]], min, env); +      if (r != 0) break; +      for (i = 1; i < br->back_num; i++) { +        if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF; +        r = get_min_match_length(nodes[backs[i]], &tmin, env); +        if (r != 0) break; +        if (*min > tmin) *min = tmin; +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (IS_CALL_RECURSION(NCALL(node))) { +      EncloseNode* en = NENCLOSE(NCALL(node)->target); +      if (IS_ENCLOSE_MIN_FIXED(en)) +        *min = en->min_len; +    } +    else +      r = get_min_match_length(NCALL(node)->target, min, env); +    break; +#endif + +  case NT_LIST: +    do { +      r = get_min_match_length(NCAR(node), &tmin, env); +      if (r == 0) *min += tmin; +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_ALT: +    { +      Node *x, *y; +      y = node; +      do { +        x = NCAR(y); +        r = get_min_match_length(x, &tmin, env); +        if (r != 0) break; +        if (y == node) *min = tmin; +        else if (*min > tmin) *min = tmin; +      } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); +    } +    break; + +  case NT_STR: +    { +      StrNode* sn = NSTR(node); +      *min = sn->end - sn->s; +    } +    break; + +  case NT_CTYPE: +    *min = 1; +    break; + +  case NT_CCLASS: +  case NT_CANY: +    *min = 1; +    break; + +  case NT_QTFR: +    { +      QtfrNode* qn = NQTFR(node); + +      if (qn->lower > 0) { +        r = get_min_match_length(qn->target, min, env); +        if (r == 0) +          *min = distance_multiply(*min, qn->lower); +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      switch (en->type) { +      case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +        if (IS_ENCLOSE_MIN_FIXED(en)) +          *min = en->min_len; +        else { +          r = get_min_match_length(en->target, min, env); +          if (r == 0) { +            en->min_len = *min; +            SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); +          } +        } +        break; +#endif +      case ENCLOSE_OPTION: +      case ENCLOSE_STOP_BACKTRACK: +        r = get_min_match_length(en->target, min, env); +        break; +      } +    } +    break; + +  case NT_ANCHOR: +  default: +    break; +  } + +  return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ +  OnigDistance tmax; +  int r = 0; + +  *max = 0; +  switch (NTYPE(node)) { +  case NT_LIST: +    do { +      r = get_max_match_length(NCAR(node), &tmax, env); +      if (r == 0) +        *max = distance_add(*max, tmax); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_ALT: +    do { +      r = get_max_match_length(NCAR(node), &tmax, env); +      if (r == 0 && *max < tmax) *max = tmax; +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_STR: +    { +      StrNode* sn = NSTR(node); +      *max = sn->end - sn->s; +    } +    break; + +  case NT_CTYPE: +    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); +    break; + +  case NT_CCLASS: +  case NT_CANY: +    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); +    break; + +  case NT_BREF: +    { +      int i; +      int* backs; +      Node** nodes = SCANENV_MEM_NODES(env); +      BRefNode* br = NBREF(node); +      if (br->state & NST_RECURSION) { +        *max = ONIG_INFINITE_DISTANCE; +        break; +      } +      backs = BACKREFS_P(br); +      for (i = 0; i < br->back_num; i++) { +        if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF; +        r = get_max_match_length(nodes[backs[i]], &tmax, env); +        if (r != 0) break; +        if (*max < tmax) *max = tmax; +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (! IS_CALL_RECURSION(NCALL(node))) +      r = get_max_match_length(NCALL(node)->target, max, env); +    else +      *max = ONIG_INFINITE_DISTANCE; +    break; +#endif + +  case NT_QTFR: +    { +      QtfrNode* qn = NQTFR(node); + +      if (qn->upper != 0) { +        r = get_max_match_length(qn->target, max, env); +        if (r == 0 && *max != 0) { +          if (! IS_REPEAT_INFINITE(qn->upper)) +            *max = distance_multiply(*max, qn->upper); +          else +            *max = ONIG_INFINITE_DISTANCE; +        } +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      switch (en->type) { +      case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +	if (IS_ENCLOSE_MAX_FIXED(en)) +	  *max = en->max_len; +	else { +	  r = get_max_match_length(en->target, max, env); +	  if (r == 0) { +	    en->max_len = *max; +	    SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); +	  } +	} +	break; +#endif +      case ENCLOSE_OPTION: +      case ENCLOSE_STOP_BACKTRACK: +        r = get_max_match_length(en->target, max, env); +        break; +      } +    } +    break; + +  case NT_ANCHOR: +  default: +    break; +  } + +  return r; +} + +#define GET_CHAR_LEN_VARLEN           -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ +  int tlen; +  int r = 0; + +  level++; +  *len = 0; +  switch (NTYPE(node)) { +  case NT_LIST: +    do { +      r = get_char_length_tree1(NCAR(node), reg, &tlen, level); +      if (r == 0) +        *len = distance_add(*len, tlen); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_ALT: +    { +      int tlen2; +      int varlen = 0; + +      r = get_char_length_tree1(NCAR(node), reg, &tlen, level); +      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { +        r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); +        if (r == 0) { +          if (tlen != tlen2) +            varlen = 1; +        } +      } +      if (r == 0) { +        if (varlen != 0) { +          if (level == 1) +            r = GET_CHAR_LEN_TOP_ALT_VARLEN; +          else +            r = GET_CHAR_LEN_VARLEN; +        } +        else +          *len = tlen; +      } +    } +    break; + +  case NT_STR: +    { +      StrNode* sn = NSTR(node); +      UChar *s = sn->s; +      while (s < sn->end) { +        s += enclen(reg->enc, s); +        (*len)++; +      } +    } +    break; + +  case NT_QTFR: +    { +      QtfrNode* qn = NQTFR(node); +      if (qn->lower == qn->upper) { +        r = get_char_length_tree1(qn->target, reg, &tlen, level); +        if (r == 0) +          *len = distance_multiply(tlen, qn->lower); +      } +      else +        r = GET_CHAR_LEN_VARLEN; +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (! IS_CALL_RECURSION(NCALL(node))) +      r = get_char_length_tree1(NCALL(node)->target, reg, len, level); +    else +      r = GET_CHAR_LEN_VARLEN; +    break; +#endif + +  case NT_CTYPE: +    *len = 1; +    break; + +  case NT_CCLASS: +  case NT_CANY: +    *len = 1; +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      switch (en->type) { +      case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +	if (IS_ENCLOSE_CLEN_FIXED(en)) +	  *len = en->char_len; +	else { +	  r = get_char_length_tree1(en->target, reg, len, level); +	  if (r == 0) { +	    en->char_len = *len; +	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); +	  } +	} +	break; +#endif +      case ENCLOSE_OPTION: +      case ENCLOSE_STOP_BACKTRACK: +        r = get_char_length_tree1(en->target, reg, len, level); +        break; +      default: +        break; +      } +    } +    break; + +  case NT_ANCHOR: +    break; + +  default: +    r = GET_CHAR_LEN_VARLEN; +    break; +  } + +  return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ +  return get_char_length_tree1(node, reg, len, 0); +} + +/* x is not included y ==>  1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ +  int i, len; +  OnigCodePoint code; +  UChar *p; +  int ytype; + + retry: +  ytype = NTYPE(y); +  switch (NTYPE(x)) { +  case NT_CTYPE: +    { +      switch (ytype) { +      case NT_CTYPE: +        if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && +            NCTYPE(y)->not   != NCTYPE(x)->not) +          return 1; +        else +          return 0; +        break; + +      case NT_CCLASS: +      swap: +        { +          Node* tmp; +          tmp = x; x = y; y = tmp; +          goto retry; +        } +        break; + +      case NT_STR: +        goto swap; +        break; + +      default: +        break; +      } +    } +    break; + +  case NT_CCLASS: +    { +      CClassNode* xc = NCCLASS(x); +      switch (ytype) { +      case NT_CTYPE: +        switch (NCTYPE(y)->ctype) { +        case ONIGENC_CTYPE_WORD: +          if (NCTYPE(y)->not == 0) { +            if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { +              for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +                if (BITSET_AT(xc->bs, i)) { +                  if (IS_CODE_SB_WORD(reg->enc, i)) return 0; +                } +              } +              return 1; +            } +            return 0; +          } +          else { +            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +              if (! IS_CODE_SB_WORD(reg->enc, i)) { +                if (!IS_NCCLASS_NOT(xc)) { +                  if (BITSET_AT(xc->bs, i)) +                    return 0; +                } +                else { +                  if (! BITSET_AT(xc->bs, i)) +                    return 0; +                } +              } +            } +            return 1; +          } +          break; + +        default: +          break; +        } +        break; + +      case NT_CCLASS: +        { +          int v; +          CClassNode* yc = NCCLASS(y); + +          for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +            v = BITSET_AT(xc->bs, i); +            if ((v != 0 && !IS_NCCLASS_NOT(xc)) || +                (v == 0 && IS_NCCLASS_NOT(xc))) { +              v = BITSET_AT(yc->bs, i); +              if ((v != 0 && !IS_NCCLASS_NOT(yc)) || +                  (v == 0 && IS_NCCLASS_NOT(yc))) +                return 0; +            } +          } +          if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || +              (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) +            return 1; +          return 0; +        } +        break; + +      case NT_STR: +        goto swap; +        break; + +      default: +        break; +      } +    } +    break; + +  case NT_STR: +    { +      StrNode* xs = NSTR(x); +      if (NSTRING_LEN(x) == 0) +        break; + +      //c = *(xs->s); +      switch (ytype) { +      case NT_CTYPE: +        switch (NCTYPE(y)->ctype) { +        case ONIGENC_CTYPE_WORD: +          if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) +            return NCTYPE(y)->not; +          else +            return !(NCTYPE(y)->not); +          break; +        default: +          break; +        } +        break; + +      case NT_CCLASS: +        { +          CClassNode* cc = NCCLASS(y); + +          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, +                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); +          return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); +        } +        break; + +      case NT_STR: +        { +          UChar *q; +          StrNode* ys = NSTR(y); +          len = NSTRING_LEN(x); +          if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); +          if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { +            /* tiny version */ +            return 0; +          } +          else { +            for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { +              if (*p != *q) return 1; +            } +          } +        } +        break; +	 +      default: +        break; +      } +    } +    break; + +  default: +    break; +  } + +  return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ +  Node* n = NULL_NODE; + +  switch (NTYPE(node)) { +  case NT_BREF: +  case NT_ALT: +  case NT_CANY: +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +#endif +    break; + +  case NT_CTYPE: +  case NT_CCLASS: +    if (exact == 0) { +      n = node; +    } +    break; + +  case NT_LIST: +    n = get_head_value_node(NCAR(node), exact, reg); +    break; + +  case NT_STR: +    { +      StrNode* sn = NSTR(node); + +      if (sn->end <= sn->s) +        break; + +      if (exact != 0 && +          !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { +      } +      else { +        n = node; +      } +    } +    break; + +  case NT_QTFR: +    { +      QtfrNode* qn = NQTFR(node); +      if (qn->lower > 0) { +        if (IS_NOT_NULL(qn->head_exact)) +          n = qn->head_exact; +        else +          n = get_head_value_node(qn->target, exact, reg); +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      switch (en->type) { +      case ENCLOSE_OPTION: +        { +          OnigOptionType options = reg->options; + +          reg->options = NENCLOSE(node)->option; +          n = get_head_value_node(NENCLOSE(node)->target, exact, reg); +          reg->options = options; +        } +        break; + +      case ENCLOSE_MEMORY: +      case ENCLOSE_STOP_BACKTRACK: +        n = get_head_value_node(en->target, exact, reg); +        break; +      } +    } +    break; + +  case NT_ANCHOR: +    if (NANCHOR(node)->type == ANCHOR_PREC_READ) +      n = get_head_value_node(NANCHOR(node)->target, exact, reg); +    break; + +  default: +    break; +  } + +  return n; +} + +static int +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) +{ +  int type, r = 0; + +  type = NTYPE(node); +  if ((NTYPE2BIT(type) & type_mask) == 0) +    return 1; + +  switch (type) { +  case NT_LIST: +  case NT_ALT: +    do { +      r = check_type_tree(NCAR(node), type_mask, enclose_mask, +                          anchor_mask); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_QTFR: +    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, +                        anchor_mask); +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); +      if ((en->type & enclose_mask) == 0) +        return 1; + +      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); +    } +    break; + +  case NT_ANCHOR: +    type = NANCHOR(node)->type; +    if ((type & anchor_mask) == 0) +      return 1; + +    if (NANCHOR(node)->target) +      r = check_type_tree(NANCHOR(node)->target, +			  type_mask, enclose_mask, anchor_mask); +    break; + +  default: +    break; +  } +  return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST       1 +#define RECURSION_INFINITE    2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ +  int type; +  int r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    { +      Node *x; +      OnigDistance min; +      int ret; + +      x = node; +      do { +        ret = subexp_inf_recursive_check(NCAR(x), env, head); +        if (ret < 0 || ret == RECURSION_INFINITE) return ret; +        r |= ret; +        if (head) { +          ret = get_min_match_length(NCAR(x), &min, env); +          if (ret != 0) return ret; +          if (min != 0) head = 0; +        } +      } while (IS_NOT_NULL(x = NCDR(x))); +    } +    break; + +  case NT_ALT: +    { +      int ret; +      r = RECURSION_EXIST; +      do { +        ret = subexp_inf_recursive_check(NCAR(node), env, head); +        if (ret < 0 || ret == RECURSION_INFINITE) return ret; +        r &= ret; +      } while (IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_QTFR: +    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); +    if (r == RECURSION_EXIST) { +      if (NQTFR(node)->lower == 0) r = 0; +    } +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); +      switch (an->type) { +      case ANCHOR_PREC_READ: +      case ANCHOR_PREC_READ_NOT: +      case ANCHOR_LOOK_BEHIND: +      case ANCHOR_LOOK_BEHIND_NOT: +        r = subexp_inf_recursive_check(an->target, env, head); +        break; +      } +    } +    break; + +  case NT_CALL: +    r = subexp_inf_recursive_check(NCALL(node)->target, env, head); +    break; + +  case NT_ENCLOSE: +    if (IS_ENCLOSE_MARK2(NENCLOSE(node))) +      return 0; +    else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) +      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); +    else { +      SET_ENCLOSE_STATUS(node, NST_MARK2); +      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); +      CLEAR_ENCLOSE_STATUS(node, NST_MARK2); +    } +    break; + +  default: +    break; +  } + +  return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ +  int type; +  int r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +  case NT_ALT: +    do { +      r = subexp_inf_recursive_check_trav(NCAR(node), env); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_QTFR: +    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); +      switch (an->type) { +      case ANCHOR_PREC_READ: +      case ANCHOR_PREC_READ_NOT: +      case ANCHOR_LOOK_BEHIND: +      case ANCHOR_LOOK_BEHIND_NOT: +        r = subexp_inf_recursive_check_trav(an->target, env); +        break; +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); + +      if (IS_ENCLOSE_RECURSION(en)) { +        SET_ENCLOSE_STATUS(node, NST_MARK1); +        r = subexp_inf_recursive_check(en->target, env, 1); +        if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; +        CLEAR_ENCLOSE_STATUS(node, NST_MARK1); +      } +      r = subexp_inf_recursive_check_trav(en->target, env); +    } +    break; + +  default: +    break; +  } + +  return r; +} + +static int +subexp_recursive_check(Node* node) +{ +  int r = 0; + +  switch (NTYPE(node)) { +  case NT_LIST: +  case NT_ALT: +    do { +      r |= subexp_recursive_check(NCAR(node)); +    } while (IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_QTFR: +    r = subexp_recursive_check(NQTFR(node)->target); +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); +      switch (an->type) { +      case ANCHOR_PREC_READ: +      case ANCHOR_PREC_READ_NOT: +      case ANCHOR_LOOK_BEHIND: +      case ANCHOR_LOOK_BEHIND_NOT: +        r = subexp_recursive_check(an->target); +        break; +      } +    } +    break; + +  case NT_CALL: +    r = subexp_recursive_check(NCALL(node)->target); +    if (r != 0) SET_CALL_RECURSION(node); +    break; + +  case NT_ENCLOSE: +    if (IS_ENCLOSE_MARK2(NENCLOSE(node))) +      return 0; +    else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) +      return 1; /* recursion */ +    else { +      SET_ENCLOSE_STATUS(node, NST_MARK2); +      r = subexp_recursive_check(NENCLOSE(node)->target); +      CLEAR_ENCLOSE_STATUS(node, NST_MARK2); +    } +    break; + +  default: +    break; +  } + +  return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE    1 + +  int type; +  int r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +  case NT_ALT: +    { +      int ret; +      do { +        ret = subexp_recursive_check_trav(NCAR(node), env); +        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; +        else if (ret < 0) return ret; +      } while (IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_QTFR: +    r = subexp_recursive_check_trav(NQTFR(node)->target, env); +    if (NQTFR(node)->upper == 0) { +      if (r == FOUND_CALLED_NODE) +        NQTFR(node)->is_refered = 1; +    } +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); +      switch (an->type) { +      case ANCHOR_PREC_READ: +      case ANCHOR_PREC_READ_NOT: +      case ANCHOR_LOOK_BEHIND: +      case ANCHOR_LOOK_BEHIND_NOT: +        r = subexp_recursive_check_trav(an->target, env); +        break; +      } +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); + +      if (! IS_ENCLOSE_RECURSION(en)) { +        if (IS_ENCLOSE_CALLED(en)) { +          SET_ENCLOSE_STATUS(node, NST_MARK1); +          r = subexp_recursive_check(en->target); +          if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); +          CLEAR_ENCLOSE_STATUS(node, NST_MARK1); +        } +      } +      r = subexp_recursive_check_trav(en->target, env); +      if (IS_ENCLOSE_CALLED(en)) +        r |= FOUND_CALLED_NODE; +    } +    break; + +  default: +    break; +  } + +  return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ +  int type; +  int r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    do { +      r = setup_subexp_call(NCAR(node), env); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_ALT: +    do { +      r = setup_subexp_call(NCAR(node), env); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_QTFR: +    r = setup_subexp_call(NQTFR(node)->target, env); +    break; +  case NT_ENCLOSE: +    r = setup_subexp_call(NENCLOSE(node)->target, env); +    break; + +  case NT_CALL: +    { +      CallNode* cn = NCALL(node); +      Node** nodes = SCANENV_MEM_NODES(env); + +      if (cn->group_num != 0) { +        int gnum = cn->group_num; + +#ifdef USE_NAMED_GROUP +        if (env->num_named > 0 && +            IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && +            !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { +          return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; +        } +#endif +        if (gnum > env->num_mem) { +          onig_scan_env_set_error_string(env, +                   ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); +          return ONIGERR_UNDEFINED_GROUP_REFERENCE; +        } + +#ifdef USE_NAMED_GROUP +      set_call_attr: +#endif +        cn->target = nodes[cn->group_num]; +        if (IS_NULL(cn->target)) { +          onig_scan_env_set_error_string(env, +                   ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); +          return ONIGERR_UNDEFINED_NAME_REFERENCE; +        } +        SET_ENCLOSE_STATUS(cn->target, NST_CALLED); +        BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); +        cn->unset_addr_list = env->unset_addr_list; +      } +#ifdef USE_NAMED_GROUP +      else { +        int *refs; + +        int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, +                                           &refs); +        if (n <= 0) { +          onig_scan_env_set_error_string(env, +                   ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); +          return ONIGERR_UNDEFINED_NAME_REFERENCE; +        } +        else if (n > 1) { +          onig_scan_env_set_error_string(env, +               ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); +          return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; +        } +        else { +          cn->group_num = refs[0]; +          goto set_call_attr; +        } +      } +#endif +    } +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); + +      switch (an->type) { +      case ANCHOR_PREC_READ: +      case ANCHOR_PREC_READ_NOT: +      case ANCHOR_LOOK_BEHIND: +      case ANCHOR_LOOK_BEHIND_NOT: +        r = setup_subexp_call(an->target, env); +        break; +      } +    } +    break; + +  default: +    break; +  } + +  return r; +} +#endif + +/* divide different length alternatives in look-behind. +  (?<=A|B) ==> (?<=A)|(?<=B) +  (?<!A|B) ==> (?<!A)(?<!B) +*/ +static int +divide_look_behind_alternatives(Node* node) +{ +  Node *head, *np, *insert_node; +  AnchorNode* an = NANCHOR(node); +  int anc_type = an->type; + +  head = an->target; +  np = NCAR(head); +  swap_node(node, head); +  NCAR(node) = head; +  NANCHOR(head)->target = np; + +  np = node; +  while ((np = NCDR(np)) != NULL_NODE) { +    insert_node = onig_node_new_anchor(anc_type); +    CHECK_NULL_RETURN_MEMERR(insert_node); +    NANCHOR(insert_node)->target = NCAR(np); +    NCAR(np) = insert_node; +  } + +  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { +    np = node; +    do { +      SET_NTYPE(np, NT_LIST);  /* alt -> list */ +    } while ((np = NCDR(np)) != NULL_NODE); +  } +  return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ +  int r, len; +  AnchorNode* an = NANCHOR(node); + +  r = get_char_length_tree(an->target, reg, &len); +  if (r == 0) +    an->char_len = len; +  else if (r == GET_CHAR_LEN_VARLEN) +    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { +    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) +      r = divide_look_behind_alternatives(node); +    else +      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +  } + +  return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ +  int type; + + retry: +  type = NTYPE(node); +  if (type == NT_QTFR) { +    QtfrNode* qn = NQTFR(node); +    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QTFR_PEEK_NEXT +      Node* n = get_head_value_node(next_node, 1, reg); +      /* '\0': for UTF-16BE etc... */ +      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { +        qn->next_head_exact = n; +      } +#endif +      /* automatic posseivation a*b ==> (?>a*)b */ +      if (qn->lower <= 1) { +        int ttype = NTYPE(qn->target); +        if (IS_NODE_TYPE_SIMPLE(ttype)) { +          Node *x, *y; +          x = get_head_value_node(qn->target, 0, reg); +          if (IS_NOT_NULL(x)) { +            y = get_head_value_node(next_node,  0, reg); +            if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { +              Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); +              CHECK_NULL_RETURN_MEMERR(en); +              SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); +              swap_node(node, en); +              NENCLOSE(node)->target = en; +            } +          } +        } +      } +    } +  } +  else if (type == NT_ENCLOSE) { +    EncloseNode* en = NENCLOSE(node); +    if (en->type == ENCLOSE_MEMORY) { +      node = en->target; +      goto retry; +    } +  } +  return 0; +} + + +static int +update_string_node_case_fold(regex_t* reg, Node *node) +{ +  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; +  UChar *sbuf, *ebuf, *sp; +  int r, i, len, sbuf_size; +  StrNode* sn = NSTR(node); + +  end = sn->end; +  sbuf_size = (end - sn->s) * 2; +  sbuf = (UChar* )xmalloc(sbuf_size); +  CHECK_NULL_RETURN_MEMERR(sbuf); +  ebuf = sbuf + sbuf_size; + +  sp = sbuf; +  p = sn->s; +  while (p < end) { +    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); +    for (i = 0; i < len; i++) { +      if (sp >= ebuf) { +        sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); +        CHECK_NULL_RETURN_MEMERR(sbuf); +        sp = sbuf + sbuf_size; +        sbuf_size *= 2; +        ebuf = sbuf + sbuf_size; +      } + +      *sp++ = buf[i]; +    } +  } + +  r = onig_node_str_set(node, sbuf, sp); +  if (r != 0) { +    xfree(sbuf); +    return r; +  } + +  xfree(sbuf); +  return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, +				 regex_t* reg) +{ +  int r; +  Node *node; + +  node = onig_node_new_str(s, end); +  if (IS_NULL(node)) return ONIGERR_MEMORY; + +  r = update_string_node_case_fold(reg, node); +  if (r != 0) { +    onig_node_free(node); +    return r; +  } + +  NSTRING_SET_AMBIG(node); +  NSTRING_SET_DONT_GET_OPT_INFO(node); +  *rnode = node; +  return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], +			    UChar *p, int slen, UChar *end, +			    regex_t* reg, Node **rnode) +{ +  int r, i, j, len, varlen; +  Node *anode, *var_anode, *snode, *xnode, *an; +  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + +  *rnode = var_anode = NULL_NODE; + +  varlen = 0; +  for (i = 0; i < item_num; i++) { +    if (items[i].byte_len != slen) { +      varlen = 1; +      break; +    } +  } + +  if (varlen != 0) { +    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); +    if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + +    xnode = onig_node_new_list(NULL, NULL); +    if (IS_NULL(xnode)) goto mem_err; +    NCAR(var_anode) = xnode; + +    anode = onig_node_new_alt(NULL_NODE, NULL_NODE); +    if (IS_NULL(anode)) goto mem_err; +    NCAR(xnode) = anode; +  } +  else { +    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); +    if (IS_NULL(anode)) return ONIGERR_MEMORY; +  } + +  snode = onig_node_new_str(p, p + slen); +  if (IS_NULL(snode)) goto mem_err; + +  NCAR(anode) = snode; + +  for (i = 0; i < item_num; i++) { +    snode = onig_node_new_str(NULL, NULL); +    if (IS_NULL(snode)) goto mem_err; +     +    for (j = 0; j < items[i].code_len; j++) { +      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); +      if (len < 0) { +        r = len; +        goto mem_err2; +      } + +      r = onig_node_str_cat(snode, buf, buf + len); +      if (r != 0) goto mem_err2; +    } + +    an = onig_node_new_alt(NULL_NODE, NULL_NODE); +    if (IS_NULL(an)) { +      goto mem_err2; +    } + +    if (items[i].byte_len != slen) { +      Node *rem; +      UChar *q = p + items[i].byte_len; + +      if (q < end) { +        r = expand_case_fold_make_rem_string(&rem, q, end, reg); +        if (r != 0) { +          onig_node_free(an); +          goto mem_err2; +        } + +        xnode = onig_node_list_add(NULL_NODE, snode); +        if (IS_NULL(xnode)) { +          onig_node_free(an); +          onig_node_free(rem); +          goto mem_err2; +        } +        if (IS_NULL(onig_node_list_add(xnode, rem))) { +          onig_node_free(an); +          onig_node_free(xnode); +          onig_node_free(rem); +          goto mem_err; +        } + +        NCAR(an) = xnode; +      } +      else { +        NCAR(an) = snode; +      } + +      NCDR(var_anode) = an; +      var_anode = an; +    } +    else { +      NCAR(an)     = snode; +      NCDR(anode) = an; +      anode = an; +    } +  } + +  return varlen; + + mem_err2: +  onig_node_free(snode); + + mem_err: +  onig_node_free(*rnode); + +  return ONIGERR_MEMORY; +} + +static int +expand_case_fold_string(Node* node, regex_t* reg) +{ +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8 + +  int r, n, len, alt_num; +  UChar *start, *end, *p; +  Node *top_root, *root, *snode, *prev_node; +  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; +  StrNode* sn = NSTR(node); + +  if (NSTRING_IS_AMBIG(node)) return 0; + +  start = sn->s; +  end   = sn->end; +  if (start >= end) return 0; + +  r = 0; +  top_root = root = prev_node = snode = NULL_NODE; +  alt_num = 1; +  p = start; +  while (p < end) { +    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, +					   p, end, items); +    if (n < 0) { +      r = n; +      goto err; +    } + +    len = enclen(reg->enc, p); + +    if (n == 0) { +      if (IS_NULL(snode)) { +        if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { +          top_root = root = onig_node_list_add(NULL_NODE, prev_node); +          if (IS_NULL(root)) { +            onig_node_free(prev_node); +            goto mem_err; +          } +        } + +        prev_node = snode = onig_node_new_str(NULL, NULL); +        if (IS_NULL(snode)) goto mem_err; +        if (IS_NOT_NULL(root)) { +          if (IS_NULL(onig_node_list_add(root, snode))) { +            onig_node_free(snode); +            goto mem_err; +          } +        } +      } + +      r = onig_node_str_cat(snode, p, p + len); +      if (r != 0) goto err; +    } +    else { +      alt_num *= (n + 1); +      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + +      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { +        top_root = root = onig_node_list_add(NULL_NODE, prev_node); +        if (IS_NULL(root)) { +          onig_node_free(prev_node); +          goto mem_err; +        } +      } + +      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); +      if (r < 0) goto mem_err; +      if (r == 1) { +        if (IS_NULL(root)) { +          top_root = prev_node; +        } +        else { +          if (IS_NULL(onig_node_list_add(root, prev_node))) { +            onig_node_free(prev_node); +            goto mem_err; +          } +        } + +        root = NCAR(prev_node); +      } +      else { /* r == 0 */ +        if (IS_NOT_NULL(root)) { +          if (IS_NULL(onig_node_list_add(root, prev_node))) { +            onig_node_free(prev_node); +            goto mem_err; +          } +        } +      } + +      snode = NULL_NODE; +    } + +    p += len; +  } + +  if (p < end) { +    Node *srem; + +    r = expand_case_fold_make_rem_string(&srem, p, end, reg); +    if (r != 0) goto mem_err; + +    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { +      top_root = root = onig_node_list_add(NULL_NODE, prev_node); +      if (IS_NULL(root)) { +        onig_node_free(srem); +        onig_node_free(prev_node); +        goto mem_err; +      } +    } + +    if (IS_NULL(root)) { +      prev_node = srem; +    } +    else { +      if (IS_NULL(onig_node_list_add(root, srem))) { +        onig_node_free(srem); +        goto mem_err; +      } +    } +  } + +  /* ending */ +  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); +  swap_node(node, top_root); +  onig_node_free(top_root); +  return 0; + + mem_err: +  r = ONIGERR_MEMORY; + + err: +  onig_node_free(top_root); +  return r; +} + + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT         512 +#define CEC_INFINITE_NUM          0x7fffffff + +#define CEC_IN_INFINITE_REPEAT    (1<<0) +#define CEC_IN_FINITE_REPEAT      (1<<1) +#define CEC_CONT_BIG_REPEAT       (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ +  int type; +  int r = state; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    { +      Node* prev = NULL_NODE; +      do { +        r = setup_comb_exp_check(NCAR(node), r, env); +        prev = NCAR(node); +      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_ALT: +    { +      int ret; +      do { +        ret = setup_comb_exp_check(NCAR(node), state, env); +        r |= ret; +      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_QTFR: +    { +      int child_state = state; +      int add_state = 0; +      QtfrNode* qn = NQTFR(node); +      Node* target = qn->target; +      int var_num; + +      if (! IS_REPEAT_INFINITE(qn->upper)) { +        if (qn->upper > 1) { +          /* {0,1}, {1,1} are allowed */ +          child_state |= CEC_IN_FINITE_REPEAT; + +          /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ +          if (env->backrefed_mem == 0) { +            if (NTYPE(qn->target) == NT_ENCLOSE) { +              EncloseNode* en = NENCLOSE(qn->target); +              if (en->type == ENCLOSE_MEMORY) { +                if (NTYPE(en->target) == NT_QTFR) { +                  QtfrNode* q = NQTFR(en->target); +                  if (IS_REPEAT_INFINITE(q->upper) +                      && q->greedy == qn->greedy) { +                    qn->upper = (qn->lower == 0 ? 1 : qn->lower); +                    if (qn->upper == 1) +                      child_state = state; +                  } +                } +              } +            } +          } +        } +      } + +      if (state & CEC_IN_FINITE_REPEAT) { +        qn->comb_exp_check_num = -1; +      } +      else { +        if (IS_REPEAT_INFINITE(qn->upper)) { +          var_num = CEC_INFINITE_NUM; +          child_state |= CEC_IN_INFINITE_REPEAT; +        } +        else { +          var_num = qn->upper - qn->lower; +        } + +        if (var_num >= CEC_THRES_NUM_BIG_REPEAT) +          add_state |= CEC_CONT_BIG_REPEAT; + +        if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || +            ((state & CEC_CONT_BIG_REPEAT) != 0 && +             var_num >= CEC_THRES_NUM_BIG_REPEAT)) { +          if (qn->comb_exp_check_num == 0) { +            env->num_comb_exp_check++; +            qn->comb_exp_check_num = env->num_comb_exp_check; +            if (env->curr_max_regnum > env->comb_exp_max_regnum) +              env->comb_exp_max_regnum = env->curr_max_regnum; +          } +        } +      } + +      r = setup_comb_exp_check(target, child_state, env); +      r |= add_state; +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); + +      switch (en->type) { +      case ENCLOSE_MEMORY: +        { +          if (env->curr_max_regnum < en->regnum) +            env->curr_max_regnum = en->regnum; + +          r = setup_comb_exp_check(en->target, state, env); +        } +        break; + +      default: +        r = setup_comb_exp_check(en->target, state, env); +        break; +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (IS_CALL_RECURSION(NCALL(node))) +      env->has_recursion = 1; +    else +      r = setup_comb_exp_check(NCALL(node)->target, state, env); +    break; +#endif + +  default: +    break; +  } + +  return r; +} +#endif + +#define IN_ALT        (1<<0) +#define IN_NOT        (1<<1) +#define IN_REPEAT     (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ +  int type; +  int r = 0; + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    { +      Node* prev = NULL_NODE; +      do { +        r = setup_tree(NCAR(node), reg, state, env); +        if (IS_NOT_NULL(prev) && r == 0) { +          r = next_setup(prev, NCAR(node), reg); +        } +        prev = NCAR(node); +      } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    } +    break; + +  case NT_ALT: +    do { +      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); +    } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); +    break; + +  case NT_CCLASS: +    break; + +  case NT_STR: +    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { +      r = expand_case_fold_string(node, reg); +    } +    break; + +  case NT_CTYPE: +  case NT_CANY: +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    break; +#endif + +  case NT_BREF: +    { +      int i; +      int* p; +      Node** nodes = SCANENV_MEM_NODES(env); +      BRefNode* br = NBREF(node); +      p = BACKREFS_P(br); +      for (i = 0; i < br->back_num; i++) { +        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF; +        BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); +        BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_WITH_LEVEL +        if (IS_BACKREF_NEST_LEVEL(br)) { +          BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); +        } +#endif +        SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); +      } +    } +    break; + +  case NT_QTFR: +    { +      OnigDistance d; +      QtfrNode* qn = NQTFR(node); +      Node* target = qn->target; + +      if ((state & IN_REPEAT) != 0) { +        qn->state |= NST_IN_REPEAT; +      } + +      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { +        r = get_min_match_length(target, &d, env); +        if (r) break; +        if (d == 0) { +          qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +          r = quantifiers_memory_node_info(target); +          if (r < 0) break; +          if (r > 0) { +            qn->target_empty_info = r; +          } +#endif +#if 0 +          r = get_max_match_length(target, &d, env); +          if (r == 0 && d == 0) { +            /*  ()* ==> ()?, ()+ ==> ()  */ +            qn->upper = 1; +            if (qn->lower > 1) qn->lower = 1; +            if (NTYPE(target) == NT_STR) { +              qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */ +            } +          } +#endif +        } +      } + +      state |= IN_REPEAT; +      if (qn->lower != qn->upper) +        state |= IN_VAR_REPEAT; +      r = setup_tree(target, reg, state, env); +      if (r) break; + +      /* expand string */ +#define EXPAND_STRING_MAX_LENGTH  100 +      if (NTYPE(target) == NT_STR) { +        if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && +            qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { +          int len = NSTRING_LEN(target); +          StrNode* sn = NSTR(target); + +          if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { +            int i, n = qn->lower; +            onig_node_conv_to_str_node(node, NSTR(target)->flag); +            for (i = 0; i < n; i++) { +              r = onig_node_str_cat(node, sn->s, sn->end); +              if (r) break; +            } +            onig_node_free(target); +            break; /* break case NT_QTFR: */ +          } +        } +      } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT +      if (qn->greedy && (qn->target_empty_info != 0)) { +        if (NTYPE(target) == NT_QTFR) { +          QtfrNode* tqn = NQTFR(target); +          if (IS_NOT_NULL(tqn->head_exact)) { +            qn->head_exact  = tqn->head_exact; +            tqn->head_exact = NULL; +          } +        } +        else { +          qn->head_exact = get_head_value_node(qn->target, 1, reg); +        } +      } +#endif +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); + +      switch (en->type) { +      case ENCLOSE_OPTION: +        { +          OnigOptionType options = reg->options; +          reg->options = NENCLOSE(node)->option; +          r = setup_tree(NENCLOSE(node)->target, reg, state, env); +          reg->options = options; +        } +        break; + +      case ENCLOSE_MEMORY: +        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { +          BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); +          /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ +        } +        r = setup_tree(en->target, reg, state, env); +        break; + +      case ENCLOSE_STOP_BACKTRACK: +        { +          Node* target = en->target; +          r = setup_tree(target, reg, state, env); +          if (NTYPE(target) == NT_QTFR) { +            QtfrNode* tqn = NQTFR(target); +            if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && +                tqn->greedy != 0) {  /* (?>a*), a*+ etc... */ +              int qtype = NTYPE(tqn->target); +              if (IS_NODE_TYPE_SIMPLE(qtype)) +                SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); +            } +          } +        } +        break; +      } +    } +    break; + +  case NT_ANCHOR: +    { +      AnchorNode* an = NANCHOR(node); + +      switch (an->type) { +      case ANCHOR_PREC_READ: +        r = setup_tree(an->target, reg, state, env); +        break; +      case ANCHOR_PREC_READ_NOT: +        r = setup_tree(an->target, reg, (state | IN_NOT), env); +        break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB  \ +  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ +    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) + +#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT   0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) + +      case ANCHOR_LOOK_BEHIND: +        { +          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, +                              ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); +          if (r < 0) return r; +          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +          r = setup_look_behind(node, reg, env); +          if (r != 0) return r; +          r = setup_tree(an->target, reg, state, env); +        } +        break; + +      case ANCHOR_LOOK_BEHIND_NOT: +        { +          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, +                   ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); +          if (r < 0) return r; +          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; +          r = setup_look_behind(node, reg, env); +          if (r != 0) return r; +          r = setup_tree(an->target, reg, (state | IN_NOT), env); +        } +        break; +      } +    } +    break; + +  default: +    break; +  } + +  return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, +	    UChar skip[], int** int_skip) +{ +  int i, len; + +  len = end - s; +  if (len < ONIG_CHAR_TABLE_SIZE) { +    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + +    for (i = 0; i < len - 1; i++) +      skip[s[i]] = len - 1 - i; +  } +  else { +    if (IS_NULL(*int_skip)) { +      *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); +      if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; +    } +    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + +    for (i = 0; i < len - 1; i++) +      (*int_skip)[s[i]] = len - 1 - i; +  } +  return 0; +} + +#define OPT_EXACT_MAXLEN   24 + +typedef struct { +  OnigDistance min;  /* min byte length */ +  OnigDistance max;  /* max byte length */ +} MinMaxLen; + +typedef struct { +  MinMaxLen        mmd; +  OnigEncoding     enc; +  OnigOptionType   options; +  OnigCaseFoldType case_fold_flag; +  ScanEnv*         scan_env; +} OptEnv; + +typedef struct { +  int left_anchor; +  int right_anchor; +} OptAncInfo; + +typedef struct { +  MinMaxLen  mmd; /* info position */ +  OptAncInfo anc; + +  int   reach_end; +  int   ignore_case; +  int   len; +  UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { +  MinMaxLen mmd; /* info position */ +  OptAncInfo anc; + +  int   value;      /* weighted value */ +  UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { +  MinMaxLen    len; + +  OptAncInfo   anc; +  OptExactInfo exb;    /* boundary */ +  OptExactInfo exm;    /* middle */ +  OptExactInfo expr;   /* prec read (?=...) */ + +  OptMapInfo   map;   /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ +  static const short int ByteValTable[] = { +     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1, +     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, +    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5, +     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5, +     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6, +     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5, +     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6, +     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1 +  }; + +  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { +    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) +      return 20; +    else +      return (int )ByteValTable[i]; +  } +  else +    return 4;   /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ +  /* 1000 / (min-max-dist + 1) */ +  static const short int dist_vals[] = { +    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,  +      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,  +      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,  +      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,  +      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,  +      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,  +      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,  +      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,  +      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,  +      11,   11,   11,   11,   11,   10,   10,   10,   10,   10 +  }; + +  int d; + +  if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + +  d = mm->max - mm->min; +  if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) +    /* return dist_vals[d] * 16 / (mm->min + 12); */ +    return (int )dist_vals[d]; +  else +    return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ +  if (v2 <= 0) return -1; +  if (v1 <= 0) return  1; + +  v1 *= distance_value(d1); +  v2 *= distance_value(d2); + +  if (v2 > v1) return  1; +  if (v2 < v1) return -1; + +  if (d2->min < d1->min) return  1; +  if (d2->min > d1->min) return -1; +  return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ +  return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ +  mml->min = min; +  mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ +  mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ +  to->min = from->min; +  to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ +  to->min = distance_add(to->min, from->min); +  to->max = distance_add(to->max, from->max); +} + +#if 0 +static void +add_len_mml(MinMaxLen* to, OnigDistance len) +{ +  to->min = distance_add(to->min, len); +  to->max = distance_add(to->max, len); +} +#endif + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ +  if (to->min > from->min) to->min = from->min; +  if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ +  *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ +  anc->left_anchor  = 0; +  anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ +  *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, +		    OnigDistance left_len, OnigDistance right_len) +{ +  clear_opt_anc_info(to); + +  to->left_anchor = left->left_anchor; +  if (left_len == 0) { +    to->left_anchor |= right->left_anchor; +  } + +  to->right_anchor = right->right_anchor; +  if (right_len == 0) { +    to->right_anchor |= left->right_anchor; +  } +} + +static int +is_left_anchor(int anc) +{ +  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || +      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || +      anc == ANCHOR_PREC_READ_NOT) +    return 0; + +  return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ +  if ((to->left_anchor & anc) != 0) return 1; + +  return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ +  if (is_left_anchor(anc)) +    to->left_anchor |= anc; +  else +    to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ +  if (is_left_anchor(anc)) +    to->left_anchor &= ~anc; +  else +    to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ +  to->left_anchor  &= add->left_anchor; +  to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ +  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ +  clear_mml(&ex->mmd); +  clear_opt_anc_info(&ex->anc); +  ex->reach_end   = 0; +  ex->ignore_case = 0; +  ex->len         = 0; +  ex->s[0]        = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ +  *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +{ +  int i, j, len; +  UChar *p, *end; +  OptAncInfo tanc; + +  if (! to->ignore_case && add->ignore_case) { +    if (to->len >= add->len) return ;  /* avoid */ + +    to->ignore_case = 1; +  } + +  p = add->s; +  end = p + add->len; +  for (i = to->len; p < end; ) { +    len = enclen(enc, p); +    if (i + len > OPT_EXACT_MAXLEN) break; +    for (j = 0; j < len && p < end; j++) +      to->s[i++] = *p++; +  } + +  to->len = i; +  to->reach_end = (p == end ? add->reach_end : 0); + +  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); +  if (! to->reach_end) tanc.right_anchor = 0; +  copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, +			  int raw ARG_UNUSED, OnigEncoding enc) +{ +  int i, j, len; +  UChar *p; + +  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { +    len = enclen(enc, p); +    if (i + len > OPT_EXACT_MAXLEN) break; +    for (j = 0; j < len && p < end; j++) +      to->s[i++] = *p++; +  } + +  to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ +  int i, j, len; + +  if (add->len == 0 || to->len == 0) { +    clear_opt_exact_info(to); +    return ; +  } + +  if (! is_equal_mml(&to->mmd, &add->mmd)) { +    clear_opt_exact_info(to); +    return ; +  } + +  for (i = 0; i < to->len && i < add->len; ) { +    if (to->s[i] != add->s[i]) break; +    len = enclen(env->enc, to->s + i); + +    for (j = 1; j < len; j++) { +      if (to->s[i+j] != add->s[i+j]) break; +    } +    if (j < len) break; +    i += len; +  } + +  if (! add->reach_end || i < add->len || i < to->len) { +    to->reach_end = 0; +  } +  to->len = i; +  to->ignore_case |= add->ignore_case; + +  alt_merge_opt_anc_info(&to->anc, &add->anc); +  if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ +  int v1, v2; + +  v1 = now->len; +  v2 = alt->len; + +  if (v2 == 0) { +    return ; +  } +  else if (v1 == 0) { +    copy_opt_exact_info(now, alt); +    return ; +  } +  else if (v1 <= 2 && v2 <= 2) { +    /* ByteValTable[x] is big value --> low price */ +    v2 = map_position_value(enc, now->s[0]); +    v1 = map_position_value(enc, alt->s[0]); + +    if (now->len > 1) v1 += 5; +    if (alt->len > 1) v2 += 5; +  } + +  if (now->ignore_case == 0) v1 *= 2; +  if (alt->ignore_case == 0) v2 *= 2; + +  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) +    copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ +  static const OptMapInfo clean_info = { +    {0, 0}, {0, 0}, 0, +    { +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +    } +  }; + +  xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ +  *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ +  if (map->map[c] == 0) { +    map->map[c] = 1; +    map->value += map_position_value(enc, c); +  } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, +                          OnigEncoding enc, OnigCaseFoldType case_fold_flag) +{ +  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; +  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; +  int i, n; + +  add_char_opt_map_info(map, p[0], enc); + +  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); +  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); +  if (n < 0) return n; + +  for (i = 0; i < n; i++) { +    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); +    add_char_opt_map_info(map, buf[0], enc); +  } + +  return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ +  static int z = 1<<15; /* 32768: something big value */ + +  int v1, v2; + +  if (alt->value == 0) return ; +  if (now->value == 0) { +    copy_opt_map_info(now, alt); +    return ; +  } + +  v1 = z / now->value; +  v2 = z / alt->value; +  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) +    copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE  20 +  int ve, vm; + +  if (m->value <= 0) return -1; + +  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); +  vm = COMP_EM_BASE * 5 * 2 / m->value; +  return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ +  int i, val; + +  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ +  if (to->value == 0) return ; +  if (add->value == 0 || to->mmd.max < add->mmd.min) { +    clear_opt_map_info(to); +    return ; +  } + +  alt_merge_mml(&to->mmd, &add->mmd); + +  val = 0; +  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { +    if (add->map[i]) +      to->map[i] = 1; + +    if (to->map[i]) +      val += map_position_value(enc, i); +  } +  to->value = val; + +  alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ +  copy_mml(&(opt->exb.mmd),  mmd); +  copy_mml(&(opt->expr.mmd), mmd); +  copy_mml(&(opt->map.mmd),  mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ +  clear_mml(&opt->len); +  clear_opt_anc_info(&opt->anc); +  clear_opt_exact_info(&opt->exb); +  clear_opt_exact_info(&opt->exm); +  clear_opt_exact_info(&opt->expr); +  clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ +  *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ +  int exb_reach, exm_reach; +  OptAncInfo tanc; + +  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); +  copy_opt_anc_info(&to->anc, &tanc); + +  if (add->exb.len > 0 && to->len.max == 0) { +    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, +			to->len.max, add->len.max); +    copy_opt_anc_info(&add->exb.anc, &tanc); +  } + +  if (add->map.value > 0 && to->len.max == 0) { +    if (add->map.mmd.max == 0) +      add->map.anc.left_anchor |= to->anc.left_anchor; +  } + +  exb_reach = to->exb.reach_end; +  exm_reach = to->exm.reach_end; + +  if (add->len.max != 0) +    to->exb.reach_end = to->exm.reach_end = 0; + +  if (add->exb.len > 0) { +    if (exb_reach) { +      concat_opt_exact_info(&to->exb, &add->exb, enc); +      clear_opt_exact_info(&add->exb); +    } +    else if (exm_reach) { +      concat_opt_exact_info(&to->exm, &add->exb, enc); +      clear_opt_exact_info(&add->exb); +    } +  } +  select_opt_exact_info(enc, &to->exm, &add->exb); +  select_opt_exact_info(enc, &to->exm, &add->exm); + +  if (to->expr.len > 0) { +    if (add->len.max > 0) { +      if (to->expr.len > (int )add->len.max) +	to->expr.len = add->len.max; + +      if (to->expr.mmd.max == 0) +        select_opt_exact_info(enc, &to->exb, &to->expr); +      else +        select_opt_exact_info(enc, &to->exm, &to->expr); +    } +  } +  else if (add->expr.len > 0) { +    copy_opt_exact_info(&to->expr, &add->expr); +  } + +  select_opt_map_info(&to->map, &add->map); + +  add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ +  alt_merge_opt_anc_info  (&to->anc,  &add->anc); +  alt_merge_opt_exact_info(&to->exb,  &add->exb, env); +  alt_merge_opt_exact_info(&to->exm,  &add->exm, env); +  alt_merge_opt_exact_info(&to->expr, &add->expr, env); +  alt_merge_opt_map_info(env->enc, &to->map,  &add->map); + +  alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT    5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ +  int type; +  int r = 0; + +  clear_node_opt_info(opt); +  set_bound_node_opt_info(opt, &env->mmd); + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +    { +      OptEnv nenv; +      NodeOptInfo nopt; +      Node* nd = node; + +      copy_opt_env(&nenv, env); +      do { +        r = optimize_node_left(NCAR(nd), &nopt, &nenv); +        if (r == 0) { +          add_mml(&nenv.mmd, &nopt.len); +          concat_left_node_opt_info(env->enc, opt, &nopt); +        } +      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); +    } +    break; + +  case NT_ALT: +    { +      NodeOptInfo nopt; +      Node* nd = node; + +      do { +        r = optimize_node_left(NCAR(nd), &nopt, env); +        if (r == 0) { +          if (nd == node) copy_node_opt_info(opt, &nopt); +          else            alt_merge_node_opt_info(opt, &nopt, env); +        } +      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); +    } +    break; + +  case NT_STR: +    { +      StrNode* sn = NSTR(node); +      int slen = sn->end - sn->s; +      int is_raw = NSTRING_IS_RAW(node); + +      if (! NSTRING_IS_AMBIG(node)) { +        concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, +                                  NSTRING_IS_RAW(node), env->enc); +        if (slen > 0) { +          add_char_opt_map_info(&opt->map, *(sn->s), env->enc); +        } +        set_mml(&opt->len, slen, slen); +      } +      else { +        int max; + +        if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { +          int n = onigenc_strlen(env->enc, sn->s, sn->end); +          max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; +        } +        else { +          concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, +                                    is_raw, env->enc); +          opt->exb.ignore_case = 1; + +          if (slen > 0) { +            r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, +                                          env->enc, env->case_fold_flag); +            if (r != 0) break; +          } + +          max = slen; +        } + +        set_mml(&opt->len, slen, max); +      } + +      if (opt->exb.len == slen) +        opt->exb.reach_end = 1; +    } +    break; + +  case NT_CCLASS: +    { +      int i, z; +      CClassNode* cc = NCCLASS(node); + +      /* no need to check ignore case. (setted in setup_tree()) */ + +      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { +        OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); +        OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + +        set_mml(&opt->len, min, max); +      } +      else { +        for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +          z = BITSET_AT(cc->bs, i); +          if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { +            add_char_opt_map_info(&opt->map, (UChar )i, env->enc); +          } +        } +        set_mml(&opt->len, 1, 1); +      } +    } +    break; + +  case NT_CTYPE: +    { +      int i, min, max; + +      max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + +      if (max == 1) { +        min = 1; + +        switch (NCTYPE(node)->ctype) { +        case ONIGENC_CTYPE_WORD: +          if (NCTYPE(node)->not != 0) { +            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +              if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { +                add_char_opt_map_info(&opt->map, (UChar )i, env->enc); +              } +            } +          } +          else { +            for (i = 0; i < SINGLE_BYTE_SIZE; i++) { +              if (ONIGENC_IS_CODE_WORD(env->enc, i)) { +                add_char_opt_map_info(&opt->map, (UChar )i, env->enc); +              } +            } +          } +          break; +        } +      } +      else { +        min = ONIGENC_MBC_MINLEN(env->enc); +      } +      set_mml(&opt->len, min, max); +    } +    break; + +  case NT_CANY: +    { +      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); +      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); +      set_mml(&opt->len, min, max); +    } +    break; + +  case NT_ANCHOR: +    switch (NANCHOR(node)->type) { +    case ANCHOR_BEGIN_BUF: +    case ANCHOR_BEGIN_POSITION: +    case ANCHOR_BEGIN_LINE: +    case ANCHOR_END_BUF: +    case ANCHOR_SEMI_END_BUF: +    case ANCHOR_END_LINE: +      add_opt_anc_info(&opt->anc, NANCHOR(node)->type); +      break; + +    case ANCHOR_PREC_READ: +      { +        NodeOptInfo nopt; + +        r = optimize_node_left(NANCHOR(node)->target, &nopt, env); +        if (r == 0) { +          if (nopt.exb.len > 0) +            copy_opt_exact_info(&opt->expr, &nopt.exb); +          else if (nopt.exm.len > 0) +            copy_opt_exact_info(&opt->expr, &nopt.exm); + +          opt->expr.reach_end = 0; + +          if (nopt.map.value > 0) +            copy_opt_map_info(&opt->map, &nopt.map); +        } +      } +      break; + +    case ANCHOR_PREC_READ_NOT: +    case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ +    case ANCHOR_LOOK_BEHIND_NOT: +      break; +    } +    break; + +  case NT_BREF: +    { +      int i; +      int* backs; +      OnigDistance min, max, tmin, tmax; +      Node** nodes = SCANENV_MEM_NODES(env->scan_env); +      BRefNode* br = NBREF(node); + +      if (br->state & NST_RECURSION) { +        set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); +        break; +      } +      backs = BACKREFS_P(br); +      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); +      if (r != 0) break; +      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); +      if (r != 0) break; +      for (i = 1; i < br->back_num; i++) { +        r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); +        if (r != 0) break; +        r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); +        if (r != 0) break; +        if (min > tmin) min = tmin; +        if (max < tmax) max = tmax; +      } +      if (r == 0) set_mml(&opt->len, min, max); +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    if (IS_CALL_RECURSION(NCALL(node))) +      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); +    else { +      OnigOptionType save = env->options; +      env->options = NENCLOSE(NCALL(node)->target)->option; +      r = optimize_node_left(NCALL(node)->target, opt, env); +      env->options = save; +    } +    break; +#endif + +  case NT_QTFR: +    { +      int i; +      OnigDistance min, max; +      NodeOptInfo nopt; +      QtfrNode* qn = NQTFR(node); + +      r = optimize_node_left(qn->target, &nopt, env); +      if (r) break; + +      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { +        if (env->mmd.max == 0 && +            NTYPE(qn->target) == NT_CANY && qn->greedy) { +          if (IS_MULTILINE(env->options)) +            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); +          else +            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); +        } +      } +      else { +        if (qn->lower > 0) { +          copy_node_opt_info(opt, &nopt); +          if (nopt.exb.len > 0) { +            if (nopt.exb.reach_end) { +              for (i = 2; i <= qn->lower && +                     ! is_full_opt_exact_info(&opt->exb); i++) { +                concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); +              } +              if (i < qn->lower) { +                opt->exb.reach_end = 0; +              } +            } +          } + +          if (qn->lower != qn->upper) { +            opt->exb.reach_end = 0; +            opt->exm.reach_end = 0; +          } +          if (qn->lower > 1) +            opt->exm.reach_end = 0; +        } +      } + +      min = distance_multiply(nopt.len.min, qn->lower); +      if (IS_REPEAT_INFINITE(qn->upper)) +        max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); +      else +        max = distance_multiply(nopt.len.max, qn->upper); + +      set_mml(&opt->len, min, max); +    } +    break; + +  case NT_ENCLOSE: +    { +      EncloseNode* en = NENCLOSE(node); + +      switch (en->type) { +      case ENCLOSE_OPTION: +        { +          OnigOptionType save = env->options; + +          env->options = en->option; +          r = optimize_node_left(en->target, opt, env); +          env->options = save; +        } +        break; + +      case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL +        en->opt_count++; +        if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { +          OnigDistance min, max; + +          min = 0; +          max = ONIG_INFINITE_DISTANCE; +          if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; +          if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; +          set_mml(&opt->len, min, max); +        } +        else +#endif +          { +            r = optimize_node_left(en->target, opt, env); + +            if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { +              if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) +                remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); +            } +          } +        break; + +      case ENCLOSE_STOP_BACKTRACK: +        r = optimize_node_left(en->target, opt, env); +        break; +      } +    } +    break; + +  default: +#ifdef ONIG_DEBUG +    fprintf(stderr, "optimize_node_left: undefined node type %d\n", +	    NTYPE(node)); +#endif +    r = ONIGERR_TYPE_BUG; +    break; +  } + +  return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ +  int r; + +  if (e->len == 0) return 0; + +  if (e->ignore_case) { +    reg->exact = (UChar* )xmalloc(e->len); +    CHECK_NULL_RETURN_MEMERR(reg->exact); +    xmemcpy(reg->exact, e->s, e->len); +    reg->exact_end = reg->exact + e->len; +    reg->optimize = ONIG_OPTIMIZE_EXACT_IC; +  } +  else { +    int allow_reverse; + +    reg->exact = str_dup(e->s, e->s + e->len); +    CHECK_NULL_RETURN_MEMERR(reg->exact); +    reg->exact_end = reg->exact + e->len; +  +    allow_reverse = +	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + +    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { +      r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, +	              reg->map, &(reg->int_map)); +      if (r) return r; + +      reg->optimize = (allow_reverse != 0 +		       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); +    } +    else { +      reg->optimize = ONIG_OPTIMIZE_EXACT; +    } +  } + +  reg->dmin = e->mmd.min; +  reg->dmax = e->mmd.max; + +  if (reg->dmin != ONIG_INFINITE_DISTANCE) { +    reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); +  } + +  return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ +  int i; + +  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) +    reg->map[i] = m->map[i]; + +  reg->optimize   = ONIG_OPTIMIZE_MAP; +  reg->dmin       = m->mmd.min; +  reg->dmax       = m->mmd.max; + +  if (reg->dmin != ONIG_INFINITE_DISTANCE) { +    reg->threshold_len = reg->dmin + 1; +  } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ +  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE; +  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + +  int r; +  NodeOptInfo opt; +  OptEnv env; + +  env.enc            = reg->enc; +  env.options        = reg->options; +  env.case_fold_flag = reg->case_fold_flag; +  env.scan_env   = scan_env; +  clear_mml(&env.mmd); + +  r = optimize_node_left(node, &opt, &env); +  if (r) return r; + +  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | +        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + +  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + +  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { +    reg->anchor_dmin = opt.len.min; +    reg->anchor_dmax = opt.len.max; +  } + +  if (opt.exb.len > 0 || opt.exm.len > 0) { +    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); +    if (opt.map.value > 0 && +	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { +      goto set_map; +    } +    else { +      r = set_optimize_exact_info(reg, &opt.exb); +      set_sub_anchor(reg, &opt.exb.anc); +    } +  } +  else if (opt.map.value > 0) { +  set_map: +    set_optimize_map_info(reg, &opt.map); +    set_sub_anchor(reg, &opt.map.anc); +  } +  else { +    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; +    if (opt.len.max == 0) +      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; +  } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) +  print_optimize_info(stderr, reg); +#endif +  return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ +  reg->optimize      = ONIG_OPTIMIZE_NONE; +  reg->anchor        = 0; +  reg->anchor_dmin   = 0; +  reg->anchor_dmax   = 0; +  reg->sub_anchor    = 0; +  reg->exact_end     = (UChar* )NULL; +  reg->threshold_len = 0; +  if (IS_NOT_NULL(reg->exact)) { +    xfree(reg->exact); +    reg->exact = (UChar* )NULL; +  } +} + +#ifdef ONIG_DEBUG + +static void print_enc_string(FILE* fp, OnigEncoding enc, +			     const UChar *s, const UChar *end) +{ +  fprintf(fp, "\nPATTERN: /"); + +  if (ONIGENC_MBC_MINLEN(enc) > 1) { +    const UChar *p; +    OnigCodePoint code; + +    p = s; +    while (p < end) { +      code = ONIGENC_MBC_TO_CODE(enc, p, end); +      if (code >= 0x80) { +        fprintf(fp, " 0x%04x ", (int )code); +      } +      else { +        fputc((int )code, fp); +      } + +      p += enclen(enc, p); +    } +  } +  else { +    while (s < end) { +      fputc((int )*s, fp); +      s++; +    } +  } + +  fprintf(fp, "/\n"); +} + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ +  if (a == ONIG_INFINITE_DISTANCE) +    fputs("inf", f); +  else +    fprintf(f, "(%u)", a); + +  fputs("-", f); + +  if (b == ONIG_INFINITE_DISTANCE) +    fputs("inf", f); +  else +    fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ +  int q = 0; + +  fprintf(f, "["); + +  if (anchor & ANCHOR_BEGIN_BUF) { +    fprintf(f, "begin-buf"); +    q = 1; +  } +  if (anchor & ANCHOR_BEGIN_LINE) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "begin-line"); +  } +  if (anchor & ANCHOR_BEGIN_POSITION) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "begin-pos"); +  } +  if (anchor & ANCHOR_END_BUF) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "end-buf"); +  } +  if (anchor & ANCHOR_SEMI_END_BUF) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "semi-end-buf"); +  } +  if (anchor & ANCHOR_END_LINE) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "end-line"); +  } +  if (anchor & ANCHOR_ANYCHAR_STAR) { +    if (q) fprintf(f, ", "); +    q = 1; +    fprintf(f, "anychar-star"); +  } +  if (anchor & ANCHOR_ANYCHAR_STAR_ML) { +    if (q) fprintf(f, ", "); +    fprintf(f, "anychar-star-pl"); +  } + +  fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ +  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", +                              "EXACT_IC", "MAP" }; + +  fprintf(f, "optimize: %s\n", on[reg->optimize]); +  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor); +  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) +    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); +  fprintf(f, "\n"); + +  if (reg->optimize) { +    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor); +    fprintf(f, "\n"); +  } +  fprintf(f, "\n"); + +  if (reg->exact) { +    UChar *p; +    fprintf(f, "exact: ["); +    for (p = reg->exact; p < reg->exact_end; p++) { +      fputc(*p, f); +    } +    fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); +  } +  else if (reg->optimize & ONIG_OPTIMIZE_MAP) { +    int c, i, n = 0; + +    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) +      if (reg->map[i]) n++; + +    fprintf(f, "map: n=%d\n", n); +    if (n > 0) { +      c = 0; +      fputc('[', f); +      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { +        if (reg->map[i] != 0) { +          if (c > 0)  fputs(", ", f); +          c++; +          if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && +              ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) +            fputc(i, f); +          else +            fprintf(f, "%d", i); +        } +      } +      fprintf(f, "]\n"); +    } +  } +} +#endif /* ONIG_DEBUG */ + + +extern void +onig_free_body(regex_t* reg) +{ +  if (IS_NOT_NULL(reg)) { +    if (IS_NOT_NULL(reg->p))                xfree(reg->p); +    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact); +    if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map); +    if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); +    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range); +    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP +    onig_names_free(reg); +#endif +  } +} + +extern void +onig_free(regex_t* reg) +{ +  if (IS_NOT_NULL(reg)) { +    onig_free_body(reg); +    xfree(reg); +  } +} + +#define REGEX_TRANSFER(to,from) do {\ +  onig_free_body(to);\ +  xmemcpy(to, from, sizeof(regex_t));\ +  xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ +  REGEX_TRANSFER(to, from); +} + + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, +	      OnigErrorInfo* einfo) +{ +#define COMPILE_INIT_SIZE  20 + +  int r, init_size; +  Node*  root; +  ScanEnv  scan_env; +#ifdef USE_SUBEXP_CALL +  UnsetAddrList  uslist; +#endif + +  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + +#ifdef ONIG_DEBUG +  print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + +  if (reg->alloc == 0) { +    init_size = (pattern_end - pattern) * 2; +    if (init_size <= 0) init_size = COMPILE_INIT_SIZE; +    r = BBUF_INIT(reg, init_size); +    if (r != 0) goto end; +  } +  else +    reg->used = 0; + +  reg->num_mem            = 0; +  reg->num_repeat         = 0; +  reg->num_null_check     = 0; +  reg->repeat_range_alloc = 0; +  reg->repeat_range       = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK +  reg->num_comb_exp_check = 0; +#endif + +  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); +  if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP +  /* mixed use named group and no-named group */ +  if (scan_env.num_named > 0 && +      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && +      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { +    if (scan_env.num_named != scan_env.num_mem) +      r = disable_noname_group_capture(&root, reg, &scan_env); +    else +      r = numbered_ref_check(root); + +    if (r != 0) goto err; +  } +#endif + +#ifdef USE_SUBEXP_CALL +  if (scan_env.num_call > 0) { +    r = unset_addr_list_init(&uslist, scan_env.num_call); +    if (r != 0) goto err; +    scan_env.unset_addr_list = &uslist; +    r = setup_subexp_call(root, &scan_env); +    if (r != 0) goto err_unset; +    r = subexp_recursive_check_trav(root, &scan_env); +    if (r  < 0) goto err_unset; +    r = subexp_inf_recursive_check_trav(root, &scan_env); +    if (r != 0) goto err_unset; + +    reg->num_call = scan_env.num_call; +  } +  else +    reg->num_call = 0; +#endif + +  r = setup_tree(root, reg, 0, &scan_env); +  if (r != 0) goto err_unset; + +#ifdef ONIG_DEBUG_PARSE_TREE +  print_tree(stderr, root); +#endif + +  reg->capture_history  = scan_env.capture_history; +  reg->bt_mem_start     = scan_env.bt_mem_start; +  reg->bt_mem_start    |= reg->capture_history; +  if (IS_FIND_CONDITION(reg->options)) +    BIT_STATUS_ON_ALL(reg->bt_mem_end); +  else { +    reg->bt_mem_end  = scan_env.bt_mem_end; +    reg->bt_mem_end |= reg->capture_history; +  } + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +  if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL +      || scan_env.num_call == 0 +#endif +      ) { +    setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL +    if (scan_env.has_recursion != 0) { +      scan_env.num_comb_exp_check = 0; +    } +    else +#endif +    if (scan_env.comb_exp_max_regnum > 0) { +      int i; +      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { +        if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { +          scan_env.num_comb_exp_check = 0; +          break; +        } +      } +    } +  } + +  reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + +  clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE +  r = set_optimize_info_from_tree(root, reg, &scan_env); +  if (r != 0) goto err_unset; +#endif + +  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { +    xfree(scan_env.mem_nodes_dynamic); +    scan_env.mem_nodes_dynamic = (Node** )NULL; +  } + +  r = compile_tree(root, reg); +  if (r == 0) { +    r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL +    if (scan_env.num_call > 0) { +      r = unset_addr_list_fix(&uslist, reg); +      unset_addr_list_end(&uslist); +      if (r) goto err; +    } +#endif + +    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) +      reg->stack_pop_level = STACK_POP_LEVEL_ALL; +    else { +      if (reg->bt_mem_start != 0) +        reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; +      else +        reg->stack_pop_level = STACK_POP_LEVEL_FREE; +    } +  } +#ifdef USE_SUBEXP_CALL +  else if (scan_env.num_call > 0) { +    unset_addr_list_end(&uslist); +  } +#endif +  onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP +  onig_print_names(stderr, reg); +#endif +  print_compiled_byte_code_list(stderr, reg); +#endif + + end: +  return r; + + err_unset: +#ifdef USE_SUBEXP_CALL +  if (scan_env.num_call > 0) { +    unset_addr_list_end(&uslist); +  } +#endif + err: +  if (IS_NOT_NULL(scan_env.error)) { +    if (IS_NOT_NULL(einfo)) { +      einfo->enc     = scan_env.enc; +      einfo->par     = scan_env.error; +      einfo->par_end = scan_env.error_end; +    } +  } + +  onig_node_free(root); +  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) +      xfree(scan_env.mem_nodes_dynamic); +  return r; +} + + +static int onig_inited = 0; + +extern int +onig_reg_init(regex_t* reg, OnigOptionType option, +	      OnigCaseFoldType case_fold_flag, +	      OnigEncoding enc, OnigSyntaxType* syntax) +{ +  int r; + +  xmemset(reg, 0, sizeof(*reg)); + +  if (onig_inited == 0) { +#if 0 +    return ONIGERR_LIBRARY_IS_NOT_INITIALIZED; +#else +    r = onig_initialize(NULL, 0); +    if (r != 0) +      return ONIGERR_FAIL_TO_INITIALIZE; + +    r = onig_initialize_encoding(enc); +    if (r != 0) +      return ONIGERR_FAIL_TO_INITIALIZE; +#endif +  } + +  if (IS_NULL(reg)) +    return ONIGERR_INVALID_ARGUMENT; + +  if (ONIGENC_IS_UNDEF(enc)) +    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + +  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) +      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { +    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; +  } + +  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { +    option |= syntax->options; +    option &= ~ONIG_OPTION_SINGLELINE; +  } +  else +    option |= syntax->options; + +  (reg)->enc              = enc; +  (reg)->options          = option; +  (reg)->syntax           = syntax; +  (reg)->optimize         = 0; +  (reg)->exact            = (UChar* )NULL; +  (reg)->int_map          = (int* )NULL; +  (reg)->int_map_backward = (int* )NULL; +  (reg)->chain            = (regex_t* )NULL; + +  (reg)->p                = (UChar* )NULL; +  (reg)->alloc            = 0; +  (reg)->used             = 0; +  (reg)->name_table       = (void* )NULL; + +  (reg)->case_fold_flag   = case_fold_flag; +  return 0; +} + +extern int +onig_new_without_alloc(regex_t* reg, const UChar* pattern, +          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, +          OnigSyntaxType* syntax, OnigErrorInfo* einfo) +{ +  int r; + +  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); +  if (r) return r; + +  r = onig_compile(reg, pattern, pattern_end, einfo); +  return r; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, +	  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, +	  OnigErrorInfo* einfo) +{ +  int r; + +  *reg = (regex_t* )xmalloc(sizeof(regex_t)); +  if (IS_NULL(*reg)) return ONIGERR_MEMORY; + +  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); +  if (r) goto err; + +  r = onig_compile(*reg, pattern, pattern_end, einfo); +  if (r) { +  err: +    onig_free(*reg); +    *reg = NULL; +  } +  return r; +} + +extern int +onig_initialize(OnigEncoding encodings[], int n) +{ +  int i; +  int r; + +  if (onig_inited != 0) +    return 0; + +  onigenc_init(); + +  onig_inited = 1; + +  for (i = 0; i < n; i++) { +    OnigEncoding enc = encodings[i]; +    r = onig_initialize_encoding(enc); +    if (r != 0) +      return r; +  } + +  return 0; +} + +static OnigEndCallListItemType* EndCallTop; + +extern void onig_add_end_call(void (*func)(void)) +{ +  OnigEndCallListItemType* item; + +  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); +  if (item == 0) return ; + +  item->next = EndCallTop; +  item->func = func; + +  EndCallTop = item; +} + +static void +exec_end_call_list(void) +{ +  OnigEndCallListItemType* prev; +  void (*func)(void); + +  while (EndCallTop != 0) { +    func = EndCallTop->func; +    (*func)(); + +    prev = EndCallTop; +    EndCallTop = EndCallTop->next; +    xfree(prev); +  } +} + +extern int +onig_end(void) +{ +  exec_end_call_list(); + +  onig_inited = 0; + +  return 0; +} + +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ +  OnigCodePoint n, *data; +  OnigCodePoint low, high, x; + +  GET_CODE_POINT(n, p); +  data = (OnigCodePoint* )p; +  data++; + +  for (low = 0, high = n; low < high; ) { +    x = (low + high) >> 1; +    if (code > data[x * 2 + 1]) +      low = x + 1; +    else +      high = x; +  } + +  return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ +  int found; + +  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { +    if (IS_NULL(cc->mbuf)) { +      found = 0; +    } +    else { +      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); +    } +  } +  else { +    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); +  } + +  if (IS_NCCLASS_NOT(cc)) +    return !found; +  else +    return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ +  int len; + +  if (ONIGENC_MBC_MINLEN(enc) > 1) { +    len = 2; +  } +  else { +    len = ONIGENC_CODE_TO_MBCLEN(enc, code); +  } +  return onig_is_code_in_cc_len(len, code, cc); +} + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL     -1 +#define ARG_NON          0 +#define ARG_RELADDR      1 +#define ARG_ABSADDR      2 +#define ARG_LENGTH       3 +#define ARG_MEMNUM       4 +#define ARG_OPTION       5 +#define ARG_STATE_CHECK  6 + +OnigOpInfoType OnigOpInfo[] = { +  { OP_FINISH,            "finish",          ARG_NON }, +  { OP_END,               "end",             ARG_NON }, +  { OP_EXACT1,            "exact1",          ARG_SPECIAL }, +  { OP_EXACT2,            "exact2",          ARG_SPECIAL }, +  { OP_EXACT3,            "exact3",          ARG_SPECIAL }, +  { OP_EXACT4,            "exact4",          ARG_SPECIAL }, +  { OP_EXACT5,            "exact5",          ARG_SPECIAL }, +  { OP_EXACTN,            "exactn",          ARG_SPECIAL }, +  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL }, +  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL }, +  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL }, +  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL }, +  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL }, +  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL }, +  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL }, +  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL }, +  { OP_CCLASS,            "cclass",          ARG_SPECIAL }, +  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL }, +  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL }, +  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL }, +  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL }, +  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL }, +  { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL }, +  { OP_ANYCHAR,           "anychar",         ARG_NON }, +  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON }, +  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON }, +  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON }, +  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, +  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, +  { OP_WORD,                "word",            ARG_NON }, +  { OP_NOT_WORD,            "not-word",        ARG_NON }, +  { OP_WORD_BOUND,          "word-bound",      ARG_NON }, +  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON }, +  { OP_WORD_BEGIN,          "word-begin",      ARG_NON }, +  { OP_WORD_END,            "word-end",        ARG_NON }, +  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON }, +  { OP_END_BUF,             "end-buf",         ARG_NON }, +  { OP_BEGIN_LINE,          "begin-line",      ARG_NON }, +  { OP_END_LINE,            "end-line",        ARG_NON }, +  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON }, +  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON }, +  { OP_BACKREF1,            "backref1",             ARG_NON }, +  { OP_BACKREF2,            "backref2",             ARG_NON }, +  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  }, +  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL }, +  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL }, +  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL }, +  { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL }, +  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  }, +  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  }, +  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  }, +  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  }, +  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  }, +  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  }, +  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  }, +  { OP_SET_OPTION,          "set-option",           ARG_OPTION  }, +  { OP_FAIL,                "fail",                 ARG_NON }, +  { OP_JUMP,                "jump",                 ARG_RELADDR }, +  { OP_PUSH,                "push",                 ARG_RELADDR }, +  { OP_POP,                 "pop",                  ARG_NON }, +  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL }, +  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL }, +  { OP_REPEAT,              "repeat",               ARG_SPECIAL }, +  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL }, +  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  }, +  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  }, +  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  }, +  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  }, +  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  }, +  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  }, +  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  }, +  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  }, +  { OP_PUSH_POS,             "push-pos",             ARG_NON }, +  { OP_POP_POS,              "pop-pos",              ARG_NON }, +  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR }, +  { OP_FAIL_POS,             "fail-pos",             ARG_NON }, +  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON }, +  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON }, +  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL }, +  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, +  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, +  { OP_CALL,                 "call",                 ARG_ABSADDR }, +  { OP_RETURN,               "return",               ARG_NON }, +  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL }, +  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, +  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK }, +  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK }, +  { OP_STATE_CHECK_ANYCHAR_ML_STAR, +    "state-check-anychar-ml*", ARG_STATE_CHECK }, +  { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ +  int i; + +  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { +    if (opcode == OnigOpInfo[i].opcode) +      return OnigOpInfo[i].name; +  } +  return ""; +} + +static int +op2arg_type(int opcode) +{ +  int i; + +  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { +    if (opcode == OnigOpInfo[i].opcode) +      return OnigOpInfo[i].arg_type; +  } +  return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ +  int i; +  for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ +  fputs(":", f); +  while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ +  int x = len * mb_len; + +  fprintf(f, ":%d:", len); +  while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, +                              OnigEncoding enc) +{ +  int i, n, arg_type; +  RelAddrType addr; +  LengthType len; +  MemNumType mem; +  StateCheckNumType scn; +  OnigCodePoint code; +  UChar *q; + +  fprintf(f, "[%s", op2name(*bp)); +  arg_type = op2arg_type(*bp); +  if (arg_type != ARG_SPECIAL) { +    bp++; +    switch (arg_type) { +    case ARG_NON: +      break; +    case ARG_RELADDR: +      GET_RELADDR_INC(addr, bp); +      fprintf(f, ":(%d)", addr); +      break; +    case ARG_ABSADDR: +      GET_ABSADDR_INC(addr, bp); +      fprintf(f, ":(%d)", addr); +      break; +    case ARG_LENGTH: +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d", len); +      break; +    case ARG_MEMNUM: +      mem = *((MemNumType* )bp); +      bp += SIZE_MEMNUM; +      fprintf(f, ":%d", mem); +      break; +    case ARG_OPTION: +      { +        OnigOptionType option = *((OnigOptionType* )bp); +        bp += SIZE_OPTION; +        fprintf(f, ":%d", option); +      } +      break; + +    case ARG_STATE_CHECK: +      scn = *((StateCheckNumType* )bp); +      bp += SIZE_STATE_CHECK_NUM; +      fprintf(f, ":%d", scn); +      break; +    } +  } +  else { +    switch (*bp++) { +    case OP_EXACT1: +    case OP_ANYCHAR_STAR_PEEK_NEXT: +    case OP_ANYCHAR_ML_STAR_PEEK_NEXT: +      p_string(f, 1, bp++); break; +    case OP_EXACT2: +      p_string(f, 2, bp); bp += 2; break; +    case OP_EXACT3: +      p_string(f, 3, bp); bp += 3; break; +    case OP_EXACT4: +      p_string(f, 4, bp); bp += 4; break; +    case OP_EXACT5: +      p_string(f, 5, bp); bp += 5; break; +    case OP_EXACTN: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 1, bp); +      bp += len; +      break; +     +    case OP_EXACTMB2N1: +      p_string(f, 2, bp); bp += 2; break; +    case OP_EXACTMB2N2: +      p_string(f, 4, bp); bp += 4; break; +    case OP_EXACTMB2N3: +      p_string(f, 6, bp); bp += 6; break; +    case OP_EXACTMB2N: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 2, bp); +      bp += len * 2; +      break; +    case OP_EXACTMB3N: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 3, bp); +      bp += len * 3; +      break; +    case OP_EXACTMBN: +      { +        int mb_len; +       +        GET_LENGTH_INC(mb_len, bp); +        GET_LENGTH_INC(len, bp); +        fprintf(f, ":%d:%d:", mb_len, len); +        n = len * mb_len; +        while (n-- > 0) { fputc(*bp++, f); } +      } +      break; + +    case OP_EXACT1_IC: +      len = enclen(enc, bp); +      p_string(f, len, bp); +      bp += len; +      break; +    case OP_EXACTN_IC: +      GET_LENGTH_INC(len, bp); +      p_len_string(f, len, 1, bp); +      bp += len; +      break; + +    case OP_CCLASS: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      fprintf(f, ":%d", n); +      break; + +    case OP_CCLASS_NOT: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      fprintf(f, ":%d", n); +      break; + +    case OP_CCLASS_MB: +    case OP_CCLASS_MB_NOT: +      GET_LENGTH_INC(len, bp); +      q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +      ALIGNMENT_RIGHT(q); +#endif +      GET_CODE_POINT(code, q); +      bp += len; +      fprintf(f, ":%d:%d", (int )code, len); +      break; + +    case OP_CCLASS_MIX: +    case OP_CCLASS_MIX_NOT: +      n = bitset_on_num((BitSetRef )bp); +      bp += SIZE_BITSET; +      GET_LENGTH_INC(len, bp); +      q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +      ALIGNMENT_RIGHT(q); +#endif +      GET_CODE_POINT(code, q); +      bp += len; +      fprintf(f, ":%d:%d:%d", n, (int )code, len); +      break; + +    case OP_CCLASS_NODE: +      { +        CClassNode *cc; + +        GET_POINTER_INC(cc, bp); +        n = bitset_on_num(cc->bs); +        fprintf(f, ":%u:%d", (unsigned int )cc, n); +      } +      break; + +    case OP_BACKREFN_IC: +      mem = *((MemNumType* )bp); +      bp += SIZE_MEMNUM; +      fprintf(f, ":%d", mem); +      break; + +    case OP_BACKREF_MULTI_IC: +    case OP_BACKREF_MULTI: +      fputs(" ", f); +      GET_LENGTH_INC(len, bp); +      for (i = 0; i < len; i++) { +        GET_MEMNUM_INC(mem, bp); +        if (i > 0) fputs(", ", f); +        fprintf(f, "%d", mem); +      } +      break; + +    case OP_BACKREF_WITH_LEVEL: +      { +        OnigOptionType option; +        LengthType level; + +        GET_OPTION_INC(option, bp); +        fprintf(f, ":%d", option); +        GET_LENGTH_INC(level, bp); +        fprintf(f, ":%d", level); + +        fputs(" ", f); +        GET_LENGTH_INC(len, bp); +        for (i = 0; i < len; i++) { +          GET_MEMNUM_INC(mem, bp); +          if (i > 0) fputs(", ", f); +          fprintf(f, "%d", mem); +        } +      } +      break; + +    case OP_REPEAT: +    case OP_REPEAT_NG: +      { +        mem = *((MemNumType* )bp); +        bp += SIZE_MEMNUM; +        addr = *((RelAddrType* )bp); +        bp += SIZE_RELADDR; +        fprintf(f, ":%d:%d", mem, addr); +      } +      break; + +    case OP_PUSH_OR_JUMP_EXACT1: +    case OP_PUSH_IF_PEEK_NEXT: +      addr = *((RelAddrType* )bp); +      bp += SIZE_RELADDR; +      fprintf(f, ":(%d)", addr); +      p_string(f, 1, bp); +      bp += 1; +      break; + +    case OP_LOOK_BEHIND: +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d", len); +      break; + +    case OP_PUSH_LOOK_BEHIND_NOT: +      GET_RELADDR_INC(addr, bp); +      GET_LENGTH_INC(len, bp); +      fprintf(f, ":%d:(%d)", len, addr); +      break; + +    case OP_STATE_CHECK_PUSH: +    case OP_STATE_CHECK_PUSH_OR_JUMP: +      scn = *((StateCheckNumType* )bp); +      bp += SIZE_STATE_CHECK_NUM; +      addr = *((RelAddrType* )bp); +      bp += SIZE_RELADDR; +      fprintf(f, ":%d:(%d)", scn, addr); +      break; + +    default: +      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", +	      *--bp); +    } +  } +  fputs("]", f); +  if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ +  int ncode; +  UChar* bp = reg->p; +  UChar* end = reg->p + reg->used; + +  fprintf(f, "code length: %d\n", reg->used); + +  ncode = 0; +  while (bp < end) { +    ncode++; +    if (bp > reg->p) { +      if (ncode % 5 == 0) +        fprintf(f, "\n"); +      else +        fputs(" ", f); +    } +    onig_print_compiled_byte_code(f, bp, &bp, reg->enc); +  } + +  fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ +  int i, type; +  int add = 3; +  UChar* p; + +  Indent(f, indent); +  if (IS_NULL(node)) { +    fprintf(f, "ERROR: null node!!!\n"); +    exit (0); +  } + +  type = NTYPE(node); +  switch (type) { +  case NT_LIST: +  case NT_ALT: +    if (NTYPE(node) == NT_LIST) +      fprintf(f, "<list:%x>\n", (int )node); +    else +      fprintf(f, "<alt:%x>\n", (int )node); + +    print_indent_tree(f, NCAR(node), indent + add); +    while (IS_NOT_NULL(node = NCDR(node))) { +      if (NTYPE(node) != type) { +        fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); +        exit(0); +      } +      print_indent_tree(f, NCAR(node), indent + add); +    } +    break; + +  case NT_STR: +    fprintf(f, "<string%s:%x>", +	    (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); +    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { +      if (*p >= 0x20 && *p < 0x7f) +        fputc(*p, f); +      else { +        fprintf(f, " 0x%02x", *p); +      } +    } +    break; + +  case NT_CCLASS: +    fprintf(f, "<cclass:%x>", (int )node); +    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); +    if (NCCLASS(node)->mbuf) { +      BBuf* bbuf = NCCLASS(node)->mbuf; +      for (i = 0; i < bbuf->used; i++) { +        if (i > 0) fprintf(f, ","); +        fprintf(f, "%0x", bbuf->p[i]); +      } +    } +    break; + +  case NT_CTYPE: +    fprintf(f, "<ctype:%x> ", (int )node); +    switch (NCTYPE(node)->ctype) { +    case ONIGENC_CTYPE_WORD: +      if (NCTYPE(node)->not != 0) +        fputs("not word",       f); +      else +        fputs("word",           f); +      break; + +    default: +      fprintf(f, "ERROR: undefined ctype.\n"); +      exit(0); +    } +    break; + +  case NT_CANY: +    fprintf(f, "<anychar:%x>", (int )node); +    break; + +  case NT_ANCHOR: +    fprintf(f, "<anchor:%x> ", (int )node); +    switch (NANCHOR(node)->type) { +    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break; +    case ANCHOR_END_BUF:        fputs("end buf",        f); break; +    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break; +    case ANCHOR_END_LINE:       fputs("end line",       f); break; +    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break; +    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + +    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break; +    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END +    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break; +    case ANCHOR_WORD_END:        fputs("word end", f);       break; +#endif +    case ANCHOR_PREC_READ:       fputs("prec read",      f); break; +    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break; +    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break; +    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + +    default: +      fprintf(f, "ERROR: undefined anchor type.\n"); +      break; +    } +    break; + +  case NT_BREF: +    { +      int* p; +      BRefNode* br = NBREF(node); +      p = BACKREFS_P(br); +      fprintf(f, "<backref:%x>", (int )node); +      for (i = 0; i < br->back_num; i++) { +        if (i > 0) fputs(", ", f); +        fprintf(f, "%d", p[i]); +      } +    } +    break; + +#ifdef USE_SUBEXP_CALL +  case NT_CALL: +    { +      CallNode* cn = NCALL(node); +      fprintf(f, "<call:%x>", (int )node); +      p_string(f, cn->name_end - cn->name, cn->name); +    } +    break; +#endif + +  case NT_QTFR: +    fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, +	    NQTFR(node)->lower, NQTFR(node)->upper, +	    (NQTFR(node)->greedy ? "" : "?")); +    print_indent_tree(f, NQTFR(node)->target, indent + add); +    break; + +  case NT_ENCLOSE: +    fprintf(f, "<enclose:%x> ", (int )node); +    switch (NENCLOSE(node)->type) { +    case ENCLOSE_OPTION: +      fprintf(f, "option:%d", NENCLOSE(node)->option); +      break; +    case ENCLOSE_MEMORY: +      fprintf(f, "memory:%d", NENCLOSE(node)->regnum); +      break; +    case ENCLOSE_STOP_BACKTRACK: +      fprintf(f, "stop-bt"); +      break; + +    default: +      break; +    } +    fprintf(f, "\n"); +    print_indent_tree(f, NENCLOSE(node)->target, indent + add); +    break; + +  default: +    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); +    break; +  } + +  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && +      type != NT_ENCLOSE) +    fprintf(f, "\n"); +  fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ +  print_indent_tree(f, node, 0); +} +#endif | 
