diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-07-23 10:18:42 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-07-23 10:18:42 +0200 | 
| commit | 3583c157ab4535e5ccee87e787be33447213461f (patch) | |
| tree | 88b4ca33e0b887948715dcba6556c35daaec9cdd /src/regparse.h | |
| parent | 7aac082e4e72a80c965825bbc5e8176bc7667e5a (diff) | |
| parent | 995dfd20e78ad16cec678df25422ce032650e3aa (diff) | |
Updated version 6.4.0 from 'upstream/6.4.0'
with Debian dir c94f3039d51e97ec5152e0857aee3095c04a8323
Diffstat (limited to 'src/regparse.h')
| -rw-r--r-- | src/regparse.h | 334 | 
1 files changed, 196 insertions, 138 deletions
diff --git a/src/regparse.h b/src/regparse.h index c9d1fe8..884f4d5 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@    regparse.h -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -32,88 +32,91 @@  #include "regint.h"  /* node type */ -#define NT_STR         0 -#define NT_CCLASS      1 -#define NT_CTYPE       2 -#define NT_CANY        3 -#define NT_BREF        4 -#define NT_QTFR        5 -#define NT_ENCLOSE     6 -#define NT_ANCHOR      7 -#define NT_LIST        8 -#define NT_ALT         9 -#define NT_CALL       10 +typedef enum { +  NODE_STR       = 0, +  NODE_CCLASS    = 1, +  NODE_CTYPE     = 2, +  NODE_BREF      = 3, +  NODE_QUANT      = 4, +  NODE_ENCLOSURE = 5, +  NODE_ANCHOR    = 6, +  NODE_LIST      = 7, +  NODE_ALT       = 8, +  NODE_CALL      = 9 +} NodeType;  /* node type bit */ -#define NTYPE2BIT(type)      (1<<(type)) - -#define BIT_NT_STR        NTYPE2BIT(NT_STR) -#define BIT_NT_CCLASS     NTYPE2BIT(NT_CCLASS) -#define BIT_NT_CTYPE      NTYPE2BIT(NT_CTYPE) -#define BIT_NT_CANY       NTYPE2BIT(NT_CANY) -#define BIT_NT_BREF       NTYPE2BIT(NT_BREF) -#define BIT_NT_QTFR       NTYPE2BIT(NT_QTFR) -#define BIT_NT_ENCLOSE    NTYPE2BIT(NT_ENCLOSE) -#define BIT_NT_ANCHOR     NTYPE2BIT(NT_ANCHOR) -#define BIT_NT_LIST       NTYPE2BIT(NT_LIST) -#define BIT_NT_ALT        NTYPE2BIT(NT_ALT) -#define BIT_NT_CALL       NTYPE2BIT(NT_CALL) - -#define IS_NODE_TYPE_SIMPLE(type) \ -  ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ -                       BIT_NT_CANY | BIT_NT_BREF)) != 0) - -#define NTYPE(node)             ((node)->u.base.type) -#define SET_NTYPE(node, ntype)   (node)->u.base.type = (ntype) - -#define NSTR(node)         (&((node)->u.str)) -#define NCCLASS(node)      (&((node)->u.cclass)) -#define NCTYPE(node)       (&((node)->u.ctype)) -#define NBREF(node)        (&((node)->u.bref)) -#define NQTFR(node)        (&((node)->u.qtfr)) -#define NENCLOSE(node)     (&((node)->u.enclose)) -#define NANCHOR(node)      (&((node)->u.anchor)) -#define NCONS(node)        (&((node)->u.cons)) -#define NCALL(node)        (&((node)->u.call)) - -#define NCAR(node)         (NCONS(node)->car) -#define NCDR(node)         (NCONS(node)->cdr) - +#define NODE_TYPE2BIT(type)      (1<<(type)) + +#define BIT_NODE_STR        NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_CCLASS     NODE_TYPE2BIT(NODE_CCLASS) +#define BIT_NODE_CTYPE      NODE_TYPE2BIT(NODE_CTYPE) +#define BIT_NODE_BREF       NODE_TYPE2BIT(NODE_BREF) +#define BIT_NODE_QUANT       NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_ENCLOSURE  NODE_TYPE2BIT(NODE_ENCLOSURE) +#define BIT_NODE_ANCHOR     NODE_TYPE2BIT(NODE_ANCHOR) +#define BIT_NODE_LIST       NODE_TYPE2BIT(NODE_LIST) +#define BIT_NODE_ALT        NODE_TYPE2BIT(NODE_ALT) +#define BIT_NODE_CALL       NODE_TYPE2BIT(NODE_CALL) + +#define NODE_IS_SIMPLE_TYPE(node) \ +  ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ +    (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) + +#define NODE_TYPE(node)             ((node)->u.base.node_type) +#define SET_NODE_TYPE(node, ntype)   (node)->u.base.node_type = (ntype) + +#define STR_(node)         (&((node)->u.str)) +#define CCLASS_(node)      (&((node)->u.cclass)) +#define CTYPE_(node)       (&((node)->u.ctype)) +#define BREF_(node)        (&((node)->u.bref)) +#define QUANT_(node)        (&((node)->u.quant)) +#define ENCLOSURE_(node)     (&((node)->u.enclosure)) +#define ANCHOR_(node)      (&((node)->u.anchor)) +#define CONS_(node)        (&((node)->u.cons)) +#define CALL_(node)        (&((node)->u.call)) + +#define NODE_CAR(node)         (CONS_(node)->car) +#define NODE_CDR(node)         (CONS_(node)->cdr) + +#define CTYPE_ANYCHAR      -1 +#define NODE_IS_ANYCHAR(node) \ +  (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)  #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)  #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) -#define ENCLOSE_MEMORY           (1<<0) -#define ENCLOSE_OPTION           (1<<1) -#define ENCLOSE_STOP_BACKTRACK   (1<<2) +#define ENCLOSURE_MEMORY           (1<<0) +#define ENCLOSURE_OPTION           (1<<1) +#define ENCLOSURE_STOP_BACKTRACK   (1<<2)  #define NODE_STR_MARGIN         16  #define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */  #define NODE_BACKREFS_SIZE       6 -#define NSTR_RAW                (1<<0) /* by backslashed number */ -#define NSTR_AMBIG              (1<<1) -#define NSTR_DONT_GET_OPT_INFO  (1<<2) +#define STRING_RAW                (1<<0) /* by backslashed number */ +#define STRING_AMBIG              (1<<1) +#define STRING_DONT_GET_OPT_INFO  (1<<2)  #define NSTRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node)          (node)->u.str.flag |= NSTR_RAW -#define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~NSTR_RAW -#define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_RAW(node)          (node)->u.str.flag |= STRING_RAW +#define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~STRING_RAW +#define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= STRING_AMBIG  #define NSTRING_SET_DONT_GET_OPT_INFO(node) \ -  (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node)          (((node)->u.str.flag & NSTR_RAW)   != 0) -#define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & NSTR_AMBIG) != 0) +  (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO +#define NSTRING_IS_RAW(node)          (((node)->u.str.flag & STRING_RAW)   != 0) +#define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & STRING_AMBIG) != 0)  #define NSTRING_IS_DONT_GET_OPT_INFO(node) \ -  (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) +  (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)  #define BACKREFS_P(br) \    (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); -#define NQ_TARGET_ISNOT_EMPTY     0 -#define NQ_TARGET_IS_EMPTY        1 -#define NQ_TARGET_IS_EMPTY_MEM    2 -#define NQ_TARGET_IS_EMPTY_REC    3 +#define QUANT_BODY_IS_NOT_EMPTY    0 +#define QUANT_BODY_IS_EMPTY        1 +#define QUANT_BODY_IS_EMPTY_MEM    2 +#define QUANT_BODY_IS_EMPTY_REC    3  /* status bits */  #define NST_MIN_FIXED             (1<<0) @@ -121,44 +124,56 @@  #define NST_CLEN_FIXED            (1<<2)  #define NST_MARK1                 (1<<3)  #define NST_MARK2                 (1<<4) -#define NST_MEM_BACKREFED         (1<<5) -#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) -#define NST_RECURSION             (1<<7) -#define NST_CALLED                (1<<8) -#define NST_ADDR_FIXED            (1<<9) -#define NST_NAMED_GROUP           (1<<10) -#define NST_NAME_REF              (1<<11) -#define NST_IN_REPEAT             (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_STOP_BT_SIMPLE_REPEAT (1<<5) +#define NST_RECURSION             (1<<6) +#define NST_CALLED                (1<<7) +#define NST_ADDR_FIXED            (1<<8) +#define NST_NAMED_GROUP           (1<<9) +#define NST_IN_REAL_REPEAT        (1<<10) /* STK_REPEAT is nested in stack. */ +#define NST_IN_ZERO_REPEAT        (1<<11) /* (....){0} */ +#define NST_IN_MULTI_ENTRY        (1<<12)  #define NST_NEST_LEVEL            (1<<13)  #define NST_BY_NUMBER             (1<<14) /* {n,m} */ +#define NST_BY_NAME               (1<<15) /* backref by name */ +#define NST_BACKREF               (1<<16) + + +#define NODE_STATUS(node)           (((Node* )node)->u.base.status) +#define NODE_STATUS_ADD(node,f)     (NODE_STATUS(node) |= (f)) +#define NODE_STATUS_REMOVE(node,f)  (NODE_STATUS(node) &= ~(f)) + +#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_IN_REAL_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) +#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NST_CALLED)    != 0) +#define NODE_IS_IN_MULTI_ENTRY(node)  ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) +#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_IN_ZERO_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) +#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) +#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NST_ADDR_FIXED)  != 0) +#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NST_CLEN_FIXED)  != 0) +#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NST_MIN_FIXED)   != 0) +#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NST_MAX_FIXED)   != 0) +#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NST_MARK1)       != 0) +#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NST_MARK2)       != 0) +#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NST_NEST_LEVEL)  != 0) +#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NST_BY_NAME)     != 0) +#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NST_BACKREF)     != 0) +#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ +    ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) + +#define NODE_BODY(node)           ((node)->u.base.body) +#define NODE_QUANT_BODY(node)      ((node)->body) +#define NODE_ENCLOSURE_BODY(node)   ((node)->body) +#define NODE_CALL_BODY(node)      ((node)->body) +#define NODE_ANCHOR_BODY(node)    ((node)->body) -#define SET_ENCLOSE_STATUS(node,f)      (node)->u.enclose.state |=  (f) -#define CLEAR_ENCLOSE_STATUS(node,f)    (node)->u.enclose.state &= ~(f) - -#define IS_ENCLOSE_CALLED(en)          (((en)->state & NST_CALLED)        != 0) -#define IS_ENCLOSE_ADDR_FIXED(en)      (((en)->state & NST_ADDR_FIXED)    != 0) -#define IS_ENCLOSE_RECURSION(en)       (((en)->state & NST_RECURSION)     != 0) -#define IS_ENCLOSE_MARK1(en)           (((en)->state & NST_MARK1)         != 0) -#define IS_ENCLOSE_MARK2(en)           (((en)->state & NST_MARK2)         != 0) -#define IS_ENCLOSE_MIN_FIXED(en)       (((en)->state & NST_MIN_FIXED)     != 0) -#define IS_ENCLOSE_MAX_FIXED(en)       (((en)->state & NST_MAX_FIXED)     != 0) -#define IS_ENCLOSE_CLEN_FIXED(en)      (((en)->state & NST_CLEN_FIXED)    != 0) -#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ -    (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_ENCLOSE_NAMED_GROUP(en)     (((en)->state & NST_NAMED_GROUP)   != 0) - -#define SET_CALL_RECURSION(node)       (node)->u.call.state |= NST_RECURSION -#define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0) -#define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0) -#define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0) -#define IS_BACKREF_NEST_LEVEL(bn)      (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn)    (((qn)->state & NST_IN_REPEAT)  != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn)    (((qn)->state & NST_BY_NUMBER)  != 0)  #define CALLNODE_REFNUM_UNDEF  -1  typedef struct { -  NodeBase base; +  NodeType node_type; +  int status; +    UChar* s;    UChar* end;    unsigned int flag; @@ -167,35 +182,54 @@ typedef struct {  } StrNode;  typedef struct { -  NodeBase base; -  int state; -  struct _Node* target; +  NodeType node_type; +  int status; + +  unsigned int flags; +  BitSet bs; +  BBuf*  mbuf;   /* multi-byte info or NULL */ +} CClassNode; + +typedef struct { +  NodeType node_type; +  int status; +  struct _Node* body; +    int lower;    int upper;    int greedy; -  int target_empty_info; +  int body_empty_info;    struct _Node* head_exact;    struct _Node* next_head_exact;    int is_refered;     /* include called node. don't eliminate even if {0} */  #ifdef USE_COMBINATION_EXPLOSION_CHECK    int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */  #endif -} QtfrNode; +} QuantNode;  typedef struct { -  NodeBase base; -  int state; +  NodeType node_type; +  int status; +  struct _Node* body; +    int type; -  int regnum; -  OnigOptionType option; -  struct _Node*  target; -  AbsAddrType    call_addr; +  union { +    struct { +      int regnum; +      AbsAddrType called_addr; +      int entry_count; +      int called_state; +    } m; +    struct { +      OnigOptionType option; +    } o; +  };    /* for multiple call reference */    OnigLen min_len; /* min length (byte) */    OnigLen max_len; /* max length (byte) */    int char_len;         /* character length  */    int opt_count;        /* referenced count in optimize_node_left() */ -} EncloseNode; +} EnclosureNode;  #ifdef USE_SUBEXP_CALL @@ -211,20 +245,23 @@ typedef struct {  } UnsetAddrList;  typedef struct { -  NodeBase base; -  int     state; +  NodeType node_type; +  int status; +  struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */ + +  int     by_number;    int     group_num;    UChar*  name;    UChar*  name_end; -  struct _Node*  target;  /* EncloseNode : ENCLOSE_MEMORY */ -  UnsetAddrList* unset_addr_list; +  int     entry_count;  } CallNode;  #endif  typedef struct { -  NodeBase base; -  int  state; +  NodeType node_type; +  int status; +    int  back_num;    int  back_static[NODE_BACKREFS_SIZE];    int* back_dynamic; @@ -232,37 +269,48 @@ typedef struct {  } BRefNode;  typedef struct { -  NodeBase base; +  NodeType node_type; +  int status; +  struct _Node* body; +    int type; -  struct _Node* target;    int char_len;  } AnchorNode;  typedef struct { -  NodeBase base; +  NodeType node_type; +  int status; +    struct _Node* car;    struct _Node* cdr;  } ConsAltNode;  typedef struct { -  NodeBase base; +  NodeType node_type; +  int status; +    int ctype;    int not;  } CtypeNode;  typedef struct _Node {    union { -    NodeBase     base; -    StrNode      str; -    CClassNode   cclass; -    QtfrNode     qtfr; -    EncloseNode  enclose; -    BRefNode     bref; -    AnchorNode   anchor; -    ConsAltNode  cons; -    CtypeNode    ctype; +    struct { +      NodeType node_type; +      int status; +      struct _Node* body; +    } base; + +    StrNode       str; +    CClassNode    cclass; +    QuantNode     quant; +    EnclosureNode enclosure; +    BRefNode      bref; +    AnchorNode    anchor; +    ConsAltNode   cons; +    CtypeNode     ctype;  #ifdef USE_SUBEXP_CALL -    CallNode     call; +    CallNode      call;  #endif    } u;  } Node; @@ -270,20 +318,28 @@ typedef struct _Node {  #define NULL_NODE  ((Node* )0) -#define SCANENV_MEMNODES_SIZE               8 -#define SCANENV_MEM_NODES(senv)   \ - (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ -    (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) +#define SCANENV_MEMENV_SIZE               8 +#define SCANENV_MEMENV(senv) \ + (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ +    (senv)->mem_env_dynamic : (senv)->mem_env_static) + +typedef struct { +  Node* node; +#if 0 +  int in; +  int recursion; +#endif +} MemEnv;  typedef struct {    OnigOptionType   option;    OnigCaseFoldType case_fold_flag;    OnigEncoding     enc;    OnigSyntaxType*  syntax; -  BitStatusType    capture_history; -  BitStatusType    bt_mem_start; -  BitStatusType    bt_mem_end; -  BitStatusType    backrefed_mem; +  MemStatusType    capture_history; +  MemStatusType    bt_mem_start; +  MemStatusType    bt_mem_end; +  MemStatusType    backrefed_mem;    UChar*           pattern;    UChar*           pattern_end;    UChar*           error; @@ -292,14 +348,15 @@ typedef struct {    int              num_call;  #ifdef USE_SUBEXP_CALL    UnsetAddrList*   unset_addr_list; +  int              has_call_zero;  #endif    int              num_mem;  #ifdef USE_NAMED_GROUP    int              num_named;  #endif    int              mem_alloc; -  Node*            mem_nodes_static[SCANENV_MEMNODES_SIZE]; -  Node**           mem_nodes_dynamic; +  MemEnv            mem_env_static[SCANENV_MEMENV_SIZE]; +  MemEnv*           mem_env_dynamic;  #ifdef USE_COMBINATION_EXPLOSION_CHECK    int num_comb_exp_check;    int comb_exp_max_regnum; @@ -331,7 +388,7 @@ extern void   onig_node_conv_to_str_node P_((Node* node, int raw));  extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));  extern int    onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));  extern void   onig_node_free P_((Node* node)); -extern Node*  onig_node_new_enclose P_((int type)); +extern Node*  onig_node_new_enclosure P_((int type));  extern Node*  onig_node_new_anchor P_((int type));  extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));  extern Node*  onig_node_new_list P_((Node* left, Node* right)); @@ -339,8 +396,9 @@ extern Node*  onig_node_list_add P_((Node* list, Node* x));  extern Node*  onig_node_new_alt P_((Node* left, Node* right));  extern void   onig_node_str_clear P_((Node* node));  extern int    onig_names_free P_((regex_t* reg)); -extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int    onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));  extern int    onig_free_shared_cclass_table P_((void)); +extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));  #ifdef ONIG_DEBUG  #ifdef USE_NAMED_GROUP  | 
