diff options
Diffstat (limited to 'src/regparse.h')
| -rw-r--r-- | src/regparse.h | 159 | 
1 files changed, 103 insertions, 56 deletions
| diff --git a/src/regparse.h b/src/regparse.h index 884f4d5..b7260ea 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -33,48 +33,58 @@  /* node type */  typedef enum { -  NODE_STR       = 0, -  NODE_CCLASS    = 1, -  NODE_CTYPE     = 2, -  NODE_BREF      = 3, -  NODE_QUANT      = 4, -  NODE_ENCLOSURE = 5, -  NODE_ANCHOR    = 6, -  NODE_LIST      = 7, -  NODE_ALT       = 8, -  NODE_CALL      = 9 +  NODE_STRING    =  0, +  NODE_CCLASS    =  1, +  NODE_CTYPE     =  2, +  NODE_BACKREF   =  3, +  NODE_QUANT     =  4, +  NODE_ENCLOSURE =  5, +  NODE_ANCHOR    =  6, +  NODE_LIST      =  7, +  NODE_ALT       =  8, +  NODE_CALL      =  9, +  NODE_GIMMICK   = 10  } NodeType; +enum GimmickType { +  GIMMICK_FAIL = 0, +  GIMMICK_KEEP = 1, +  GIMMICK_SAVE = 2, +  GIMMICK_UPDATE_VAR = 3, +}; +  /* node type bit */  #define NODE_TYPE2BIT(type)      (1<<(type)) -#define BIT_NODE_STR        NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_STRING     NODE_TYPE2BIT(NODE_STRING)  #define BIT_NODE_CCLASS     NODE_TYPE2BIT(NODE_CCLASS)  #define BIT_NODE_CTYPE      NODE_TYPE2BIT(NODE_CTYPE) -#define BIT_NODE_BREF       NODE_TYPE2BIT(NODE_BREF) -#define BIT_NODE_QUANT       NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_BACKREF    NODE_TYPE2BIT(NODE_BACKREF) +#define BIT_NODE_QUANT      NODE_TYPE2BIT(NODE_QUANT)  #define BIT_NODE_ENCLOSURE  NODE_TYPE2BIT(NODE_ENCLOSURE)  #define BIT_NODE_ANCHOR     NODE_TYPE2BIT(NODE_ANCHOR)  #define BIT_NODE_LIST       NODE_TYPE2BIT(NODE_LIST)  #define BIT_NODE_ALT        NODE_TYPE2BIT(NODE_ALT)  #define BIT_NODE_CALL       NODE_TYPE2BIT(NODE_CALL) +#define BIT_NODE_GIMMICK    NODE_TYPE2BIT(NODE_GIMMICK)  #define NODE_IS_SIMPLE_TYPE(node) \    ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ -    (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) +    (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0)  #define NODE_TYPE(node)             ((node)->u.base.node_type) -#define SET_NODE_TYPE(node, ntype)   (node)->u.base.node_type = (ntype) +#define NODE_SET_TYPE(node, ntype)   (node)->u.base.node_type = (ntype)  #define STR_(node)         (&((node)->u.str))  #define CCLASS_(node)      (&((node)->u.cclass))  #define CTYPE_(node)       (&((node)->u.ctype)) -#define BREF_(node)        (&((node)->u.bref)) -#define QUANT_(node)        (&((node)->u.quant)) -#define ENCLOSURE_(node)     (&((node)->u.enclosure)) +#define BACKREF_(node)     (&((node)->u.backref)) +#define QUANT_(node)       (&((node)->u.quant)) +#define ENCLOSURE_(node)   (&((node)->u.enclosure))  #define ANCHOR_(node)      (&((node)->u.anchor))  #define CONS_(node)        (&((node)->u.cons))  #define CALL_(node)        (&((node)->u.call)) +#define GIMMICK_(node)     (&((node)->u.gimmick))  #define NODE_CAR(node)         (CONS_(node)->car)  #define NODE_CDR(node)         (CONS_(node)->cdr) @@ -83,6 +93,9 @@ typedef enum {  #define NODE_IS_ANYCHAR(node) \    (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) +#define CTYPE_OPTION(node, reg) \ +  (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) +  #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)  #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) @@ -90,24 +103,25 @@ typedef enum {  #define ENCLOSURE_MEMORY           (1<<0)  #define ENCLOSURE_OPTION           (1<<1)  #define ENCLOSURE_STOP_BACKTRACK   (1<<2) +#define ENCLOSURE_IF_ELSE          (1<<3) -#define NODE_STR_MARGIN         16 -#define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_MARGIN         16 +#define NODE_STRING_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */  #define NODE_BACKREFS_SIZE       6  #define STRING_RAW                (1<<0) /* by backslashed number */  #define STRING_AMBIG              (1<<1)  #define STRING_DONT_GET_OPT_INFO  (1<<2) -#define NSTRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node)          (node)->u.str.flag |= STRING_RAW -#define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~STRING_RAW -#define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= STRING_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node)          (node)->u.str.flag |= STRING_RAW +#define NODE_STRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~STRING_RAW +#define NODE_STRING_SET_AMBIG(node)        (node)->u.str.flag |= STRING_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \    (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node)          (((node)->u.str.flag & STRING_RAW)   != 0) -#define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & STRING_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_IS_RAW(node)          (((node)->u.str.flag & STRING_RAW)   != 0) +#define NODE_STRING_IS_AMBIG(node)        (((node)->u.str.flag & STRING_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \    (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)  #define BACKREFS_P(br) \ @@ -118,7 +132,7 @@ typedef enum {  #define QUANT_BODY_IS_EMPTY_MEM    2  #define QUANT_BODY_IS_EMPTY_REC    3 -/* status bits */ +/* node status bits */  #define NST_MIN_FIXED             (1<<0)  #define NST_MAX_FIXED             (1<<1)  #define NST_CLEN_FIXED            (1<<2) @@ -136,28 +150,37 @@ typedef enum {  #define NST_BY_NUMBER             (1<<14) /* {n,m} */  #define NST_BY_NAME               (1<<15) /* backref by name */  #define NST_BACKREF               (1<<16) +#define NST_CHECKER               (1<<17) +#define NST_FIXED_OPTION          (1<<18) +#define NST_PROHIBIT_RECURSION    (1<<19) +#define NST_SUPER                 (1<<20)  #define NODE_STATUS(node)           (((Node* )node)->u.base.status)  #define NODE_STATUS_ADD(node,f)     (NODE_STATUS(node) |= (f))  #define NODE_STATUS_REMOVE(node,f)  (NODE_STATUS(node) &= ~(f)) -#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NST_BY_NUMBER)      != 0)  #define NODE_IS_IN_REAL_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NST_CALLED)    != 0) +#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NST_CALLED)         != 0)  #define NODE_IS_IN_MULTI_ENTRY(node)  ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NST_RECURSION)      != 0)  #define NODE_IS_IN_ZERO_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NST_ADDR_FIXED)  != 0) -#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NST_CLEN_FIXED)  != 0) -#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NST_MIN_FIXED)   != 0) -#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NST_MAX_FIXED)   != 0) -#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NST_MARK1)       != 0) -#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NST_MARK2)       != 0) -#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NST_NEST_LEVEL)  != 0) -#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NST_BY_NAME)     != 0) -#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NST_BACKREF)     != 0) +#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NST_NAMED_GROUP)  != 0) +#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NST_ADDR_FIXED)   != 0) +#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NST_CLEN_FIXED)   != 0) +#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NST_MIN_FIXED)    != 0) +#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NST_MAX_FIXED)    != 0) +#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NST_MARK1)        != 0) +#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NST_MARK2)        != 0) +#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NST_NEST_LEVEL)   != 0) +#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NST_BY_NAME)      != 0) +#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NST_BACKREF)      != 0) +#define NODE_IS_CHECKER(node)         ((NODE_STATUS(node) & NST_CHECKER)      != 0) +#define NODE_IS_FIXED_OPTION(node)    ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node)           ((NODE_STATUS(node) & NST_SUPER)        != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ +    ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0)  #define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \      ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) @@ -168,8 +191,6 @@ typedef enum {  #define NODE_ANCHOR_BODY(node)    ((node)->body) -#define CALLNODE_REFNUM_UNDEF  -1 -  typedef struct {    NodeType node_type;    int status; @@ -178,7 +199,7 @@ typedef struct {    UChar* end;    unsigned int flag;    int    capa;    /* (allocated size - 1) or 0: use buf[] */ -  UChar  buf[NODE_STR_BUF_SIZE]; +  UChar  buf[NODE_STRING_BUF_SIZE];  } StrNode;  typedef struct { @@ -221,17 +242,22 @@ typedef struct {        int called_state;      } m;      struct { -      OnigOptionType option; +      OnigOptionType options;      } o; +    struct { +      /* body is condition */ +      struct _Node* Then; +      struct _Node* Else; +    } te;    };    /* for multiple call reference */ -  OnigLen min_len; /* min length (byte) */ -  OnigLen max_len; /* max length (byte) */ -  int char_len;         /* character length  */ -  int opt_count;        /* referenced count in optimize_node_left() */ +  OnigLen min_len;   /* min length (byte) */ +  OnigLen max_len;   /* max length (byte) */ +  int char_len;      /* character length  */ +  int opt_count;     /* referenced count in optimize_node_left() */  } EnclosureNode; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  typedef struct {    int           offset; @@ -266,7 +292,7 @@ typedef struct {    int  back_static[NODE_BACKREFS_SIZE];    int* back_dynamic;    int  nest_level; -} BRefNode; +} BackRefNode;  typedef struct {    NodeType node_type; @@ -291,8 +317,18 @@ typedef struct {    int ctype;    int not; +  OnigOptionType options;  } CtypeNode; +typedef struct { +  NodeType node_type; +  int status; + +  enum GimmickType type; +  int  detail_type; +  int  id; +} GimmickNode; +  typedef struct _Node {    union {      struct { @@ -305,13 +341,14 @@ typedef struct _Node {      CClassNode    cclass;      QuantNode     quant;      EnclosureNode enclosure; -    BRefNode      bref; +    BackRefNode   backref;      AnchorNode    anchor;      ConsAltNode   cons;      CtypeNode     ctype; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      CallNode      call;  #endif +    GimmickNode   gimmick;    } u;  } Node; @@ -332,7 +369,11 @@ typedef struct {  } MemEnv;  typedef struct { -  OnigOptionType   option; +  enum SaveType type; +} SaveItem; + +typedef struct { +  OnigOptionType   options;    OnigCaseFoldType case_fold_flag;    OnigEncoding     enc;    OnigSyntaxType*  syntax; @@ -346,7 +387,7 @@ typedef struct {    UChar*           error_end;    regex_t*         reg;       /* for reg->names only */    int              num_call; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    UnsetAddrList*   unset_addr_list;    int              has_call_zero;  #endif @@ -364,6 +405,11 @@ typedef struct {    int has_recursion;  #endif    unsigned int parse_depth; + +  int keep_num; +  int save_num; +  int save_alloc_num; +  SaveItem* saves;  } ScanEnv; @@ -399,6 +445,7 @@ extern int    onig_names_free P_((regex_t* reg));  extern int    onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));  extern int    onig_free_shared_cclass_table P_((void));  extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);  #ifdef ONIG_DEBUG  #ifdef USE_NAMED_GROUP | 
