summaryrefslogtreecommitdiff
path: root/src/regparse.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.h')
-rw-r--r--src/regparse.h159
1 files changed, 103 insertions, 56 deletions
diff --git a/src/regparse.h b/src/regparse.h
index 884f4d5..b7260ea 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -33,48 +33,58 @@
/* node type */
typedef enum {
- NODE_STR = 0,
- NODE_CCLASS = 1,
- NODE_CTYPE = 2,
- NODE_BREF = 3,
- NODE_QUANT = 4,
- NODE_ENCLOSURE = 5,
- NODE_ANCHOR = 6,
- NODE_LIST = 7,
- NODE_ALT = 8,
- NODE_CALL = 9
+ NODE_STRING = 0,
+ NODE_CCLASS = 1,
+ NODE_CTYPE = 2,
+ NODE_BACKREF = 3,
+ NODE_QUANT = 4,
+ NODE_ENCLOSURE = 5,
+ NODE_ANCHOR = 6,
+ NODE_LIST = 7,
+ NODE_ALT = 8,
+ NODE_CALL = 9,
+ NODE_GIMMICK = 10
} NodeType;
+enum GimmickType {
+ GIMMICK_FAIL = 0,
+ GIMMICK_KEEP = 1,
+ GIMMICK_SAVE = 2,
+ GIMMICK_UPDATE_VAR = 3,
+};
+
/* node type bit */
#define NODE_TYPE2BIT(type) (1<<(type))
-#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR)
+#define BIT_NODE_STRING NODE_TYPE2BIT(NODE_STRING)
#define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
#define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
-#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF)
-#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
+#define BIT_NODE_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
+#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
#define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
#define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
#define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST)
#define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT)
#define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL)
+#define BIT_NODE_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
#define NODE_IS_SIMPLE_TYPE(node) \
((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0)
+ (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0)
#define NODE_TYPE(node) ((node)->u.base.node_type)
-#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
+#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
#define STR_(node) (&((node)->u.str))
#define CCLASS_(node) (&((node)->u.cclass))
#define CTYPE_(node) (&((node)->u.ctype))
-#define BREF_(node) (&((node)->u.bref))
-#define QUANT_(node) (&((node)->u.quant))
-#define ENCLOSURE_(node) (&((node)->u.enclosure))
+#define BACKREF_(node) (&((node)->u.backref))
+#define QUANT_(node) (&((node)->u.quant))
+#define ENCLOSURE_(node) (&((node)->u.enclosure))
#define ANCHOR_(node) (&((node)->u.anchor))
#define CONS_(node) (&((node)->u.cons))
#define CALL_(node) (&((node)->u.call))
+#define GIMMICK_(node) (&((node)->u.gimmick))
#define NODE_CAR(node) (CONS_(node)->car)
#define NODE_CDR(node) (CONS_(node)->cdr)
@@ -83,6 +93,9 @@ typedef enum {
#define NODE_IS_ANYCHAR(node) \
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
+#define CTYPE_OPTION(node, reg) \
+ (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
+
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@@ -90,24 +103,25 @@ typedef enum {
#define ENCLOSURE_MEMORY (1<<0)
#define ENCLOSURE_OPTION (1<<1)
#define ENCLOSURE_STOP_BACKTRACK (1<<2)
+#define ENCLOSURE_IF_ELSE (1<<3)
-#define NODE_STR_MARGIN 16
-#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_STRING_MARGIN 16
+#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
#define STRING_RAW (1<<0) /* by backslashed number */
#define STRING_AMBIG (1<<1)
#define STRING_DONT_GET_OPT_INFO (1<<2)
-#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
-#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
-#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
+#define NODE_STRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
+#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
+#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
+#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= STRING_DONT_GET_OPT_INFO
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
-#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
-#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
+#define NODE_STRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
+#define NODE_STRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
+#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
@@ -118,7 +132,7 @@ typedef enum {
#define QUANT_BODY_IS_EMPTY_MEM 2
#define QUANT_BODY_IS_EMPTY_REC 3
-/* status bits */
+/* node status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
@@ -136,28 +150,37 @@ typedef enum {
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define NST_BY_NAME (1<<15) /* backref by name */
#define NST_BACKREF (1<<16)
+#define NST_CHECKER (1<<17)
+#define NST_FIXED_OPTION (1<<18)
+#define NST_PROHIBIT_RECURSION (1<<19)
+#define NST_SUPER (1<<20)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f))
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f))
-#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
+#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0)
-#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
+#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0)
-#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
+#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0)
-#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
-#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
-#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
-#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
-#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
-#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
-#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
-#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
-#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
-#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
+#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
+#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
+#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
+#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
+#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
+#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
+#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
+#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
+#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NST_CHECKER) != 0)
+#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0)
+#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NST_SUPER) != 0)
+#define NODE_IS_PROHIBIT_RECURSION(node) \
+ ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0)
#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0)
@@ -168,8 +191,6 @@ typedef enum {
#define NODE_ANCHOR_BODY(node) ((node)->body)
-#define CALLNODE_REFNUM_UNDEF -1
-
typedef struct {
NodeType node_type;
int status;
@@ -178,7 +199,7 @@ typedef struct {
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
- UChar buf[NODE_STR_BUF_SIZE];
+ UChar buf[NODE_STRING_BUF_SIZE];
} StrNode;
typedef struct {
@@ -221,17 +242,22 @@ typedef struct {
int called_state;
} m;
struct {
- OnigOptionType option;
+ OnigOptionType options;
} o;
+ struct {
+ /* body is condition */
+ struct _Node* Then;
+ struct _Node* Else;
+ } te;
};
/* for multiple call reference */
- OnigLen min_len; /* min length (byte) */
- OnigLen max_len; /* max length (byte) */
- int char_len; /* character length */
- int opt_count; /* referenced count in optimize_node_left() */
+ OnigLen min_len; /* min length (byte) */
+ OnigLen max_len; /* max length (byte) */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
} EnclosureNode;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
typedef struct {
int offset;
@@ -266,7 +292,7 @@ typedef struct {
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
int nest_level;
-} BRefNode;
+} BackRefNode;
typedef struct {
NodeType node_type;
@@ -291,8 +317,18 @@ typedef struct {
int ctype;
int not;
+ OnigOptionType options;
} CtypeNode;
+typedef struct {
+ NodeType node_type;
+ int status;
+
+ enum GimmickType type;
+ int detail_type;
+ int id;
+} GimmickNode;
+
typedef struct _Node {
union {
struct {
@@ -305,13 +341,14 @@ typedef struct _Node {
CClassNode cclass;
QuantNode quant;
EnclosureNode enclosure;
- BRefNode bref;
+ BackRefNode backref;
AnchorNode anchor;
ConsAltNode cons;
CtypeNode ctype;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
CallNode call;
#endif
+ GimmickNode gimmick;
} u;
} Node;
@@ -332,7 +369,11 @@ typedef struct {
} MemEnv;
typedef struct {
- OnigOptionType option;
+ enum SaveType type;
+} SaveItem;
+
+typedef struct {
+ OnigOptionType options;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
@@ -346,7 +387,7 @@ typedef struct {
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
UnsetAddrList* unset_addr_list;
int has_call_zero;
#endif
@@ -364,6 +405,11 @@ typedef struct {
int has_recursion;
#endif
unsigned int parse_depth;
+
+ int keep_num;
+ int save_num;
+ int save_alloc_num;
+ SaveItem* saves;
} ScanEnv;
@@ -399,6 +445,7 @@ extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP