summaryrefslogtreecommitdiff
path: root/harnesses
diff options
context:
space:
mode:
Diffstat (limited to 'harnesses')
-rw-r--r--harnesses/base.c166
-rw-r--r--harnesses/makefile9
2 files changed, 117 insertions, 58 deletions
diff --git a/harnesses/base.c b/harnesses/base.c
index a88e6f2..1206217 100644
--- a/harnesses/base.c
+++ b/harnesses/base.c
@@ -10,16 +10,32 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
-
#include "oniguruma.h"
#define PARSE_DEPTH_LIMIT 8
-#define RETRY_LIMIT 5000
#define CALL_MAX_NEST_LEVEL 8
+#define SUBEXP_CALL_LIMIT 500
+#define BASE_RETRY_LIMIT 20000
+#define BASE_LENGTH 2048
+#define MATCH_STACK_LIMIT 10000000
+#define MAX_REM_SIZE 1048576
+#define MAX_SLOW_REM_SIZE 1024
+#define SLOW_RETRY_LIMIT 2000
+
//#define EXEC_PRINT_INTERVAL 500000
//#define DUMP_DATA_INTERVAL 100000
//#define STAT_PATH "fuzzer.stat_log"
+#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD )
+
+#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION)
+
+
+#define ADJUST_LEN(enc, len) do {\
+ int mlen = ONIGENC_MBC_MINLEN(enc);\
+ if (mlen != 1) { len -= len % mlen; }\
+} while (0)
+
typedef unsigned char uint8_t;
#ifdef DUMP_INPUT
@@ -103,14 +119,34 @@ output_current_time(FILE* fp)
#endif
static int
-search(regex_t* reg, unsigned char* str, unsigned char* end, int backward)
+search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType options, int backward, int sl)
{
int r;
unsigned char *start, *range;
OnigRegion *region;
+ unsigned int retry_limit;
+ size_t len;
region = onig_region_new();
+ len = (size_t )(end - str);
+ if (len < BASE_LENGTH) {
+ if (sl >= 2)
+ retry_limit = (unsigned int )SLOW_RETRY_LIMIT;
+ else
+ retry_limit = (unsigned int )BASE_RETRY_LIMIT;
+ }
+ else
+ retry_limit = (unsigned int )(BASE_RETRY_LIMIT * BASE_LENGTH / len);
+
+#ifdef STANDALONE
+ fprintf(stdout, "retry limit: %u\n", retry_limit);
+#endif
+
+ onig_set_retry_limit_in_search(retry_limit);
+ onig_set_match_stack_limit_size(MATCH_STACK_LIMIT);
+ onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT);
+
if (backward != 0) {
start = end;
range = str;
@@ -120,7 +156,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end, int backward)
range = end;
}
- r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ r = onig_search(reg, str, end, start, range, region, (options & OPTIONS_AT_RUNTIME));
if (r >= 0) {
#ifdef STANDALONE
int i;
@@ -168,7 +204,8 @@ static long VALID_STRING_COUNT;
static int
exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
- char* apattern, char* apattern_end, char* astr, UChar* end, int backward)
+ char* apattern, char* apattern_end, char* astr, UChar* end, int backward,
+ int sl)
{
int r;
regex_t* reg;
@@ -181,14 +218,13 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
EXEC_COUNT_INTERVAL++;
onig_initialize(&enc, 1);
- onig_set_retry_limit_in_search(RETRY_LIMIT);
#ifdef PARSE_DEPTH_LIMIT
onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);
#endif
onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL);
r = onig_new(&reg, pattern, pattern_end,
- options, enc, syntax, &einfo);
+ (options & OPTIONS_AT_COMPILE), enc, syntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
@@ -208,12 +244,12 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
}
REGEX_SUCCESS_COUNT++;
- r = search(reg, pattern, pattern_end, backward);
+ r = search(reg, pattern, pattern_end, options, backward, sl);
if (r == -2) return -2;
if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
VALID_STRING_COUNT++;
- r = search(reg, str, end, backward);
+ r = search(reg, str, end, options, backward, sl);
if (r == -2) return -2;
}
@@ -224,43 +260,52 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
static int
alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
- int backward, int pattern_size, size_t remaining_size, unsigned char *data)
+ int backward, int pattern_size, size_t rem_size, unsigned char *data)
{
+ extern int onig_detect_can_be_slow_pattern(const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax);
+
int r;
+ int sl;
+ unsigned char *pattern;
unsigned char *pattern_end;
unsigned char *str_null_end;
- // copy first PATTERN_SIZE bytes off to be the pattern
- unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1);
+ pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1);
memcpy(pattern, data, pattern_size);
pattern_end = pattern + pattern_size;
data += pattern_size;
- remaining_size -= pattern_size;
+ rem_size -= pattern_size;
+
+ if (rem_size > MAX_REM_SIZE) rem_size = MAX_REM_SIZE;
+
+ sl = onig_detect_can_be_slow_pattern(pattern, pattern_end, options, enc, syntax);
+ if (sl > 0) {
+ if (rem_size > MAX_SLOW_REM_SIZE)
+ rem_size = MAX_SLOW_REM_SIZE;
+ }
-#if defined(UTF16_BE) || defined(UTF16_LE)
- if (remaining_size % 2 == 1) remaining_size--;
+ ADJUST_LEN(enc, rem_size);
+#ifdef STANDALONE
+ fprintf(stdout, "rem_size: %ld\n", rem_size);
#endif
- unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);
- memcpy(str, data, remaining_size);
- str_null_end = str + remaining_size;
+ unsigned char *str = (unsigned char*)malloc(rem_size != 0 ? rem_size : 1);
+ memcpy(str, data, rem_size);
+ str_null_end = str + rem_size;
r = exec(enc, options, syntax,
(char *)pattern, (char *)pattern_end,
- (char *)str, str_null_end, backward);
+ (char *)str, str_null_end, backward, sl);
free(pattern);
free(str);
return r;
}
-#define OPTIONS_MASK (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP)
-
-
#ifdef SYNTAX_TEST
-#define NUM_CONTROL_BYTES 6
+#define NUM_CONTROL_BYTES 7
#else
-#define NUM_CONTROL_BYTES 5
+#define NUM_CONTROL_BYTES 6
#endif
int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
@@ -285,14 +330,14 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
ONIG_ENCODING_CP1251,
ONIG_ENCODING_BIG5,
ONIG_ENCODING_GB18030,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
- ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_UTF16_BE,
+ ONIG_ENCODING_UTF16_LE,
+ ONIG_ENCODING_UTF16_BE,
+ ONIG_ENCODING_UTF16_LE,
+ ONIG_ENCODING_UTF32_BE,
+ ONIG_ENCODING_UTF32_LE,
+ ONIG_ENCODING_UTF32_BE,
+ ONIG_ENCODING_UTF32_LE,
ONIG_ENCODING_ISO_8859_1,
ONIG_ENCODING_ISO_8859_2,
ONIG_ENCODING_ISO_8859_3,
@@ -341,7 +386,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
int r;
int backward;
int pattern_size;
- size_t remaining_size;
+ size_t rem_size;
unsigned char *data;
unsigned char pattern_size_choice;
OnigOptionType options;
@@ -364,7 +409,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
if (Size < NUM_CONTROL_BYTES) return 0;
- remaining_size = Size;
+ rem_size = Size;
data = (unsigned char* )(Data);
#ifdef UTF16_BE
@@ -375,7 +420,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
#else
encoding_choice = data[0];
data++;
- remaining_size--;
+ rem_size--;
int num_encodings = sizeof(encodings)/sizeof(encodings[0]);
enc = encodings[encoding_choice % num_encodings];
@@ -385,7 +430,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
#ifdef SYNTAX_TEST
syntax_choice = data[0];
data++;
- remaining_size--;
+ rem_size--;
int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]);
syntax = syntaxes[syntax_choice % num_syntaxes];
@@ -393,31 +438,30 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
syntax = ONIG_SYNTAX_DEFAULT;
#endif
- if ((data[1] & 0xc0) == 0)
- options = (data[0] | (data[1] << 8)) & OPTIONS_MASK;
+ if ((data[2] & 0xc0) == 0)
+ options = data[0] | (data[1] << 8) | (data[2] << 16);
else
options = data[0] & ONIG_OPTION_IGNORECASE;
- data++;
- remaining_size--;
- data++;
- remaining_size--;
+ data++; rem_size--;
+ data++; rem_size--;
+ data++; rem_size--;
pattern_size_choice = data[0];
- data++;
- remaining_size--;
+ data++; rem_size--;
backward = (data[0] == 0xbb);
- data++;
- remaining_size--;
+ data++; rem_size--;
- if (remaining_size == 0)
+ if (backward != 0) {
+ options = options & ~ONIG_OPTION_FIND_LONGEST;
+ }
+
+ if (rem_size == 0)
pattern_size = 0;
else {
- pattern_size = (int )pattern_size_choice % remaining_size;
-#if defined(UTF16_BE) || defined(UTF16_LE)
- if (pattern_size % 2 == 1) pattern_size--;
-#endif
+ pattern_size = (int )pattern_size_choice % rem_size;
+ ADJUST_LEN(enc, pattern_size);
}
#ifdef STANDALONE
@@ -440,7 +484,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
#endif
r = alloc_exec(enc, options, syntax, backward, pattern_size,
- remaining_size, data);
+ rem_size, data);
if (r == -2) exit(-2);
#ifndef STANDALONE
@@ -485,15 +529,25 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
#ifdef STANDALONE
+#define MAX_INPUT_DATA_SIZE 4194304
+
extern int main(int argc, char* argv[])
{
+ size_t max_size;
size_t n;
- uint8_t Data[10000];
+ uint8_t Data[MAX_INPUT_DATA_SIZE];
- n = read(0, Data, sizeof(Data));
- fprintf(stdout, "n: %ld\n", n);
- LLVMFuzzerTestOneInput(Data, n);
+ if (argc > 1) {
+ max_size = (size_t )atoi(argv[1]);
+ }
+ else {
+ max_size = sizeof(Data);
+ }
+ n = read(0, Data, max_size);
+ fprintf(stdout, "read size: %ld, max_size: %ld\n", n, max_size);
+
+ LLVMFuzzerTestOneInput(Data, n);
return 0;
}
#endif /* STANDALONE */
diff --git a/harnesses/makefile b/harnesses/makefile
index b324295..d4fcfb6 100644
--- a/harnesses/makefile
+++ b/harnesses/makefile
@@ -1,4 +1,8 @@
# makefile for harness
+DEBUG_OUT =
+#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE
+#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE -DONIG_DEBUG_MATCH_COUNTER
+
SRC = ../src
CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer
CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE
@@ -12,7 +16,8 @@ TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \
OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe
-default: $(TARGETS)
+#default: $(TARGETS)
+default: read-syntax
fuzzer-encode: base.c $(ONIG_LIB)
clang $(CFLAGS) $< $(LIBS) -o $@
@@ -60,7 +65,7 @@ libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB)
$(ONIG_LIB):
cd ..; make clean
#cd ..; autoreconf -vfi
- cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer"
+ cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer $(DEBUG_OUT)" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer"
cd ..; make -j4