From 98f7065a3f7b386564840bb5b24b94f9335b2e97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Mon, 26 Apr 2021 17:40:17 +0200 Subject: New upstream version 6.9.7.1 --- harnesses/base.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 163 insertions(+), 30 deletions(-) (limited to 'harnesses/base.c') diff --git a/harnesses/base.c b/harnesses/base.c index 1206217..70f98f7 100644 --- a/harnesses/base.c +++ b/harnesses/base.c @@ -1,6 +1,6 @@ /* * base.c contributed by Mark Griffin - * Copyright (c) 2019-2020 K.Kosako + * Copyright (c) 2019-2021 K.Kosako */ #include #include @@ -12,23 +12,31 @@ #include #include "oniguruma.h" -#define PARSE_DEPTH_LIMIT 8 -#define CALL_MAX_NEST_LEVEL 8 -#define SUBEXP_CALL_LIMIT 500 -#define BASE_RETRY_LIMIT 20000 -#define BASE_LENGTH 2048 -#define MATCH_STACK_LIMIT 10000000 -#define MAX_REM_SIZE 1048576 -#define MAX_SLOW_REM_SIZE 1024 -#define SLOW_RETRY_LIMIT 2000 - -//#define EXEC_PRINT_INTERVAL 500000 -//#define DUMP_DATA_INTERVAL 100000 -//#define STAT_PATH "fuzzer.stat_log" - -#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD ) +#define PARSE_DEPTH_LIMIT 8 +#define MAX_SUBEXP_CALL_NEST_LEVEL 8 +#define SUBEXP_CALL_LIMIT 1000 +#define BASE_RETRY_LIMIT 20000 +#define BASE_LENGTH 2048 +#define MATCH_STACK_LIMIT 10000000 +#define MAX_REM_SIZE 1048576 +#define MAX_SLOW_REM_SIZE 1024 +#define MAX_SLOW_REM_SIZE2 100 +#define SLOW_RETRY_LIMIT 2000 +#define SLOW_SUBEXP_CALL_LIMIT 100 +#define MAX_SLOW_BACKWARD_REM_SIZE 200 + +//#define EXEC_PRINT_INTERVAL 500000 +//#define DUMP_DATA_INTERVAL 100000 +//#define STAT_PATH "fuzzer.stat_log" +//#define PREV_CONTROL + +#ifdef PREV_CONTROL +#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD) +#else +#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD | ONIG_OPTION_IGNORECASE_IS_ASCII) +#endif -#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION) +#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION | ONIG_OPTION_CALLBACK_EACH_MATCH) #define ADJUST_LEN(enc, len) do {\ @@ -38,6 +46,64 @@ typedef unsigned char uint8_t; + +//#define TEST_PATTERN + +#ifdef TEST_PATTERN + +#if 1 +unsigned char TestPattern[] = { +}; +#endif + +#endif /* TEST_PATTERN */ + +#ifdef STANDALONE + +static void +print_options(FILE* fp, OnigOptionType o) +{ + if ((o & ONIG_OPTION_IGNORECASE) != 0) fprintf(fp, " IGNORECASE"); + if ((o & ONIG_OPTION_EXTEND) != 0) fprintf(fp, " EXTEND"); + if ((o & ONIG_OPTION_MULTILINE) != 0) fprintf(fp, " MULTILINE"); + if ((o & ONIG_OPTION_SINGLELINE) != 0) fprintf(fp, " SINGLELINE"); + if ((o & ONIG_OPTION_FIND_LONGEST) != 0) fprintf(fp, " FIND_LONGEST"); + if ((o & ONIG_OPTION_FIND_NOT_EMPTY) != 0) fprintf(fp, " FIND_NOT_EMPTY"); + if ((o & ONIG_OPTION_NEGATE_SINGLELINE) != 0) fprintf(fp, " NEGATE_SINGLELINE"); + if ((o & ONIG_OPTION_DONT_CAPTURE_GROUP) != 0) fprintf(fp, " DONT_CAPTURE_GROUP"); + if ((o & ONIG_OPTION_CAPTURE_GROUP) != 0) fprintf(fp, " CAPTURE_GROUP"); + if ((o & ONIG_OPTION_NOTBOL) != 0) fprintf(fp, " NOTBOL"); + if ((o & ONIG_OPTION_NOTEOL) != 0) fprintf(fp, " NOTEOL"); + if ((o & ONIG_OPTION_POSIX_REGION) != 0) fprintf(fp, " POSIX_REGION"); + if ((o & ONIG_OPTION_CHECK_VALIDITY_OF_STRING) != 0) fprintf(fp, " CHECK_VALIDITY_OF_STRING"); + if ((o & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) fprintf(fp, " IGNORECASE_IS_ASCII"); + if ((o & ONIG_OPTION_WORD_IS_ASCII) != 0) fprintf(fp, " WORD_IS_ASCII"); + if ((o & ONIG_OPTION_DIGIT_IS_ASCII) != 0) fprintf(fp, " DIGIT_IS_ASCII"); + if ((o & ONIG_OPTION_SPACE_IS_ASCII) != 0) fprintf(fp, " SPACE_IS_ASCII"); + if ((o & ONIG_OPTION_POSIX_IS_ASCII) != 0) fprintf(fp, " POSIX_IS_ASCII"); + if ((o & ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER) != 0) fprintf(fp, " TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER"); + if ((o & ONIG_OPTION_TEXT_SEGMENT_WORD) != 0) fprintf(fp, " TEXT_SEGMENT_WORD"); + if ((o & ONIG_OPTION_NOT_BEGIN_STRING) != 0) fprintf(fp, " NOT_BIGIN_STRING"); + if ((o & ONIG_OPTION_NOT_END_STRING) != 0) fprintf(fp, " NOT_END_STRING"); + if ((o & ONIG_OPTION_NOT_BEGIN_POSITION) != 0) fprintf(fp, " NOT_BEGIN_POSITION"); + if ((o & ONIG_OPTION_CALLBACK_EACH_MATCH) != 0) fprintf(fp, " CALLBACK_EACH_MATCH"); +} + +static void +to_binary(unsigned int v, char s[/* 33 */]) +{ + unsigned int mask; + int i; + + mask = 1 << (sizeof(v) * 8 - 1); + i = 0; + do { + s[i++] = (mask & v ? '1' : '0'); + } while (mask >>= 1); + s[i] = 0; +} +#endif + #ifdef DUMP_INPUT static void dump_input(unsigned char* data, size_t len) @@ -104,6 +170,7 @@ dump_data(FILE* fp, unsigned char* data, int len) #else +#ifdef EXEC_PRINT_INTERVAL static void output_current_time(FILE* fp) { @@ -115,9 +182,23 @@ output_current_time(FILE* fp) fprintf(fp, "%s", d); } +#endif #endif +static int +progress_callout_func(OnigCalloutArgs* args, void* user_data) +{ + return ONIG_CALLOUT_SUCCESS; +} + +static int +each_match_callback_func(const UChar* str, const UChar* end, + const UChar* match_start, OnigRegion* region, void* user_data) +{ + return ONIG_NORMAL; +} + static int search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType options, int backward, int sl) { @@ -145,7 +226,10 @@ search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType opti onig_set_retry_limit_in_search(retry_limit); onig_set_match_stack_limit_size(MATCH_STACK_LIMIT); - onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT); + if (sl >= 2) + onig_set_subexp_call_limit_in_search(SLOW_SUBEXP_CALL_LIMIT); + else + onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT); if (backward != 0) { start = end; @@ -218,10 +302,12 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, EXEC_COUNT_INTERVAL++; onig_initialize(&enc, 1); + (void)onig_set_progress_callout(progress_callout_func); #ifdef PARSE_DEPTH_LIMIT onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); #endif - onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); + onig_set_subexp_call_max_nest_level(MAX_SUBEXP_CALL_NEST_LEVEL); + onig_set_callback_each_match(each_match_callback_func); r = onig_new(®, pattern, pattern_end, (options & OPTIONS_AT_COMPILE), enc, syntax, &einfo); @@ -270,18 +356,38 @@ alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, unsigned char *pattern_end; unsigned char *str_null_end; +#ifdef TEST_PATTERN + pattern = (unsigned char *)malloc(sizeof(TestPattern)); + memcpy(pattern, TestPattern, sizeof(TestPattern)); + pattern_end = pattern + sizeof(TestPattern); +#else pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); memcpy(pattern, data, pattern_size); pattern_end = pattern + pattern_size; +#endif + data += pattern_size; rem_size -= pattern_size; if (rem_size > MAX_REM_SIZE) rem_size = MAX_REM_SIZE; sl = onig_detect_can_be_slow_pattern(pattern, pattern_end, options, enc, syntax); +#ifdef STANDALONE + fprintf(stdout, "sl: %d\n", sl); +#endif if (sl > 0) { - if (rem_size > MAX_SLOW_REM_SIZE) - rem_size = MAX_SLOW_REM_SIZE; + if (sl >= 100) { + if (rem_size > MAX_SLOW_REM_SIZE2) + rem_size = MAX_SLOW_REM_SIZE2; + } + else { + if (rem_size > MAX_SLOW_REM_SIZE) + rem_size = MAX_SLOW_REM_SIZE; + } + } + if (backward != 0 && enc == ONIG_ENCODING_GB18030) { + if (rem_size > MAX_SLOW_BACKWARD_REM_SIZE) + rem_size = MAX_SLOW_BACKWARD_REM_SIZE; } ADJUST_LEN(enc, rem_size); @@ -302,11 +408,19 @@ alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, return r; } +#ifdef PREV_CONTROL #ifdef SYNTAX_TEST #define NUM_CONTROL_BYTES 7 #else #define NUM_CONTROL_BYTES 6 #endif +#else +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES 8 +#else +#define NUM_CONTROL_BYTES 7 +#endif +#endif int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) { @@ -365,6 +479,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) ONIG_SYNTAX_GNU_REGEX, ONIG_SYNTAX_JAVA, ONIG_SYNTAX_PERL_NG, + ONIG_SYNTAX_PYTHON, ONIG_SYNTAX_ONIGURUMA }; @@ -376,6 +491,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) "GNU Regex", "Java", "Perl+NG", + "Python", "Oniguruma" }; #endif @@ -394,7 +510,9 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) OnigSyntaxType* syntax; #ifndef STANDALONE +#ifdef EXEC_PRINT_INTERVAL static FILE* STAT_FP; +#endif #endif INPUT_COUNT++; @@ -438,14 +556,22 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) syntax = ONIG_SYNTAX_DEFAULT; #endif +#ifdef PREV_CONTROL if ((data[2] & 0xc0) == 0) options = data[0] | (data[1] << 8) | (data[2] << 16); +#else + if ((data[3] & 0xc0) == 0) + options = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); +#endif else options = data[0] & ONIG_OPTION_IGNORECASE; data++; rem_size--; data++; rem_size--; data++; rem_size--; +#ifndef PREV_CONTROL + data++; rem_size--; +#endif pattern_size_choice = data[0]; data++; rem_size--; @@ -465,18 +591,25 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) } #ifdef STANDALONE - dump_data(stdout, data, pattern_size); + { + char soptions[33]; + + dump_data(stdout, data, pattern_size); + to_binary(options, soptions); #ifdef SYNTAX_TEST - fprintf(stdout, - "enc: %s, syntax: %s, options: %u, pattern_size: %d, back:%d\n", - ONIGENC_NAME(enc), - syntax_names[syntax_choice % num_syntaxes], - options, - pattern_size, backward); + fprintf(stdout, + "enc: %s, syntax: %s, pattern_size: %d, back:%d\noptions: %s\n", + ONIGENC_NAME(enc), + syntax_names[syntax_choice % num_syntaxes], + pattern_size, backward, soptions); #else - fprintf(stdout, "enc: %s, options: %u, pattern_size: %d, back:%d\n", - ONIGENC_NAME(enc), options, pattern_size, backward); + fprintf(stdout, "enc: %s, pattern_size: %d, back:%d\noptions: %s\n", + ONIGENC_NAME(enc), pattern_size, backward, soptions); #endif + + print_options(stdout, options); + fprintf(stdout, "\n"); + } #endif #ifdef DUMP_INPUT -- cgit v1.2.3