/* * deluxe.c * contributed by Mark Griffin */ #include #include "oniguruma.h" #include #include #define RETRY_LIMIT 10000 #define DEPTH_LIMIT 10 typedef unsigned char uint8_t; static int search(regex_t* reg, unsigned char* str, unsigned char* end) { int r; unsigned char *start, *range; OnigRegion *region; region = onig_region_new(); start = str; range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { int i; fprintf(stdout, "match at %d (%s)\n", r, ONIGENC_NAME(onig_get_encoding(reg))); for (i = 0; i < region->num_regs; i++) { fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); } } else if (r == ONIG_MISMATCH) { fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r); fprintf(stdout, "ERROR: %s\n", s); fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); onig_region_free(region, 1 /* 1:free self, 0:free contents only */); return -1; } onig_region_free(region, 1 /* 1:free self, 0:free contents only */); return 0; } static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; static int exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, OnigOptionType options, char* apattern, char* apattern_end, char* astr, char* astr_end) { int r; regex_t* reg; OnigCompileInfo ci; OnigErrorInfo einfo; UChar* pattern = (UChar* )apattern; UChar* str = (UChar* )astr; UChar* pattern_end = (UChar* )apattern_end; unsigned char* end = (unsigned char* )astr_end; onig_initialize(&str_enc, 1); onig_set_retry_limit_in_search(RETRY_LIMIT); onig_set_parse_depth_limit(DEPTH_LIMIT); ci.num_of_elements = 5; ci.pattern_enc = pattern_enc; ci.target_enc = str_enc; ci.syntax = ONIG_SYNTAX_DEFAULT; ci.option = options; ci.case_fold_flag = CF; r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stdout, "ERROR: %s\n", s); onig_end(); return -1; } if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { r = search(reg, str, end); } onig_free(reg); onig_end(); return 0; } #define PATTERN_SIZE 48 #define NUM_CONTROL_BYTES 1 #define MIN_STR_SIZE 2 int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) { int r; size_t remaining_size; unsigned char *data; unsigned char pat_encoding_choice; unsigned char str_encoding_choice; unsigned char *pattern; unsigned char *str; unsigned char *pattern_end; unsigned char *str_end; unsigned int num_encodings; OnigEncodingType *pattern_enc; OnigEncodingType *str_enc; OnigEncodingType *encodings[] = { ONIG_ENCODING_ASCII, ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_ISO_8859_2, ONIG_ENCODING_ISO_8859_3, ONIG_ENCODING_ISO_8859_4, ONIG_ENCODING_ISO_8859_5, ONIG_ENCODING_ISO_8859_6, ONIG_ENCODING_ISO_8859_7, ONIG_ENCODING_ISO_8859_8, ONIG_ENCODING_ISO_8859_9, ONIG_ENCODING_ISO_8859_10, ONIG_ENCODING_ISO_8859_11, ONIG_ENCODING_ISO_8859_13, ONIG_ENCODING_ISO_8859_14, ONIG_ENCODING_ISO_8859_15, ONIG_ENCODING_ISO_8859_16, ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE, ONIG_ENCODING_UTF32_BE, ONIG_ENCODING_UTF32_LE, ONIG_ENCODING_EUC_JP, ONIG_ENCODING_EUC_TW, ONIG_ENCODING_EUC_KR, ONIG_ENCODING_EUC_CN, ONIG_ENCODING_SJIS, //ONIG_ENCODING_KOI8, ONIG_ENCODING_KOI8_R, ONIG_ENCODING_CP1251, ONIG_ENCODING_BIG5, ONIG_ENCODING_GB18030, }; if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) return 0; if (Size > 0x1000) return 0; remaining_size = Size; data = (unsigned char *)(Data); // pull off bytes to switch off pat_encoding_choice = data[0]; data++; remaining_size--; str_encoding_choice = data[0]; data++; remaining_size--; // copy first PATTERN_SIZE bytes off to be the pattern pattern = (unsigned char *)malloc(PATTERN_SIZE); memcpy(pattern, data, PATTERN_SIZE); pattern_end = pattern + PATTERN_SIZE; data += PATTERN_SIZE; remaining_size -= PATTERN_SIZE; str = (unsigned char*)malloc(remaining_size); memcpy(str, data, remaining_size); str_end = str + remaining_size; num_encodings = sizeof(encodings) / sizeof(encodings[0]); pattern_enc = encodings[pat_encoding_choice % num_encodings]; str_enc = encodings[str_encoding_choice % num_encodings]; r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); free(pattern); free(str); return r; } #ifdef STANDALONE #include extern int main(int argc, char* argv[]) { size_t n; uint8_t Data[10000]; n = read(0, Data, sizeof(Data)); fprintf(stdout, "n: %ld\n", n); LLVMFuzzerTestOneInput(Data, n); return 0; } #endif /* STANDALONE */