diff options
Diffstat (limited to 'harnesses')
| -rw-r--r-- | harnesses/ascii_compatible.dict | 20 | ||||
| -rw-r--r-- | harnesses/deluxe-encode-harness.c | 39 | ||||
| -rw-r--r-- | harnesses/encode-harness.c | 291 | ||||
| -rw-r--r-- | harnesses/libfuzzer-onig.cpp | 45 | ||||
| -rw-r--r-- | harnesses/makefile | 69 | ||||
| -rw-r--r-- | harnesses/regset-harness.c | 379 | ||||
| -rw-r--r-- | harnesses/syntax-harness.c | 120 | 
7 files changed, 748 insertions, 215 deletions
| diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict index 820bf47..e6e00db 100644 --- a/harnesses/ascii_compatible.dict +++ b/harnesses/ascii_compatible.dict @@ -1,10 +1,7 @@  # First-pass fuzzing dictionary for Oniguruma by Mark Griffin -"\\o{17777777777}" -"\\777" -"\\u" -"\\uFFFF" -"\\xFF" -"\\x{70000000}" +"\\o{34}" +"\\123" +"\\x{40}"  "\\C-"  "\\M-\\C-"  "\\X" @@ -12,6 +9,8 @@  "\\p{^"  "}"  "]" +"]" +")"  ")"  "\\n"  "\\r" @@ -47,10 +46,13 @@  "\\B"  "(?y{"  "[abcd1-9]" +"[\\w]" +"[\\W]" +"[\\s]" +"[\\S]"  "[\\w\\d"  "[\\p{Alphabetic}" -"[\\P{Arabic}" -"[\\x{ffff}" +"[\\x{03}"  "[a-w&&"  "[^"  "[:graph:]" @@ -88,7 +90,6 @@  "(?(<name+0>))"  "(?(<name+1>))"  "(?(<name-1>))" -"(*ERROR{-2000})"  "(*COUNT[tag]{X})"  "\\1"  "\\2" @@ -106,6 +107,5 @@  "(?<name>a|b\\g<name>c)"  "(?-i:\\g<name>)"  "\\N{name}" -"\\p{Hiragana}"  "\\p{Katakana}"  "\\p{Emoji}" diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c index e1f84a5..aabe916 100644 --- a/harnesses/deluxe-encode-harness.c +++ b/harnesses/deluxe-encode-harness.c @@ -49,39 +49,6 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)    return 0;  } -static int -exec(OnigEncoding enc, OnigOptionType options, -     char* apattern, char* apattern_end, char* astr, char* astr_end) -{ -  int r; -  regex_t* reg; -  OnigErrorInfo einfo; -  UChar* pattern = (UChar* )apattern; -  UChar* str     = (UChar* )astr; -  UChar* pattern_end = (UChar* )apattern_end; -  unsigned char *end = (unsigned char* )astr_end; - -  onig_initialize(&enc, 1); -  onig_set_retry_limit_in_match(DEFAULT_LIMIT); -  onig_set_parse_depth_limit(DEFAULT_LIMIT); - -  r = onig_new(®, pattern, pattern_end, -               options, enc, ONIG_SYNTAX_DEFAULT, &einfo); -  if (r != ONIG_NORMAL) { -    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; -    onig_error_code_to_str((UChar* )s, r, &einfo); -    fprintf(stdout, "ERROR: %s\n", s); -    onig_end(); -    return -1; -  } - -  r = search(reg, str, end); - -  onig_free(reg); -  onig_end(); -  return 0; -} -  static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;  static int @@ -196,15 +163,13 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)    remaining_size--;    // copy first PATTERN_SIZE bytes off to be the pattern -  pattern = (unsigned char *)malloc(PATTERN_SIZE+4); -  memset(pattern, 0, PATTERN_SIZE+4); +  pattern = (unsigned char *)malloc(PATTERN_SIZE);    memcpy(pattern, data, PATTERN_SIZE);    pattern_end = pattern + PATTERN_SIZE;    data += PATTERN_SIZE;    remaining_size -= PATTERN_SIZE; -  str = (unsigned char*)malloc(remaining_size+4); -  memset(str, 0, remaining_size+4); +  str = (unsigned char*)malloc(remaining_size);    memcpy(str, data, remaining_size);    str_end = str + remaining_size; diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c index e57fd4f..5db0512 100644 --- a/harnesses/encode-harness.c +++ b/harnesses/encode-harness.c @@ -3,13 +3,19 @@   * contributed by Mark Griffin   */  #include <stdio.h> -#include "oniguruma.h" - +#include <unistd.h>  #include <stdlib.h>  #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> -#define PARSE_DEPTH_LIMIT   120 -#define RETRY_LIMIT        4000 +#include "oniguruma.h" + + +//#define PARSE_DEPTH_LIMIT   120 +#define RETRY_LIMIT        3500  typedef unsigned char uint8_t; @@ -26,6 +32,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)    range = end;    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);    if (r >= 0) { +#ifdef WITH_READ_MAIN      int i;      fprintf(stdout, "match at %d  (%s)\n", r, @@ -33,17 +40,29 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)      for (i = 0; i < region->num_regs; i++) {        fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);      } +#endif    }    else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN      fprintf(stdout, "search fail (%s)\n",              ONIGENC_NAME(onig_get_encoding(reg))); +#endif    }    else { /* error */ +#ifdef WITH_READ_MAIN      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +      onig_error_code_to_str((UChar* )s, r);      fprintf(stdout, "ERROR: %s\n", s);      fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif      onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + +    if (r == ONIGERR_STACK_BUG || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) +      return -2; +      return -1;    } @@ -51,8 +70,14 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)    return 0;  } +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; +  static int -exec(OnigEncoding enc, OnigOptionType options, +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,       char* apattern, char* apattern_end, char* astr, UChar* end)  {    int r; @@ -62,22 +87,41 @@ exec(OnigEncoding enc, OnigOptionType options,    UChar* str     = (UChar* )astr;    UChar* pattern_end = (UChar* )apattern_end; +  EXEC_COUNT++; +  EXEC_COUNT_INTERVAL++; +    onig_initialize(&enc, 1);    onig_set_retry_limit_in_match(RETRY_LIMIT); -  onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); +  //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);    r = onig_new(®, pattern, pattern_end, -               options, enc, ONIG_SYNTAX_DEFAULT, &einfo); +               options, enc, syntax, &einfo);    if (r != ONIG_NORMAL) {      char s[ONIG_MAX_ERROR_MESSAGE_LEN];      onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef WITH_READ_MAIN      fprintf(stdout, "ERROR: %s\n", s); +#endif      onig_end(); -    return -1; + +    if (r == ONIGERR_PARSER_BUG || +        r == ONIGERR_STACK_BUG  || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) { +      return -2; +    } +    else +      return -1;    } +  REGEX_SUCCESS_COUNT++; + +  r = search(reg, pattern, pattern_end); +  if (r == -2) return -2;    if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    VALID_STRING_COUNT++;      r = search(reg, str, end); +    if (r == -2) return -2;    }    onig_free(reg); @@ -85,52 +129,114 @@ exec(OnigEncoding enc, OnigOptionType options,    return 0;  } -#define PATTERN_SIZE 32 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE  1 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +#if 0 +static void +output_data(char* path, const uint8_t * data, size_t size)  { -  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) -    return 0; -  if (Size > 0x1000) -    return 0; +  int fd; +  ssize_t n; +  fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); +  if (fd == -1) { +    fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); +    return ; +  } + +  n = write(fd, (const void* )data, size); +  if (n != size) { +    fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); +  } +  close(fd); +} +#endif + + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, +           int pattern_size, size_t remaining_size, unsigned char *data) +{ +  int r;    unsigned char *pattern_end;    unsigned char *str_null_end; -  size_t remaining_size = Size; -  unsigned char *data = (unsigned char *)(Data); +  // copy first PATTERN_SIZE bytes off to be the pattern +  unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); +  memcpy(pattern, data, pattern_size); +  pattern_end = pattern + pattern_size; +  data += pattern_size; +  remaining_size -= pattern_size; -  // pull off one byte to switch off -  unsigned char encoding_choice = data[0]; -  data++; -  remaining_size--; +#if defined(UTF16_BE) || defined(UTF16_LE) +  if (remaining_size % 2 == 1) remaining_size--; +#endif -  // copy first PATTERN_SIZE bytes off to be the pattern -  unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); -  memset(pattern, 0, PATTERN_SIZE+4); -  memcpy(pattern, data, PATTERN_SIZE); -  pattern_end = pattern + PATTERN_SIZE; -  data += PATTERN_SIZE; -  remaining_size -= PATTERN_SIZE; - -  unsigned char *str = (unsigned char*)malloc(remaining_size+4); -  memset(str, 0, remaining_size+4); +  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);    memcpy(str, data, remaining_size);    str_null_end = str + remaining_size; -  int r; -  OnigEncodingType *encodings[] = { -	  ONIG_ENCODING_SJIS, -	  ONIG_ENCODING_EUC_JP, -	  ONIG_ENCODING_CP1251, -	  ONIG_ENCODING_ISO_8859_1, -	  ONIG_ENCODING_UTF8, -    ONIG_ENCODING_KOI8_R, -    ONIG_ENCODING_BIG5 +  r = exec(enc, options, syntax, +           (char *)pattern, (char *)pattern_end, +           (char *)str, str_null_end); + +  free(pattern); +  free(str); +  return r; +} + + +#define EXEC_PRINT_INTERVAL  10000000 +#define MAX_PATTERN_SIZE     150 + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES      3 +#else +#define NUM_CONTROL_BYTES      2 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) +  static OnigEncoding encodings[] = { +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_SJIS, +    //ONIG_ENCODING_EUC_JP, +    ONIG_ENCODING_ISO_8859_1, +    ONIG_ENCODING_BIG5, +    ONIG_ENCODING_GB18030, +    ONIG_ENCODING_EUC_TW +  }; +  unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST +  static OnigSyntaxType* syntaxes[] = { +    ONIG_SYNTAX_POSIX_EXTENDED, +    ONIG_SYNTAX_EMACS, +    ONIG_SYNTAX_GREP, +    ONIG_SYNTAX_GNU_REGEX, +    ONIG_SYNTAX_JAVA, +    ONIG_SYNTAX_PERL_NG, +    ONIG_SYNTAX_ONIGURUMA    }; +  unsigned char syntax_choice; +#endif + +  int r; +  int pattern_size; +  size_t remaining_size; +  unsigned char *data; +  unsigned char options_choice; +  OnigOptionType  options; +  OnigEncoding    enc; +  OnigSyntaxType* syntax; -  OnigEncodingType *enc; +  INPUT_COUNT++; +  if (Size < NUM_CONTROL_BYTES) return 0; + +  remaining_size = Size; +  data = (unsigned char* )(Data);  #ifdef UTF16_BE    enc = ONIG_ENCODING_UTF16_BE; @@ -138,24 +244,113 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)  #ifdef UTF16_LE    enc = ONIG_ENCODING_UTF16_LE;  #else +  encoding_choice = data[0]; +  data++; +  remaining_size--; +    int num_encodings = sizeof(encodings)/sizeof(encodings[0]);    enc = encodings[encoding_choice % num_encodings];  #endif  #endif -  r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, -           (char *)str, str_null_end); +#ifdef SYNTAX_TEST +  syntax_choice = data[0]; +  data++; +  remaining_size--; -  free(pattern); -  free(str); +  int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); +  syntax = syntaxes[syntax_choice % num_syntaxes]; +#else +  syntax = ONIG_SYNTAX_DEFAULT; +#endif + +  options_choice = data[0]; +  options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; +  data++; +  remaining_size--; + +#ifdef WITH_READ_MAIN +#ifdef SYNTAX_TEST +  fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", +          ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); +#else +  fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); +#endif +#endif +#ifdef WITH_READ_MAIN +  int max_pattern_size; + +  if (remaining_size == 0) +    max_pattern_size = 0; +  else { +    max_pattern_size = remaining_size - 1; +    if (max_pattern_size > MAX_PATTERN_SIZE) +      max_pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) +    if (max_pattern_size % 2 == 1) max_pattern_size--; +#endif +  } + +  for (pattern_size = 0; pattern_size <= max_pattern_size; ) { +    fprintf(stdout, "pattern_size: %d\n", pattern_size); +    r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); +    if (r == -2) { +      //output_data("parser-bug", Data, Size); +      exit(-2); +    } + +#if defined(UTF16_BE) || defined(UTF16_LE) +    pattern_size += 2; +#else +    pattern_size++; +#endif +  } + +#else /* WITH_READ_MAIN */ + +  if (remaining_size == 0) +    pattern_size = 0; +  else { +    pattern_size = INPUT_COUNT % remaining_size; +    if (pattern_size > MAX_PATTERN_SIZE) +      pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) +    if (pattern_size % 2 == 1) pattern_size--; +#endif +  } + +  r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); +  if (r == -2) { +    //output_data("parser-bug", Data, Size); +    exit(-2); +  } +#endif /* else WITH_READ_MAIN */ + +  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { +    char d[64]; +    time_t t; +    float fexec, freg, fvalid; + +    t = time(NULL); +    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +    fexec  = (float )EXEC_COUNT / INPUT_COUNT; +    freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; +    fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + +    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", +            d, EXEC_COUNT, fexec, freg, fvalid); + +    EXEC_COUNT_INTERVAL = 0; +  }    return r;  }  #ifdef WITH_READ_MAIN -#include <unistd.h> -  extern int main(int argc, char* argv[])  {    size_t n; diff --git a/harnesses/libfuzzer-onig.cpp b/harnesses/libfuzzer-onig.cpp new file mode 100644 index 0000000..526c826 --- /dev/null +++ b/harnesses/libfuzzer-onig.cpp @@ -0,0 +1,45 @@ +/* libfuzzer test code for oniguruma + * author: Hanno Böck, license: CC0/public domain + +Usage: +* compile oniguruma with something like +    ./configure CC=clang LD=clang CFLAGS="-fsanitize-coverage=edge -fsanitize=address" \ +    LDFLAGS="-fsanitize-coverage=edge -fsanitize=address" +* Compile libfuzzer stub and link against static libonig.a and libFuzzer.a: +    clang++ libfuzzer-onig.cpp src/.libs/libonig.a libFuzzer.a -o libfuzzer-onig \ +    -fsanitize-coverage=edge -fsanitize=address +* Put sample patterns in directory "in/" +* Run +    ./libfuzzer-onig in + +Consult libfuzzer docs for further details and how to create libFuzzer.a: +http://llvm.org/docs/LibFuzzer.html + + */ +#include <stdint.h> +#include <string.h> +#include <oniguruma.h> + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  regex_t *reg; +  OnigEncoding enc; + +  enc = ONIG_ENCODING_UTF8; + +#ifdef FULL_TEST +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_match(120); +  onig_set_parse_depth_limit(120); +#endif + +  if (onig_new(®, Data, Data + Size, ONIG_OPTION_DEFAULT, enc, +               ONIG_SYNTAX_DEFAULT, 0) == 0) +    onig_free(reg); + +#ifdef FULL_TEST +  onig_end(); +#endif + +  return 0; +} diff --git a/harnesses/makefile b/harnesses/makefile new file mode 100644 index 0000000..dfd84de --- /dev/null +++ b/harnesses/makefile @@ -0,0 +1,69 @@ +# makefile for harness +SRC = ../src +CFLAGS   = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer +CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN +ONIG_LIB = $(SRC)/.libs/libonig.a +LIBS     = $(ONIG_LIB) + +TARGETS = encode-libfuzzer syntax-libfuzzer \ +          utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \ +          main-utf16-be main-utf16-le main-regset regset-libfuzzer + +OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \ +          deluxe-encode-libfuzzer main-deluxe-encode + + +default: $(TARGETS) + +encode-libfuzzer: encode-harness.c $(ONIG_LIB) +	clang $(CFLAGS) $< $(LIBS) -o $@ + +syntax-libfuzzer: encode-harness.c $(ONIG_LIB) +	clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@ + +deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB) +	clang $(CFLAGS) $< $(LIBS) -o $@ + +utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB) +	clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@ + +utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB) +	clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@ + +regset-libfuzzer: regset-harness.c $(ONIG_LIB) +	clang $(CFLAGS) $< $(LIBS) -o $@ + +main-encode: encode-harness.c $(ONIG_LIB) +	clang $(CFLAGS_M) $< $(LIBS) -o $@ + +main-syntax: encode-harness.c $(ONIG_LIB) +	clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@ + +main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB) +	clang $(CFLAGS_M) $< $(LIBS) -o $@ + +main-utf16-be: encode-harness.c $(ONIG_LIB) +	clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@ + +main-utf16-le: encode-harness.c $(ONIG_LIB) +	clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@ + +main-regset: regset-harness.c $(ONIG_LIB) +	clang $(CFLAGS_M) $< $(LIBS) -o $@ + +libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) +	clang++ $(CFLAGS) $< $(LIBS) -o $@ + +libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB) +	clang++ -DFULL_TEST $(CFLAGS) $< $(LIBS) -o $@ + + +$(ONIG_LIB): +	cd ..; make clean +	#cd ..; autoreconf -vfi +	cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" +	cd ..; make -j4 + + +clean: +	rm -f $(TARGETS) $(OTHER_TARGETS) diff --git a/harnesses/regset-harness.c b/harnesses/regset-harness.c new file mode 100644 index 0000000..b4b7e20 --- /dev/null +++ b/harnesses/regset-harness.c @@ -0,0 +1,379 @@ +/* + * regset-harness.c + * Copyright (c) 2019  K.Kosako + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "oniguruma.h" + + +#define RETRY_LIMIT    500 + +#ifdef WITH_READ_MAIN +//#define CHECK_EACH_REGEX_SEARCH_TIME +#endif + +#define MAX_REG_NUM   256 + +typedef unsigned char uint8_t; +static OnigEncoding ENC; + +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +static double +get_sec(struct timespec* ts, struct timespec* te) +{ +  double t; + +  t = (te->tv_sec - ts->tv_sec) + +      (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; +  return t; +} + +static int +check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) +{ +  int n; +  int i; +  int r; +  OnigRegion* region; + +  n = onig_regset_number_of_regex(set); +  region = onig_region_new(); + +  for (i = 0; i < n; i++) { +    regex_t* reg; +    unsigned char* start; +    unsigned char* range; +    struct timespec ts1, ts2; +    double t; + +    reg = onig_regset_get_regex(set, i); +    start = str; +    range = end; + +    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + +    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + +    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); +    t = get_sec(&ts1, &ts2); + +    fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); +  } + +  onig_region_free(region, 1); +  return 0; +} +#endif + +static int +search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) +{ +  int r; +  int match_pos; +  unsigned char *start, *range; + +  start = str; +  range = end; +  r = onig_regset_search(set, str, end, start, range, lead, +                         ONIG_OPTION_NONE, &match_pos); +  if (r >= 0) { +#ifdef WITH_READ_MAIN +    int i; +    int match_index; +    OnigRegion* region; + +    match_index = r; +    fprintf(stdout, "match reg index: %d, pos: %d  (%s)\n", +            match_index, match_pos, ONIGENC_NAME(ENC)); +    region = onig_regset_get_region(set, match_index); +    if (region == 0) { +      fprintf(stdout, "ERROR: can't get region.\n"); +      return -1; +    } + +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +#endif +  } +  else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN +    fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); +#endif +  } +  else { /* error */ +#ifdef WITH_READ_MAIN +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(ENC)); +#endif +    return -1; +  } + +  return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, int reg_num, int init_reg_num, +     UChar* pat[], UChar* pat_end[], +     OnigRegSetLead lead, UChar* str, UChar* end) +{ +  int r; +  int i, j; +  OnigRegSet* set; +  regex_t* reg; +  OnigOptionType options; +  OnigErrorInfo einfo; +  regex_t* regs[MAX_REG_NUM]; + +  EXEC_COUNT++; +  EXEC_COUNT_INTERVAL++; + +  options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; + +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_match(RETRY_LIMIT); + +  for (i = 0; i < init_reg_num; i++) { +    r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, +                 ONIG_SYNTAX_DEFAULT, &einfo); +    if (r != 0) { +#ifdef WITH_READ_MAIN +      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +      onig_error_code_to_str((UChar* )s, r, &einfo); +      fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + +      for (j = 0; j < i; j++) onig_free(regs[j]); + +      onig_end(); + +      if (r == ONIGERR_PARSER_BUG || +          r == ONIGERR_STACK_BUG  || +          r == ONIGERR_UNDEFINED_BYTECODE || +          r == ONIGERR_UNEXPECTED_BYTECODE) { +        return -2; +      } +      else +        return -1; +    } +  } + +  r = onig_regset_new(&set, init_reg_num, regs); +  if (r != 0) { +    for (i = 0; i < init_reg_num; i++) { +      onig_free(regs[i]); +    } +    onig_end(); +    return -1; +  } + +  for (i = init_reg_num; i < reg_num; i++) { +    r = onig_new(®, pat[i], pat_end[i], options, ENC, +                 ONIG_SYNTAX_DEFAULT, &einfo); +    if (r != 0) { +#ifdef WITH_READ_MAIN +      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +      onig_error_code_to_str((UChar* )s, r, &einfo); +      fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif +      onig_regset_free(set); +      onig_end(); + +      if (r == ONIGERR_PARSER_BUG || +          r == ONIGERR_STACK_BUG  || +          r == ONIGERR_UNDEFINED_BYTECODE || +          r == ONIGERR_UNEXPECTED_BYTECODE) { +        return -2; +      } +      else +        return -1; +    } + +    r = onig_regset_add(set, reg); +    if (r != 0) { +      onig_regset_free(set); +      onig_end(); +      fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); +      return r; +    } +  } + +  REGEX_SUCCESS_COUNT++; + +  if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    VALID_STRING_COUNT++; +    r = search(set, lead, str, end); +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +    r = check_each_regex_search_time(set, str, end); +#endif +  } + +  onig_regset_free(set); +  onig_end(); +  return 0; +} + +#define MAX_PATTERN_SIZE      30 +#define NUM_CONTROL_BYTES      3 + +#define EXEC_PRINT_INTERVAL  2000000 + +static int MaxRegNum; +static int MaxInitRegNum; + +extern int +LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  int r, i; +  int pattern_size; +  unsigned char *str_null_end; +  size_t remaining_size; +  unsigned char *data; +  unsigned int reg_num; +  unsigned int init_reg_num; +  unsigned char* pat[256]; +  unsigned char* pat_end[256]; +  int len; +  unsigned int lead_num; +  OnigRegSetLead lead; + +  INPUT_COUNT++; + +  if (Size < NUM_CONTROL_BYTES) return 0; + +  remaining_size = Size; +  data = (unsigned char* )(Data); + +  reg_num = data[0]; +  data++; +  remaining_size--; + +  init_reg_num = data[0]; +  data++; +  remaining_size--; + +  lead_num = data[0]; +  data++; +  remaining_size--; +  lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); + +  if (remaining_size < reg_num * 2) { +    reg_num = reg_num % 15;  // zero is OK. +  } + +  init_reg_num %= (reg_num + 1); + +  if (MaxRegNum < reg_num) +    MaxRegNum = reg_num; + +  if (MaxInitRegNum < init_reg_num) +    MaxInitRegNum = init_reg_num; + +  if (reg_num == 0) +    pattern_size = 1; +  else +    pattern_size = remaining_size / (reg_num * 2); +     +  if (pattern_size > MAX_PATTERN_SIZE) +    pattern_size = MAX_PATTERN_SIZE; + +  len = pattern_size * reg_num; +  if (len == 0) len = 1; + +  for (i = 0; i < reg_num; i++) { +    pat[i] = (unsigned char* )malloc(pattern_size); +    memcpy(pat[i], data, pattern_size); +    pat_end[i] = pat[i] + pattern_size; +    data += pattern_size; +    remaining_size -= pattern_size; +  } + +  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); +  memcpy(str, data, remaining_size); +  str_null_end = str + remaining_size; + +#ifdef WITH_READ_MAIN +  fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", +          reg_num, pattern_size, +          lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); + +  if (reg_num != 0) { +    unsigned char* p; +    i = 0; +    p = pat[0]; +    while (p < pat_end[0]) { +      fprintf(stdout, " 0x%02x", (int )*p++); +      i++; +      if (i % 8 == 0) fprintf(stdout, "\n"); +    } +    fprintf(stdout, "\n"); +  } +#endif + +  ENC = ONIG_ENCODING_UTF8; + +  r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); + +  for (i = 0; i < reg_num; i++) { +    free(pat[i]); +  } +  free(str); + +  if (r == -2) { +    //output_data("parser-bug", Data, Size); +    exit(-2); +  } + +  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { +    char d[64]; +    time_t t; +    float fexec, freg, fvalid; + +    t = time(NULL); +    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +    fexec  = (float )EXEC_COUNT / INPUT_COUNT; +    freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; +    fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + +    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", +            d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + +    EXEC_COUNT_INTERVAL = 0; +  } +  return r; +} + +#ifdef WITH_READ_MAIN + +extern int main(int argc, char* argv[]) +{ +  size_t n; +  uint8_t Data[10000]; + +  n = read(0, Data, sizeof(Data)); +  fprintf(stdout, "n: %ld\n", n); +  LLVMFuzzerTestOneInput(Data, n); + +  return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/syntax-harness.c b/harnesses/syntax-harness.c deleted file mode 100644 index 0fb3587..0000000 --- a/harnesses/syntax-harness.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * syntax-harness.c - * contributed by Mark Griffin - */ -#include <stdio.h> -#include <string.h> -#include "oniguruma.h" - -#include <stdlib.h> - -#define DEFAULT_LIMIT 120 -typedef unsigned char uint8_t; - -extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) -{ -  int r; -  unsigned char *start, *range, *end; -  regex_t* reg; -  OnigErrorInfo einfo; -  OnigRegion *region; -  UChar* pattern = (UChar* )apattern; -  UChar* str     = (UChar* )astr; - -  r = onig_new(®, pattern, pattern + strlen((char* )pattern), -               ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); -  if (r != ONIG_NORMAL) { -    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; -    onig_error_code_to_str((UChar* )s, r, &einfo); -    fprintf(stdout, "ERROR: %s\n", s); -    return -1; -  } - -  region = onig_region_new(); - -  end   = str + strlen((char* )str); -  start = str; -  range = end; -  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); -  if (r >= 0) { -    int i; - -    fprintf(stdout, "match at %d\n", r); -    for (i = 0; i < region->num_regs; i++) { -      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); -    } -  } -  else if (r == ONIG_MISMATCH) { -    fprintf(stdout, "search fail\n"); -  } -  else { /* error */ -    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; -    onig_error_code_to_str((UChar* )s, r); -    fprintf(stdout, "ERROR: %s\n", s); -    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); -    onig_free(reg); -    return -1; -  } - -  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); -  onig_free(reg); -  return 0; -} - -#define PATTERN_SIZE 64 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE  1 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ -  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) -    return 0; -  if (Size > 0x1000) -    return 0; -  size_t remaining_size = Size; -  unsigned char *data = (unsigned char *)(Data); - -  // pull off one byte to switch syntax choice -  unsigned char syntax_choice = data[0]; -  data++; -  remaining_size--; - -  // copy first PATTERN_SIZE bytes off to be the pattern -  unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1); -  memset(pattern, 0, PATTERN_SIZE+1); -  memcpy(pattern, data, PATTERN_SIZE); -  data += PATTERN_SIZE; -  remaining_size -= PATTERN_SIZE; - -  unsigned char *str = (unsigned char*)malloc(remaining_size+1); -  memset(str, 0, remaining_size+1); -  memcpy(str, data, remaining_size); -   -  OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; -  onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); - -  onig_set_retry_limit_in_match(DEFAULT_LIMIT); -  onig_set_parse_depth_limit(DEFAULT_LIMIT); - -  OnigSyntaxType *syntaxes[] = { -    ONIG_SYNTAX_POSIX_EXTENDED, -    ONIG_SYNTAX_EMACS, -    ONIG_SYNTAX_GREP, -    ONIG_SYNTAX_GNU_REGEX, -    ONIG_SYNTAX_JAVA, -    ONIG_SYNTAX_PERL_NG, -    ONIG_SYNTAX_RUBY, -    ONIG_SYNTAX_ONIGURUMA, -  };  -  OnigSyntaxType *syntax = syntaxes[syntax_choice % 8]; -   -  int r; -  r = exec(syntax, (char *)pattern, (char *)str); -  // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); - -  onig_end(); - -  free(pattern); -  free(str); - -  return 0; -} | 
