diff options
Diffstat (limited to 'harnesses')
| -rw-r--r-- | harnesses/ascii_compatible.dict | 2 | ||||
| -rw-r--r-- | harnesses/base.c | 499 | ||||
| -rw-r--r-- | harnesses/deluxe.c (renamed from harnesses/deluxe-encode-harness.c) | 14 | ||||
| -rw-r--r-- | harnesses/encode-harness.c | 365 | ||||
| -rw-r--r-- | harnesses/fuzzer.options | 2 | ||||
| -rw-r--r-- | harnesses/makefile | 35 | ||||
| -rw-r--r-- | harnesses/regset.c (renamed from harnesses/regset-harness.c) | 51 | 
7 files changed, 560 insertions, 408 deletions
| diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict index e6e00db..a3e978b 100644 --- a/harnesses/ascii_compatible.dict +++ b/harnesses/ascii_compatible.dict @@ -109,3 +109,5 @@  "\\N{name}"  "\\p{Katakana}"  "\\p{Emoji}" +"ss" +"SS" diff --git a/harnesses/base.c b/harnesses/base.c new file mode 100644 index 0000000..a88e6f2 --- /dev/null +++ b/harnesses/base.c @@ -0,0 +1,499 @@ +/* + * base.c  contributed by Mark Griffin + * Copyright (c) 2019-2020  K.Kosako + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "oniguruma.h" + +#define PARSE_DEPTH_LIMIT           8 +#define RETRY_LIMIT              5000 +#define CALL_MAX_NEST_LEVEL         8 +//#define EXEC_PRINT_INTERVAL    500000 +//#define DUMP_DATA_INTERVAL     100000 +//#define STAT_PATH              "fuzzer.stat_log" + +typedef unsigned char uint8_t; + +#ifdef DUMP_INPUT +static void +dump_input(unsigned char* data, size_t len) +{ +  static FILE* DumpFp; +  static char end[] = { 'E', 'N', 'D' }; + +  if (DumpFp == 0) +    DumpFp = fopen("dump-input", "w"); + +  fseek(DumpFp, 0, SEEK_SET); +  fwrite(data, sizeof(unsigned char), len, DumpFp); +  fwrite(end,  sizeof(char), sizeof(end), DumpFp); +  fflush(DumpFp); +} +#endif + +#ifdef DUMP_DATA_INTERVAL +static void +dump_file(char* path, unsigned char* data, size_t len) +{ +  FILE* fp; + +  fp = fopen(path, "w"); +  fwrite(data, sizeof(unsigned char), len, fp); +  fclose(fp); +} +#endif + +#ifdef STANDALONE +#include <ctype.h> + +static void +dump_data(FILE* fp, unsigned char* data, int len) +{ +  int i; + +  fprintf(fp, "{\n"); +  for (i = 0; i < len; i++) { +    unsigned char c = data[i]; + +    if (isprint((int )c)) { +      if (c == '\\') +        fprintf(fp, " '\\\\'"); +      else +        fprintf(fp, " '%c'", c); +    } +    else { +      fprintf(fp, "0x%02x", (int )c); +    } + +    if (i == len - 1) { +      fprintf(fp, "\n"); +    } +    else { +      if (i % 8 == 7) +        fprintf(fp, ",\n"); +      else +        fprintf(fp, ", "); +    } +  } +  fprintf(fp, "};\n"); +} + +#else + +static void +output_current_time(FILE* fp) +{ +  char d[64]; +  time_t t; + +  t = time(NULL); +  strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +  fprintf(fp, "%s", d); +} + +#endif + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) +{ +  int r; +  unsigned char *start, *range; +  OnigRegion *region; + +  region = onig_region_new(); + +  if (backward != 0) { +    start = end; +    range = str; +  } +  else { +    start = str; +    range = end; +  } + +  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); +  if (r >= 0) { +#ifdef STANDALONE +    int i; + +    fprintf(stdout, "match at %d  (%s)\n", r, +            ONIGENC_NAME(onig_get_encoding(reg))); +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +#endif +  } +  else if (r == ONIG_MISMATCH) { +#ifdef STANDALONE +    fprintf(stdout, "search fail (%s)\n", +            ONIGENC_NAME(onig_get_encoding(reg))); +#endif +  } +  else { /* error */ +#ifdef STANDALONE +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif +    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + +    if (r == ONIGERR_STACK_BUG || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) +      return -2; + +    return -1; +  } + +  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +  return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, +     char* apattern, char* apattern_end, char* astr, UChar* end, int backward) +{ +  int r; +  regex_t* reg; +  OnigErrorInfo einfo; +  UChar* pattern = (UChar* )apattern; +  UChar* str     = (UChar* )astr; +  UChar* pattern_end = (UChar* )apattern_end; + +  EXEC_COUNT++; +  EXEC_COUNT_INTERVAL++; + +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_search(RETRY_LIMIT); +#ifdef PARSE_DEPTH_LIMIT +  onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); +#endif +  onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); + +  r = onig_new(®, pattern, pattern_end, +               options, enc, syntax, &einfo); +  if (r != ONIG_NORMAL) { +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef STANDALONE +    fprintf(stdout, "ERROR: %s\n", s); +#endif +    onig_end(); + +    if (r == ONIGERR_PARSER_BUG || +        r == ONIGERR_STACK_BUG  || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) { +      return -2; +    } +    else +      return -1; +  } +  REGEX_SUCCESS_COUNT++; + +  r = search(reg, pattern, pattern_end, backward); +  if (r == -2) return -2; + +  if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    VALID_STRING_COUNT++; +    r = search(reg, str, end, backward); +    if (r == -2) return -2; +  } + +  onig_free(reg); +  onig_end(); +  return 0; +} + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, +           int backward, int pattern_size, size_t remaining_size, unsigned char *data) +{ +  int r; +  unsigned char *pattern_end; +  unsigned char *str_null_end; + +  // copy first PATTERN_SIZE bytes off to be the pattern +  unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); +  memcpy(pattern, data, pattern_size); +  pattern_end = pattern + pattern_size; +  data += pattern_size; +  remaining_size -= pattern_size; + +#if defined(UTF16_BE) || defined(UTF16_LE) +  if (remaining_size % 2 == 1) remaining_size--; +#endif + +  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); +  memcpy(str, data, remaining_size); +  str_null_end = str + remaining_size; + +  r = exec(enc, options, syntax, +           (char *)pattern, (char *)pattern_end, +           (char *)str, str_null_end, backward); + +  free(pattern); +  free(str); +  return r; +} + +#define OPTIONS_MASK  (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP) + + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES      6 +#else +#define NUM_CONTROL_BYTES      5 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) +  static OnigEncoding encodings[] = { +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_ASCII, +    ONIG_ENCODING_EUC_JP, +    ONIG_ENCODING_EUC_TW, +    ONIG_ENCODING_EUC_KR, +    ONIG_ENCODING_EUC_CN, +    ONIG_ENCODING_SJIS, +    ONIG_ENCODING_KOI8_R, +    ONIG_ENCODING_CP1251, +    ONIG_ENCODING_BIG5, +    ONIG_ENCODING_GB18030, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_ISO_8859_1, +    ONIG_ENCODING_ISO_8859_2, +    ONIG_ENCODING_ISO_8859_3, +    ONIG_ENCODING_ISO_8859_4, +    ONIG_ENCODING_ISO_8859_5, +    ONIG_ENCODING_ISO_8859_6, +    ONIG_ENCODING_ISO_8859_7, +    ONIG_ENCODING_ISO_8859_8, +    ONIG_ENCODING_ISO_8859_9, +    ONIG_ENCODING_ISO_8859_10, +    ONIG_ENCODING_ISO_8859_11, +    ONIG_ENCODING_ISO_8859_13, +    ONIG_ENCODING_ISO_8859_14, +    ONIG_ENCODING_ISO_8859_15, +    ONIG_ENCODING_ISO_8859_16 +  }; +  unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST +  static OnigSyntaxType* syntaxes[] = { +    ONIG_SYNTAX_POSIX_EXTENDED, +    ONIG_SYNTAX_EMACS, +    ONIG_SYNTAX_GREP, +    ONIG_SYNTAX_GNU_REGEX, +    ONIG_SYNTAX_JAVA, +    ONIG_SYNTAX_PERL_NG, +    ONIG_SYNTAX_ONIGURUMA +  }; + +#ifdef STANDALONE +  static char* syntax_names[] = { +    "Posix Extended", +    "Emacs", +    "Grep", +    "GNU Regex", +    "Java", +    "Perl+NG", +    "Oniguruma" +  }; +#endif + +  unsigned char syntax_choice; +#endif + +  int r; +  int backward; +  int pattern_size; +  size_t remaining_size; +  unsigned char *data; +  unsigned char pattern_size_choice; +  OnigOptionType  options; +  OnigEncoding    enc; +  OnigSyntaxType* syntax; + +#ifndef STANDALONE +  static FILE* STAT_FP; +#endif + +  INPUT_COUNT++; + +#ifdef DUMP_DATA_INTERVAL +  if (INPUT_COUNT % DUMP_DATA_INTERVAL == 0) { +    char path[20]; +    sprintf(path, "dump-%ld", INPUT_COUNT); +    dump_file(path, (unsigned char* )Data, Size); +  } +#endif + +  if (Size < NUM_CONTROL_BYTES) return 0; + +  remaining_size = Size; +  data = (unsigned char* )(Data); + +#ifdef UTF16_BE +  enc = ONIG_ENCODING_UTF16_BE; +#else +#ifdef UTF16_LE +  enc = ONIG_ENCODING_UTF16_LE; +#else +  encoding_choice = data[0]; +  data++; +  remaining_size--; + +  int num_encodings = sizeof(encodings)/sizeof(encodings[0]); +  enc = encodings[encoding_choice % num_encodings]; +#endif +#endif + +#ifdef SYNTAX_TEST +  syntax_choice = data[0]; +  data++; +  remaining_size--; + +  int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); +  syntax = syntaxes[syntax_choice % num_syntaxes]; +#else +  syntax = ONIG_SYNTAX_DEFAULT; +#endif + +  if ((data[1] & 0xc0) == 0) +    options = (data[0] | (data[1] << 8)) & OPTIONS_MASK; +  else +    options = data[0] & ONIG_OPTION_IGNORECASE; + +  data++; +  remaining_size--; +  data++; +  remaining_size--; + +  pattern_size_choice = data[0]; +  data++; +  remaining_size--; + +  backward = (data[0] == 0xbb); +  data++; +  remaining_size--; + +  if (remaining_size == 0) +    pattern_size = 0; +  else { +    pattern_size = (int )pattern_size_choice % remaining_size; +#if defined(UTF16_BE) || defined(UTF16_LE) +    if (pattern_size % 2 == 1) pattern_size--; +#endif +  } + +#ifdef STANDALONE +  dump_data(stdout, data, pattern_size); +#ifdef SYNTAX_TEST +  fprintf(stdout, +          "enc: %s, syntax: %s, options: %u, pattern_size: %d, back:%d\n", +          ONIGENC_NAME(enc), +          syntax_names[syntax_choice % num_syntaxes], +          options, +          pattern_size, backward); +#else +  fprintf(stdout, "enc: %s, options: %u, pattern_size: %d, back:%d\n", +          ONIGENC_NAME(enc), options, pattern_size, backward); +#endif +#endif + +#ifdef DUMP_INPUT +  dump_input((unsigned char* )Data, Size); +#endif + +  r = alloc_exec(enc, options, syntax, backward, pattern_size, +                 remaining_size, data); +  if (r == -2) exit(-2); + +#ifndef STANDALONE +#ifdef EXEC_PRINT_INTERVAL +  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { +    float fexec, freg, fvalid; + +    if (STAT_FP == 0) { +#ifdef STAT_PATH +      STAT_FP = fopen(STAT_PATH, "a"); +#else +      STAT_FP = stdout; +#endif +    } + +    output_current_time(STAT_FP); + +    if (INPUT_COUNT != 0) { // overflow check +      fexec  = (float )EXEC_COUNT / INPUT_COUNT; +      freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; +      fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + +      fprintf(STAT_FP, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", +              EXEC_COUNT, fexec, freg, fvalid); +      fflush(STAT_FP); +    } +    else { +      fprintf(STAT_FP, ": ignore (input count overflow)\n"); +    } + +    EXEC_COUNT_INTERVAL = 0; +  } +  else if (EXEC_COUNT == 1) { +    output_current_time(stdout); +    fprintf(stdout, ": ------------ START ------------\n"); +  } +#endif +#endif + +  return r; +} + +#ifdef STANDALONE + +extern int main(int argc, char* argv[]) +{ +  size_t n; +  uint8_t Data[10000]; + +  n = read(0, Data, sizeof(Data)); +  fprintf(stdout, "n: %ld\n", n); +  LLVMFuzzerTestOneInput(Data, n); + +  return 0; +} +#endif /* STANDALONE */ diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe.c index aabe916..5441de9 100644 --- a/harnesses/deluxe-encode-harness.c +++ b/harnesses/deluxe.c @@ -1,5 +1,5 @@  /* - * deluxe-encode-harness.c + * deluxe.c   * contributed by Mark Griffin   */  #include <stdio.h> @@ -8,7 +8,9 @@  #include <stdlib.h>  #include <string.h> -#define DEFAULT_LIMIT 120 +#define RETRY_LIMIT   10000 +#define DEPTH_LIMIT      10 +  typedef unsigned char uint8_t;  static int @@ -66,8 +68,8 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,    unsigned char* end = (unsigned char* )astr_end;    onig_initialize(&str_enc, 1); -  onig_set_retry_limit_in_match(DEFAULT_LIMIT); -  onig_set_parse_depth_limit(DEFAULT_LIMIT); +  onig_set_retry_limit_in_search(RETRY_LIMIT); +  onig_set_parse_depth_limit(DEPTH_LIMIT);    ci.num_of_elements = 5;    ci.pattern_enc = pattern_enc; @@ -186,7 +188,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)  } -#ifdef WITH_READ_MAIN +#ifdef STANDALONE  #include <unistd.h> @@ -201,4 +203,4 @@ extern int main(int argc, char* argv[])    return 0;  } -#endif /* WITH_READ_MAIN */ +#endif /* STANDALONE */ diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c deleted file mode 100644 index 5db0512..0000000 --- a/harnesses/encode-harness.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * encode-harness.c - * contributed by Mark Griffin - */ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <time.h> - -#include "oniguruma.h" - - -//#define PARSE_DEPTH_LIMIT   120 -#define RETRY_LIMIT        3500 - -typedef unsigned char uint8_t; - -static int -search(regex_t* reg, unsigned char* str, unsigned char* end) -{ -  int r; -  unsigned char *start, *range; -  OnigRegion *region; - -  region = onig_region_new(); - -  start = str; -  range = end; -  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); -  if (r >= 0) { -#ifdef WITH_READ_MAIN -    int i; - -    fprintf(stdout, "match at %d  (%s)\n", r, -            ONIGENC_NAME(onig_get_encoding(reg))); -    for (i = 0; i < region->num_regs; i++) { -      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); -    } -#endif -  } -  else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN -    fprintf(stdout, "search fail (%s)\n", -            ONIGENC_NAME(onig_get_encoding(reg))); -#endif -  } -  else { /* error */ -#ifdef WITH_READ_MAIN -    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - -    onig_error_code_to_str((UChar* )s, r); -    fprintf(stdout, "ERROR: %s\n", s); -    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); -#endif -    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - -    if (r == ONIGERR_STACK_BUG || -        r == ONIGERR_UNDEFINED_BYTECODE || -        r == ONIGERR_UNEXPECTED_BYTECODE) -      return -2; - -    return -1; -  } - -  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); -  return 0; -} - -static long INPUT_COUNT; -static long EXEC_COUNT; -static long EXEC_COUNT_INTERVAL; -static long REGEX_SUCCESS_COUNT; -static long VALID_STRING_COUNT; - -static int -exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, -     char* apattern, char* apattern_end, char* astr, UChar* end) -{ -  int r; -  regex_t* reg; -  OnigErrorInfo einfo; -  UChar* pattern = (UChar* )apattern; -  UChar* str     = (UChar* )astr; -  UChar* pattern_end = (UChar* )apattern_end; - -  EXEC_COUNT++; -  EXEC_COUNT_INTERVAL++; - -  onig_initialize(&enc, 1); -  onig_set_retry_limit_in_match(RETRY_LIMIT); -  //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); - -  r = onig_new(®, pattern, pattern_end, -               options, enc, syntax, &einfo); -  if (r != ONIG_NORMAL) { -    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; -    onig_error_code_to_str((UChar* )s, r, &einfo); -#ifdef WITH_READ_MAIN -    fprintf(stdout, "ERROR: %s\n", s); -#endif -    onig_end(); - -    if (r == ONIGERR_PARSER_BUG || -        r == ONIGERR_STACK_BUG  || -        r == ONIGERR_UNDEFINED_BYTECODE || -        r == ONIGERR_UNEXPECTED_BYTECODE) { -      return -2; -    } -    else -      return -1; -  } -  REGEX_SUCCESS_COUNT++; - -  r = search(reg, pattern, pattern_end); -  if (r == -2) return -2; - -  if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { -    VALID_STRING_COUNT++; -    r = search(reg, str, end); -    if (r == -2) return -2; -  } - -  onig_free(reg); -  onig_end(); -  return 0; -} - -#if 0 -static void -output_data(char* path, const uint8_t * data, size_t size) -{ -  int fd; -  ssize_t n; - -  fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); -  if (fd == -1) { -    fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); -    return ; -  } - -  n = write(fd, (const void* )data, size); -  if (n != size) { -    fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); -  } -  close(fd); -} -#endif - - -static int -alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, -           int pattern_size, size_t remaining_size, unsigned char *data) -{ -  int r; -  unsigned char *pattern_end; -  unsigned char *str_null_end; - -  // copy first PATTERN_SIZE bytes off to be the pattern -  unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); -  memcpy(pattern, data, pattern_size); -  pattern_end = pattern + pattern_size; -  data += pattern_size; -  remaining_size -= pattern_size; - -#if defined(UTF16_BE) || defined(UTF16_LE) -  if (remaining_size % 2 == 1) remaining_size--; -#endif - -  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); -  memcpy(str, data, remaining_size); -  str_null_end = str + remaining_size; - -  r = exec(enc, options, syntax, -           (char *)pattern, (char *)pattern_end, -           (char *)str, str_null_end); - -  free(pattern); -  free(str); -  return r; -} - - -#define EXEC_PRINT_INTERVAL  10000000 -#define MAX_PATTERN_SIZE     150 - -#ifdef SYNTAX_TEST -#define NUM_CONTROL_BYTES      3 -#else -#define NUM_CONTROL_BYTES      2 -#endif - -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ -#if !defined(UTF16_BE) && !defined(UTF16_LE) -  static OnigEncoding encodings[] = { -    ONIG_ENCODING_UTF8, -    ONIG_ENCODING_UTF8, -    ONIG_ENCODING_UTF8, -    ONIG_ENCODING_SJIS, -    //ONIG_ENCODING_EUC_JP, -    ONIG_ENCODING_ISO_8859_1, -    ONIG_ENCODING_BIG5, -    ONIG_ENCODING_GB18030, -    ONIG_ENCODING_EUC_TW -  }; -  unsigned char encoding_choice; -#endif - -#ifdef SYNTAX_TEST -  static OnigSyntaxType* syntaxes[] = { -    ONIG_SYNTAX_POSIX_EXTENDED, -    ONIG_SYNTAX_EMACS, -    ONIG_SYNTAX_GREP, -    ONIG_SYNTAX_GNU_REGEX, -    ONIG_SYNTAX_JAVA, -    ONIG_SYNTAX_PERL_NG, -    ONIG_SYNTAX_ONIGURUMA -  }; -  unsigned char syntax_choice; -#endif - -  int r; -  int pattern_size; -  size_t remaining_size; -  unsigned char *data; -  unsigned char options_choice; -  OnigOptionType  options; -  OnigEncoding    enc; -  OnigSyntaxType* syntax; - -  INPUT_COUNT++; -  if (Size < NUM_CONTROL_BYTES) return 0; - -  remaining_size = Size; -  data = (unsigned char* )(Data); - -#ifdef UTF16_BE -  enc = ONIG_ENCODING_UTF16_BE; -#else -#ifdef UTF16_LE -  enc = ONIG_ENCODING_UTF16_LE; -#else -  encoding_choice = data[0]; -  data++; -  remaining_size--; - -  int num_encodings = sizeof(encodings)/sizeof(encodings[0]); -  enc = encodings[encoding_choice % num_encodings]; -#endif -#endif - -#ifdef SYNTAX_TEST -  syntax_choice = data[0]; -  data++; -  remaining_size--; - -  int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); -  syntax = syntaxes[syntax_choice % num_syntaxes]; -#else -  syntax = ONIG_SYNTAX_DEFAULT; -#endif - -  options_choice = data[0]; -  options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; -  data++; -  remaining_size--; - -#ifdef WITH_READ_MAIN -#ifdef SYNTAX_TEST -  fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", -          ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); -#else -  fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); -#endif -#endif - -#ifdef WITH_READ_MAIN -  int max_pattern_size; - -  if (remaining_size == 0) -    max_pattern_size = 0; -  else { -    max_pattern_size = remaining_size - 1; -    if (max_pattern_size > MAX_PATTERN_SIZE) -      max_pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) -    if (max_pattern_size % 2 == 1) max_pattern_size--; -#endif -  } - -  for (pattern_size = 0; pattern_size <= max_pattern_size; ) { -    fprintf(stdout, "pattern_size: %d\n", pattern_size); -    r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); -    if (r == -2) { -      //output_data("parser-bug", Data, Size); -      exit(-2); -    } - -#if defined(UTF16_BE) || defined(UTF16_LE) -    pattern_size += 2; -#else -    pattern_size++; -#endif -  } - -#else /* WITH_READ_MAIN */ - -  if (remaining_size == 0) -    pattern_size = 0; -  else { -    pattern_size = INPUT_COUNT % remaining_size; -    if (pattern_size > MAX_PATTERN_SIZE) -      pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) -    if (pattern_size % 2 == 1) pattern_size--; -#endif -  } - -  r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); -  if (r == -2) { -    //output_data("parser-bug", Data, Size); -    exit(-2); -  } -#endif /* else WITH_READ_MAIN */ - -  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { -    char d[64]; -    time_t t; -    float fexec, freg, fvalid; - -    t = time(NULL); -    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); - -    fexec  = (float )EXEC_COUNT / INPUT_COUNT; -    freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; -    fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; - -    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", -            d, EXEC_COUNT, fexec, freg, fvalid); - -    EXEC_COUNT_INTERVAL = 0; -  } -  return r; -} - -#ifdef WITH_READ_MAIN - -extern int main(int argc, char* argv[]) -{ -  size_t n; -  uint8_t Data[10000]; - -  n = read(0, Data, sizeof(Data)); -  fprintf(stdout, "n: %ld\n", n); -  LLVMFuzzerTestOneInput(Data, n); - -  return 0; -} -#endif /* WITH_READ_MAIN */ diff --git a/harnesses/fuzzer.options b/harnesses/fuzzer.options new file mode 100644 index 0000000..ab44744 --- /dev/null +++ b/harnesses/fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = ascii_compatible.dict diff --git a/harnesses/makefile b/harnesses/makefile index dfd84de..b324295 100644 --- a/harnesses/makefile +++ b/harnesses/makefile @@ -1,54 +1,53 @@  # makefile for harness  SRC = ../src  CFLAGS   = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer -CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN +CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE  ONIG_LIB = $(SRC)/.libs/libonig.a  LIBS     = $(ONIG_LIB) -TARGETS = encode-libfuzzer syntax-libfuzzer \ -          utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \ -          main-utf16-be main-utf16-le main-regset regset-libfuzzer +TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \ +          fuzzer-regset \ +          read-encode read-syntax read-utf16-be read-utf16-le read-regset -OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \ -          deluxe-encode-libfuzzer main-deluxe-encode +OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe  default: $(TARGETS) -encode-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-encode: base.c $(ONIG_LIB)  	clang $(CFLAGS) $< $(LIBS) -o $@ -syntax-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-syntax: base.c $(ONIG_LIB)  	clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@ -deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB) +fuzzer-deluxe: deluxe.c $(ONIG_LIB)  	clang $(CFLAGS) $< $(LIBS) -o $@ -utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-be: base.c $(ONIG_LIB)  	clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@ -utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-le: base.c $(ONIG_LIB)  	clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@ -regset-libfuzzer: regset-harness.c $(ONIG_LIB) +fuzzer-regset: regset.c $(ONIG_LIB)  	clang $(CFLAGS) $< $(LIBS) -o $@ -main-encode: encode-harness.c $(ONIG_LIB) +read-encode: base.c $(ONIG_LIB)  	clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-syntax: encode-harness.c $(ONIG_LIB) +read-syntax: base.c $(ONIG_LIB)  	clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@ -main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB) +read-deluxe: deluxe.c $(ONIG_LIB)  	clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-be: encode-harness.c $(ONIG_LIB) +read-utf16-be: base.c $(ONIG_LIB)  	clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-le: encode-harness.c $(ONIG_LIB) +read-utf16-le: base.c $(ONIG_LIB)  	clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@ -main-regset: regset-harness.c $(ONIG_LIB) +read-regset: regset.c $(ONIG_LIB)  	clang $(CFLAGS_M) $< $(LIBS) -o $@  libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) diff --git a/harnesses/regset-harness.c b/harnesses/regset.c index b4b7e20..a8dd181 100644 --- a/harnesses/regset-harness.c +++ b/harnesses/regset.c @@ -1,5 +1,5 @@  /* - * regset-harness.c + * regset.c   * Copyright (c) 2019  K.Kosako   */  #include <stdio.h> @@ -14,9 +14,9 @@  #include "oniguruma.h" -#define RETRY_LIMIT    500 +#define RETRY_LIMIT   5000 -#ifdef WITH_READ_MAIN +#ifdef STANDALONE  //#define CHECK_EACH_REGEX_SEARCH_TIME  #endif @@ -25,6 +25,18 @@  typedef unsigned char uint8_t;  static OnigEncoding ENC; +static void +output_current_time(FILE* fp) +{ +  char d[64]; +  time_t t; + +  t = time(NULL); +  strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +  fprintf(fp, "%s", d); +} +  #ifdef CHECK_EACH_REGEX_SEARCH_TIME  static double  get_sec(struct timespec* ts, struct timespec* te) @@ -85,7 +97,7 @@ search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char*    r = onig_regset_search(set, str, end, start, range, lead,                           ONIG_OPTION_NONE, &match_pos);    if (r >= 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE      int i;      int match_index;      OnigRegion* region; @@ -105,12 +117,12 @@ search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char*  #endif    }    else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE      fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC));  #endif    }    else { /* error */ -#ifdef WITH_READ_MAIN +#ifdef STANDALONE      char s[ONIG_MAX_ERROR_MESSAGE_LEN];      onig_error_code_to_str((UChar* )s, r); @@ -148,13 +160,13 @@ exec(OnigEncoding enc, int reg_num, int init_reg_num,    options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE;    onig_initialize(&enc, 1); -  onig_set_retry_limit_in_match(RETRY_LIMIT); +  onig_set_retry_limit_in_search(RETRY_LIMIT);    for (i = 0; i < init_reg_num; i++) {      r = onig_new(®s[i], pat[i], pat_end[i], options, ENC,                   ONIG_SYNTAX_DEFAULT, &einfo);      if (r != 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE        char s[ONIG_MAX_ERROR_MESSAGE_LEN];        onig_error_code_to_str((UChar* )s, r, &einfo); @@ -189,7 +201,7 @@ exec(OnigEncoding enc, int reg_num, int init_reg_num,      r = onig_new(®, pat[i], pat_end[i], options, ENC,                   ONIG_SYNTAX_DEFAULT, &einfo);      if (r != 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE        char s[ONIG_MAX_ERROR_MESSAGE_LEN];        onig_error_code_to_str((UChar* )s, r, &einfo); @@ -311,7 +323,7 @@ LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)    memcpy(str, data, remaining_size);    str_null_end = str + remaining_size; -#ifdef WITH_READ_MAIN +#ifdef STANDALONE    fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n",            reg_num, pattern_size,            lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); @@ -344,26 +356,27 @@ LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)    }    if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { -    char d[64]; -    time_t t;      float fexec, freg, fvalid; -    t = time(NULL); -    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); -      fexec  = (float )EXEC_COUNT / INPUT_COUNT;      freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT;      fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; -    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", -            d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); +    output_current_time(stdout); +    fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", +            EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum);      EXEC_COUNT_INTERVAL = 0;    } +  else if (EXEC_COUNT == 1) { +    output_current_time(stdout); +    fprintf(stdout, ": ------------ START ------------\n"); +  } +    return r;  } -#ifdef WITH_READ_MAIN +#ifdef STANDALONE  extern int main(int argc, char* argv[])  { @@ -376,4 +389,4 @@ extern int main(int argc, char* argv[])    return 0;  } -#endif /* WITH_READ_MAIN */ +#endif /* STANDALONE */ | 
