diff options
Diffstat (limited to 'harnesses/encode-harness.c')
| -rw-r--r-- | harnesses/encode-harness.c | 291 | 
1 files changed, 243 insertions, 48 deletions
| diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c index e57fd4f..5db0512 100644 --- a/harnesses/encode-harness.c +++ b/harnesses/encode-harness.c @@ -3,13 +3,19 @@   * contributed by Mark Griffin   */  #include <stdio.h> -#include "oniguruma.h" - +#include <unistd.h>  #include <stdlib.h>  #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> -#define PARSE_DEPTH_LIMIT   120 -#define RETRY_LIMIT        4000 +#include "oniguruma.h" + + +//#define PARSE_DEPTH_LIMIT   120 +#define RETRY_LIMIT        3500  typedef unsigned char uint8_t; @@ -26,6 +32,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)    range = end;    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);    if (r >= 0) { +#ifdef WITH_READ_MAIN      int i;      fprintf(stdout, "match at %d  (%s)\n", r, @@ -33,17 +40,29 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)      for (i = 0; i < region->num_regs; i++) {        fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);      } +#endif    }    else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN      fprintf(stdout, "search fail (%s)\n",              ONIGENC_NAME(onig_get_encoding(reg))); +#endif    }    else { /* error */ +#ifdef WITH_READ_MAIN      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +      onig_error_code_to_str((UChar* )s, r);      fprintf(stdout, "ERROR: %s\n", s);      fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif      onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + +    if (r == ONIGERR_STACK_BUG || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) +      return -2; +      return -1;    } @@ -51,8 +70,14 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)    return 0;  } +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; +  static int -exec(OnigEncoding enc, OnigOptionType options, +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,       char* apattern, char* apattern_end, char* astr, UChar* end)  {    int r; @@ -62,22 +87,41 @@ exec(OnigEncoding enc, OnigOptionType options,    UChar* str     = (UChar* )astr;    UChar* pattern_end = (UChar* )apattern_end; +  EXEC_COUNT++; +  EXEC_COUNT_INTERVAL++; +    onig_initialize(&enc, 1);    onig_set_retry_limit_in_match(RETRY_LIMIT); -  onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); +  //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);    r = onig_new(®, pattern, pattern_end, -               options, enc, ONIG_SYNTAX_DEFAULT, &einfo); +               options, enc, syntax, &einfo);    if (r != ONIG_NORMAL) {      char s[ONIG_MAX_ERROR_MESSAGE_LEN];      onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef WITH_READ_MAIN      fprintf(stdout, "ERROR: %s\n", s); +#endif      onig_end(); -    return -1; + +    if (r == ONIGERR_PARSER_BUG || +        r == ONIGERR_STACK_BUG  || +        r == ONIGERR_UNDEFINED_BYTECODE || +        r == ONIGERR_UNEXPECTED_BYTECODE) { +      return -2; +    } +    else +      return -1;    } +  REGEX_SUCCESS_COUNT++; + +  r = search(reg, pattern, pattern_end); +  if (r == -2) return -2;    if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    VALID_STRING_COUNT++;      r = search(reg, str, end); +    if (r == -2) return -2;    }    onig_free(reg); @@ -85,52 +129,114 @@ exec(OnigEncoding enc, OnigOptionType options,    return 0;  } -#define PATTERN_SIZE 32 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE  1 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +#if 0 +static void +output_data(char* path, const uint8_t * data, size_t size)  { -  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) -    return 0; -  if (Size > 0x1000) -    return 0; +  int fd; +  ssize_t n; +  fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); +  if (fd == -1) { +    fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); +    return ; +  } + +  n = write(fd, (const void* )data, size); +  if (n != size) { +    fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); +  } +  close(fd); +} +#endif + + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, +           int pattern_size, size_t remaining_size, unsigned char *data) +{ +  int r;    unsigned char *pattern_end;    unsigned char *str_null_end; -  size_t remaining_size = Size; -  unsigned char *data = (unsigned char *)(Data); +  // copy first PATTERN_SIZE bytes off to be the pattern +  unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); +  memcpy(pattern, data, pattern_size); +  pattern_end = pattern + pattern_size; +  data += pattern_size; +  remaining_size -= pattern_size; -  // pull off one byte to switch off -  unsigned char encoding_choice = data[0]; -  data++; -  remaining_size--; +#if defined(UTF16_BE) || defined(UTF16_LE) +  if (remaining_size % 2 == 1) remaining_size--; +#endif -  // copy first PATTERN_SIZE bytes off to be the pattern -  unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); -  memset(pattern, 0, PATTERN_SIZE+4); -  memcpy(pattern, data, PATTERN_SIZE); -  pattern_end = pattern + PATTERN_SIZE; -  data += PATTERN_SIZE; -  remaining_size -= PATTERN_SIZE; - -  unsigned char *str = (unsigned char*)malloc(remaining_size+4); -  memset(str, 0, remaining_size+4); +  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);    memcpy(str, data, remaining_size);    str_null_end = str + remaining_size; -  int r; -  OnigEncodingType *encodings[] = { -	  ONIG_ENCODING_SJIS, -	  ONIG_ENCODING_EUC_JP, -	  ONIG_ENCODING_CP1251, -	  ONIG_ENCODING_ISO_8859_1, -	  ONIG_ENCODING_UTF8, -    ONIG_ENCODING_KOI8_R, -    ONIG_ENCODING_BIG5 +  r = exec(enc, options, syntax, +           (char *)pattern, (char *)pattern_end, +           (char *)str, str_null_end); + +  free(pattern); +  free(str); +  return r; +} + + +#define EXEC_PRINT_INTERVAL  10000000 +#define MAX_PATTERN_SIZE     150 + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES      3 +#else +#define NUM_CONTROL_BYTES      2 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) +  static OnigEncoding encodings[] = { +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_SJIS, +    //ONIG_ENCODING_EUC_JP, +    ONIG_ENCODING_ISO_8859_1, +    ONIG_ENCODING_BIG5, +    ONIG_ENCODING_GB18030, +    ONIG_ENCODING_EUC_TW +  }; +  unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST +  static OnigSyntaxType* syntaxes[] = { +    ONIG_SYNTAX_POSIX_EXTENDED, +    ONIG_SYNTAX_EMACS, +    ONIG_SYNTAX_GREP, +    ONIG_SYNTAX_GNU_REGEX, +    ONIG_SYNTAX_JAVA, +    ONIG_SYNTAX_PERL_NG, +    ONIG_SYNTAX_ONIGURUMA    }; +  unsigned char syntax_choice; +#endif + +  int r; +  int pattern_size; +  size_t remaining_size; +  unsigned char *data; +  unsigned char options_choice; +  OnigOptionType  options; +  OnigEncoding    enc; +  OnigSyntaxType* syntax; -  OnigEncodingType *enc; +  INPUT_COUNT++; +  if (Size < NUM_CONTROL_BYTES) return 0; + +  remaining_size = Size; +  data = (unsigned char* )(Data);  #ifdef UTF16_BE    enc = ONIG_ENCODING_UTF16_BE; @@ -138,24 +244,113 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)  #ifdef UTF16_LE    enc = ONIG_ENCODING_UTF16_LE;  #else +  encoding_choice = data[0]; +  data++; +  remaining_size--; +    int num_encodings = sizeof(encodings)/sizeof(encodings[0]);    enc = encodings[encoding_choice % num_encodings];  #endif  #endif -  r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, -           (char *)str, str_null_end); +#ifdef SYNTAX_TEST +  syntax_choice = data[0]; +  data++; +  remaining_size--; -  free(pattern); -  free(str); +  int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); +  syntax = syntaxes[syntax_choice % num_syntaxes]; +#else +  syntax = ONIG_SYNTAX_DEFAULT; +#endif + +  options_choice = data[0]; +  options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; +  data++; +  remaining_size--; + +#ifdef WITH_READ_MAIN +#ifdef SYNTAX_TEST +  fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", +          ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); +#else +  fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); +#endif +#endif +#ifdef WITH_READ_MAIN +  int max_pattern_size; + +  if (remaining_size == 0) +    max_pattern_size = 0; +  else { +    max_pattern_size = remaining_size - 1; +    if (max_pattern_size > MAX_PATTERN_SIZE) +      max_pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) +    if (max_pattern_size % 2 == 1) max_pattern_size--; +#endif +  } + +  for (pattern_size = 0; pattern_size <= max_pattern_size; ) { +    fprintf(stdout, "pattern_size: %d\n", pattern_size); +    r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); +    if (r == -2) { +      //output_data("parser-bug", Data, Size); +      exit(-2); +    } + +#if defined(UTF16_BE) || defined(UTF16_LE) +    pattern_size += 2; +#else +    pattern_size++; +#endif +  } + +#else /* WITH_READ_MAIN */ + +  if (remaining_size == 0) +    pattern_size = 0; +  else { +    pattern_size = INPUT_COUNT % remaining_size; +    if (pattern_size > MAX_PATTERN_SIZE) +      pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) +    if (pattern_size % 2 == 1) pattern_size--; +#endif +  } + +  r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); +  if (r == -2) { +    //output_data("parser-bug", Data, Size); +    exit(-2); +  } +#endif /* else WITH_READ_MAIN */ + +  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { +    char d[64]; +    time_t t; +    float fexec, freg, fvalid; + +    t = time(NULL); +    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +    fexec  = (float )EXEC_COUNT / INPUT_COUNT; +    freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; +    fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + +    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", +            d, EXEC_COUNT, fexec, freg, fvalid); + +    EXEC_COUNT_INTERVAL = 0; +  }    return r;  }  #ifdef WITH_READ_MAIN -#include <unistd.h> -  extern int main(int argc, char* argv[])  {    size_t n; | 
