diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2019-08-07 09:32:48 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2019-08-07 09:32:48 +0200 | 
| commit | 40f3d0030e6e98bcb02d6523e5ee48497dec49a6 (patch) | |
| tree | a992f9a6acd3edc2c7bb6b1aba2e52084918c9ab /harnesses | |
| parent | e25c754918ae26e8b9e68a47bc1af36248e91800 (diff) | |
New upstream version 6.9.3upstream/6.9.3
Diffstat (limited to 'harnesses')
| -rw-r--r-- | harnesses/ascii_compatible.dict | 111 | ||||
| -rw-r--r-- | harnesses/deluxe-encode-harness.c | 239 | ||||
| -rw-r--r-- | harnesses/dict_conv.py | 72 | ||||
| -rw-r--r-- | harnesses/encode-harness.c | 170 | ||||
| -rw-r--r-- | harnesses/syntax-harness.c | 120 | 
5 files changed, 712 insertions, 0 deletions
| diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict new file mode 100644 index 0000000..820bf47 --- /dev/null +++ b/harnesses/ascii_compatible.dict @@ -0,0 +1,111 @@ +# First-pass fuzzing dictionary for Oniguruma by Mark Griffin +"\\o{17777777777}" +"\\777" +"\\u" +"\\uFFFF" +"\\xFF" +"\\x{70000000}" +"\\C-" +"\\M-\\C-" +"\\X" +"\\p{" +"\\p{^" +"}" +"]" +")" +"\\n" +"\\r" +"\\R" +"\\W" +"\\w" +"\\s" +"\\S" +"\\d" +"\\O" +"\\X" +"\\b" +"\\y" +"\\Y" +"\\A" +"\\z" +"\\K" +"\\G" +"\\p{Print}" +"\\p{ASCII}" +"\\p{Alnum}" +"{0,2}" +"{3,}" +"{,3}" +"{5}" +"{4,2}" +"??" +"*?" +"+?" +"*+" +"{1,3}+" +"(?>" +"\\B" +"(?y{" +"[abcd1-9]" +"[\\w\\d" +"[\\p{Alphabetic}" +"[\\P{Arabic}" +"[\\x{ffff}" +"[a-w&&" +"[^" +"[:graph:]" +"[^:cntrl:]" +"(?i:" +"(?i)" +"(?m:" +"(?x:" +"(?W:" +"(?y-:" +"(?y{w}:" +"(?P:" +"(?#" +"(?:" +"(?=" +"(?!" +"(?<=" +"(?<!" +"(?>" +"(?<name>" +"(?{" +"(?{....}[x])" +"(?{.}[x]>)" +"(?{{{.}}})" +"(?~" +"(?~a)" +"(?~|a|.*)" +"(?~|(?:a|b))" +"(?~|)" +"(?(.) |.)" +"(?('-n'))" +"(?(n+0))" +"(?(n+1))" +"(?(n-1))" +"(?(<name+0>))" +"(?(<name+1>))" +"(?(<name-1>))" +"(*ERROR{-2000})" +"(*COUNT[tag]{X})" +"\\1" +"\\2" +"\\k<name>" +"\\k<1>" +"\\k<2>" +"\\k<-1>" +"\\k<-2>" +"\\k<name+0>" +"\\k<name+1>" +"\\k<name-1>" +"\\g<-1>" +"\\g<name>" +"name" +"(?<name>a|b\\g<name>c)" +"(?-i:\\g<name>)" +"\\N{name}" +"\\p{Hiragana}" +"\\p{Katakana}" +"\\p{Emoji}" diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c new file mode 100644 index 0000000..e1f84a5 --- /dev/null +++ b/harnesses/deluxe-encode-harness.c @@ -0,0 +1,239 @@ +/* + * deluxe-encode-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include "oniguruma.h" + +#include <stdlib.h> +#include <string.h> + +#define DEFAULT_LIMIT 120 +typedef unsigned char uint8_t; + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ +  int r; +  unsigned char *start, *range; +  OnigRegion *region; + +  region = onig_region_new(); + +  start = str; +  range = end; +  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); +  if (r >= 0) { +    int i; + +    fprintf(stdout, "match at %d  (%s)\n", r, +            ONIGENC_NAME(onig_get_encoding(reg))); +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +  } +  else if (r == ONIG_MISMATCH) { +    fprintf(stdout, "search fail (%s)\n", +            ONIGENC_NAME(onig_get_encoding(reg))); +  } +  else { /* error */ +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +    return -1; +  } + +  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +  return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, +     char* apattern, char* apattern_end, char* astr, char* astr_end) +{ +  int r; +  regex_t* reg; +  OnigErrorInfo einfo; +  UChar* pattern = (UChar* )apattern; +  UChar* str     = (UChar* )astr; +  UChar* pattern_end = (UChar* )apattern_end; +  unsigned char *end = (unsigned char* )astr_end; + +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_match(DEFAULT_LIMIT); +  onig_set_parse_depth_limit(DEFAULT_LIMIT); + +  r = onig_new(®, pattern, pattern_end, +               options, enc, ONIG_SYNTAX_DEFAULT, &einfo); +  if (r != ONIG_NORMAL) { +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r, &einfo); +    fprintf(stdout, "ERROR: %s\n", s); +    onig_end(); +    return -1; +  } + +  r = search(reg, str, end); + +  onig_free(reg); +  onig_end(); +  return 0; +} + +static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; + +static int +exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, +            OnigOptionType options, char* apattern, char* apattern_end, +            char* astr, char* astr_end) +{ +  int r; +  regex_t* reg; +  OnigCompileInfo ci; +  OnigErrorInfo einfo; +  UChar* pattern = (UChar* )apattern; +  UChar* str     = (UChar* )astr; +  UChar* pattern_end = (UChar* )apattern_end; +  unsigned char* end = (unsigned char* )astr_end; + +  onig_initialize(&str_enc, 1); +  onig_set_retry_limit_in_match(DEFAULT_LIMIT); +  onig_set_parse_depth_limit(DEFAULT_LIMIT); + +  ci.num_of_elements = 5; +  ci.pattern_enc = pattern_enc; +  ci.target_enc  = str_enc; +  ci.syntax      = ONIG_SYNTAX_DEFAULT; +  ci.option      = options; +  ci.case_fold_flag  = CF; + +  r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); +  if (r != ONIG_NORMAL) { +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r, &einfo); +    fprintf(stdout, "ERROR: %s\n", s); +    onig_end(); +    return -1; +  } + +  if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { +    r = search(reg, str, end); +  } + +  onig_free(reg); +  onig_end(); +  return 0; +} + +#define PATTERN_SIZE 48  +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE  2 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  int r; +  size_t remaining_size; +  unsigned char *data; +  unsigned char pat_encoding_choice; +  unsigned char str_encoding_choice; +  unsigned char *pattern; +  unsigned char *str; +  unsigned char *pattern_end; +  unsigned char *str_end; +  unsigned int num_encodings; +  OnigEncodingType *pattern_enc; +  OnigEncodingType *str_enc; + +  OnigEncodingType *encodings[] = { +    ONIG_ENCODING_ASCII, +    ONIG_ENCODING_ISO_8859_1, +    ONIG_ENCODING_ISO_8859_2, +    ONIG_ENCODING_ISO_8859_3, +    ONIG_ENCODING_ISO_8859_4, +    ONIG_ENCODING_ISO_8859_5, +    ONIG_ENCODING_ISO_8859_6, +    ONIG_ENCODING_ISO_8859_7, +    ONIG_ENCODING_ISO_8859_8, +    ONIG_ENCODING_ISO_8859_9, +    ONIG_ENCODING_ISO_8859_10, +    ONIG_ENCODING_ISO_8859_11, +    ONIG_ENCODING_ISO_8859_13, +    ONIG_ENCODING_ISO_8859_14, +    ONIG_ENCODING_ISO_8859_15, +    ONIG_ENCODING_ISO_8859_16, +    ONIG_ENCODING_UTF8, +    ONIG_ENCODING_UTF16_BE, +    ONIG_ENCODING_UTF16_LE, +    ONIG_ENCODING_UTF32_BE, +    ONIG_ENCODING_UTF32_LE, +    ONIG_ENCODING_EUC_JP, +    ONIG_ENCODING_EUC_TW, +    ONIG_ENCODING_EUC_KR, +    ONIG_ENCODING_EUC_CN, +    ONIG_ENCODING_SJIS, +    //ONIG_ENCODING_KOI8, +    ONIG_ENCODING_KOI8_R, +    ONIG_ENCODING_CP1251, +    ONIG_ENCODING_BIG5, +    ONIG_ENCODING_GB18030, +  }; + +  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) +    return 0; +  if (Size > 0x1000) +    return 0; + +  remaining_size = Size; +  data = (unsigned char *)(Data); + +  // pull off bytes to switch off +  pat_encoding_choice = data[0]; +  data++; +  remaining_size--; +  str_encoding_choice = data[0]; +  data++; +  remaining_size--; + +  // copy first PATTERN_SIZE bytes off to be the pattern +  pattern = (unsigned char *)malloc(PATTERN_SIZE+4); +  memset(pattern, 0, PATTERN_SIZE+4); +  memcpy(pattern, data, PATTERN_SIZE); +  pattern_end = pattern + PATTERN_SIZE; +  data += PATTERN_SIZE; +  remaining_size -= PATTERN_SIZE; + +  str = (unsigned char*)malloc(remaining_size+4); +  memset(str, 0, remaining_size+4); +  memcpy(str, data, remaining_size); +  str_end = str + remaining_size; + +  num_encodings = sizeof(encodings) / sizeof(encodings[0]); +  pattern_enc = encodings[pat_encoding_choice % num_encodings]; +  str_enc = encodings[str_encoding_choice % num_encodings]; + +  r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); + +  free(pattern); +  free(str); + +  return r; +} + + +#ifdef WITH_READ_MAIN + +#include <unistd.h> + +extern int main(int argc, char* argv[]) +{ +  size_t n; +  uint8_t Data[10000]; + +  n = read(0, Data, sizeof(Data)); +  fprintf(stdout, "n: %ld\n", n); +  LLVMFuzzerTestOneInput(Data, n); + +  return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/dict_conv.py b/harnesses/dict_conv.py new file mode 100644 index 0000000..f721293 --- /dev/null +++ b/harnesses/dict_conv.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# dict_conv.py  (Python3 script) + +import sys + +ENC_UTF16_BE = 1 +ENC_UTF16_LE = 2 + +def add_char(enc, s, c): +  if enc == ENC_UTF16_BE: +    s += "\\x00" + +  s += c +  if enc == ENC_UTF16_LE: +    s += "\\x00" + +  return s + +def conv(enc, s): +  n = len(s) +  r = "" +  i = 0 +  while i < n: +    c = s[i] +    if c == '\\': +      c = s[i+1] +      if c == '\\' or c == '"': +        r = add_char(enc, r, "\\" + c) +        i += 2 +        continue +      else: +        raise("Unknown escape {0}".format(s)) + +    r = add_char(enc, r, c) +    i += 1 + +  return r + +def main(enc): +  print("# This file was generated by dict_conv.py.") +  for line in sys.stdin: +    s = line.strip() +    if s[0] == '#': +      print(s) +      continue + +    if s[0] == '"' and s[-1] == '"': +      s = conv(enc, s[1:-1]) +      print("\"{0}\"".format(s)) +    else: +      raise("Invalid format {0}".format(s)) + +def usage(argv): +  raise RuntimeError("Usage: python {0} utf16_be/utf16_le".format(argv[0])) + + +if __name__ == "__main__": +  argv = sys.argv +  argc = len(argv) + +  if argc >= 2: +    s = argv[1] +    if s == 'utf16_be': +      enc = ENC_UTF16_BE +    elif s == 'utf16_le': +      enc = ENC_UTF16_LE +    else: +      usage(argv) +  else: +    usage(argv) + +  main(enc) diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c new file mode 100644 index 0000000..e57fd4f --- /dev/null +++ b/harnesses/encode-harness.c @@ -0,0 +1,170 @@ +/* + * encode-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include "oniguruma.h" + +#include <stdlib.h> +#include <string.h> + +#define PARSE_DEPTH_LIMIT   120 +#define RETRY_LIMIT        4000 + +typedef unsigned char uint8_t; + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ +  int r; +  unsigned char *start, *range; +  OnigRegion *region; + +  region = onig_region_new(); + +  start = str; +  range = end; +  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); +  if (r >= 0) { +    int i; + +    fprintf(stdout, "match at %d  (%s)\n", r, +            ONIGENC_NAME(onig_get_encoding(reg))); +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +  } +  else if (r == ONIG_MISMATCH) { +    fprintf(stdout, "search fail (%s)\n", +            ONIGENC_NAME(onig_get_encoding(reg))); +  } +  else { /* error */ +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +    return -1; +  } + +  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +  return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, +     char* apattern, char* apattern_end, char* astr, UChar* end) +{ +  int r; +  regex_t* reg; +  OnigErrorInfo einfo; +  UChar* pattern = (UChar* )apattern; +  UChar* str     = (UChar* )astr; +  UChar* pattern_end = (UChar* )apattern_end; + +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_match(RETRY_LIMIT); +  onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); + +  r = onig_new(®, pattern, pattern_end, +               options, enc, ONIG_SYNTAX_DEFAULT, &einfo); +  if (r != ONIG_NORMAL) { +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r, &einfo); +    fprintf(stdout, "ERROR: %s\n", s); +    onig_end(); +    return -1; +  } + +  if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    r = search(reg, str, end); +  } + +  onig_free(reg); +  onig_end(); +  return 0; +} + +#define PATTERN_SIZE 32 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE  1 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) +    return 0; +  if (Size > 0x1000) +    return 0; + +  unsigned char *pattern_end; +  unsigned char *str_null_end; + +  size_t remaining_size = Size; +  unsigned char *data = (unsigned char *)(Data); + +  // pull off one byte to switch off +  unsigned char encoding_choice = data[0]; +  data++; +  remaining_size--; + +  // copy first PATTERN_SIZE bytes off to be the pattern +  unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); +  memset(pattern, 0, PATTERN_SIZE+4); +  memcpy(pattern, data, PATTERN_SIZE); +  pattern_end = pattern + PATTERN_SIZE; +  data += PATTERN_SIZE; +  remaining_size -= PATTERN_SIZE; + +  unsigned char *str = (unsigned char*)malloc(remaining_size+4); +  memset(str, 0, remaining_size+4); +  memcpy(str, data, remaining_size); +  str_null_end = str + remaining_size; + +  int r; +  OnigEncodingType *encodings[] = { +	  ONIG_ENCODING_SJIS, +	  ONIG_ENCODING_EUC_JP, +	  ONIG_ENCODING_CP1251, +	  ONIG_ENCODING_ISO_8859_1, +	  ONIG_ENCODING_UTF8, +    ONIG_ENCODING_KOI8_R, +    ONIG_ENCODING_BIG5 +  }; + +  OnigEncodingType *enc; + +#ifdef UTF16_BE +  enc = ONIG_ENCODING_UTF16_BE; +#else +#ifdef UTF16_LE +  enc = ONIG_ENCODING_UTF16_LE; +#else +  int num_encodings = sizeof(encodings)/sizeof(encodings[0]); +  enc = encodings[encoding_choice % num_encodings]; +#endif +#endif + +  r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, +           (char *)str, str_null_end); + +  free(pattern); +  free(str); + +  return r; +} + +#ifdef WITH_READ_MAIN + +#include <unistd.h> + +extern int main(int argc, char* argv[]) +{ +  size_t n; +  uint8_t Data[10000]; + +  n = read(0, Data, sizeof(Data)); +  fprintf(stdout, "n: %ld\n", n); +  LLVMFuzzerTestOneInput(Data, n); + +  return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/syntax-harness.c b/harnesses/syntax-harness.c new file mode 100644 index 0000000..0fb3587 --- /dev/null +++ b/harnesses/syntax-harness.c @@ -0,0 +1,120 @@ +/* + * syntax-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#include <stdlib.h> + +#define DEFAULT_LIMIT 120 +typedef unsigned char uint8_t; + +extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) +{ +  int r; +  unsigned char *start, *range, *end; +  regex_t* reg; +  OnigErrorInfo einfo; +  OnigRegion *region; +  UChar* pattern = (UChar* )apattern; +  UChar* str     = (UChar* )astr; + +  r = onig_new(®, pattern, pattern + strlen((char* )pattern), +               ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); +  if (r != ONIG_NORMAL) { +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r, &einfo); +    fprintf(stdout, "ERROR: %s\n", s); +    return -1; +  } + +  region = onig_region_new(); + +  end   = str + strlen((char* )str); +  start = str; +  range = end; +  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); +  if (r >= 0) { +    int i; + +    fprintf(stdout, "match at %d\n", r); +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +  } +  else if (r == ONIG_MISMATCH) { +    fprintf(stdout, "search fail\n"); +  } +  else { /* error */ +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +    onig_free(reg); +    return -1; +  } + +  onig_region_free(region, 1 /* 1:free self, 0:free contents only */); +  onig_free(reg); +  return 0; +} + +#define PATTERN_SIZE 64 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE  1 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) +    return 0; +  if (Size > 0x1000) +    return 0; +  size_t remaining_size = Size; +  unsigned char *data = (unsigned char *)(Data); + +  // pull off one byte to switch syntax choice +  unsigned char syntax_choice = data[0]; +  data++; +  remaining_size--; + +  // copy first PATTERN_SIZE bytes off to be the pattern +  unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1); +  memset(pattern, 0, PATTERN_SIZE+1); +  memcpy(pattern, data, PATTERN_SIZE); +  data += PATTERN_SIZE; +  remaining_size -= PATTERN_SIZE; + +  unsigned char *str = (unsigned char*)malloc(remaining_size+1); +  memset(str, 0, remaining_size+1); +  memcpy(str, data, remaining_size); +   +  OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; +  onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + +  onig_set_retry_limit_in_match(DEFAULT_LIMIT); +  onig_set_parse_depth_limit(DEFAULT_LIMIT); + +  OnigSyntaxType *syntaxes[] = { +    ONIG_SYNTAX_POSIX_EXTENDED, +    ONIG_SYNTAX_EMACS, +    ONIG_SYNTAX_GREP, +    ONIG_SYNTAX_GNU_REGEX, +    ONIG_SYNTAX_JAVA, +    ONIG_SYNTAX_PERL_NG, +    ONIG_SYNTAX_RUBY, +    ONIG_SYNTAX_ONIGURUMA, +  };  +  OnigSyntaxType *syntax = syntaxes[syntax_choice % 8]; +   +  int r; +  r = exec(syntax, (char *)pattern, (char *)str); +  // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); + +  onig_end(); + +  free(pattern); +  free(str); + +  return 0; +} | 
