diff options
Diffstat (limited to 'harnesses/regset-harness.c')
| -rw-r--r-- | harnesses/regset-harness.c | 379 | 
1 files changed, 379 insertions, 0 deletions
| diff --git a/harnesses/regset-harness.c b/harnesses/regset-harness.c new file mode 100644 index 0000000..b4b7e20 --- /dev/null +++ b/harnesses/regset-harness.c @@ -0,0 +1,379 @@ +/* + * regset-harness.c + * Copyright (c) 2019  K.Kosako + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "oniguruma.h" + + +#define RETRY_LIMIT    500 + +#ifdef WITH_READ_MAIN +//#define CHECK_EACH_REGEX_SEARCH_TIME +#endif + +#define MAX_REG_NUM   256 + +typedef unsigned char uint8_t; +static OnigEncoding ENC; + +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +static double +get_sec(struct timespec* ts, struct timespec* te) +{ +  double t; + +  t = (te->tv_sec - ts->tv_sec) + +      (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; +  return t; +} + +static int +check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) +{ +  int n; +  int i; +  int r; +  OnigRegion* region; + +  n = onig_regset_number_of_regex(set); +  region = onig_region_new(); + +  for (i = 0; i < n; i++) { +    regex_t* reg; +    unsigned char* start; +    unsigned char* range; +    struct timespec ts1, ts2; +    double t; + +    reg = onig_regset_get_regex(set, i); +    start = str; +    range = end; + +    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + +    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + +    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); +    t = get_sec(&ts1, &ts2); + +    fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); +  } + +  onig_region_free(region, 1); +  return 0; +} +#endif + +static int +search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) +{ +  int r; +  int match_pos; +  unsigned char *start, *range; + +  start = str; +  range = end; +  r = onig_regset_search(set, str, end, start, range, lead, +                         ONIG_OPTION_NONE, &match_pos); +  if (r >= 0) { +#ifdef WITH_READ_MAIN +    int i; +    int match_index; +    OnigRegion* region; + +    match_index = r; +    fprintf(stdout, "match reg index: %d, pos: %d  (%s)\n", +            match_index, match_pos, ONIGENC_NAME(ENC)); +    region = onig_regset_get_region(set, match_index); +    if (region == 0) { +      fprintf(stdout, "ERROR: can't get region.\n"); +      return -1; +    } + +    for (i = 0; i < region->num_regs; i++) { +      fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); +    } +#endif +  } +  else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN +    fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); +#endif +  } +  else { /* error */ +#ifdef WITH_READ_MAIN +    char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +    onig_error_code_to_str((UChar* )s, r); +    fprintf(stdout, "ERROR: %s\n", s); +    fprintf(stdout, "  (%s)\n", ONIGENC_NAME(ENC)); +#endif +    return -1; +  } + +  return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, int reg_num, int init_reg_num, +     UChar* pat[], UChar* pat_end[], +     OnigRegSetLead lead, UChar* str, UChar* end) +{ +  int r; +  int i, j; +  OnigRegSet* set; +  regex_t* reg; +  OnigOptionType options; +  OnigErrorInfo einfo; +  regex_t* regs[MAX_REG_NUM]; + +  EXEC_COUNT++; +  EXEC_COUNT_INTERVAL++; + +  options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; + +  onig_initialize(&enc, 1); +  onig_set_retry_limit_in_match(RETRY_LIMIT); + +  for (i = 0; i < init_reg_num; i++) { +    r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, +                 ONIG_SYNTAX_DEFAULT, &einfo); +    if (r != 0) { +#ifdef WITH_READ_MAIN +      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +      onig_error_code_to_str((UChar* )s, r, &einfo); +      fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + +      for (j = 0; j < i; j++) onig_free(regs[j]); + +      onig_end(); + +      if (r == ONIGERR_PARSER_BUG || +          r == ONIGERR_STACK_BUG  || +          r == ONIGERR_UNDEFINED_BYTECODE || +          r == ONIGERR_UNEXPECTED_BYTECODE) { +        return -2; +      } +      else +        return -1; +    } +  } + +  r = onig_regset_new(&set, init_reg_num, regs); +  if (r != 0) { +    for (i = 0; i < init_reg_num; i++) { +      onig_free(regs[i]); +    } +    onig_end(); +    return -1; +  } + +  for (i = init_reg_num; i < reg_num; i++) { +    r = onig_new(®, pat[i], pat_end[i], options, ENC, +                 ONIG_SYNTAX_DEFAULT, &einfo); +    if (r != 0) { +#ifdef WITH_READ_MAIN +      char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + +      onig_error_code_to_str((UChar* )s, r, &einfo); +      fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif +      onig_regset_free(set); +      onig_end(); + +      if (r == ONIGERR_PARSER_BUG || +          r == ONIGERR_STACK_BUG  || +          r == ONIGERR_UNDEFINED_BYTECODE || +          r == ONIGERR_UNEXPECTED_BYTECODE) { +        return -2; +      } +      else +        return -1; +    } + +    r = onig_regset_add(set, reg); +    if (r != 0) { +      onig_regset_free(set); +      onig_end(); +      fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); +      return r; +    } +  } + +  REGEX_SUCCESS_COUNT++; + +  if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { +    VALID_STRING_COUNT++; +    r = search(set, lead, str, end); +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +    r = check_each_regex_search_time(set, str, end); +#endif +  } + +  onig_regset_free(set); +  onig_end(); +  return 0; +} + +#define MAX_PATTERN_SIZE      30 +#define NUM_CONTROL_BYTES      3 + +#define EXEC_PRINT_INTERVAL  2000000 + +static int MaxRegNum; +static int MaxInitRegNum; + +extern int +LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +  int r, i; +  int pattern_size; +  unsigned char *str_null_end; +  size_t remaining_size; +  unsigned char *data; +  unsigned int reg_num; +  unsigned int init_reg_num; +  unsigned char* pat[256]; +  unsigned char* pat_end[256]; +  int len; +  unsigned int lead_num; +  OnigRegSetLead lead; + +  INPUT_COUNT++; + +  if (Size < NUM_CONTROL_BYTES) return 0; + +  remaining_size = Size; +  data = (unsigned char* )(Data); + +  reg_num = data[0]; +  data++; +  remaining_size--; + +  init_reg_num = data[0]; +  data++; +  remaining_size--; + +  lead_num = data[0]; +  data++; +  remaining_size--; +  lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); + +  if (remaining_size < reg_num * 2) { +    reg_num = reg_num % 15;  // zero is OK. +  } + +  init_reg_num %= (reg_num + 1); + +  if (MaxRegNum < reg_num) +    MaxRegNum = reg_num; + +  if (MaxInitRegNum < init_reg_num) +    MaxInitRegNum = init_reg_num; + +  if (reg_num == 0) +    pattern_size = 1; +  else +    pattern_size = remaining_size / (reg_num * 2); +     +  if (pattern_size > MAX_PATTERN_SIZE) +    pattern_size = MAX_PATTERN_SIZE; + +  len = pattern_size * reg_num; +  if (len == 0) len = 1; + +  for (i = 0; i < reg_num; i++) { +    pat[i] = (unsigned char* )malloc(pattern_size); +    memcpy(pat[i], data, pattern_size); +    pat_end[i] = pat[i] + pattern_size; +    data += pattern_size; +    remaining_size -= pattern_size; +  } + +  unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); +  memcpy(str, data, remaining_size); +  str_null_end = str + remaining_size; + +#ifdef WITH_READ_MAIN +  fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", +          reg_num, pattern_size, +          lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); + +  if (reg_num != 0) { +    unsigned char* p; +    i = 0; +    p = pat[0]; +    while (p < pat_end[0]) { +      fprintf(stdout, " 0x%02x", (int )*p++); +      i++; +      if (i % 8 == 0) fprintf(stdout, "\n"); +    } +    fprintf(stdout, "\n"); +  } +#endif + +  ENC = ONIG_ENCODING_UTF8; + +  r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); + +  for (i = 0; i < reg_num; i++) { +    free(pat[i]); +  } +  free(str); + +  if (r == -2) { +    //output_data("parser-bug", Data, Size); +    exit(-2); +  } + +  if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { +    char d[64]; +    time_t t; +    float fexec, freg, fvalid; + +    t = time(NULL); +    strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + +    fexec  = (float )EXEC_COUNT / INPUT_COUNT; +    freg   = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; +    fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + +    fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", +            d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + +    EXEC_COUNT_INTERVAL = 0; +  } +  return r; +} + +#ifdef WITH_READ_MAIN + +extern int main(int argc, char* argv[]) +{ +  size_t n; +  uint8_t Data[10000]; + +  n = read(0, Data, sizeof(Data)); +  fprintf(stdout, "n: %ld\n", n); +  LLVMFuzzerTestOneInput(Data, n); + +  return 0; +} +#endif /* WITH_READ_MAIN */ | 
