diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-06-02 19:35:35 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-06-02 19:35:35 +0200 |
commit | 7b8e42400dc780b86479b4f0c713db5d9aa6623b (patch) | |
tree | ebe0dd5a09b8bf368b5695b13eb28fe07371afc5 | |
parent | 0e4c1c1dd3dd4271e54a7305699c303b629fa915 (diff) | |
parent | 9c10d3a428f2e33d01e1ea66932d272ac731787f (diff) |
Merge tag 'upstream/6.3.0' into feature/upstream
Upstream version 6.3.0
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | HISTORY | 11 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | dist.info | 2 | ||||
-rw-r--r-- | doc/RE | 3 | ||||
-rw-r--r-- | doc/RE.ja | 33 | ||||
-rw-r--r-- | index.html | 7 | ||||
-rw-r--r-- | index_ja.html | 7 | ||||
-rwxr-xr-x | src/gperf_unfold_key_conv.py | 2 | ||||
-rw-r--r-- | src/oniguruma.h | 3 | ||||
-rw-r--r-- | src/regcomp.c | 98 | ||||
-rw-r--r-- | src/regexec.c | 46 | ||||
-rw-r--r-- | src/regparse.c | 179 | ||||
-rw-r--r-- | src/regsyntax.c | 6 | ||||
-rw-r--r-- | src/unicode_fold1_key.c | 12 | ||||
-rw-r--r-- | src/unicode_fold2_key.c | 12 | ||||
-rw-r--r-- | src/unicode_fold3_key.c | 12 | ||||
-rw-r--r-- | src/unicode_unfold_key.c | 14 | ||||
-rw-r--r-- | test/testc.c | 1 | ||||
-rw-r--r-- | test/testu.c | 3 |
21 files changed, 280 insertions, 181 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a2e4da..3b262f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.2.0") +set(PACKAGE_VERSION "6.3.0") set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) @@ -1,5 +1,16 @@ History +2017/05/29: Version 6.3.0 + +2017/05/24: fix #60 : invalid state(CCS_VALUE) in parse_char_class() +2017/05/24: fix #59 : access to invalid address by reg->dmax value +2017/05/23: fix invalid increment of start position in onig_scan() +2017/05/23: fix #58 : access to invalid address by reg->dmin value +2017/05/23: fix #57 : DATA_ENSURE() check must be before data access +2017/05/22: fix #56 : return invalid result for codepoint 0xFFFFFFFF +2017/05/19: [new] add \o{17777777777} syntax. +2017/05/19: fix #55 : Byte value expressed in octal must be smaller than 256 + 2017/04/08: Version 6.2.0 2017/03/15: fix: size in xmemcpy in stack_double (PR #51) @@ -20,6 +20,12 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.3.0 +-------------------------- + +* NEW SYNTAX: escape-o-brace for octal codepoint. + + New feature of version 6.1.2 -------------------------- diff --git a/configure.ac b/configure.ac index 28ebdbf..1a7ca9b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.2.0) +AC_INIT(onig, 6.3.0) AC_CONFIG_MACRO_DIR([m4]) @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.2.0" +version = "6.3.0" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.0.0 2016/08/18 +Oniguruma Regular Expressions Version 6.3.0 2017/05/19 syntax: ONIG_SYNTAX_RUBY (default) @@ -22,6 +22,7 @@ syntax: ONIG_SYNTAX_RUBY (default) \a bell (0x07) \e escape (0x1B) \nnn octal char (encoded byte value) + \o{17777777777} wide octal char (character code point value) \xHH hexadecimal char (encoded byte value) \x{7HHHHHHH} wide hexadecimal char (character code point value) \cx control char (character code point value) @@ -1,4 +1,4 @@ -µ´¼Ö Àµµ¬É½¸½ Version 6.0.0 2016/05/02 +µ´¼Ö Àµµ¬É½¸½ Version 6.3.0 2017/05/19 »ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ) @@ -13,21 +13,22 @@ 2. ʸ»ú - \t ¿åÊ¿¥¿¥Ö (0x09) - \v ¿âľ¥¿¥Ö (0x0B) - \n ²þ¹Ô (0x0A) - \r Éüµ¢ (0x0D) - \b ¸åÂà¶õÇò (0x08) - \f ²þÊÇ (0x0C) - \a ¾â (0x07) - \e ÂàÈò½¤¾þ (0x1B) - \nnn Ȭ¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) - \xHH ½½Ï»¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) - \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \cx À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \C-x À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \M-\C-x Ķ + À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \t ¿åÊ¿¥¿¥Ö (0x09) + \v ¿âľ¥¿¥Ö (0x0B) + \n ²þ¹Ô (0x0A) + \r Éüµ¢ (0x0D) + \b ¸åÂà¶õÇò (0x08) + \f ²þÊÇ (0x0C) + \a ¾â (0x07) + \e ÂàÈò½¤¾þ (0x1B) + \nnn Ȭ¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \o{17777777777} ³ÈĥȬ¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \xHH ½½Ï»¿Ê¿ôɽ¸½ Éä¹æ²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \cx À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \C-x À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-\C-x Ķ + À©¸æʸ»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ß͸ú @@ -8,7 +8,7 @@ <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -(c) K.Kosako, updated at: 2017/04/06 +(c) K.Kosako, updated at: 2017/05/26 </p> <dl> @@ -16,12 +16,9 @@ <dt><b>What's new</b> </font> <ul> +<li>2017/05/29: Version 6.3.0 released.</li> <li>2017/04/08: Version 6.2.0 released.</li> <li>2016/12/11: Version 6.1.3 released.</li> -<li>2016/11/07: Version 6.1.2 released.</li> -<li>2016/09/02: Version 6.1.1 released.</li> -<li>2016/08/29: Version 6.1.0 released.</li> -<li>2014/12/12: Version 5.9.6 released.</li> </ul> </dl> <hr> diff --git a/index_ja.html b/index_ja.html index 7bae60e..7070dfe 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@ <h1>鬼車</h1> <p> -(c) K.Kosako, 最終更新: 2017/04/06 +(c) K.Kosako, 最終更新: 2017/05/26 </p> <dl> @@ -16,12 +16,9 @@ <dt><b>æ›´æ–°æƒ…å ±</b> </font> <ul> +<li>2017/05/29: Version 6.3.0 リリース</li> <li>2017/04/08: Version 6.2.0 リリース</li> <li>2016/12/11: Version 6.1.3 リリース</li> -<li>2016/11/07: Version 6.1.2 リリース</li> -<li>2016/09/02: Version 6.1.1 リリース</li> -<li>2016/08/29: Version 6.1.0 リリース</li> -<li>2014/12/12: Version 5.9.6 リリース</li> </ul> </dl> <hr> diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py index dcd8587..34f9c2f 100755 --- a/src/gperf_unfold_key_conv.py +++ b/src/gperf_unfold_key_conv.py @@ -36,7 +36,7 @@ def parse_line(s): if r != s: return r r = re.sub(REG_GET_CODE, 'OnigCodePoint gcode = wordlist[key].code;', s) if r != s: return r - r = re.sub(REG_CODE_CHECK, 'if (code == gcode)', s) + r = re.sub(REG_CODE_CHECK, 'if (code == gcode && wordlist[key].index >= 0)', s) if r != s: return r return s diff --git a/src/oniguruma.h b/src/oniguruma.h index 33e2a0a..02d4254 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,7 +35,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_MINOR 3 #define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus @@ -473,6 +473,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ #define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ #define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ +#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{1OOOOOOOOOO} */ #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ diff --git a/src/regcomp.c b/src/regcomp.c index 5c924b5..0e9a9ab 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -761,17 +761,17 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (infinite && qn->lower <= 1) { if (qn->greedy) { if (qn->lower == 1) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; } else { if (qn->lower == 0) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += mod_tlen + SIZE_OP_PUSH + cklen; } @@ -785,10 +785,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) else if (qn->upper == 1 && qn->greedy) { if (qn->lower == 0) { if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; + len = SIZE_OP_STATE_CHECK_PUSH + tlen; } else { - len = SIZE_OP_PUSH + tlen; + len = SIZE_OP_PUSH + tlen; } } else { @@ -1255,7 +1255,7 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) if (tlen < 0) return tlen; len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; @@ -1362,7 +1362,7 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) r = add_opcode(reg, OP_POP); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_PUSH_STOP_BT); @@ -2145,16 +2145,16 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) switch (en->type) { case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; #endif case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: @@ -2594,17 +2594,17 @@ get_min_len(Node* node, OnigLen *min, ScanEnv* env) if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *min = 0; // recursive - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_min_len(en->target, min, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *min = 0; // recursive + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_min_len(en->target, min, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } } break; @@ -2713,22 +2713,22 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env) EncloseNode* en = NENCLOSE(node); switch (en->type) { case ENCLOSE_MEMORY: - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *max = ONIG_INFINITE_DISTANCE; - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_max_len(en->target, max, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - } - break; + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *max = ONIG_INFINITE_DISTANCE; + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_max_len(en->target, max, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + } + break; case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: @@ -4559,7 +4559,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (to->expr.len > 0) { if (add->len.max > 0) { if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; + to->expr.len = add->len.max; if (to->expr.mmd.max == 0) select_opt_exact_info(enc, &to->exb, &to->expr); @@ -4957,7 +4957,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->exact_end = reg->exact + e->len; allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, @@ -5045,7 +5045,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (opt.exb.len > 0 || opt.exm.len > 0) { select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { goto set_map; } else { diff --git a/src/regexec.c b/src/regexec.c index 35fef11..c0626ef 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1346,8 +1346,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; + len = enclen(encode, q); + while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } @@ -1473,14 +1473,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; case OP_EXACT1: MOP_IN(OP_EXACT1); -#if 0 DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; MOP_OUT; break; @@ -3159,6 +3154,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; + + if (q >= end) return 0; /* fail */ while (p < q) p += enclen(reg->enc, p); } } @@ -3238,18 +3235,25 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); + if (p - str < reg->dmax) { + *low = (UChar* )str; + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); } else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } } } } @@ -3790,8 +3794,10 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, if (rs != 0) return rs; - if (region->end[0] == start - str) - start++; + if (region->end[0] == start - str) { + if (start >= end) break; + start += enclen(reg->enc, start); + } else start = str + region->end[0]; diff --git a/src/regparse.c b/src/regparse.c index 11f9e34..8153513 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,8 @@ OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | @@ -553,8 +554,8 @@ i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) int r = (*(arg->func))(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); if (r != 0) { arg->ret = r; return ST_STOP; @@ -1053,7 +1054,7 @@ onig_node_free(Node* node) switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { xfree(NSTR(node)->s); } break; @@ -2519,8 +2520,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, int flag = (c == '-' ? -1 : 1); if (PEND) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - goto end; + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + goto end; } PFETCH(c); if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; @@ -2531,9 +2532,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, exist_level = 1; if (!PEND) { - PFETCH(c); - if (c == end_code) - goto end; + PFETCH(c); + if (c == end_code) + goto end; } } @@ -2945,19 +2946,46 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) c2 = PPEEK; if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'o': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_DIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 8; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } break; @@ -3020,7 +3048,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) PUNFETCH; prev = p; num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3132,7 +3160,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.repeat.upper = 1; greedy_check: if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { PFETCH(c); tok->u.repeat.greedy = 0; tok->u.repeat.possessive = 0; @@ -3302,6 +3330,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) goto end_buf; break; + case 'o': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + break; + case 'x': if (PEND) break; @@ -3392,7 +3445,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { prev = p; num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3541,7 +3594,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } else { /* string */ p = tok->backp + enclen(enc, tok->backp); - } + } } break; } @@ -3753,8 +3806,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, OnigCodePoint prev = 0; for (i = 0; i < n; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { if (j >= sb_out) { goto sb_end2; } @@ -4028,14 +4080,16 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } } - *state = CCS_VALUE; + if (*state != CCS_START) + *state = CCS_VALUE; + *type = CCV_CLASS; return 0; } static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, - int* vs_israw, int v_israw, +next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, + int* from_israw, int to_israw, enum CCVALTYPE intype, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) { @@ -4044,10 +4098,13 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, switch (*state) { case CCS_VALUE: if (*type == CCV_SB) { - BITSET_SET_BIT(cc->bs, (int )(*vs)); + if (*from > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + BITSET_SET_BIT(cc->bs, (int )(*from)); } else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); + r = add_code_range(&(cc->mbuf), env, *from, *from); if (r < 0) return r; } break; @@ -4055,40 +4112,32 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, case CCS_RANGE: if (intype == *type) { if (intype == CCV_SB) { - if (*vs > 0xff || v > 0xff) + if (*from > 0xff || to > 0xff) return ONIGERR_INVALID_CODE_POINT_VALUE; - if (*vs > v) { + if (*from > to) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) goto ccs_range_end; else return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - bitset_set_range(cc->bs, (int )*vs, (int )v); + bitset_set_range(cc->bs, (int )*from, (int )to); } else { - r = add_code_range(&(cc->mbuf), env, *vs, v); + r = add_code_range(&(cc->mbuf), env, *from, to); if (r < 0) return r; } } else { -#if 0 - if (intype == CCV_CODE_POINT && *type == CCV_SB) { -#endif - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); - if (r < 0) return r; -#if 0 + if (*from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - else - return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; -#endif + bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to); + if (r < 0) return r; } ccs_range_end: *state = CCS_COMPLETE; @@ -4103,9 +4152,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, break; } - *vs_israw = v_israw; - *vs = v; - *type = intype; + *from_israw = to_israw; + *from = to; + *type = intype; return 0; } @@ -4366,9 +4415,9 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) { - onig_node_free(anode); - goto cc_open_err; - } + onig_node_free(anode); + goto cc_open_err; + } acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); @@ -4663,9 +4712,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = parse_subexp(&target, tok, term, &p, end, env); env->option = prev; if (r < 0) { - onig_node_free(target); - return r; - } + onig_node_free(target); + return r; + } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); NENCLOSE(*np)->target = target; @@ -5291,8 +5340,8 @@ parse_branch(Node** top, OnigToken* tok, int term, while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) { - onig_node_free(node); - return r; + onig_node_free(node); + return r; } if (NTYPE(node) == NT_LIST) { diff --git a/src/regsyntax.c b/src/regsyntax.c index ade5b55..e751e24 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -168,7 +168,8 @@ OnigSyntaxType OnigSyntaxJava = { OnigSyntaxType OnigSyntaxPerl = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | @@ -192,7 +193,8 @@ OnigSyntaxType OnigSyntaxPerl = { OnigSyntaxType OnigSyntaxPerl_NG = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 6b390fc..2151211 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */ /* Computed positions: -k'1-3' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+3] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold1_key(OnigCodePoint codes[]) { @@ -2534,7 +2540,7 @@ unicode_fold1_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 74e9876..07cfa4e 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */ /* Computed positions: -k'3,6' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold2_key(OnigCodePoint codes[]) { @@ -189,7 +195,7 @@ unicode_fold2_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 8095b1c..1b4d9d4 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */ /* Computed positions: -k'3,6,9' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 8)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold3_key(OnigCodePoint codes[]) { @@ -99,7 +105,7 @@ unicode_fold3_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index c6261d2..15302ca 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_unfold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */ /* Computed positions: -k'1-3' */ @@ -64,6 +64,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+35] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)+1] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif const struct ByUnfoldKey * unicode_unfold_key(OnigCodePoint code) { @@ -2840,11 +2846,11 @@ unicode_unfold_key(OnigCodePoint code) { int key = hash(&code); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { OnigCodePoint gcode = wordlist[key].code; - if (code == gcode) + if (code == gcode && wordlist[key].index >= 0) return &wordlist[key]; } } diff --git a/test/testc.c b/test/testc.c index fbad340..ddf9fd5 100644 --- a/test/testc.c +++ b/test/testc.c @@ -585,6 +585,7 @@ extern int main(int argc, char* argv[]) x2("[0-9-a]", "-", 0, 1); // PR#44 n("[0-9-a]", ":"); // PR#44 x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 1, 4, 2); // PR#43 + x2("\\o{101}", "A", 0, 1); x2("", "¤¢", 0, 0); x2("¤¢", "¤¢", 0, 2); diff --git a/test/testu.c b/test/testu.c index 70284fb..017ebef 100644 --- a/test/testu.c +++ b/test/testu.c @@ -903,6 +903,9 @@ extern int main(int argc, char* argv[]) x2("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); x2("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); x2("\000^\000\\\000p\000{\000K\000a\000t\000a\000k\000a\000n\000a\000}\000$\000\000", "\060\277\000\000", 0, 2); + x2("\000\\\000o\000{\0001\0000\0001\000}\000\000", "\000A\000\000", 0, 2); + x2("\000\\\000o\000{\0001\0001\0000\0007\0002\0001\000}\000\000", "\221\321\000\000", 0, 2); + fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); |