From 81f65b49e828952d496c80a991397fdac96feea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 9 Nov 2016 22:19:08 +0100 Subject: New upstream version 6.1.2 --- CMakeLists.txt | 2 +- HISTORY | 20 +++- README.md | 6 + config.guess | 353 ++++++++++++++++++++------------------------------------ config.sub | 86 +++++++++----- configure.ac | 2 +- dist.info | 2 +- doc/API | 8 +- doc/API.ja | 2 +- doc/RE.ja | 2 +- index.html | 4 +- index_ja.html | 4 +- src/big5.c | 23 +++- src/euc_jp.c | 34 +++++- src/euc_kr.c | 22 +++- src/euc_tw.c | 37 +++++- src/gb18030.c | 38 +++++- src/onigposix.h | 2 +- src/oniguruma.h | 9 +- src/regcomp.c | 34 ++++-- src/regenc.h | 2 +- src/regexec.c | 24 +++- src/regint.h | 2 +- src/regparse.c | 59 +++++++--- src/sjis.c | 31 ++++- 25 files changed, 493 insertions(+), 315 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5523a5f..ac8ba7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.1.1") +set(PACKAGE_VERSION "6.1.2") set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) diff --git a/HISTORY b/HISTORY index 9894c7d..c59fe4b 100644 --- a/HISTORY +++ b/HISTORY @@ -1,5 +1,13 @@ History +2016/11/07: Version 6.1.2 + +2016/10/25: allow word bound, word begin and word end in look-behind. +2016/10/19: add ONIG_OPTION_CHECK_VALIDITY_OF_STRING option. +2016/10/16: fix use after free node. +2016/10/10: fix memory leaks after parsing regexp error. +2016/09/22: implement many of is_valid_mbc_string(). + 2016/09/02: Version 6.1.1 2016/08/31: fix segfault /W.?{888}{888}{888}\x00/ (found by libfuzzer) @@ -338,7 +346,7 @@ History 2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. 2006/10/10: [impl] remove OP_EXACT1_IC and OP_EXACTN_IC from match_at(). 2006/10/10: [impl] should free new_str in expand_case_fold_string(). -2006/10/06: [dist] add test entrys to sample/encode.c. +2006/10/06: [dist] add test entries to sample/encode.c. 2006/10/06: [impl] re-implement caseless match (case-fold). 2006/10/06: [impl] expand string node by case fold variations. add expand_case_fold_string(). @@ -907,7 +915,7 @@ History 2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType. 2004/11/15: [impl] add st.h and st.c from Ruby 1.9. use st-hash always. -2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'. +2004/11/12: [impl] change member 'not' of CClassNode to 'flags'. add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE. 2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c. 2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c. @@ -952,7 +960,7 @@ History 2004/10/18: [impl] (thanks Imai Yasumasa) enclose #include by #ifndef __BORLANDC__. 2004/10/18: [bug] (thanks Imai Yasumasa) - memory acess violation in select_opt_exact_info(). + memory access violation in select_opt_exact_info(). 2004/09/25: [dist] fix doc/API and doc/API.ja. 2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -1163,7 +1171,7 @@ History 2004/04/15: [new] add onig_get_ambig_flag(). 2004/04/14: [bug] (thanks Isao Sonobe) undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST - is setted. + is set. should finish matching process if find-condition is fail at OP_END in match_at(). 2004/04/12: [impl] add ambig_flag to regex_t. @@ -1721,7 +1729,7 @@ History 2003/03/12: [spec] change named backref and subexp call format. backref: \k, call: \g (thanks akr) 2003/03/11: [inst] add regparse.[ch] in win32/Makefile. -2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't setted +2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't set, then compile error in unset_addr_list_fix(). (thanks knu) 2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h. 2003/03/10: [bug] should handle multi-byte code name in fetch_name(). @@ -1788,7 +1796,7 @@ History 2003/02/28: [impl] add opcode OP_CALL, OP_RETURN. add stack type STK_CALL_FRAME, STK_RETURN, STK_MEM_END. 2003/02/26: [spec] add new syntax behavior REG_SYN_STRICT_CHECK_BACKREF_NUMBER. - if it is setted, then error /(\1)/, /\1(..)/ etc... + if it is set, then error /(\1)/, /\1(..)/ etc... 2003/02/26: [spec] if backref number is greater than max group number, then return compile error. (REGERR_INVALID_BACKREF_NUMBER) 2003/02/26: [tune] bad implemented N_ALT case in get_min_match_length(). diff --git a/README.md b/README.md index a2c49cd..bfb41c7 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.1.2 +-------------------------- + +* allow word bound, word begin and word end in look-behind. +* NEW option: ONIG_OPTION_CHECK_VALIDITY_OF_STRING + New feature of version 6.1 -------------------------- diff --git a/config.guess b/config.guess index b79252d..bf5ad89 100755 --- a/config.guess +++ b/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2013 Free Software Foundation, Inc. +# Copyright 1992-2016 Free Software Foundation, Inc. -timestamp='2013-06-10' +timestamp='2016-09-11' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -24,12 +24,12 @@ timestamp='2013-06-10' # program. This Exception is an additional permission under section 7 # of the GNU General Public License, version 3 ("GPLv3"). # -# Originally written by Per Bothner. +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess # -# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# Please send patches to . me=`echo "$0" | sed -e 's,.*/,,'` @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2013 Free Software Foundation, Inc. +Copyright 1992-2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -149,7 +149,7 @@ Linux|GNU|GNU/*) LIBC=gnu #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` ;; esac @@ -168,19 +168,29 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || \ + echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. + # to ELF recently (or will in the future) and ABI. case "${UNAME_MACHINE_ARCH}" in + earm*) + os=netbsdelf + ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ @@ -197,6 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in os=netbsd ;; esac + # Determine ABI tags. + case "${UNAME_MACHINE_ARCH}" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"` + ;; + esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need @@ -207,13 +224,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" + echo "${machine}-${os}${release}${abi}" exit ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` @@ -223,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -235,6 +256,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; + *:Sortix:*:*) + echo ${UNAME_MACHINE}-unknown-sortix + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -251,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; + UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; + UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; + UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; + UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; + UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; + UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; + UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; + UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; + UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 @@ -359,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH="i386" + SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH="x86_64" + SUN_ARCH=x86_64 fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -393,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -579,8 +603,9 @@ EOF else IBM_ARCH=powerpc fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi @@ -617,13 +642,13 @@ EOF sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi @@ -662,11 +687,11 @@ EOF exit (0); } EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = "hppa2.0w" ] + if [ ${HP_ARCH} = hppa2.0w ] then eval $set_cc_for_build @@ -679,12 +704,12 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH="hppa2.0w" + HP_ARCH=hppa2.0w else - HP_ARCH="hppa64" + HP_ARCH=hppa64 fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -789,14 +814,14 @@ EOF echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -826,7 +851,7 @@ EOF *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; - i*:MSYS*:*) + *:MSYS*:*) echo ${UNAME_MACHINE}-pc-msys exit ;; i*:windows32*:*) @@ -878,7 +903,7 @@ EOF exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix @@ -901,7 +926,7 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arc:Linux:*:* | arceb:Linux:*:*) @@ -932,6 +957,9 @@ EOF crisv32:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-${LIBC} exit ;; + e2k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; frv:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -944,6 +972,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; + k1om:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -969,10 +1000,10 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; - or1k:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + openrisc*:Linux:*:*) + echo or1k-unknown-linux-${LIBC} exit ;; - or32:Linux:*:*) + or32:Linux:*:* | or1k*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; padre:Linux:*:*) @@ -1001,6 +1032,9 @@ EOF ppcle:Linux:*:*) echo powerpcle-unknown-linux-${LIBC} exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; @@ -1020,7 +1054,7 @@ EOF echo ${UNAME_MACHINE}-dec-linux-${LIBC} exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-pc-linux-${LIBC} exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} @@ -1099,7 +1133,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that + # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1248,6 +1282,9 @@ EOF SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1260,22 +1297,32 @@ EOF if test "$UNAME_PROCESSOR" = unknown ; then UNAME_PROCESSOR=powerpc fi - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac + if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 fi echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then + if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1306,7 +1353,7 @@ EOF # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = "386"; then + if test "$cputype" = 386; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1348,7 +1395,7 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1359,171 +1406,25 @@ EOF x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; -esac - -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs exit ;; - esac -fi +esac cat >&2 < in order to provide the needed -information to handle your system. +If $0 has already been updated, send the following data and any +information you think might be pertinent to config-patches@gnu.org to +provide the necessary information to handle your system. config.guess timestamp = $timestamp diff --git a/config.sub b/config.sub index 9633db7..cc69b06 100755 --- a/config.sub +++ b/config.sub @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2013 Free Software Foundation, Inc. +# Copyright 1992-2016 Free Software Foundation, Inc. -timestamp='2013-08-10' +timestamp='2016-09-05' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ timestamp='2013-08-10' # of the GNU General Public License, version 3 ("GPLv3"). -# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# Please send patches to . # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -33,7 +33,7 @@ timestamp='2013-08-10' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,8 +53,7 @@ timestamp='2013-08-10' me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. @@ -68,7 +67,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2013 Free Software Foundation, Inc. +Copyright 1992-2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -117,8 +116,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -255,16 +254,18 @@ case $basic_machine in | arc | arceb \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ | avr | avr32 \ + | ba \ | be32 | be64 \ | bfin \ | c4x | c8051 | clipper \ | d10v | d30v | dlx | dsp16xx \ - | epiphany \ - | fido | fr30 | frv \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ + | k1om \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ @@ -282,8 +283,10 @@ case $basic_machine in | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipsr5900 | mipsr5900el \ @@ -295,14 +298,14 @@ case $basic_machine in | nds32 | nds32le | nds32be \ | nios | nios2 | nios2eb | nios2el \ | ns16k | ns32k \ - | open8 \ - | or1k | or32 \ + | open8 | or1k | or1knd | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ + | riscv32 | riscv64 \ | rl78 | rx \ | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ @@ -310,6 +313,7 @@ case $basic_machine in | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) @@ -324,7 +328,10 @@ case $basic_machine in c6x) basic_machine=tic6x-unknown ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) basic_machine=$basic_machine-unknown os=-none ;; @@ -369,18 +376,20 @@ case $basic_machine in | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ + | ba-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ | c8051-* | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ + | e2k-* | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ + | k1om-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ @@ -400,8 +409,10 @@ case $basic_machine in | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipsr5900-* | mipsr5900el-* \ @@ -413,16 +424,18 @@ case $basic_machine in | nios-* | nios2-* | nios2eb-* | nios2el-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ + | or1k*-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ | pyramid-* \ + | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ @@ -430,6 +443,7 @@ case $basic_machine in | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ + | visium-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -506,6 +520,9 @@ case $basic_machine in basic_machine=i386-pc os=-aros ;; + asmjs) + basic_machine=asmjs-unknown + ;; aux) basic_machine=m68k-apple os=-aux @@ -626,6 +643,14 @@ case $basic_machine in basic_machine=m68k-bull os=-sysv3 ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + os=$os"spe" + ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -767,6 +792,9 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` + ;; m68knommu) basic_machine=m68k-unknown os=-linux @@ -822,6 +850,10 @@ case $basic_machine in basic_machine=powerpc-unknown os=-morphos ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; msdos) basic_machine=i386-pc os=-msdos @@ -998,7 +1030,7 @@ case $basic_machine in ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle | ppc-le | powerpc-little) + ppcle | powerpclittle) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1008,7 +1040,7 @@ case $basic_machine in ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) + ppc64le | powerpc64little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1354,27 +1386,28 @@ case $os in | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ | -sym* | -kopensolaris* | -plan9* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1506,6 +1539,8 @@ case $os in ;; -nacl*) ;; + -ios) + ;; -none) ;; *) @@ -1592,9 +1627,6 @@ case $basic_machine in mips*-*) os=-elf ;; - or1k-*) - os=-elf - ;; or32-*) os=-coff ;; diff --git a/configure.ac b/configure.ac index e7d8459..beeaf5a 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.1.1) +AC_INIT(onig, 6.1.2) AC_CONFIG_MACRO_DIR([m4]) diff --git a/dist.info b/dist.info index 6ecfe65..8e8d1aa 100644 --- a/dist.info +++ b/dist.info @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.1.1" +version = "6.1.2" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" diff --git a/doc/API b/doc/API index 8e824f5..c639432 100644 --- a/doc/API +++ b/doc/API @@ -432,9 +432,9 @@ Oniguruma API Version 6.1.0 2016/08/22 1 region: match region data. 2 at: callback position. - ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs. - ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse childs first, then callback. - ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse childs, + ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children. + ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse children first, then callback. + ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse children, and at last callback again. 3 func: callback function. @@ -553,7 +553,7 @@ Oniguruma API Version 6.1.0 2016/08/22 2 op, op2, behavior, options: value of element. -# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) +# void onig_copy_encoding(OnigEncoding to, OnigEncoding from) Copy encoding. diff --git a/doc/API.ja b/doc/API.ja index f617a1c..2597510 100644 --- a/doc/API.ja +++ b/doc/API.ja @@ -559,7 +559,7 @@ 2 op, op2, behavior, options: Í×ÁǤÎÃÍ -# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) +# void onig_copy_encoding(OnigEncoding to, OnigEncoding from) ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò¥³¥Ô¡¼¤¹¤ë¡£ diff --git a/doc/RE.ja b/doc/RE.ja index bc877f2..cf89c8f 100644 --- a/doc/RE.ja +++ b/doc/RE.ja @@ -383,7 +383,7 @@ ÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤) + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤È¡¢Éôʬ¼°¸Æ½Ð¤·µ¡Ç½Äɲà + ¿¥Ð¥¤¥Èʸ»ú¥³¡¼¥É¤¬»ØÄꤵ¤ì¤Æ¤¤¤ë¤È¤­¡¢ - ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹ç¤Çɽ¸½¤µ¤ì¤¿ + ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹æ¤Çɽ¸½¤µ¤ì¤¿ °ì¸Ä¤Îʸ»ú¤È²ò¼á¤µ¤ì¤ë (Îã. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + ʸ»ú½¸¹ç¤ÎÃæ¤Ç¡¢°ì¥Ð¥¤¥Èʸ»ú¤È¿¥Ð¥¤¥Èʸ»ú¤ÎÈÏ°Ï»ØÄê¤Ïµö¤µ¤ì¤ë¡£ diff --git a/index.html b/index.html index 159d687..cf9177c 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2016/08/31 +(c) K.Kosako, updated at: 2016/11/07

@@ -16,9 +16,9 @@
What's new
    +
  • 2016/11/07: Version 6.1.2 released.
  • 2016/09/02: Version 6.1.1 released.
  • 2016/08/29: Version 6.1.0 released.
  • -
  • 2016/05/09: Version 6.0.0 released.
  • 2014/12/12: Version 5.9.6 released.
diff --git a/index_ja.html b/index_ja.html index 0918897..e11e0f5 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@

鬼車

-(c) K.Kosako, 最終更新: 2016/08/31 +(c) K.Kosako, 最終更新: 2016/11/07

@@ -16,9 +16,9 @@
更新情報
    +
  • 2016/11/07: Version 6.1.2 リリース
  • 2016/09/02: Version 6.1.1 リリース
  • 2016/08/29: Version 6.1.0 リリース
  • -
  • 2016/05/09: Version 6.0.0 リリース
  • 2014/12/12: Version 5.9.6 リリース
diff --git a/src/big5.c b/src/big5.c index 3d44975..bc713ab 100644 --- a/src/big5.c +++ b/src/big5.c @@ -55,9 +55,28 @@ big5_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) return FALSE; + if (*p > 0x7e && *p < 0xa1) return FALSE; + if (*p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_jp.c b/src/euc_jp.c index 19422ce..3b54e95 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -57,9 +57,39 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p > 0xa0) { + if (*p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xdf) return FALSE; + p++; + } + else if (*p == 0x8f) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_kr.c b/src/euc_kr.c index 12803cd..450caf1 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -55,9 +55,27 @@ euckr_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_tw.c b/src/euc_tw.c index 4e07567..b3ee628 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -55,9 +55,42 @@ euctw_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xb0) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/gb18030.c b/src/gb18030.c index 36fc3de..c8b5865 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -76,9 +76,43 @@ gb18030_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p == 0x80 || *p == 0xff) { + return FALSE; + } + else { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) { + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x81 || *p == 0xff) return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + } + else if (*p == 0x7f || *p == 0xff) { + return FALSE; + } + else { + p++; + } + } + } + + return TRUE; } static OnigCodePoint diff --git a/src/onigposix.h b/src/onigposix.h index 6c41537..2af3717 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -39,7 +39,7 @@ extern "C" { #define REG_NEWLINE (1<<1) #define REG_NOTBOL (1<<2) #define REG_NOTEOL (1<<3) -#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ +#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */ #define REG_NOSUB (1<<5) /* POSIX error codes */ diff --git a/src/oniguruma.h b/src/oniguruma.h index 5aa49f6..6090165 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,7 +36,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 1 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 2 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -364,7 +364,7 @@ int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN -int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); @@ -398,7 +398,8 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ +#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_CHECK_VALIDITY_OF_STRING /* limit */ #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -742,7 +743,7 @@ void onig_free P_((OnigRegex)); ONIG_EXTERN void onig_free_body P_((OnigRegex)); ONIG_EXTERN -int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN diff --git a/src/regcomp.c b/src/regcomp.c index 0235a9f..11ba1e7 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -1795,6 +1795,11 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = noname_disable_map(&(NANCHOR(node)->target), map, counter); + break; + default: break; } @@ -1853,6 +1858,11 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_node_backref(node, map); break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = renumber_by_map(NANCHOR(node)->target, map); + break; + default: break; } @@ -1884,6 +1894,11 @@ numbered_ref_check(Node* node) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = numbered_ref_check(NANCHOR(node)->target); + break; + default: break; } @@ -3875,9 +3890,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) + #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) case ANCHOR_LOOK_BEHIND: { @@ -3913,7 +3929,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) return r; } -/* set skip map for Boyer-Moor search */ +/* set skip map for Boyer-Moore search */ static int set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[], int** int_skip) @@ -4641,7 +4657,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int i, z; CClassNode* cc = NCCLASS(node); - /* no need to check ignore case. (setted in setup_tree()) */ + /* no need to check ignore case. (set in setup_tree()) */ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { OnigLen min = ONIGENC_MBC_MINLEN(env->enc); @@ -4712,6 +4728,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; @@ -4734,8 +4752,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ case ANCHOR_LOOK_BEHIND_NOT: break; } @@ -4989,6 +5005,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) + reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { @@ -5133,7 +5152,7 @@ print_anchor(FILE* f, int anchor) } if (anchor & ANCHOR_ANYCHAR_STAR_ML) { if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-pl"); + fprintf(f, "anychar-star-ml"); } fprintf(f, "]"); @@ -5252,6 +5271,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, UnsetAddrList uslist; #endif + root = 0; if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; #ifdef ONIG_DEBUG diff --git a/src/regenc.h b/src/regenc.h index 49227fa..e119dab 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -110,7 +110,7 @@ struct PropertyNameCtype { /* #define USE_CRNL_AS_LINE_TERMINATOR */ #define USE_UNICODE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII diff --git a/src/regexec.c b/src/regexec.c index 70ac89e..7e8d3d1 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -3111,6 +3111,13 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On r = 0; if (r == 0) { + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto end; + } + } + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE @@ -3119,6 +3126,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On at, prev, &msa); } + end: MATCH_ARG_FREE(msa); return r; } @@ -3391,6 +3399,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto finish_no_msa; + } + } + #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -3707,7 +3722,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_FREE(msa); /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ + then the region is not set in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3747,6 +3762,13 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, int rs; const UChar* start; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING); + } + n = 0; start = str; while (1) { diff --git a/src/regint.h b/src/regint.h index d320e26..7a3283d 100644 --- a/src/regint.h +++ b/src/regint.h @@ -685,7 +685,7 @@ typedef struct _OnigStackType { struct { int num; /* memory num */ UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ + /* Following information is set, if this stack type is MEM-START */ OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ } mem; diff --git a/src/regparse.c b/src/regparse.c index e8a6e20..8f1d1cb 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -29,6 +29,10 @@ #include "regparse.h" #include "st.h" +#ifdef DEBUG_NODE_FREE +#include +#endif + #define WARN_BUFSIZE 256 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS @@ -1003,13 +1007,16 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) return 0; } - extern void onig_node_free(Node* node) { start: if (IS_NULL(node)) return ; +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "onig_node_free: %p\n", node); +#endif + switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && @@ -1071,6 +1078,9 @@ node_new(void) node = (Node* )xmalloc(sizeof(Node)); /* xmemset(node, 0, sizeof(Node)); */ +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "node_new: %p\n", node); +#endif return node; } @@ -1449,7 +1459,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) if (sn->end > sn->s) { p = onigenc_get_prev_char_head(enc, sn->s, sn->end); - if (p && p > sn->s) { /* can be splitted. */ + if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); if ((sn->flag & NSTR_RAW) != 0) NSTRING_SET_RAW(n); @@ -2520,7 +2530,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, #endif /* USE_BACKREF_WITH_LEVEL */ /* - def: 0 -> define name (don't allow number name) + ref: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) */ static int @@ -3000,7 +3010,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (c == '[') { if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; - tok->backp = p; /* point at '[' is readed */ + tok->backp = p; /* point at '[' is read */ PINC; if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']', enc, syn)) { @@ -4318,7 +4328,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CClassNode* acc; r = parse_char_class(&anode, tok, &p, end, env); - if (r != 0) goto cc_open_err; + if (r != 0) { + onig_node_free(anode); + goto cc_open_err; + } acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); @@ -4412,7 +4425,6 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, err: if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); - onig_node_free(*np); return r; } @@ -4542,11 +4554,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { - onig_node_free(*np); return num; } else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } NENCLOSE(*np)->regnum = num; @@ -4614,7 +4624,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); NENCLOSE(*np)->target = target; @@ -4647,7 +4660,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } if (NTYPE(*np) == NT_ANCHOR) NANCHOR(*np)->target = target; @@ -4908,7 +4924,10 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } NENCLOSE(*np)->target = target; return tok->type; } @@ -5220,7 +5239,10 @@ parse_branch(Node** top, OnigToken* tok, int term, *top = NULL; r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (r == TK_EOT || r == term || r == TK_ALT) { *top = node; @@ -5230,7 +5252,10 @@ parse_branch(Node** top, OnigToken* tok, int term, headp = &(NCDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (NTYPE(node) == NT_LIST) { *headp = node; @@ -5272,8 +5297,10 @@ parse_subexp(Node** top, OnigToken* tok, int term, r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; - + if (r < 0) { + onig_node_free(node); + return r; + } *headp = onig_node_new_alt(node, NULL); headp = &(NCDR(*headp)); } @@ -5282,8 +5309,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, goto err; } else { - err: onig_node_free(node); + err: if (term == TK_SUBEXP_CLOSE) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; else diff --git a/src/sjis.c b/src/sjis.c index a607b3d..3378474 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -77,9 +77,36 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0xa0 || *p == 0x80) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else if (*p < 0xe0) { + p++; + } + else if (*p < 0xfd) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static int -- cgit v1.2.3