diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-13 13:55:30 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-13 13:55:30 +0200 | 
| commit | a89a4ac904bc93b1d93b410394fa05c23260351b (patch) | |
| tree | b10c64aff1d79627925154364772774fc82d07ff | |
| parent | b6c6e4122f35fbead1e9661dfb2d852b39faf8ff (diff) | |
| parent | ae063b1e6ea3d97ea4e3404bfd8289895619d04f (diff) | |
Merge branch 'release/6.5.0-1'6.5.0-1
| -rw-r--r-- | .bzrignore | 3 | ||||
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | CMakeLists.txt | 2 | ||||
| -rw-r--r-- | HISTORY | 14 | ||||
| -rw-r--r-- | README | 7 | ||||
| -rw-r--r-- | README.md | 19 | ||||
| -rwxr-xr-x | compile | 2 | ||||
| -rwxr-xr-x | config.guess | 134 | ||||
| -rwxr-xr-x | config.sub | 36 | ||||
| -rw-r--r-- | configure.ac | 4 | ||||
| -rw-r--r-- | debian/changelog | 7 | ||||
| -rw-r--r-- | debian/symbols | 1 | ||||
| -rwxr-xr-x | depcomp | 2 | ||||
| -rw-r--r-- | dist.info | 2 | ||||
| -rw-r--r-- | doc/RE | 69 | ||||
| -rw-r--r-- | doc/RE.ja | 71 | ||||
| -rw-r--r-- | index.html | 3 | ||||
| -rw-r--r-- | index_ja.html | 3 | ||||
| -rwxr-xr-x | install-sh | 373 | ||||
| -rwxr-xr-x | missing | 2 | ||||
| -rw-r--r-- | src/oniguruma.h | 10 | ||||
| -rw-r--r-- | src/regcomp.c | 1102 | ||||
| -rw-r--r-- | src/regenc.h | 2 | ||||
| -rw-r--r-- | src/regerror.c | 6 | ||||
| -rw-r--r-- | src/regexec.c | 487 | ||||
| -rw-r--r-- | src/regint.h | 119 | ||||
| -rw-r--r-- | src/regparse.c | 1075 | ||||
| -rw-r--r-- | src/regparse.h | 159 | ||||
| -rw-r--r-- | src/regposix.c | 5 | ||||
| -rw-r--r-- | src/regsyntax.c | 16 | ||||
| -rw-r--r-- | src/utf8.c | 41 | ||||
| -rwxr-xr-x | test-driver | 15 | ||||
| -rw-r--r-- | test/testc.c | 82 | ||||
| -rw-r--r-- | test/testu.c | 9 | 
34 files changed, 2954 insertions, 929 deletions
| diff --git a/.bzrignore b/.bzrignore deleted file mode 100644 index 2386f62..0000000 --- a/.bzrignore +++ /dev/null @@ -1,3 +0,0 @@ -.git -**/.git -**/.pc @@ -25,4 +25,3 @@ testcu  testp  /build  m4/*.m4 -.bzr diff --git a/CMakeLists.txt b/CMakeLists.txt index 60ce397..b40fb2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8)  project(oniguruma C)  set(PACKAGE onig) -set(PACKAGE_VERSION "6.4.0") +set(PACKAGE_VERSION "6.5.0")  set(USE_COMBINATION_EXPLOSION_CHECK 0)  set(USE_CRNL_AS_LINE_TERMINATOR 0) @@ -1,5 +1,19 @@  History +2017/08/03: Version 6.5.0 + +2017/07/30: [new] support Absent clear (Absent functions) +2017/07/25: abolish configure option: --enable-combination-explosion-check +2017/07/23: [new] support Absent functions (?~...) +2017/07/14: fix #65: SIZEOF_SIZE_T doesn't exist on certain architecutres +2017/07/11: [new] support \O (true anychar) +2017/07/10: [new] support \K (keep) +2017/07/10: add new node type: NODE_GIMMICK +2017/07/07: [new] support \N (no newline) +2017/07/05: [new] support \R (general newline) +2017/07/05: [new] support if-then-else syntax +2017/07/04: [new] support backref validity checker +  2017/07/03: Version 6.4.0  2017/06/30: fix memory leaks @@ -1,9 +1,14 @@ -README  2016/05/06 +README  2017/07/08  Oniguruma  ----   (C) K.Kosako <kkosako0@gmail.com>  https://github.com/kkos/oniguruma +FIXED Security Issues: +  CVE-2017-9224, CVE-2017-9225, CVE-2017-9226 +  CVE-2017-9227, CVE-2017-9228, CVE-2017-9229 + +---  Oniguruma is a regular expressions library.  The characteristics of this library is that different character encoding  for every regular expression object can be specified. @@ -3,6 +3,12 @@ Oniguruma  https://github.com/kkos/oniguruma +FIXED Security Issues: +-------------------------- +  CVE-2017-9224, CVE-2017-9225, CVE-2017-9226 +  CVE-2017-9227, CVE-2017-9228, CVE-2017-9229 + +  Oniguruma is a regular expressions library.  The characteristics of this library is that different character encoding  for every regular expression object can be specified. @@ -20,6 +26,19 @@ Supported character encodings:  * CP1251:  contributed by Byte +New feature of version 6.5.0 +-------------------------- + +* NEW: \K (keep) +* NEW: \R (general newline) \N (no newline) +* NEW: \O (true anychar) +* NEW: if-then-else syntax   (?(...)...\|...) +* NEW: Backreference validity checker  (*original) +* NEW: Absent repeater (?~absent) +* NEW: Absent expression   (?~|absent|expr)  (*original) +* NEW: Absent range cutter (?~|absent)     (*original) + +  New feature of version 6.4.0  -------------------------- @@ -3,7 +3,7 @@  scriptversion=2012-10-14.11; # UTC -# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Copyright (C) 1999-2014 Free Software Foundation, Inc.  # Written by Tom Tromey <tromey@cygnus.com>.  #  # This program is free software; you can redistribute it and/or modify diff --git a/config.guess b/config.guess index bf5ad89..1659250 100755 --- a/config.guess +++ b/config.guess @@ -1,8 +1,8 @@  #! /bin/sh  # Attempt to guess a canonical system name. -#   Copyright 1992-2016 Free Software Foundation, Inc. +#   Copyright 1992-2015 Free Software Foundation, Inc. -timestamp='2016-09-11' +timestamp='2015-08-20'  # This file is free software; you can redistribute it and/or modify it  # under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ timestamp='2016-09-11'  # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.  #  # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD  #  # Please send patches to <config-patches@gnu.org>. @@ -50,7 +50,7 @@ version="\  GNU config.guess ($timestamp)  Originally written by Per Bothner. -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2015 Free Software Foundation, Inc.  This is free software; see the source for copying conditions.  There is NO  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -186,12 +186,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in  	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;  	esac  	# The Operating System including object format, if it has switched -	# to ELF recently (or will in the future) and ABI. +	# to ELF recently, or will in the future.  	case "${UNAME_MACHINE_ARCH}" in -	    earm*) -		os=netbsdelf -		;; -	    arm*|i386|m68k|ns32k|sh3*|sparc|vax) +	    arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax)  		eval $set_cc_for_build  		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \  			| grep -q __ELF__ @@ -240,10 +237,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in  	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`  	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}  	exit ;; -    *:LibertyBSD:*:*) -	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` -	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} -	exit ;;      *:ekkoBSD:*:*)  	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}  	exit ;; @@ -275,42 +268,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in  	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`  	case "$ALPHA_CPU_TYPE" in  	    "EV4 (21064)") -		UNAME_MACHINE=alpha ;; +		UNAME_MACHINE="alpha" ;;  	    "EV4.5 (21064)") -		UNAME_MACHINE=alpha ;; +		UNAME_MACHINE="alpha" ;;  	    "LCA4 (21066/21068)") -		UNAME_MACHINE=alpha ;; +		UNAME_MACHINE="alpha" ;;  	    "EV5 (21164)") -		UNAME_MACHINE=alphaev5 ;; +		UNAME_MACHINE="alphaev5" ;;  	    "EV5.6 (21164A)") -		UNAME_MACHINE=alphaev56 ;; +		UNAME_MACHINE="alphaev56" ;;  	    "EV5.6 (21164PC)") -		UNAME_MACHINE=alphapca56 ;; +		UNAME_MACHINE="alphapca56" ;;  	    "EV5.7 (21164PC)") -		UNAME_MACHINE=alphapca57 ;; +		UNAME_MACHINE="alphapca57" ;;  	    "EV6 (21264)") -		UNAME_MACHINE=alphaev6 ;; +		UNAME_MACHINE="alphaev6" ;;  	    "EV6.7 (21264A)") -		UNAME_MACHINE=alphaev67 ;; +		UNAME_MACHINE="alphaev67" ;;  	    "EV6.8CB (21264C)") -		UNAME_MACHINE=alphaev68 ;; +		UNAME_MACHINE="alphaev68" ;;  	    "EV6.8AL (21264B)") -		UNAME_MACHINE=alphaev68 ;; +		UNAME_MACHINE="alphaev68" ;;  	    "EV6.8CX (21264D)") -		UNAME_MACHINE=alphaev68 ;; +		UNAME_MACHINE="alphaev68" ;;  	    "EV6.9A (21264/EV69A)") -		UNAME_MACHINE=alphaev69 ;; +		UNAME_MACHINE="alphaev69" ;;  	    "EV7 (21364)") -		UNAME_MACHINE=alphaev7 ;; +		UNAME_MACHINE="alphaev7" ;;  	    "EV7.9 (21364A)") -		UNAME_MACHINE=alphaev79 ;; +		UNAME_MACHINE="alphaev79" ;;  	esac  	# A Pn.n version is a patched version.  	# A Vn.n version is a released version.  	# A Tn.n version is a released field test version.  	# A Xn.n version is an unreleased experimental baselevel.  	# 1.2 uses "1.2" for uname -r. -	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` +	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`  	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.  	exitcode=$?  	trap '' 0 @@ -383,16 +376,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in  	exit ;;      i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)  	eval $set_cc_for_build -	SUN_ARCH=i386 +	SUN_ARCH="i386"  	# If there is a compiler, see if it is configured for 64-bit objects.  	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.  	# This test works for both compilers. -	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then +	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then  	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ -		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ +		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \  		grep IS_64BIT_ARCH >/dev/null  	    then -		SUN_ARCH=x86_64 +		SUN_ARCH="x86_64"  	    fi  	fi  	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -417,7 +410,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in  	exit ;;      sun*:*:4.2BSD:*)  	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` -	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 +	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3  	case "`/bin/arch`" in  	    sun3)  		echo m68k-sun-sunos${UNAME_RELEASE} @@ -642,13 +635,13 @@ EOF  		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`  		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`  		    case "${sc_cpu_version}" in -		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 -		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 +		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 +		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1  		      532)                      # CPU_PA_RISC2_0  			case "${sc_kernel_bits}" in -			  32) HP_ARCH=hppa2.0n ;; -			  64) HP_ARCH=hppa2.0w ;; -			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20 +			  32) HP_ARCH="hppa2.0n" ;; +			  64) HP_ARCH="hppa2.0w" ;; +			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20  			esac ;;  		    esac  		fi @@ -687,11 +680,11 @@ EOF  		    exit (0);  		}  EOF -		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` +		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`  		    test -z "$HP_ARCH" && HP_ARCH=hppa  		fi ;;  	esac -	if [ ${HP_ARCH} = hppa2.0w ] +	if [ ${HP_ARCH} = "hppa2.0w" ]  	then  	    eval $set_cc_for_build @@ -704,12 +697,12 @@ EOF  	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess  	    # => hppa64-hp-hpux11.23 -	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | +	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |  		grep -q __LP64__  	    then -		HP_ARCH=hppa2.0w +		HP_ARCH="hppa2.0w"  	    else -		HP_ARCH=hppa64 +		HP_ARCH="hppa64"  	    fi  	fi  	echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -814,14 +807,14 @@ EOF  	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'  	exit ;;      F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) -	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` -	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` +	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` +	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`  	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`  	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"  	exit ;;      5000:UNIX_System_V:4.*:*) -	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` -	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` +	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` +	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`  	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"  	exit ;;      i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -903,7 +896,7 @@ EOF  	exit ;;      *:GNU/*:*:*)  	# other systems with GNU libc and userland -	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} +	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}  	exit ;;      i*86:Minix:*:*)  	echo ${UNAME_MACHINE}-pc-minix @@ -926,7 +919,7 @@ EOF  	  EV68*) UNAME_MACHINE=alphaev68 ;;  	esac  	objdump --private-headers /bin/sh | grep -q ld.so.1 -	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi +	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi  	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}  	exit ;;      arc:Linux:*:* | arceb:Linux:*:*) @@ -972,9 +965,6 @@ EOF      ia64:Linux:*:*)  	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}  	exit ;; -    k1om:Linux:*:*) -	echo ${UNAME_MACHINE}-unknown-linux-${LIBC} -	exit ;;      m32r*:Linux:*:*)  	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}  	exit ;; @@ -1032,9 +1022,6 @@ EOF      ppcle:Linux:*:*)  	echo powerpcle-unknown-linux-${LIBC}  	exit ;; -    riscv32:Linux:*:* | riscv64:Linux:*:*) -	echo ${UNAME_MACHINE}-unknown-linux-${LIBC} -	exit ;;      s390:Linux:*:* | s390x:Linux:*:*)  	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}  	exit ;; @@ -1133,7 +1120,7 @@ EOF  	# uname -m prints for DJGPP always 'pc', but it prints nothing about  	# the processor, so we play safe by assuming i586.  	# Note: whatever this is, it MUST be the same as what config.sub -	# prints for the "djgpp" host, or else GDB configure will decide that +	# prints for the "djgpp" host, or else GDB configury will decide that  	# this is a cross-build.  	echo i586-pc-msdosdjgpp  	exit ;; @@ -1282,9 +1269,6 @@ EOF      SX-8R:SUPER-UX:*:*)  	echo sx8r-nec-superux${UNAME_RELEASE}  	exit ;; -    SX-ACE:SUPER-UX:*:*) -	echo sxace-nec-superux${UNAME_RELEASE} -	exit ;;      Power*:Rhapsody:*:*)  	echo powerpc-apple-rhapsody${UNAME_RELEASE}  	exit ;; @@ -1298,9 +1282,9 @@ EOF  	    UNAME_PROCESSOR=powerpc  	fi  	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then -	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then +	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then  		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ -		    (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ +		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \  		    grep IS_64BIT_ARCH >/dev/null  		then  		    case $UNAME_PROCESSOR in @@ -1322,7 +1306,7 @@ EOF  	exit ;;      *:procnto*:*:* | *:QNX:[0123456789]*:*)  	UNAME_PROCESSOR=`uname -p` -	if test "$UNAME_PROCESSOR" = x86; then +	if test "$UNAME_PROCESSOR" = "x86"; then  		UNAME_PROCESSOR=i386  		UNAME_MACHINE=pc  	fi @@ -1353,7 +1337,7 @@ EOF  	# "uname -m" is not consistent, so use $cputype instead. 386  	# is converted to i386 for consistency with other x86  	# operating systems. -	if test "$cputype" = 386; then +	if test "$cputype" = "386"; then  	    UNAME_MACHINE=i386  	else  	    UNAME_MACHINE="$cputype" @@ -1395,7 +1379,7 @@ EOF  	echo i386-pc-xenix  	exit ;;      i*86:skyos:*:*) -	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` +	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'  	exit ;;      i*86:rdos:*:*)  	echo ${UNAME_MACHINE}-pc-rdos @@ -1406,25 +1390,23 @@ EOF      x86_64:VMkernel:*:*)  	echo ${UNAME_MACHINE}-unknown-esx  	exit ;; -    amd64:Isilon\ OneFS:*:*) -	echo x86_64-unknown-onefs -	exit ;;  esac  cat >&2 <<EOF  $0: unable to guess system type -This script (version $timestamp), has failed to recognize the -operating system you are using. If your script is old, overwrite -config.guess and config.sub with the latest versions from: +This script, last modified $timestamp, has failed to recognize +the operating system you are using. It is advised that you +download the most up to date version of the config scripts from -  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD  and -  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub +  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD -If $0 has already been updated, send the following data and any -information you think might be pertinent to config-patches@gnu.org to -provide the necessary information to handle your system. +If the version you run ($0) is already up to date, please +send the following data and any information you think might be +pertinent to <config-patches@gnu.org> in order to provide the needed +information to handle your system.  config.guess timestamp = $timestamp @@ -1,8 +1,8 @@  #! /bin/sh  # Configuration validation subroutine script. -#   Copyright 1992-2016 Free Software Foundation, Inc. +#   Copyright 1992-2015 Free Software Foundation, Inc. -timestamp='2016-09-05' +timestamp='2015-08-20'  # This file is free software; you can redistribute it and/or modify it  # under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ timestamp='2016-09-05'  # Otherwise, we print the canonical config type on stdout and succeed.  # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD  # This file is supposed to be the same for all GNU packages  # and recognize all the CPU types, system types and aliases @@ -53,7 +53,8 @@ timestamp='2016-09-05'  me=`echo "$0" | sed -e 's,.*/,,'`  usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS +       $0 [OPTION] ALIAS  Canonicalize a configuration name. @@ -67,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."  version="\  GNU config.sub ($timestamp) -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2015 Free Software Foundation, Inc.  This is free software; see the source for copying conditions.  There is NO  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -117,7 +118,7 @@ case $maybe_os in    nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \    linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \    knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ -  kopensolaris*-gnu* | cloudabi*-eabi* | \ +  kopensolaris*-gnu* | \    storm-chaos* | os2-emx* | rtmk-nova*)      os=-$maybe_os      basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -520,7 +521,7 @@ case $basic_machine in  		basic_machine=i386-pc  		os=-aros  		;; -	asmjs) +        asmjs)  		basic_machine=asmjs-unknown  		;;  	aux) @@ -643,14 +644,6 @@ case $basic_machine in  		basic_machine=m68k-bull  		os=-sysv3  		;; -	e500v[12]) -		basic_machine=powerpc-unknown -		os=$os"spe" -		;; -	e500v[12]-*) -		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` -		os=$os"spe" -		;;  	ebmon29k)  		basic_machine=a29k-amd  		os=-ebmon @@ -1030,7 +1023,7 @@ case $basic_machine in  	ppc-* | ppcbe-*)  		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`  		;; -	ppcle | powerpclittle) +	ppcle | powerpclittle | ppc-le | powerpc-little)  		basic_machine=powerpcle-unknown  		;;  	ppcle-* | powerpclittle-*) @@ -1040,7 +1033,7 @@ case $basic_machine in  		;;  	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`  		;; -	ppc64le | powerpc64little) +	ppc64le | powerpc64little | ppc64-le | powerpc64-little)  		basic_machine=powerpc64le-unknown  		;;  	ppc64le-* | powerpc64little-*) @@ -1390,14 +1383,14 @@ case $os in  	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \  	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \  	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ -	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ +	      | -bitrig* | -openbsd* | -solidbsd* \  	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \  	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \  	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \  	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \  	      | -chorusos* | -chorusrdb* | -cegcc* \  	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ -	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ +	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \  	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \  	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \  	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ @@ -1406,8 +1399,7 @@ case $os in  	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \  	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \  	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ -	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ -	      | -onefs* | -tirtos* | -phoenix*) +	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)  	# Remember, each alternative MUST END IN *, to match a version number.  		;;  	-qnx*) @@ -1539,8 +1531,6 @@ case $os in  		;;  	-nacl*)  		;; -	-ios) -		;;  	-none)  		;;  	*) diff --git a/configure.ac b/configure.ac index 688d15b..efaf5e1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@  dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.4.0) +AC_INIT(onig, 6.5.0)  AC_CONFIG_MACRO_DIR([m4]) @@ -16,7 +16,7 @@ AC_SUBST(STATISTICS)  dnl check for COMBINATION_EXPLOSION  AC_ARG_ENABLE(combination-explosion-check, -	[  --enable-combination-explosion-check   enable combination explosion check], +	[  --enable-combination-explosion-check   deprecated],  	[comb_expl_check=$enableval])  if test "${comb_expl_check}" = yes; then    AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check]) diff --git a/debian/changelog b/debian/changelog index 69a8598..d7330ed 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +libonig (6.5.0-1) unstable; urgency=medium + +  * New upstream release. +    + Refresh symbols file. + + -- Jörg Frings-Fürst <debian@jff-webhosting.net>  Sun, 06 Aug 2017 19:31:50 +0200 +  libonig (6.4.0-1) unstable; urgency=medium    * New upstream release. diff --git a/debian/symbols b/debian/symbols index 23c1b49..d3c085c 100644 --- a/debian/symbols +++ b/debian/symbols @@ -50,6 +50,7 @@ libonig.so.4 libonig4 #MINVER#   OnigUnicodeFolds2@Base 6.0.0   OnigUnicodeFolds3@Base 6.0.0   euc_jp_lookup_property_name@Base 6.0.0 + list_node_free_not_car@Base 6.5.0   onig_add_end_call@Base 5.9.6   onig_bbuf_init@Base 5.9.5   onig_capture_tree_traverse@Base 5.9.5 @@ -3,7 +3,7 @@  scriptversion=2013-05-30.07; # UTC -# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Copyright (C) 1999-2014 Free Software Foundation, Inc.  # This program is free software; you can redistribute it and/or modify  # it under the terms of the GNU General Public License as published by @@ -1,7 +1,7 @@  --- This file is part of LuaDist project  name = "onig" -version = "6.4.0" +version = "6.5.0"  desc = "Oniguruma is a regular expressions library."  author = "K.Kosako" @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.4.0    2017/06/28 +Oniguruma Regular Expressions Version 6.5.0    2017/07/30  syntax: ONIG_SYNTAX_RUBY (default) @@ -52,8 +52,8 @@ syntax: ONIG_SYNTAX_RUBY (default)             Not Unicode:               \t, \n, \v, \f, \r, \x20 -           Unicode: -             0009, 000A, 000B, 000C, 000D, 0085(NEL), +           Unicode case: +             U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),               General_Category -- Line_Separator                                -- Paragraph_Separator                                -- Space_Separator @@ -70,6 +70,16 @@ syntax: ONIG_SYNTAX_RUBY (default)    \H       non-hexdigit char +  \R       general newline  (* can't be used in character-class) +           "\r\n" or \n,\v,\f,\r  (* but doesn't backtrack from \r\n to \r) + +           Unicode case: +             "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + +  \N       negative newline  (?-m:.) + +  \O       true anychar      (?m:.)    (* original function) +    Character Property @@ -133,6 +143,8 @@ syntax: ONIG_SYNTAX_RUBY (default)    \Z      end of string, or before newline at the end    \z      end of string    \G      where the current search attempt begins +  \K      keep (keep start position of the result string) +  6. Character class @@ -183,9 +195,9 @@ syntax: ONIG_SYNTAX_RUBY (default)                 Final_Punctuation | Initial_Punctuation | Other_Punctuation |                 Open_Punctuation        space    Space_Separator | Line_Separator | Paragraph_Separator | -               0009 | 000A | 000B | 000C | 000D | 0085 +               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085        upper    Uppercase_Letter -      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066 +      xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066                 (0-9, a-f, A-F)        word     Letter | Mark | Decimal_Number | Connector_Punctuation @@ -228,6 +240,50 @@ syntax: ONIG_SYNTAX_RUBY (default)                       Assigning the same name to two or more subexps is allowed. +  <Absent functions> + +  (?~absent)         Absent repeater    (* proposed by Tanaka Akira) +                     This works like .* (more precisely \O*), but it is +                     limited by the range that does not include the string +                     match with absent. +                     This is a written abbreviation of (?~|absent|\O*). +                     \O* is used as a repeater. + +  (?~|absent|exp)    Absent expression  (* original) +                     This works like "exp", but it is limited by the range +                     that does not include the string match with absent. + +                     ex. (?~|345|\d*)  "12345678"  ==> "12", "1", "" + +  (?~|absent)        Absent cutter (* original) +                     After passed this operator, string right range is limited +                     at the point that does not include the string match whth +                     absent. + +  (?~|)              Absent clear +                     Clear the effects caused by Absent cutters. +                     (* This operation is not cancelled by backtrack.) + +     * Nested Absent functions are not supported and the behavior +       is undefined. + + +  (?(condition_exp)then_exp|else_exp)    if-then-else +  (?(condition_exp)then_exp)             if-then + +               condition_exp can be a backreference number/name or a normal +               regular expression. +               When condition_exp is a backreference, both then_exp and +               else_exp can be omitted. +               Then it works as a backreference validity checker. + +  [ backreference validity checker ]   (* original) + +    (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... +    (?(<n>)), (?('-n')), (?(<+n>)) ... +    (?(<name>)), (?('name')), (?(<name+level>)) ... + +  8. Backreferences @@ -282,7 +338,7 @@ syntax: ONIG_SYNTAX_RUBY (default)        p r.match("<foo>f<bar>bbb</bar>f</foo>").captures -9. Subexp calls ("Tanaka Akira special") +9. Subexp calls ("Tanaka Akira special")   (* original function)    When we say "call a group," it actually means, "re-execute the subexp in    that group." @@ -367,7 +423,6 @@ A-3. Missing features compared with perl 5.8.0     + \l,\u,\L,\U, \X, \C     + (?{code})     + (??{code}) -   + (?(condition)yes-pat|no-pat)     * \Q...\E       This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.4.0    2017/06/28 +鬼車 正規表現 Version 6.5.0    2017/07/30  使用文法: ONIG_SYNTAX_RUBY (既定値) @@ -35,7 +35,7 @@  3. 文字種 -  .        任意文字 (改行を除く) +  .        任意文字 (改行を除く: オプションに依存)    \w       単語構成文字 @@ -53,7 +53,7 @@               \t, \n, \v, \f, \r, \x20             Unicodeの場合: -             0009, 000A, 000B, 000C, 000D, 0085(NEL),  +             U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),                General_Category -- Line_Separator                                -- Paragraph_Separator                                -- Space_Separator @@ -70,6 +70,16 @@    \H       非16進数字 +  \R       汎改行  (* 文字集合の中では使用できない) +           "\r\n" or \n,\v,\f,\r  (* 但し \r\nから\rにはバックトラックしない) + +           Unicodeの場合: +             "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + +  \N       非改行文字  (?-m:.) + +  \O       真任意文字  (?m:.)      (* 原作) +    Character Property @@ -133,6 +143,8 @@    \Z      文字列末尾、または文字列末尾の改行の直前    \z      文字列末尾    \G      照合開始位置 +  \K      保持 (結果の開始位置をこの位置に保つ) +  6. 文字集合 @@ -182,9 +194,9 @@                 Final_Punctuation | Initial_Punctuation | Other_Punctuation |                 Open_Punctuation        space    Space_Separator | Line_Separator | Paragraph_Separator | -               0009 | 000A | 000B | 000C | 000D | 0085 +               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085        upper    Uppercase_Letter -      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066 +      xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066                 (0-9, a-f, A-F)        word     Letter | Mark | Decimal_Number | Connector_Punctuation @@ -230,6 +242,52 @@                      この場合には、この名前を使用した後方参照は可能であるが、                      部分式呼出しはできない。 +  <不在機能群> + +  (?~不在式)        不在繰り返し  (*原案 田中哲) +                    これは.*のように(より正確には\O*)動作するが、不在式に +                    適合する文字列を含まない範囲に制限される。 +                    これは(?~|不在式|\O*)の省略表記である。 +                    \O*の部分はマルチラインオプション(?m)の影響を受けない。 + +  (?~|不在式|式)    不在式  (* 原作) +                    これは"式"のように動作するが、不在式に適合する文字列を +                    含まない範囲に制限される。 + +                    例 (?~|345|\d*)  "12345678"  ==> "12", "1", "" + +  (?~|不在式)       不在切断 (* 原作) +                    この演算子を通過した後は、対象文字列の適合範囲の最後が +                    不在式に適合する文字列を含まない範囲に制限される。 + +  (?~|)             不在消去 +                    不在切断の効果を消して、初期状態にする。 +                    (* この演算子の効果は後退再試行で無効化されない) + +     * 不在機能の入れ子はサポートしておらず、挙動は不定とする。 + + +  (?(条件式)成功式|失敗式)    条件式が成功すれば成功式、失敗すれば失敗式を実行する +                             この機能の存在理由は、成功式が失敗しても失敗式には +                             行かないこと。これは他の正規表現で書くことができない。 +                             もうひとつは、条件式が後方参照のとき、後方参照値の有効性 +                             を調べる(文字列とマッチングはしない)意味になる。 + +  (?(条件式)成功式)           条件式が成功すれば成功式を実行する +                             (条件式が通常の式のときには、この構文は不必要だが +                              今のところエラーにはしない。) + + +                    条件式は後方参照または通常の式を使用できる。 +                    条件式が後方参照の場合、成功式と失敗式の両方を省略可能であり、 +                    この場合、後方参照値有効性を調べる(成功/失敗)機能のみになる。 + +  [後方参照値有効性確認器]  (* 原作) +    (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... +    (?(<n>)), (?('-n')), (?(<+n>)) ... +    (?(<name>)), (?('name')), (?(<name+level>)) ... + +  8. 後方参照 @@ -288,7 +346,7 @@ -9. 部分式呼出し ("田中哲スペシャル") +9. 部分式呼出し ("田中哲スペシャル")   (* 原作)    \g<name>    名前指定呼出し    \g'name'    名前指定呼出し @@ -373,7 +431,6 @@     + \l,\u,\L,\U, \X, \C     + (?{code})     + (??{code}) -   + (?(condition)yes-pat|no-pat)     * \Q...\E       但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効 @@ -8,7 +8,7 @@  <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)  <p> -(c) K.Kosako, updated at: 2017/06/30 +(c) K.Kosako, updated at: 2017/08/03  </p>  <dl> @@ -16,6 +16,7 @@  <dt><b>What's new</b>  </font>  <ul> +<li>2017/08/03: Version 6.5.0 released.</li>  <li>2017/07/03: Version 6.4.0 released.</li>  <li>2017/05/29: Version 6.3.0 released.</li>  <li>2017/04/08: Version 6.2.0 released.</li> diff --git a/index_ja.html b/index_ja.html index 502f460..52f0412 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@  <h1>鬼車</h1>  <p> -(c) K.Kosako, 最終更新: 2017/06/30 +(c) K.Kosako, 最終更新: 2017/08/03  </p>  <dl> @@ -16,6 +16,7 @@  <dt><b>更新情報</b>  </font>  <ul> +<li>2017/08/03: Version 6.5.0 リリース</li>  <li>2017/07/03: Version 6.4.0 リリース</li>  <li>2017/05/29: Version 6.3.0 リリース</li>  <li>2017/04/08: Version 6.2.0 リリース</li> @@ -1,7 +1,7 @@  #!/bin/sh  # install - install a program, script, or datafile -scriptversion=2011-11-20.07; # UTC +scriptversion=2014-09-12.12; # UTC  # This originates from X11R5 (mit/util/scripts/install.sh), which was  # later released in X11R6 (xc/config/util/install.sh) with the @@ -41,19 +41,15 @@ scriptversion=2011-11-20.07; # UTC  # This script is compatible with the BSD install script, but was written  # from scratch. +tab='	'  nl='  ' -IFS=" ""	$nl" +IFS=" $tab$nl" -# set DOITPROG to echo to test this script +# Set DOITPROG to "echo" to test this script. -# Don't use :- since 4.3BSD and earlier shells don't like it.  doit=${DOITPROG-} -if test -z "$doit"; then -  doit_exec=exec -else -  doit_exec=$doit -fi +doit_exec=${doit:-exec}  # Put in absolute file names if you don't have them in your path;  # or use environment vars. @@ -68,17 +64,6 @@ mvprog=${MVPROG-mv}  rmprog=${RMPROG-rm}  stripprog=${STRIPPROG-strip} -posix_glob='?' -initialize_posix_glob=' -  test "$posix_glob" != "?" || { -    if (set -f) 2>/dev/null; then -      posix_glob= -    else -      posix_glob=: -    fi -  } -' -  posix_mkdir=  # Desired mode of installed file. @@ -97,7 +82,7 @@ dir_arg=  dst_arg=  copy_on_change=false -no_target_directory= +is_target_a_directory=possibly  usage="\  Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE @@ -137,46 +122,57 @@ while test $# -ne 0; do      -d) dir_arg=true;;      -g) chgrpcmd="$chgrpprog $2" -	shift;; +        shift;;      --help) echo "$usage"; exit $?;;      -m) mode=$2 -	case $mode in -	  *' '* | *'	'* | *' -'*	  | *'*'* | *'?'* | *'['*) -	    echo "$0: invalid mode: $mode" >&2 -	    exit 1;; -	esac -	shift;; +        case $mode in +          *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) +            echo "$0: invalid mode: $mode" >&2 +            exit 1;; +        esac +        shift;;      -o) chowncmd="$chownprog $2" -	shift;; +        shift;;      -s) stripcmd=$stripprog;; -    -t) dst_arg=$2 -	# Protect names problematic for 'test' and other utilities. -	case $dst_arg in -	  -* | [=\(\)!]) dst_arg=./$dst_arg;; -	esac -	shift;; +    -t) +        is_target_a_directory=always +        dst_arg=$2 +        # Protect names problematic for 'test' and other utilities. +        case $dst_arg in +          -* | [=\(\)!]) dst_arg=./$dst_arg;; +        esac +        shift;; -    -T) no_target_directory=true;; +    -T) is_target_a_directory=never;;      --version) echo "$0 $scriptversion"; exit $?;; -    --)	shift -	break;; +    --) shift +        break;; -    -*)	echo "$0: invalid option: $1" >&2 -	exit 1;; +    -*) echo "$0: invalid option: $1" >&2 +        exit 1;;      *)  break;;    esac    shift  done +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then +  if test -n "$dst_arg"; then +    echo "$0: target directory not allowed when installing a directory." >&2 +    exit 1 +  fi +fi +  if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then    # When -d is used, all remaining arguments are directories to create.    # When -t is used, the destination is already specified. @@ -208,6 +204,15 @@ if test $# -eq 0; then  fi  if test -z "$dir_arg"; then +  if test $# -gt 1 || test "$is_target_a_directory" = always; then +    if test ! -d "$dst_arg"; then +      echo "$0: $dst_arg: Is not a directory." >&2 +      exit 1 +    fi +  fi +fi + +if test -z "$dir_arg"; then    do_exit='(exit $ret); exit $ret'    trap "ret=129; $do_exit" 1    trap "ret=130; $do_exit" 2 @@ -223,16 +228,16 @@ if test -z "$dir_arg"; then      *[0-7])        if test -z "$stripcmd"; then -	u_plus_rw= +        u_plus_rw=        else -	u_plus_rw='% 200' +        u_plus_rw='% 200'        fi        cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;      *)        if test -z "$stripcmd"; then -	u_plus_rw= +        u_plus_rw=        else -	u_plus_rw=,u+rw +        u_plus_rw=,u+rw        fi        cp_umask=$mode$u_plus_rw;;    esac @@ -269,41 +274,15 @@ do      # If destination is a directory, append the input filename; won't work      # if double slashes aren't ignored.      if test -d "$dst"; then -      if test -n "$no_target_directory"; then -	echo "$0: $dst_arg: Is a directory" >&2 -	exit 1 +      if test "$is_target_a_directory" = never; then +        echo "$0: $dst_arg: Is a directory" >&2 +        exit 1        fi        dstdir=$dst        dst=$dstdir/`basename "$src"`        dstdir_status=0      else -      # Prefer dirname, but fall back on a substitute if dirname fails. -      dstdir=` -	(dirname "$dst") 2>/dev/null || -	expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ -	     X"$dst" : 'X\(//\)[^/]' \| \ -	     X"$dst" : 'X\(//\)$' \| \ -	     X"$dst" : 'X\(/\)' \| . 2>/dev/null || -	echo X"$dst" | -	    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ -		   s//\1/ -		   q -		 } -		 /^X\(\/\/\)[^/].*/{ -		   s//\1/ -		   q -		 } -		 /^X\(\/\/\)$/{ -		   s//\1/ -		   q -		 } -		 /^X\(\/\).*/{ -		   s//\1/ -		   q -		 } -		 s/.*/./; q' -      ` - +      dstdir=`dirname "$dst"`        test -d "$dstdir"        dstdir_status=$?      fi @@ -314,74 +293,81 @@ do    if test $dstdir_status != 0; then      case $posix_mkdir in        '') -	# Create intermediate dirs using mode 755 as modified by the umask. -	# This is like FreeBSD 'install' as of 1997-10-28. -	umask=`umask` -	case $stripcmd.$umask in -	  # Optimize common cases. -	  *[2367][2367]) mkdir_umask=$umask;; -	  .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; - -	  *[0-7]) -	    mkdir_umask=`expr $umask + 22 \ -	      - $umask % 100 % 40 + $umask % 20 \ -	      - $umask % 10 % 4 + $umask % 2 -	    `;; -	  *) mkdir_umask=$umask,go-w;; -	esac - -	# With -d, create the new directory with the user-specified mode. -	# Otherwise, rely on $mkdir_umask. -	if test -n "$dir_arg"; then -	  mkdir_mode=-m$mode -	else -	  mkdir_mode= -	fi - -	posix_mkdir=false -	case $umask in -	  *[123567][0-7][0-7]) -	    # POSIX mkdir -p sets u+wx bits regardless of umask, which -	    # is incompatible with FreeBSD 'install' when (umask & 300) != 0. -	    ;; -	  *) -	    tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ -	    trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 - -	    if (umask $mkdir_umask && -		exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 -	    then -	      if test -z "$dir_arg" || { -		   # Check for POSIX incompatibilities with -m. -		   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or -		   # other-writable bit of parent directory when it shouldn't. -		   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. -		   ls_ld_tmpdir=`ls -ld "$tmpdir"` -		   case $ls_ld_tmpdir in -		     d????-?r-*) different_mode=700;; -		     d????-?--*) different_mode=755;; -		     *) false;; -		   esac && -		   $mkdirprog -m$different_mode -p -- "$tmpdir" && { -		     ls_ld_tmpdir_1=`ls -ld "$tmpdir"` -		     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" -		   } -		 } -	      then posix_mkdir=: -	      fi -	      rmdir "$tmpdir/d" "$tmpdir" -	    else -	      # Remove any dirs left behind by ancient mkdir implementations. -	      rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null -	    fi -	    trap '' 0;; -	esac;; +        # Create intermediate dirs using mode 755 as modified by the umask. +        # This is like FreeBSD 'install' as of 1997-10-28. +        umask=`umask` +        case $stripcmd.$umask in +          # Optimize common cases. +          *[2367][2367]) mkdir_umask=$umask;; +          .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + +          *[0-7]) +            mkdir_umask=`expr $umask + 22 \ +              - $umask % 100 % 40 + $umask % 20 \ +              - $umask % 10 % 4 + $umask % 2 +            `;; +          *) mkdir_umask=$umask,go-w;; +        esac + +        # With -d, create the new directory with the user-specified mode. +        # Otherwise, rely on $mkdir_umask. +        if test -n "$dir_arg"; then +          mkdir_mode=-m$mode +        else +          mkdir_mode= +        fi + +        posix_mkdir=false +        case $umask in +          *[123567][0-7][0-7]) +            # POSIX mkdir -p sets u+wx bits regardless of umask, which +            # is incompatible with FreeBSD 'install' when (umask & 300) != 0. +            ;; +          *) +            # $RANDOM is not portable (e.g. dash);  use it when possible to +            # lower collision chance +            tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ +            trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + +            # As "mkdir -p" follows symlinks and we work in /tmp possibly;  so +            # create the $tmpdir first (and fail if unsuccessful) to make sure +            # that nobody tries to guess the $tmpdir name. +            if (umask $mkdir_umask && +                $mkdirprog $mkdir_mode "$tmpdir" && +                exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 +            then +              if test -z "$dir_arg" || { +                   # Check for POSIX incompatibilities with -m. +                   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or +                   # other-writable bit of parent directory when it shouldn't. +                   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. +                   test_tmpdir="$tmpdir/a" +                   ls_ld_tmpdir=`ls -ld "$test_tmpdir"` +                   case $ls_ld_tmpdir in +                     d????-?r-*) different_mode=700;; +                     d????-?--*) different_mode=755;; +                     *) false;; +                   esac && +                   $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { +                     ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` +                     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" +                   } +                 } +              then posix_mkdir=: +              fi +              rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" +            else +              # Remove any dirs left behind by ancient mkdir implementations. +              rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null +            fi +            trap '' 0;; +        esac;;      esac      if        $posix_mkdir && ( -	umask $mkdir_umask && -	$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" +        umask $mkdir_umask && +        $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"        )      then :      else @@ -391,53 +377,51 @@ do        # directory the slow way, step by step, checking for races as we go.        case $dstdir in -	/*) prefix='/';; -	[-=\(\)!]*) prefix='./';; -	*)  prefix='';; +        /*) prefix='/';; +        [-=\(\)!]*) prefix='./';; +        *)  prefix='';;        esac -      eval "$initialize_posix_glob" -        oIFS=$IFS        IFS=/ -      $posix_glob set -f +      set -f        set fnord $dstdir        shift -      $posix_glob set +f +      set +f        IFS=$oIFS        prefixes=        for d        do -	test X"$d" = X && continue - -	prefix=$prefix$d -	if test -d "$prefix"; then -	  prefixes= -	else -	  if $posix_mkdir; then -	    (umask=$mkdir_umask && -	     $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break -	    # Don't fail if two instances are running concurrently. -	    test -d "$prefix" || exit 1 -	  else -	    case $prefix in -	      *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; -	      *) qprefix=$prefix;; -	    esac -	    prefixes="$prefixes '$qprefix'" -	  fi -	fi -	prefix=$prefix/ +        test X"$d" = X && continue + +        prefix=$prefix$d +        if test -d "$prefix"; then +          prefixes= +        else +          if $posix_mkdir; then +            (umask=$mkdir_umask && +             $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break +            # Don't fail if two instances are running concurrently. +            test -d "$prefix" || exit 1 +          else +            case $prefix in +              *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; +              *) qprefix=$prefix;; +            esac +            prefixes="$prefixes '$qprefix'" +          fi +        fi +        prefix=$prefix/        done        if test -n "$prefixes"; then -	# Don't fail if two instances are running concurrently. -	(umask $mkdir_umask && -	 eval "\$doit_exec \$mkdirprog $prefixes") || -	  test -d "$dstdir" || exit 1 -	obsolete_mkdir_used=true +        # Don't fail if two instances are running concurrently. +        (umask $mkdir_umask && +         eval "\$doit_exec \$mkdirprog $prefixes") || +          test -d "$dstdir" || exit 1 +        obsolete_mkdir_used=true        fi      fi    fi @@ -472,15 +456,12 @@ do      # If -C, don't bother to copy if it wouldn't change the file.      if $copy_on_change && -       old=`LC_ALL=C ls -dlL "$dst"	2>/dev/null` && -       new=`LC_ALL=C ls -dlL "$dsttmp"	2>/dev/null` && - -       eval "$initialize_posix_glob" && -       $posix_glob set -f && +       old=`LC_ALL=C ls -dlL "$dst"     2>/dev/null` && +       new=`LC_ALL=C ls -dlL "$dsttmp"  2>/dev/null` && +       set -f &&         set X $old && old=:$2:$4:$5:$6 &&         set X $new && new=:$2:$4:$5:$6 && -       $posix_glob set +f && - +       set +f &&         test "$old" = "$new" &&         $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1      then @@ -493,24 +474,24 @@ do        # to itself, or perhaps because mv is so ancient that it does not        # support -f.        { -	# Now remove or move aside any old file at destination location. -	# We try this two ways since rm can't unlink itself on some -	# systems and the destination file might be busy for other -	# reasons.  In this case, the final cleanup might fail but the new -	# file should still install successfully. -	{ -	  test ! -f "$dst" || -	  $doit $rmcmd -f "$dst" 2>/dev/null || -	  { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && -	    { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } -	  } || -	  { echo "$0: cannot unlink or rename $dst" >&2 -	    (exit 1); exit 1 -	  } -	} && - -	# Now rename the file to the real destination. -	$doit $mvcmd "$dsttmp" "$dst" +        # Now remove or move aside any old file at destination location. +        # We try this two ways since rm can't unlink itself on some +        # systems and the destination file might be busy for other +        # reasons.  In this case, the final cleanup might fail but the new +        # file should still install successfully. +        { +          test ! -f "$dst" || +          $doit $rmcmd -f "$dst" 2>/dev/null || +          { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && +            { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } +          } || +          { echo "$0: cannot unlink or rename $dst" >&2 +            (exit 1); exit 1 +          } +        } && + +        # Now rename the file to the real destination. +        $doit $mvcmd "$dsttmp" "$dst"        }      fi || exit 1 @@ -3,7 +3,7 @@  scriptversion=2013-10-28.13; # UTC -# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# Copyright (C) 1996-2014 Free Software Foundation, Inc.  # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.  # This program is free software; you can redistribute it and/or modify diff --git a/src/oniguruma.h b/src/oniguruma.h index a8ae09a..bc8983f 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,7 +35,7 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6 -#define ONIGURUMA_VERSION_MINOR   4 +#define ONIGURUMA_VERSION_MINOR   5  #define ONIGURUMA_VERSION_TEENY   0  #ifdef __cplusplus @@ -496,6 +496,11 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  /* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */  #define ONIG_SYN_OP2_ESC_H_XDIGIT               (1U<<19) /* \h, \H */  #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE         (1U<<20) /* \ */ +#define ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE       (1U<<21) /* (?(n)) (?(...)...|...) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP         (1U<<22) /* \K */ +#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */ +#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT  (1U<<24) /* \N (?-m:.), \O (?m:.) */ +#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP   (1U<<25) /* (?~...) */  /* syntax (behavior) */  #define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1U<<31) /* not implemented */ @@ -596,6 +601,9 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;  #define ONIGERR_NEVER_ENDING_RECURSION                       -221  #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY        -222  #define ONIGERR_INVALID_CHAR_PROPERTY_NAME                   -223 +#define ONIGERR_INVALID_IF_ELSE_SYNTAX                       -224 +#define ONIGERR_INVALID_ABSENT_GROUP_PATTERN                 -225 +#define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN       -226  #define ONIGERR_INVALID_CODE_POINT_VALUE                     -400  #define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400  #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401 diff --git a/src/regcomp.c b/src/regcomp.c index db83739..47023cb 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -147,7 +147,7 @@ swap_node(Node* a, Node* b)    Node c;    c = *a; *a = *b; *b = c; -  if (NODE_TYPE(a) == NODE_STR) { +  if (NODE_TYPE(a) == NODE_STRING) {      StrNode* sn = STR_(a);      if (sn->capa == 0) {        int len = sn->end - sn->s; @@ -156,7 +156,7 @@ swap_node(Node* a, Node* b)      }    } -  if (NODE_TYPE(b) == NODE_STR) { +  if (NODE_TYPE(b) == NODE_STRING) {      StrNode* sn = STR_(b);      if (sn->capa == 0) {        int len = sn->end - sn->s; @@ -169,11 +169,11 @@ swap_node(Node* a, Node* b)  static OnigLen  distance_add(OnigLen d1, OnigLen d2)  { -  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) -    return ONIG_INFINITE_DISTANCE; +  if (d1 == INFINITE_LEN || d2 == INFINITE_LEN) +    return INFINITE_LEN;    else { -    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; -    else return ONIG_INFINITE_DISTANCE; +    if (d1 <= INFINITE_LEN - d2) return d1 + d2; +    else return INFINITE_LEN;    }  } @@ -182,10 +182,10 @@ distance_multiply(OnigLen d, int m)  {    if (m == 0) return 0; -  if (d < ONIG_INFINITE_DISTANCE / m) +  if (d < INFINITE_LEN / m)      return d * m;    else -    return ONIG_INFINITE_DISTANCE; +    return INFINITE_LEN;  }  static int @@ -230,7 +230,7 @@ onig_bbuf_init(BBuf* buf, int size)  } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  static int  unset_addr_list_init(UnsetAddrList* list, int size) @@ -271,7 +271,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)    list->num++;    return 0;  } -#endif /* USE_SUBEXP_CALL */ +#endif /* USE_CALL */  static int @@ -347,6 +347,24 @@ add_option(regex_t* reg, OnigOptionType option)  }  static int +add_save_type(regex_t* reg, enum SaveType type) +{ +  SaveType t = (SaveType )type; + +  BBUF_ADD(reg, &t, SIZE_SAVE_TYPE); +  return 0; +} + +static int +add_update_var_type(regex_t* reg, enum UpdateVarType type) +{ +  UpdateVarType t = (UpdateVarType )type; + +  BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); +  return 0; +} + +static int  add_opcode_rel_addr(regex_t* reg, int opcode, int addr)  {    int r; @@ -466,7 +484,7 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)    return r;  } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  static int  compile_call(CallNode* node, regex_t* reg, ScanEnv* env)  { @@ -545,7 +563,7 @@ compile_length_string_node(Node* node, regex_t* reg)    if (sn->end <= sn->s)      return 0; -  ambig = NSTRING_IS_AMBIG(node); +  ambig = NODE_STRING_IS_AMBIG(node);    p = prev = sn->s;    prev_len = enclen(enc, p); @@ -594,7 +612,7 @@ compile_string_node(Node* node, regex_t* reg)      return 0;    end = sn->end; -  ambig = NSTRING_IS_AMBIG(node); +  ambig = NODE_STRING_IS_AMBIG(node);    p = prev = sn->s;    prev_len = enclen(enc, p); @@ -767,7 +785,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,    if (r != 0) return r;    if ( -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL        NODE_IS_IN_MULTI_ENTRY(qn) ||  #endif        NODE_IS_IN_REAL_REPEAT(qn)) { @@ -893,7 +911,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);      if (r != 0) return r;      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { -      if (IS_MULTILINE(reg->options)) +      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))          r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);        else          r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -906,7 +924,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)        return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);      }      else { -      if (IS_MULTILINE(reg->options)) { +      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) {          r = add_opcode(reg, (CKN_ON ?                               OP_STATE_CHECK_ANYCHAR_ML_STAR                               : OP_ANYCHAR_ML_STAR)); @@ -1109,7 +1127,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);      if (r != 0) return r;      if (IS_NOT_NULL(qn->next_head_exact)) { -      if (IS_MULTILINE(reg->options)) +      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))          r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);        else          r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -1117,7 +1135,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)        return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);      }      else { -      if (IS_MULTILINE(reg->options)) +      if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))          return add_opcode(reg, OP_ANYCHAR_ML_STAR);        else          return add_opcode(reg, OP_ANYCHAR_STAR); @@ -1229,7 +1247,7 @@ compile_length_option_node(EnclosureNode* node, regex_t* reg)    int tlen;    OnigOptionType prev = reg->options; -  reg->options = node->o.option; +  reg->options = node->o.options;    tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);    reg->options = prev; @@ -1249,8 +1267,8 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    int r;    OnigOptionType prev = reg->options; -  if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { -    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option); +  if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) { +    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.options);      if (r != 0) return r;      r = add_opcode_option(reg, OP_SET_OPTION, prev);      if (r != 0) return r; @@ -1258,11 +1276,11 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      if (r != 0) return r;    } -  reg->options = node->o.option; +  reg->options = node->o.options;    r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);    reg->options = prev; -  if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { +  if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) {      if (r != 0) return r;      r = add_opcode_option(reg, OP_SET_OPTION, prev);    } @@ -1287,7 +1305,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)    switch (node->type) {    case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {        len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; @@ -1336,6 +1354,32 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)      }      break; +  case ENCLOSURE_IF_ELSE: +    { +      Node* cond = NODE_ENCLOSURE_BODY(node); +      Node* Then = node->te.Then; +      Node* Else = node->te.Else; + +      len = compile_length_tree(cond, reg); +      if (len < 0) return len; +      len += SIZE_OP_PUSH; +      len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT; + +      if (IS_NOT_NULL(Then)) { +        tlen = compile_length_tree(Then, reg); +        if (tlen < 0) return tlen; +        len += tlen; +      } + +      if (IS_NOT_NULL(Else)) { +        len += SIZE_OP_JUMP; +        tlen = compile_length_tree(Else, reg); +        if (tlen < 0) return tlen; +        len += tlen; +      } +    } +    break; +    default:      return ONIGERR_TYPE_BUG;      break; @@ -1352,7 +1396,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    int r;    int len; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {      r = add_opcode(reg, OP_CALL);      if (r != 0) return r; @@ -1370,9 +1414,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      r = add_opcode(reg, OP_RETURN);      return r;    } -#endif -#ifdef USE_SUBEXP_CALL    if (NODE_IS_CALLED(node)) {      r = add_opcode(reg, OP_CALL);      if (r != 0) return r; @@ -1404,7 +1446,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);    if (r != 0) return r; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))      r = add_opcode(reg, (NODE_IS_RECURSION(node)                           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); @@ -1434,14 +1476,15 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)  {    int r, len; -  if (node->type == ENCLOSURE_OPTION) -    return compile_option_node(node, reg, env); -    switch (node->type) {    case ENCLOSURE_MEMORY:      r = compile_enclosure_memory_node(node, reg, env);      break; +  case ENCLOSURE_OPTION: +    r = compile_option_node(node, reg, env); +    break; +    case ENCLOSURE_STOP_BACKTRACK:      if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {        QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); @@ -1469,6 +1512,49 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      }      break; +  case ENCLOSURE_IF_ELSE: +    { +      int cond_len, then_len, jump_len; +      Node* cond = NODE_ENCLOSURE_BODY(node); +      Node* Then = node->te.Then; +      Node* Else = node->te.Else; + +      r = add_opcode(reg, OP_PUSH_STOP_BT); +      if (r != 0) return r; + +      cond_len = compile_length_tree(cond, reg); +      if (cond_len < 0) return cond_len; +      if (IS_NOT_NULL(Then)) { +        then_len = compile_length_tree(Then, reg); +        if (then_len < 0) return then_len; +      } +      else +        then_len = 0; + +      jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT; +      if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; + +      r = add_opcode_rel_addr(reg, OP_PUSH, jump_len); +      if (r != 0) return r; +      r = compile_tree(cond, reg, env); +      if (r != 0) return r; +      r = add_opcode(reg, OP_POP_STOP_BT); +      if (r != 0) return r; + +      if (IS_NOT_NULL(Then)) { +        r = compile_tree(Then, reg, env); +        if (r != 0) return r; +      } + +      if (IS_NOT_NULL(Else)) { +        int else_len = compile_length_tree(Else, reg); +        r = add_opcode_rel_addr(reg, OP_JUMP, else_len); +        if (r != 0) return r; +        r = compile_tree(Else, reg, env); +      } +    } +    break; +    default:      return ONIGERR_TYPE_BUG;      break; @@ -1490,10 +1576,10 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)    switch (node->type) {    case ANCHOR_PREC_READ: -    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; +    len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;      break;    case ANCHOR_PREC_READ_NOT: -    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; +    len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT;      break;    case ANCHOR_LOOK_BEHIND:      len = SIZE_OP_LOOK_BEHIND + tlen; @@ -1531,21 +1617,21 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)  #endif    case ANCHOR_PREC_READ: -    r = add_opcode(reg, OP_PUSH_POS); +    r = add_opcode(reg, OP_PREC_READ_START);      if (r != 0) return r;      r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);      if (r != 0) return r; -    r = add_opcode(reg, OP_POP_POS); +    r = add_opcode(reg, OP_PREC_READ_END);      break;    case ANCHOR_PREC_READ_NOT:      len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);      if (len < 0) return len; -    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); +    r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT);      if (r != 0) return r;      r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);      if (r != 0) return r; -    r = add_opcode(reg, OP_FAIL_POS); +    r = add_opcode(reg, OP_FAIL_PREC_READ_NOT);      break;    case ANCHOR_LOOK_BEHIND: @@ -1596,6 +1682,67 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)  }  static int +compile_gimmick_node(GimmickNode* node, regex_t* reg) +{ +  int r; + +  switch (node->type) { +  case GIMMICK_FAIL: +    r = add_opcode(reg, OP_FAIL); +    break; + +  case GIMMICK_KEEP: +    r = add_opcode(reg, OP_PUSH_SAVE_VAL); +    if (r != 0) return r; +    r = add_save_type(reg, SAVE_KEEP); +    if (r != 0) return r; +    r = add_mem_num(reg, node->id); +    break; + +  case GIMMICK_SAVE: +    r = add_opcode(reg, OP_PUSH_SAVE_VAL); +    if (r != 0) return r; +    r = add_save_type(reg, node->detail_type); +    if (r != 0) return r; +    r = add_mem_num(reg, node->id); +    break; + +  case GIMMICK_UPDATE_VAR: +    r = add_opcode(reg, OP_UPDATE_VAR); +    if (r != 0) return r; +    r = add_update_var_type(reg, node->detail_type); +    if (r != 0) return r; +    r = add_mem_num(reg, node->id); +    break; +  } + +  return r; +} + +static int +compile_length_gimmick_node(GimmickNode* node, regex_t* reg) +{ +  int len; + +  switch (node->type) { +  case GIMMICK_FAIL: +    len = SIZE_OP_FAIL; +    break; + +  case GIMMICK_KEEP: +  case GIMMICK_SAVE: +    len = SIZE_OP_PUSH_SAVE_VAL; +    break; + +  case GIMMICK_UPDATE_VAR: +    len = SIZE_OP_UPDATE_VAR; +    break; +  } + +  return len; +} + +static int  compile_length_tree(Node* node, regex_t* reg)  {    int len, r; @@ -1624,8 +1771,8 @@ compile_length_tree(Node* node, regex_t* reg)      }      break; -  case NODE_STR: -    if (NSTRING_IS_RAW(node)) +  case NODE_STRING: +    if (NODE_STRING_IS_RAW(node))        r = compile_length_string_raw_node(STR_(node), reg);      else        r = compile_length_string_node(node, reg); @@ -1639,28 +1786,39 @@ compile_length_tree(Node* node, regex_t* reg)      r = SIZE_OPCODE;      break; -  case NODE_BREF: +  case NODE_BACKREF:      { -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node); +      if (NODE_IS_CHECKER(node)) {  #ifdef USE_BACKREF_WITH_LEVEL -      if (NODE_IS_NEST_LEVEL(node)) { -        r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + -            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); -      } -      else +        if (NODE_IS_NEST_LEVEL(node)) { +          r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +        } +        else  #endif -      if (br->back_num == 1) { -        r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) -             ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); +          r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);        }        else { -        r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +#ifdef USE_BACKREF_WITH_LEVEL +        if (NODE_IS_NEST_LEVEL(node)) { +          r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + +            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +        } +        else +#endif +        if (br->back_num == 1) { +          r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) +               ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); +        } +        else { +          r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +        }        }      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      r = SIZE_OP_CALL;      break; @@ -1678,6 +1836,10 @@ compile_length_tree(Node* node, regex_t* reg)      r = compile_length_anchor_node(ANCHOR_(node), reg);      break; +  case NODE_GIMMICK: +    r = compile_length_gimmick_node(GIMMICK_(node), reg); +    break; +    default:      return ONIGERR_TYPE_BUG;      break; @@ -1713,7 +1875,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)        do {          len = compile_length_tree(NODE_CAR(node), reg);          if (IS_NOT_NULL(NODE_CDR(node))) { -          r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); +          enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH; +          r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);            if (r != 0) break;          }          r = compile_tree(NODE_CAR(node), reg, env); @@ -1727,8 +1890,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)      }      break; -  case NODE_STR: -    if (NSTRING_IS_RAW(node)) +  case NODE_STRING: +    if (NODE_STRING_IS_RAW(node))        r = compile_string_raw_node(STR_(node), reg);      else        r = compile_string_node(node, reg); @@ -1744,7 +1907,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)        switch (CTYPE_(node)->ctype) {        case CTYPE_ANYCHAR: -        if (IS_MULTILINE(reg->options)) +        if (IS_MULTILINE(CTYPE_OPTION(node, reg)))            r = add_opcode(reg, OP_ANYCHAR_ML);          else            r = add_opcode(reg, OP_ANYCHAR); @@ -1764,69 +1927,86 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)      }      break; -  case NODE_BREF: +  case NODE_BACKREF:      { -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node); +      if (NODE_IS_CHECKER(node)) {  #ifdef USE_BACKREF_WITH_LEVEL -      if (NODE_IS_NEST_LEVEL(node)) { -        r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); -        if (r != 0) return r; -        r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); -        if (r != 0) return r; -        r = add_length(reg, br->nest_level); -        if (r != 0) return r; +        if (NODE_IS_NEST_LEVEL(node)) { +          r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL); +          if (r != 0) return r; +          r = add_length(reg, br->nest_level); +          if (r != 0) return r; +        } +        else +#endif +          { +            r = add_opcode(reg, OP_BACKREF_CHECK); +            if (r != 0) return r; +          }          goto add_bacref_mems;        } -      else -#endif -      if (br->back_num == 1) { -        n = br->back_static[0]; -        if (IS_IGNORECASE(reg->options)) { -          r = add_opcode(reg, OP_BACKREFN_IC); +      else { +#ifdef USE_BACKREF_WITH_LEVEL +        if (NODE_IS_NEST_LEVEL(node)) { +          r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); +          if (r != 0) return r; +          r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); +          if (r != 0) return r; +          r = add_length(reg, br->nest_level);            if (r != 0) return r; -          r = add_mem_num(reg, n); + +          goto add_bacref_mems;          } -        else { -          switch (n) { -          case 1:  r = add_opcode(reg, OP_BACKREF1); break; -          case 2:  r = add_opcode(reg, OP_BACKREF2); break; -          default: -            r = add_opcode(reg, OP_BACKREFN); +        else +#endif +        if (br->back_num == 1) { +          n = br->back_static[0]; +          if (IS_IGNORECASE(reg->options)) { +            r = add_opcode(reg, OP_BACKREF_N_IC);              if (r != 0) return r;              r = add_mem_num(reg, n); -            break;            } -        } -      } -      else { -        int i; -        int* p; - -        if (IS_IGNORECASE(reg->options)) { -          r = add_opcode(reg, OP_BACKREF_MULTI_IC); +          else { +            switch (n) { +            case 1:  r = add_opcode(reg, OP_BACKREF1); break; +            case 2:  r = add_opcode(reg, OP_BACKREF2); break; +            default: +              r = add_opcode(reg, OP_BACKREF_N); +              if (r != 0) return r; +              r = add_mem_num(reg, n); +              break; +            } +          }          }          else { -          r = add_opcode(reg, OP_BACKREF_MULTI); -        } -        if (r != 0) return r; +          int i; +          int* p; -#ifdef USE_BACKREF_WITH_LEVEL -      add_bacref_mems: -#endif -        r = add_length(reg, br->back_num); -        if (r != 0) return r; -        p = BACKREFS_P(br); -        for (i = br->back_num - 1; i >= 0; i--) { -          r = add_mem_num(reg, p[i]); +          if (IS_IGNORECASE(reg->options)) { +            r = add_opcode(reg, OP_BACKREF_MULTI_IC); +          } +          else { +            r = add_opcode(reg, OP_BACKREF_MULTI); +          }            if (r != 0) return r; + +        add_bacref_mems: +          r = add_length(reg, br->back_num); +          if (r != 0) return r; +          p = BACKREFS_P(br); +          for (i = br->back_num - 1; i >= 0; i--) { +            r = add_mem_num(reg, p[i]); +            if (r != 0) return r; +          }          }        }      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      r = compile_call(CALL_(node), reg, env);      break; @@ -1844,6 +2024,10 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)      r = compile_anchor_node(ANCHOR_(node), reg, env);      break; +  case NODE_GIMMICK: +    r = compile_gimmick_node(GIMMICK_(node), reg); +    break; +    default:  #ifdef ONIG_DEBUG      fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); @@ -1898,6 +2082,18 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)            r = noname_disable_map(plink, map, counter);          }        } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter); +        if (r != 0) return r; +        if (IS_NOT_NULL(en->te.Then)) { +          r = noname_disable_map(&(en->te.Then), map, counter); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = noname_disable_map(&(en->te.Else), map, counter); +          if (r != 0) return r; +        } +      }        else          r = noname_disable_map(&(NODE_BODY(node)), map, counter);      } @@ -1920,7 +2116,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map)  {    int i, pos, n, old_num;    int *backs; -  BRefNode* bn = BREF_(node); +  BackRefNode* bn = BACKREF_(node);    if (! NODE_IS_BY_NAME(node))      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1957,11 +2153,29 @@ renumber_by_map(Node* node, GroupNumRemap* map)      break;    case NODE_QUANT: -  case NODE_ENCLOSURE:      r = renumber_by_map(NODE_BODY(node), map);      break; -  case NODE_BREF: +  case NODE_ENCLOSURE: +    { +      EnclosureNode* en = ENCLOSURE_(node); +      r = renumber_by_map(NODE_BODY(node), map); +      if (r != 0) return r; + +      if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = renumber_by_map(en->te.Then, map); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = renumber_by_map(en->te.Else, map); +          if (r != 0) return r; +        } +      } +    } +    break; + +  case NODE_BACKREF:      r = renumber_node_backref(node, map);      break; @@ -1995,11 +2209,30 @@ numbered_ref_check(Node* node)        break;      /* fall */    case NODE_QUANT: -  case NODE_ENCLOSURE:      r = numbered_ref_check(NODE_BODY(node));      break; -  case NODE_BREF: +  case NODE_ENCLOSURE: +    { +      EnclosureNode* en = ENCLOSURE_(node); +      r = numbered_ref_check(NODE_BODY(node)); +      if (r != 0) return r; + +      if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = numbered_ref_check(en->te.Then); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = numbered_ref_check(en->te.Else); +          if (r != 0) return r; +        } +      } +    } + +    break; + +  case NODE_BACKREF:      if (! NODE_IS_BY_NAME(node))        return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;      break; @@ -2052,7 +2285,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)  }  #endif /* USE_NAMED_GROUP */ -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  static int  unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)  { @@ -2061,9 +2294,11 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)    AbsAddrType addr;    for (i = 0; i < uslist->num; i++) { +    if (! NODE_IS_ADDR_FIXED(uslist->us[i].target)) +      return ONIGERR_PARSER_BUG; +      en = ENCLOSURE_(uslist->us[i].target); -    if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; -    addr = en->m.called_addr; +    addr   = en->m.called_addr;      offset = uslist->us[i].offset;      BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); @@ -2120,7 +2355,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)      }      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* sn = STR_(node);        UChar *s = sn->s; @@ -2135,16 +2370,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)      {        QuantNode* qn = QUANT_(node);        if (qn->lower == qn->upper) { -        r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); -        if (r == 0) -          *len = distance_multiply(tlen, qn->lower); +        if (qn->upper == 0) { +          *len = 0; +        } +        else { +          r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); +          if (r == 0) +            *len = distance_multiply(tlen, qn->lower); +        }        }        else          r = GET_CHAR_LEN_VARLEN;      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      if (! NODE_IS_RECURSION(node))        r = get_char_length_tree1(NODE_BODY(node), reg, len, level); @@ -2166,7 +2406,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)        EnclosureNode* en = ENCLOSURE_(node);        switch (en->type) {        case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL          if (NODE_IS_CLEN_FIXED(node))            *len = en->char_len;          else { @@ -2182,6 +2422,31 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)        case ENCLOSURE_STOP_BACKTRACK:          r = get_char_length_tree1(NODE_BODY(node), reg, len, level);          break; +      case ENCLOSURE_IF_ELSE: +        { +          int clen, elen; +          r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); +          if (r == 0) { +            if (IS_NOT_NULL(en->te.Then)) { +              r = get_char_length_tree1(en->te.Then, reg, &tlen, level); +              if (r != 0) break; +            } +            else tlen = 0; +            if (IS_NOT_NULL(en->te.Else)) { +              r = get_char_length_tree1(en->te.Else, reg, &elen, level); +              if (r != 0) break; +            } +            else elen = 0; + +            if (clen + tlen != elen) { +              r = GET_CHAR_LEN_VARLEN; +            } +            else { +              *len = elen; +            } +          } +        } +        break;        default:          break;        } @@ -2189,8 +2454,13 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)      break;    case NODE_ANCHOR: +  case NODE_GIMMICK:      break; +  case NODE_BACKREF: +    if (NODE_IS_CHECKER(node)) +      break; +    /* fall */    default:      r = GET_CHAR_LEN_VARLEN;      break; @@ -2241,7 +2511,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)          }          break; -      case NODE_STR: +      case NODE_STRING:          goto swap;          break; @@ -2318,7 +2588,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)          }          break; -      case NODE_STR: +      case NODE_STRING:          goto swap;          break; @@ -2328,10 +2598,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg)      }      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* xs = STR_(x); -      if (NSTRING_LEN(x) == 0) +      if (NODE_STRING_LEN(x) == 0)          break;        //c = *(xs->s); @@ -2362,13 +2632,13 @@ is_exclusive(Node* x, Node* y, regex_t* reg)          }          break; -      case NODE_STR: +      case NODE_STRING:          {            UChar *q;            StrNode* ys = STR_(y); -          len = NSTRING_LEN(x); -          if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); -          if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { +          len = NODE_STRING_LEN(x); +          if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); +          if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {              /* tiny version */              return 0;            } @@ -2399,9 +2669,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)    Node* n = NULL_NODE;    switch (NODE_TYPE(node)) { -  case NODE_BREF: +  case NODE_BACKREF:    case NODE_ALT: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:  #endif      break; @@ -2420,7 +2690,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)      n = get_head_value_node(NODE_CAR(node), exact, reg);      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* sn = STR_(node); @@ -2428,7 +2698,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)          break;        if (exact != 0 && -          !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { +          !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {        }        else {          n = node; @@ -2456,7 +2726,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)          {            OnigOptionType options = reg->options; -          reg->options = ENCLOSURE_(node)->o.option; +          reg->options = ENCLOSURE_(node)->o.options;            n = get_head_value_node(NODE_BODY(node), exact, reg);            reg->options = options;          } @@ -2464,6 +2734,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)        case ENCLOSURE_MEMORY:        case ENCLOSURE_STOP_BACKTRACK: +      case ENCLOSURE_IF_ELSE:          n = get_head_value_node(NODE_BODY(node), exact, reg);          break;        } @@ -2475,6 +2746,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)        n = get_head_value_node(NODE_BODY(node), exact, reg);      break; +  case NODE_GIMMICK:    default:      break;    } @@ -2512,6 +2784,15 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)          return 1;        r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); +      if (r == 0 && en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask); +          if (r != 0) break; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask); +        } +      }      }      break; @@ -2524,6 +2805,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)        r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);      break; +  case NODE_GIMMICK:    default:      break;    } @@ -2531,31 +2813,31 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)  }  static OnigLen -get_min_len(Node* node, ScanEnv* env) +tree_min_len(Node* node, ScanEnv* env)  {    OnigLen len;    OnigLen tmin;    len = 0;    switch (NODE_TYPE(node)) { -  case NODE_BREF: -    { +  case NODE_BACKREF: +    if (! NODE_IS_CHECKER(node)) {        int i;        int* backs;        MemEnv* mem_env = SCANENV_MEMENV(env); -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        if (NODE_IS_RECURSION(node)) break;        backs = BACKREFS_P(br); -      len = get_min_len(mem_env[backs[0]].node, env); +      len = tree_min_len(mem_env[backs[0]].node, env);        for (i = 1; i < br->back_num; i++) { -        tmin = get_min_len(mem_env[backs[i]].node, env); +        tmin = tree_min_len(mem_env[backs[i]].node, env);          if (len > tmin) len = tmin;        }      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      {        Node* t = NODE_BODY(node); @@ -2564,15 +2846,15 @@ get_min_len(Node* node, ScanEnv* env)            len = ENCLOSURE_(t)->min_len;        }        else -        len = get_min_len(t, env); +        len = tree_min_len(t, env);      }      break;  #endif    case NODE_LIST:      do { -      tmin = get_min_len(NODE_CAR(node), env); -      len += tmin; +      tmin = tree_min_len(NODE_CAR(node), env); +      len = distance_add(len, tmin);      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break; @@ -2582,14 +2864,14 @@ get_min_len(Node* node, ScanEnv* env)        y = node;        do {          x = NODE_CAR(y); -        tmin = get_min_len(x, env); +        tmin = tree_min_len(x, env);          if (y == node) len = tmin;          else if (len > tmin) len = tmin;        } while (IS_NOT_NULL(y = NODE_CDR(y)));      }      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* sn = STR_(node);        len = sn->end - sn->s; @@ -2598,7 +2880,7 @@ get_min_len(Node* node, ScanEnv* env)    case NODE_CTYPE:    case NODE_CCLASS: -    len = 1; +    len = ONIGENC_MBC_MINLEN(env->enc);      break;    case NODE_QUANT: @@ -2606,7 +2888,7 @@ get_min_len(Node* node, ScanEnv* env)        QuantNode* qn = QUANT_(node);        if (qn->lower > 0) { -        len = get_min_len(NODE_BODY(node), env); +        len = tree_min_len(NODE_BODY(node), env);          len = distance_multiply(len, qn->lower);        }      } @@ -2624,7 +2906,7 @@ get_min_len(Node* node, ScanEnv* env)              len = 0;  // recursive            else {              NODE_STATUS_ADD(node, NST_MARK1); -            len = get_min_len(NODE_BODY(node), env); +            len = tree_min_len(NODE_BODY(node), env);              NODE_STATUS_REMOVE(node, NST_MARK1);              en->min_len = len; @@ -2635,12 +2917,34 @@ get_min_len(Node* node, ScanEnv* env)        case ENCLOSURE_OPTION:        case ENCLOSURE_STOP_BACKTRACK: -        len = get_min_len(NODE_BODY(node), env); +        len = tree_min_len(NODE_BODY(node), env); +        break; +      case ENCLOSURE_IF_ELSE: +        { +          int elen; +          len = tree_min_len(NODE_BODY(node), env); +          if (IS_NOT_NULL(en->te.Then)) +            len += tree_min_len(en->te.Then, env); +          if (IS_NOT_NULL(en->te.Else)) +            elen = tree_min_len(en->te.Else, env); +          else elen = 0; + +          if (elen < len) len = elen; +        }          break;        }      }      break; +  case NODE_GIMMICK: +    { +      GimmickNode* g = GIMMICK_(node); +      if (g->type == GIMMICK_FAIL) { +        len = INFINITE_LEN; +        break; +      } +    } +    /* fall */    case NODE_ANCHOR:    default:      break; @@ -2650,7 +2954,7 @@ get_min_len(Node* node, ScanEnv* env)  }  static OnigLen -get_max_len(Node* node, ScanEnv* env) +tree_max_len(Node* node, ScanEnv* env)  {    OnigLen len;    OnigLen tmax; @@ -2659,19 +2963,19 @@ get_max_len(Node* node, ScanEnv* env)    switch (NODE_TYPE(node)) {    case NODE_LIST:      do { -      tmax = get_max_len(NODE_CAR(node), env); +      tmax = tree_max_len(NODE_CAR(node), env);        len = distance_add(len, tmax);      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break;    case NODE_ALT:      do { -      tmax = get_max_len(NODE_CAR(node), env); +      tmax = tree_max_len(NODE_CAR(node), env);        if (len < tmax) len = tmax;      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* sn = STR_(node);        len = sn->end - sn->s; @@ -2683,30 +2987,30 @@ get_max_len(Node* node, ScanEnv* env)      len = ONIGENC_MBC_MAXLEN_DIST(env->enc);      break; -  case NODE_BREF: -    { +  case NODE_BACKREF: +    if (! NODE_IS_CHECKER(node)) {        int i;        int* backs;        MemEnv* mem_env = SCANENV_MEMENV(env); -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        if (NODE_IS_RECURSION(node)) { -        len = ONIG_INFINITE_DISTANCE; +        len = INFINITE_LEN;          break;        }        backs = BACKREFS_P(br);        for (i = 0; i < br->back_num; i++) { -        tmax = get_max_len(mem_env[backs[i]].node, env); +        tmax = tree_max_len(mem_env[backs[i]].node, env);          if (len < tmax) len = tmax;        }      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      if (! NODE_IS_RECURSION(node)) -      len = get_max_len(NODE_BODY(node), env); +      len = tree_max_len(NODE_BODY(node), env);      else -      len = ONIG_INFINITE_DISTANCE; +      len = INFINITE_LEN;      break;  #endif @@ -2715,12 +3019,12 @@ get_max_len(Node* node, ScanEnv* env)        QuantNode* qn = QUANT_(node);        if (qn->upper != 0) { -        len = get_max_len(NODE_BODY(node), env); +        len = tree_max_len(NODE_BODY(node), env);          if (len != 0) {            if (! IS_REPEAT_INFINITE(qn->upper))              len = distance_multiply(len, qn->upper);            else -            len = ONIG_INFINITE_DISTANCE; +            len = INFINITE_LEN;          }        }      } @@ -2735,10 +3039,10 @@ get_max_len(Node* node, ScanEnv* env)            len = en->max_len;          else {            if (NODE_IS_MARK1(node)) -            len = ONIG_INFINITE_DISTANCE; +            len = INFINITE_LEN;            else {              NODE_STATUS_ADD(node, NST_MARK1); -            len = get_max_len(NODE_BODY(node), env); +            len = tree_max_len(NODE_BODY(node), env);              NODE_STATUS_REMOVE(node, NST_MARK1);              en->max_len = len; @@ -2749,13 +3053,29 @@ get_max_len(Node* node, ScanEnv* env)        case ENCLOSURE_OPTION:        case ENCLOSURE_STOP_BACKTRACK: -        len = get_max_len(NODE_BODY(node), env); +        len = tree_max_len(NODE_BODY(node), env); +        break; +      case ENCLOSURE_IF_ELSE: +        { +          int tlen, elen; +          len = tree_max_len(NODE_BODY(node), env); +          if (IS_NOT_NULL(en->te.Then)) { +            tlen = tree_max_len(en->te.Then, env); +            len = distance_add(len, tlen); +          } +          if (IS_NOT_NULL(en->te.Else)) +            elen = tree_max_len(en->te.Else, env); +          else elen = 0; + +          if (elen > len) len = elen; +        }          break;        }      }      break;    case NODE_ANCHOR: +  case NODE_GIMMICK:    default:      break;    } @@ -2783,14 +3103,31 @@ check_backrefs(Node* node, ScanEnv* env)      }      /* fall */    case NODE_QUANT: +    r = check_backrefs(NODE_BODY(node), env); +    break; +    case NODE_ENCLOSURE:      r = check_backrefs(NODE_BODY(node), env); +    { +      EnclosureNode* en = ENCLOSURE_(node); + +      if (en->type == ENCLOSURE_IF_ELSE) { +        if (r != 0) return r; +        if (IS_NOT_NULL(en->te.Then)) { +          r = check_backrefs(en->te.Then, env); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = check_backrefs(en->te.Else, env); +        } +      } +    }      break; -  case NODE_BREF: +  case NODE_BACKREF:      {        int i; -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        int* backs = BACKREFS_P(br);        MemEnv* mem_env = SCANENV_MEMENV(env); @@ -2813,7 +3150,7 @@ check_backrefs(Node* node, ScanEnv* env)  } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  #define RECURSION_EXIST        (1<<0)  #define RECURSION_MUST         (1<<1) @@ -2822,6 +3159,7 @@ check_backrefs(Node* node, ScanEnv* env)  static int  infinite_recursive_call_check(Node* node, ScanEnv* env, int head)  { +  int ret;    int r = 0;    switch (NODE_TYPE(node)) { @@ -2829,15 +3167,14 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)      {        Node *x;        OnigLen min; -      int ret;        x = node;        do {          ret = infinite_recursive_call_check(NODE_CAR(x), env, head);          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;          r |= ret; -        if (head) { -          min = get_min_len(NODE_CAR(x), env); +        if (head != 0) { +          min = tree_min_len(NODE_CAR(x), env);            if (min != 0) head = 0;          }        } while (IS_NOT_NULL(x = NODE_CDR(x))); @@ -2846,7 +3183,6 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)    case NODE_ALT:      { -      int ret;        int must;        must = RECURSION_MUST; @@ -2894,6 +3230,31 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)            NODE_STATUS_REMOVE(node, NST_MARK2);          }        } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        int eret; + +        ret = infinite_recursive_call_check(NODE_BODY(node), env, head); +        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; +        r |= ret; +        if (IS_NOT_NULL(en->te.Then)) { +          OnigLen min; +          if (head != 0) { +            min = tree_min_len(NODE_BODY(node), env); +          } +          else min = 0; + +          ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head); +          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; +          r |= ret; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          eret = infinite_recursive_call_check(en->te.Else, env, head); +          if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret; +          r |= (eret & RECURSION_EXIST); +          if ((eret & RECURSION_MUST) == 0) +            r &= ~RECURSION_MUST; +        } +      }        else {          r = infinite_recursive_call_check(NODE_BODY(node), env, head);        } @@ -2948,6 +3309,16 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)            NODE_STATUS_REMOVE(node, NST_MARK1);          }        } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = infinite_recursive_call_check_trav(en->te.Then, env); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r = infinite_recursive_call_check_trav(en->te.Else, env); +          if (r != 0) return r; +        } +      }      }      r = infinite_recursive_call_check_trav(NODE_BODY(node), env); @@ -2987,7 +3358,10 @@ recursive_call_check(Node* node)    case NODE_CALL:      r = recursive_call_check(NODE_BODY(node)); -    if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION); +    if (r != 0) { +      if (NODE_IS_MARK1(NODE_BODY(node))) +        NODE_STATUS_ADD(node, NST_RECURSION); +    }      break;    case NODE_ENCLOSURE: @@ -3005,6 +3379,16 @@ recursive_call_check(Node* node)            NODE_STATUS_REMOVE(node, NST_MARK2);          }        } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        r = 0; +        if (IS_NOT_NULL(en->te.Then)) { +          r |= recursive_call_check(en->te.Then); +        } +        if (IS_NOT_NULL(en->te.Else)) { +          r |= recursive_call_check(en->te.Else); +        } +        r |= recursive_call_check(NODE_BODY(node)); +      }        else {          r = recursive_call_check(NODE_BODY(node));        } @@ -3058,6 +3442,8 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)    case NODE_ENCLOSURE:      { +      int ret; +      int state1;        EnclosureNode* en = ENCLOSURE_(node);        if (en->type == ENCLOSURE_MEMORY) { @@ -3075,16 +3461,25 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)          }        } -      { -        int ret; -        int state1 = state; +      state1 = state; +      if (NODE_IS_RECURSION(node)) +        state1 |= IN_RECURSION; -        if (NODE_IS_RECURSION(node)) -          state1 |= IN_RECURSION; +      ret = recursive_call_check_trav(NODE_BODY(node), env, state1); +      if (ret == FOUND_CALLED_NODE) +        r = FOUND_CALLED_NODE; -        ret = recursive_call_check_trav(NODE_BODY(node), env, state1); -        if (ret == FOUND_CALLED_NODE) -          r = FOUND_CALLED_NODE; +      if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          ret = recursive_call_check_trav(en->te.Then, env, state1); +          if (ret == FOUND_CALLED_NODE) +            r = FOUND_CALLED_NODE; +        } +        if (IS_NOT_NULL(en->te.Else)) { +          ret = recursive_call_check_trav(en->te.Else, env, state1); +          if (ret == FOUND_CALLED_NODE) +            r = FOUND_CALLED_NODE; +        }        }      }      break; @@ -3126,7 +3521,7 @@ divide_look_behind_alternatives(Node* node)    if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {      np = node;      do { -      SET_NODE_TYPE(np, NODE_LIST);  /* alt -> list */ +      NODE_SET_TYPE(np, NODE_LIST);  /* alt -> list */      } while (IS_NOT_NULL(np = NODE_CDR(np)));    }    return 0; @@ -3257,8 +3652,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,      return r;    } -  NSTRING_SET_AMBIG(node); -  NSTRING_SET_DONT_GET_OPT_INFO(node); +  NODE_STRING_SET_AMBIG(node); +  NODE_STRING_SET_DONT_GET_OPT_INFO(node);    *rnode = node;    return 0;  } @@ -3386,7 +3781,7 @@ expand_case_fold_string(Node* node, regex_t* reg)    OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];    StrNode* sn = STR_(node); -  if (NSTRING_IS_AMBIG(node)) return 0; +  if (NODE_STRING_IS_AMBIG(node)) return 0;    start = sn->s;    end   = sn->end; @@ -3529,10 +3924,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)    switch (NODE_TYPE(node)) {    case NODE_LIST:      { -      Node* prev = NULL_NODE;        do {          r = setup_comb_exp_check(NODE_CAR(node), r, env); -        prev = NODE_CAR(node);        } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));      }      break; @@ -3619,8 +4012,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)        switch (en->type) {        case ENCLOSURE_MEMORY:          { -          if (env->curr_max_regnum < en->regnum) -            env->curr_max_regnum = en->regnum; +          if (env->curr_max_regnum < en->m.regnum) +            env->curr_max_regnum = en->m.regnum;            r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);          } @@ -3633,7 +4026,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      if (NODE_IS_RECURSION(node))        env->has_recursion = 1; @@ -3668,7 +4061,7 @@ quantifiers_memory_node_info(Node* node)      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      if (NODE_IS_RECURSION(node)) {        return QUANT_BODY_IS_EMPTY_REC; /* tiny version */ @@ -3702,17 +4095,32 @@ quantifiers_memory_node_info(Node* node)        case ENCLOSURE_STOP_BACKTRACK:          r = quantifiers_memory_node_info(NODE_BODY(node));          break; +      case ENCLOSURE_IF_ELSE: +        { +          int v; +          r = quantifiers_memory_node_info(NODE_BODY(node)); +          if (IS_NOT_NULL(en->te.Then)) { +            v = quantifiers_memory_node_info(en->te.Then); +            if (v > r) r = v; +          } +          if (IS_NOT_NULL(en->te.Else)) { +            v = quantifiers_memory_node_info(en->te.Else); +            if (v > r) r = v; +          } +        } +        break;        default:          break;        }      }      break; -  case NODE_BREF: -  case NODE_STR: +  case NODE_BACKREF: +  case NODE_STRING:    case NODE_CTYPE:    case NODE_CCLASS:    case NODE_ANCHOR: +  case NODE_GIMMICK:    default:      break;    } @@ -3729,7 +4137,7 @@ quantifiers_memory_node_info(Node* node)  #define IN_ZERO_REPEAT  (1<<4)  #define IN_MULTI_ENTRY  (1<<5) -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  #ifdef __GNUC__  __inline @@ -3745,7 +4153,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)  #ifdef USE_NAMED_GROUP      if (env->num_named > 0 &&          IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && -        !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { +        !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {        return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;      }  #endif @@ -3811,10 +4219,26 @@ setup_call2_call(Node* node)      break;    case NODE_ENCLOSURE: -    if (! NODE_IS_MARK1(node)) { -      NODE_STATUS_ADD(node, NST_MARK1); -      setup_call2_call(NODE_BODY(node)); -      NODE_STATUS_REMOVE(node, NST_MARK1); +    { +      EnclosureNode* en = ENCLOSURE_(node); + +      if (en->type == ENCLOSURE_MEMORY) { +        if (! NODE_IS_MARK1(node)) { +          NODE_STATUS_ADD(node, NST_MARK1); +          setup_call2_call(NODE_BODY(node)); +          NODE_STATUS_REMOVE(node, NST_MARK1); +        } +      } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        setup_call2_call(NODE_BODY(node)); +        if (IS_NOT_NULL(en->te.Then)) +          setup_call2_call(en->te.Then); +        if (IS_NOT_NULL(en->te.Else)) +          setup_call2_call(en->te.Else); +      } +      else { +        setup_call2_call(NODE_BODY(node)); +      }      }      break; @@ -3868,11 +4292,29 @@ setup_call(Node* node, ScanEnv* env, int state)      break;    case NODE_ENCLOSURE: -    if ((state & IN_ZERO_REPEAT) != 0) { -      NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); -      ENCLOSURE_(node)->m.entry_count--; +    { +      EnclosureNode* en = ENCLOSURE_(node); + +      if (en->type == ENCLOSURE_MEMORY) { +        if ((state & IN_ZERO_REPEAT) != 0) { +          NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); +          ENCLOSURE_(node)->m.entry_count--; +        } +        r = setup_call(NODE_BODY(node), env, state); +      } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        r = setup_call(NODE_BODY(node), env, state); +        if (r != 0) return r; +        if (IS_NOT_NULL(en->te.Then)) { +          r = setup_call(en->te.Then, env, state); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) +          r = setup_call(en->te.Else, env, state); +      } +      else +        r = setup_call(NODE_BODY(node), env, state);      } -    r = setup_call(NODE_BODY(node), env, state);      break;    case NODE_CALL: @@ -3918,6 +4360,20 @@ setup_call2(Node* node)    case NODE_ENCLOSURE:      if (! NODE_IS_IN_ZERO_REPEAT(node))        r = setup_call2(NODE_BODY(node)); + +    { +      EnclosureNode* en = ENCLOSURE_(node); + +      if (r != 0) return r; +      if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          r = setup_call2(en->te.Then); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) +          r = setup_call2(en->te.Else); +      } +    }      break;    case NODE_CALL: @@ -3997,6 +4453,13 @@ setup_called_state_call(Node* node, int state)            NODE_STATUS_REMOVE(node, NST_MARK1);          }        } +      else if (en->type == ENCLOSURE_IF_ELSE) { +        if (IS_NOT_NULL(en->te.Then)) { +          setup_called_state_call(en->te.Then, state); +        } +        if (IS_NOT_NULL(en->te.Else)) +          setup_called_state_call(en->te.Else, state); +      }        else {          setup_called_state_call(NODE_BODY(node), state);        } @@ -4025,7 +4488,7 @@ setup_called_state(Node* node, int state)      } while (IS_NOT_NULL(node = NODE_CDR(node)));      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      setup_called_state_call(node, state);      break; @@ -4046,6 +4509,13 @@ setup_called_state(Node* node, int state)        case ENCLOSURE_STOP_BACKTRACK:          setup_called_state(NODE_BODY(node), state);          break; +      case ENCLOSURE_IF_ELSE: +        setup_called_state(NODE_BODY(node), state); +        if (IS_NOT_NULL(en->te.Then)) +          setup_called_state(en->te.Then, state); +        if (IS_NOT_NULL(en->te.Else)) +          setup_called_state(en->te.Else, state); +        break;        }      }      break; @@ -4082,16 +4552,17 @@ setup_called_state(Node* node, int state)      }      break; -  case NODE_BREF: -  case NODE_STR: +  case NODE_BACKREF: +  case NODE_STRING:    case NODE_CTYPE:    case NODE_CCLASS: +  case NODE_GIMMICK:    default:      break;    }  } -#endif  /* USE_SUBEXP_CALL */ +#endif  /* USE_CALL */  static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env); @@ -4104,8 +4575,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)  {  /* allowed node types in look-behind */  #define ALLOWED_TYPE_IN_LB \ -  ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \ -  | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL ) +  ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \ +  | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ +  | BIT_NODE_CALL )  #define ALLOWED_ENCLOSURE_IN_LB       ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION )  #define ALLOWED_ENCLOSURE_IN_LB_NOT   ENCLOSURE_OPTION @@ -4182,7 +4654,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)    }    if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { -    d = get_min_len(body, env); +    d = tree_min_len(body, env);      if (d == 0) {  #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT        qn->body_empty_info = quantifiers_memory_node_info(body); @@ -4208,10 +4680,10 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)    /* expand string */  #define EXPAND_STRING_MAX_LENGTH  100 -  if (NODE_TYPE(body) == NODE_STR) { +  if (NODE_TYPE(body) == NODE_STRING) {      if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&          qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { -      int len = NSTRING_LEN(body); +      int len = NODE_STRING_LEN(body);        StrNode* sn = STR_(body);        if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { @@ -4278,17 +4750,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break; -  case NODE_STR: -    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { +  case NODE_STRING: +    if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {        r = expand_case_fold_string(node, reg);      }      break; -  case NODE_BREF: +  case NODE_BACKREF:      {        int i;        int* p; -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        p = BACKREFS_P(br);        for (i = 0; i < br->back_num; i++) {          if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF; @@ -4311,14 +4783,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)        case ENCLOSURE_OPTION:          {            OnigOptionType options = reg->options; -          reg->options = ENCLOSURE_(node)->o.option; +          reg->options = ENCLOSURE_(node)->o.options;            r = setup_tree(NODE_BODY(node), reg, state, env);            reg->options = options;          }          break;        case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL          state |= en->m.called_state;  #endif @@ -4343,6 +4815,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)            }          }          break; + +      case ENCLOSURE_IF_ELSE: +        r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env); +        if (r != 0) return r; +        if (IS_NOT_NULL(en->te.Then)) { +          r = setup_tree(en->te.Then, reg, (state | IN_ALT), env); +          if (r != 0) return r; +        } +        if (IS_NOT_NULL(en->te.Else)) +          r = setup_tree(en->te.Else, reg, (state | IN_ALT), env); +        break;        }      }      break; @@ -4355,11 +4838,12 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)      r = setup_anchor(node, reg, state, env);      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:  #endif    case NODE_CTYPE:    case NODE_CCLASS: +  case NODE_GIMMICK:    default:      break;    } @@ -4487,7 +4971,7 @@ distance_value(MinMaxLen* mm)    OnigLen d; -  if (mm->max == ONIG_INFINITE_DISTANCE) return 0; +  if (mm->max == INFINITE_LEN) return 0;    d = mm->max - mm->min;    if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0]))) @@ -5048,15 +5532,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)      }      break; -  case NODE_STR: +  case NODE_STRING:      {        StrNode* sn = STR_(node);        int slen = sn->end - sn->s; -      int is_raw = NSTRING_IS_RAW(node); +      int is_raw = NODE_STRING_IS_RAW(node); -      if (! NSTRING_IS_AMBIG(node)) { +      if (! NODE_STRING_IS_AMBIG(node)) {          concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, -                                  NSTRING_IS_RAW(node), env->enc); +                                  NODE_STRING_IS_RAW(node), env->enc);          if (slen > 0) {            add_char_opt_map_info(&opt->map, *(sn->s), env->enc);          } @@ -5065,7 +5549,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)        else {          int max; -        if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { +        if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {            int n = onigenc_strlen(env->enc, sn->s, sn->end);            max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;          } @@ -5191,24 +5675,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)      }      break; -  case NODE_BREF: -    { +  case NODE_BACKREF: +    if (! NODE_IS_CHECKER(node)) {        int i;        int* backs;        OnigLen min, max, tmin, tmax;        MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        if (NODE_IS_RECURSION(node)) { -        set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); +        set_mml(&opt->len, 0, INFINITE_LEN);          break;        }        backs = BACKREFS_P(br); -      min = get_min_len(mem_env[backs[0]].node, env->scan_env); -      max = get_max_len(mem_env[backs[0]].node, env->scan_env); +      min = tree_min_len(mem_env[backs[0]].node, env->scan_env); +      max = tree_max_len(mem_env[backs[0]].node, env->scan_env);        for (i = 1; i < br->back_num; i++) { -        tmin = get_min_len(mem_env[backs[i]].node, env->scan_env); -        tmax = get_max_len(mem_env[backs[i]].node, env->scan_env); +        tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env); +        tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);          if (min > tmin) min = tmin;          if (max < tmax) max = tmax;        } @@ -5216,13 +5700,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      if (NODE_IS_RECURSION(node)) -      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); +      set_mml(&opt->len, 0, INFINITE_LEN);      else {        OnigOptionType save = env->options; -      env->options = ENCLOSURE_(NODE_BODY(node))->o.option; +      env->options = ENCLOSURE_(NODE_BODY(node))->o.options;        r = optimize_node_left(NODE_BODY(node), opt, env);        env->options = save;      } @@ -5242,7 +5726,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)        if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {          if (env->mmd.max == 0 &&              NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { -          if (IS_MULTILINE(env->options)) +          if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))              add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);            else              add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); @@ -5274,7 +5758,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)        min = distance_multiply(nopt.len.min, qn->lower);        if (IS_REPEAT_INFINITE(qn->upper)) -        max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); +        max = (nopt.len.max > 0 ? INFINITE_LEN : 0);        else          max = distance_multiply(nopt.len.max, qn->upper); @@ -5291,20 +5775,20 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)          {            OnigOptionType save = env->options; -          env->options = en->o.option; +          env->options = en->o.options;            r = optimize_node_left(NODE_BODY(node), opt, env);            env->options = save;          }          break;        case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL          en->opt_count++;          if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {            OnigLen min, max;            min = 0; -          max = ONIG_INFINITE_DISTANCE; +          max = INFINITE_LEN;            if (NODE_IS_MIN_FIXED(node)) min = en->min_len;            if (NODE_IS_MAX_FIXED(node)) max = en->max_len;            set_mml(&opt->len, min, max); @@ -5324,10 +5808,39 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)        case ENCLOSURE_STOP_BACKTRACK:          r = optimize_node_left(NODE_BODY(node), opt, env);          break; + +      case ENCLOSURE_IF_ELSE: +        { +          OptEnv nenv; +          NodeOptInfo nopt; + +          copy_opt_env(&nenv, env); +          r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); +          if (r == 0) { +            add_mml(&nenv.mmd, &nopt.len); +            concat_left_node_opt_info(env->enc, opt, &nopt); +            if (IS_NOT_NULL(en->te.Then)) { +              r = optimize_node_left(en->te.Then, &nopt, &nenv); +              if (r == 0) { +                concat_left_node_opt_info(env->enc, opt, &nopt); +              } +            } + +            if (IS_NOT_NULL(en->te.Else)) { +              r = optimize_node_left(en->te.Else, &nopt, env); +              if (r == 0) +                alt_merge_node_opt_info(opt, &nopt, env); +            } +          } +        } +        break;        }      }      break; +  case NODE_GIMMICK: +    break; +    default:  #ifdef ONIG_DEBUG      fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); @@ -5379,7 +5892,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)    reg->dmin = e->mmd.min;    reg->dmax = e->mmd.max; -  if (reg->dmin != ONIG_INFINITE_DISTANCE) { +  if (reg->dmin != INFINITE_LEN) {      reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);    } @@ -5398,7 +5911,7 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m)    reg->dmin       = m->mmd.min;    reg->dmax       = m->mmd.max; -  if (reg->dmin != ONIG_INFINITE_DISTANCE) { +  if (reg->dmin != INFINITE_LEN) {      reg->threshold_len = reg->dmin + 1;    }  } @@ -5531,14 +6044,14 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,  static void  print_distance_range(FILE* f, OnigLen a, OnigLen b)  { -  if (a == ONIG_INFINITE_DISTANCE) +  if (a == INFINITE_LEN)      fputs("inf", f);    else      fprintf(f, "(%u)", a);    fputs("-", f); -  if (b == ONIG_INFINITE_DISTANCE) +  if (b == INFINITE_LEN)      fputs("inf", f);    else      fprintf(f, "(%u)", b); @@ -5656,7 +6169,7 @@ onig_free_body(regex_t* reg)      if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);      if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);      if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range); -    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain); +    if (IS_NOT_NULL(REG_EXTP(reg)))         xfree(REG_EXTP(reg));  #ifdef USE_NAMED_GROUP      onig_names_free(reg); @@ -5702,7 +6215,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    int r, init_size;    Node*  root;    ScanEnv  scan_env; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    UnsetAddrList  uslist;  #endif @@ -5751,7 +6264,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    r = check_backrefs(root, &scan_env);    if (r != 0) goto err; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    if (scan_env.num_call > 0) {      r = unset_addr_list_init(&uslist, scan_env.num_call);      if (r != 0) goto err; @@ -5791,12 +6304,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,  #ifdef USE_COMBINATION_EXPLOSION_CHECK    if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL        || scan_env.num_call == 0  #endif        ) {      setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      if (scan_env.has_recursion != 0) {        scan_env.num_comb_exp_check = 0;      } @@ -5829,8 +6342,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    r = compile_tree(root, reg, &scan_env);    if (r == 0) { +    if (scan_env.keep_num > 0) { +      r = add_opcode(reg, OP_UPDATE_VAR); +      if (r != 0) goto err; +      r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST); +      if (r != 0) goto err; +      r = add_mem_num(reg, 0 /* not used */); +      if (r != 0) goto err; +    } +      r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      if (scan_env.num_call > 0) {        r = unset_addr_list_fix(&uslist, reg);        unset_addr_list_end(&uslist); @@ -5847,7 +6369,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,          reg->stack_pop_level = STACK_POP_LEVEL_FREE;      }    } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    else if (scan_env.num_call > 0) {      unset_addr_list_end(&uslist);    } @@ -5865,7 +6387,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,    return r;   err_unset: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    if (scan_env.num_call > 0) {      unset_addr_list_end(&uslist);    } @@ -5890,8 +6412,8 @@ static int onig_inited = 0;  extern int  onig_reg_init(regex_t* reg, OnigOptionType option, -	      OnigCaseFoldType case_fold_flag, -	      OnigEncoding enc, OnigSyntaxType* syntax) +              OnigCaseFoldType case_fold_flag, +              OnigEncoding enc, OnigSyntaxType* syntax)  {    int r; @@ -5938,7 +6460,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option,    (reg)->exact            = (UChar* )NULL;    (reg)->int_map          = (int* )NULL;    (reg)->int_map_backward = (int* )NULL; -  (reg)->chain            = (regex_t* )NULL; +  REG_EXTPL(reg) = NULL;    (reg)->p                = (UChar* )NULL;    (reg)->alloc            = 0; @@ -6165,11 +6687,13 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },    { OP_BACKREF1,            "backref1",             ARG_NON },    { OP_BACKREF2,            "backref2",             ARG_NON }, -  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  }, -  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL }, +  { OP_BACKREF_N,            "backref-n",           ARG_MEMNUM  }, +  { OP_BACKREF_N_IC,         "backref-n-ic",        ARG_SPECIAL },    { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },    { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL }, -  { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL }, +  { OP_BACKREF_WITH_LEVEL,  "backref_with_level",   ARG_SPECIAL }, +  { OP_BACKREF_CHECK,       "backref_check",        ARG_SPECIAL }, +  { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },    { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },    { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },    { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  }, @@ -6181,6 +6705,7 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_FAIL,                "fail",                 ARG_NON },    { OP_JUMP,                "jump",                 ARG_RELADDR },    { OP_PUSH,                "push",                 ARG_RELADDR }, +  { OP_PUSH_SUPER,          "push_SUPER",           ARG_RELADDR },    { OP_POP,                 "pop",                  ARG_NON },    { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },    { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL }, @@ -6194,10 +6719,10 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_EMPTY_CHECK_END,     "empty-check-end",      ARG_MEMNUM  },    { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM  },    { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM  }, -  { OP_PUSH_POS,             "push-pos",             ARG_NON }, -  { OP_POP_POS,              "pop-pos",              ARG_NON }, -  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR }, -  { OP_FAIL_POS,             "fail-pos",             ARG_NON }, +  { OP_PREC_READ_START,      "push-pos",             ARG_NON }, +  { OP_PREC_READ_END,        "pop-pos",              ARG_NON }, +  { OP_PUSH_PREC_READ_NOT,   "push-prec-read-not",   ARG_RELADDR }, +  { OP_FAIL_PREC_READ_NOT,   "fail-prec-read-not",   ARG_NON },    { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },    { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },    { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL }, @@ -6205,6 +6730,8 @@ OnigOpInfoType OnigOpInfo[] = {    { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },    { OP_CALL,                 "call",                 ARG_ABSADDR },    { OP_RETURN,               "return",               ARG_NON }, +  { OP_PUSH_SAVE_VAL,        "push-save-val",        ARG_SPECIAL }, +  { OP_UPDATE_VAR,           "update-var",           ARG_SPECIAL },    { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },    { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },    { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK }, @@ -6272,6 +6799,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,    MemNumType mem;    StateCheckNumType scn;    OnigCodePoint code; +  OnigOptionType option;    UChar *q;    fprintf(f, "%s", op2name(*bp)); @@ -6421,7 +6949,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,        }        break; -    case OP_BACKREFN_IC: +    case OP_BACKREF_N_IC:        mem = *((MemNumType* )bp);        bp += SIZE_MEMNUM;        fprintf(f, ":%d", mem); @@ -6429,6 +6957,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,      case OP_BACKREF_MULTI_IC:      case OP_BACKREF_MULTI: +    case OP_BACKREF_CHECK:        fputs(" ", f);        GET_LENGTH_INC(len, bp);        for (i = 0; i < len; i++) { @@ -6439,12 +6968,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,        break;      case OP_BACKREF_WITH_LEVEL: +      GET_OPTION_INC(option, bp); +      fprintf(f, ":%d", option); +      /* fall */ +    case OP_BACKREF_CHECK_WITH_LEVEL:        { -        OnigOptionType option;          LengthType level; -        GET_OPTION_INC(option, bp); -        fprintf(f, ":%d", option);          GET_LENGTH_INC(level, bp);          fprintf(f, ":%d", level); @@ -6501,6 +7031,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,        p_rel_addr(f, addr, bp, start);        break; +    case OP_PUSH_SAVE_VAL: +      { +        SaveType type; +        GET_SAVE_TYPE_INC(type, bp); +        GET_MEMNUM_INC(mem, bp); +        fprintf(f, ":%d:%d", type, mem); +      } +      break; + +    case OP_UPDATE_VAR: +      { +        UpdateVarType type; +        GET_UPDATE_VAR_TYPE_INC(type, bp); +        GET_MEMNUM_INC(mem, bp); +        fprintf(f, ":%d:%d", type, mem); +      } +      break; +      default:        fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",  	      *--bp); @@ -6576,9 +7124,9 @@ print_indent_tree(FILE* f, Node* node, int indent)      }      break; -  case NODE_STR: +  case NODE_STRING:      fprintf(f, "<string%s:%p>", -	    (NSTRING_IS_RAW(node) ? "-raw" : ""), node); +	    (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);      for (p = STR_(node)->s; p < STR_(node)->end; p++) {        if (*p >= 0x20 && *p < 0x7f)          fputc(*p, f); @@ -6659,12 +7207,12 @@ print_indent_tree(FILE* f, Node* node, int indent)      }      break; -  case NODE_BREF: +  case NODE_BACKREF:      {        int* p; -      BRefNode* br = BREF_(node); +      BackRefNode* br = BACKREF_(node);        p = BACKREFS_P(br); -      fprintf(f, "<backref:%p>", node); +      fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);        for (i = 0; i < br->back_num; i++) {          if (i > 0) fputs(", ", f);          fprintf(f, "%d", p[i]); @@ -6672,7 +7220,7 @@ print_indent_tree(FILE* f, Node* node, int indent)      }      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case NODE_CALL:      {        CallNode* cn = CALL_(node); @@ -6693,10 +7241,10 @@ print_indent_tree(FILE* f, Node* node, int indent)      fprintf(f, "<enclosure:%p> ", node);      switch (ENCLOSURE_(node)->type) {      case ENCLOSURE_OPTION: -      fprintf(f, "option:%d", ENCLOSURE_(node)->option); +      fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);        break;      case ENCLOSURE_MEMORY: -      fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum); +      fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);        break;      case ENCLOSURE_STOP_BACKTRACK:        fprintf(f, "stop-bt"); @@ -6709,6 +7257,24 @@ print_indent_tree(FILE* f, Node* node, int indent)      print_indent_tree(f, NODE_BODY(node), indent + add);      break; +  case NODE_GIMMICK: +    fprintf(f, "<gimmick:%p> ", node); +    switch (GIMMICK_(node)->type) { +    case GIMMICK_FAIL: +      fprintf(f, "fail"); +      break; +    case GIMMICK_KEEP: +      fprintf(f, "keep:%d", GIMMICK_(node)->id); +      break; +    case GIMMICK_SAVE: +      fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); +      break; +    case GIMMICK_UPDATE_VAR: +      fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); +      break; +    } +    break; +    default:      fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));      break; diff --git a/src/regenc.h b/src/regenc.h index 897c704..abc26be 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -239,5 +239,7 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];   (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\    ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) +#define ONIGENC_IS_UNICODE_ENCODING(enc) \ +  ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype)  #endif /* REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c index 0285272..a430e60 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -174,6 +174,12 @@ onig_error_code_to_format(int code)      p = "group number is too big for capture history"; break;    case ONIGERR_INVALID_CHAR_PROPERTY_NAME:      p = "invalid character property name {%n}"; break; +  case ONIGERR_INVALID_IF_ELSE_SYNTAX: +    p = "invalid if-else syntax"; break; +  case ONIGERR_INVALID_ABSENT_GROUP_PATTERN: +    p = "invalid absent group pattern"; break; +  case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN: +    p = "invalid absent group generator pattern"; break;    case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:      p = "not supported encoding combination"; break;    case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: diff --git a/src/regexec.c b/src/regexec.c index f66da1f..e7dfb96 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -305,32 +305,85 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  /** stack **/  #define INVALID_STACK_INDEX   -1 +#define STK_ALT_FLAG               0x0001 +  /* stack type */  /* used by normal-POP */ -#define STK_ALT                    0x0001 -#define STK_LOOK_BEHIND_NOT        0x0002 -#define STK_POS_NOT                0x0003 +#define STK_SUPER_ALT             STK_ALT_FLAG +#define STK_ALT                   (0x0002 | STK_ALT_FLAG) +#define STK_ALT_PREC_READ_NOT     (0x0004 | STK_ALT_FLAG) +#define STK_ALT_LOOK_BEHIND_NOT   (0x0006 | STK_ALT_FLAG)  /* handled by normal-POP */  #define STK_MEM_START              0x0100  #define STK_MEM_END                0x8200  #define STK_REPEAT_INC             0x0300  #define STK_STATE_CHECK_MARK       0x1000  /* avoided by normal-POP */ +#define STK_VOID                   0x0000  /* for fill a blank */  #define STK_EMPTY_CHECK_START      0x3000  #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */  #define STK_MEM_END_MARK           0x8400  #define STK_POS                    0x0500  /* used when POP-POS */ -#define STK_STOP_BT                0x0600  /* mark for "(?>...)" */ +#define STK_STOP_BACKTRACK         0x0600  /* mark for "(?>...)" */  #define STK_REPEAT                 0x0700  #define STK_CALL_FRAME             0x0800  #define STK_RETURN                 0x0900 -#define STK_VOID                   0x0a00  /* for fill a blank */ +#define STK_SAVE_VAL               0x0a00  /* stack type check mask */ -#define STK_MASK_POP_USED          0x00ff -#define STK_MASK_TO_VOID_TARGET    0x10ff +#define STK_MASK_POP_USED          STK_ALT_FLAG +#define STK_MASK_TO_VOID_TARGET    0x10fe  #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */ +typedef intptr_t StackIndex; + +typedef struct _StackType { +  unsigned int type; +  union { +    struct { +      UChar *pcode;      /* byte code position */ +      UChar *pstr;       /* string position */ +      UChar *pstr_prev;  /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK +      unsigned int state_check; +#endif +    } state; +    struct { +      int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ +      UChar *pcode;      /* byte code position (head of repeated target) */ +      int   num;         /* repeat id */ +    } repeat; +    struct { +      StackIndex si;     /* index of stack */ +    } repeat_inc; +    struct { +      int num;           /* memory num */ +      UChar *pstr;       /* start/end position */ +      /* Following information is set, if this stack type is MEM-START */ +      StackIndex start;  /* prev. info (for backtrack  "(...)*" ) */ +      StackIndex end;    /* prev. info (for backtrack  "(...)*" ) */ +    } mem; +    struct { +      int num;           /* null check id */ +      UChar *pstr;       /* start position */ +    } empty_check; +#ifdef USE_CALL +    struct { +      UChar *ret_addr;   /* byte code position */ +      int    num;        /* null check id */ +      UChar *pstr;       /* string position */ +    } call_frame; +#endif +    struct { +      int id; +      enum SaveType type; +      UChar* v; +      UChar* v2; +    } val; +  } u; +} StackType; + +  #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE  #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\    (msa).stack_p  = (void* )0;\ @@ -396,28 +449,28 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)    if (msa->stack_p) {\      is_alloca  = 0;\      alloc_base = msa->stack_p;\ -    stk_base   = (OnigStackType* )(alloc_base\ -                 + (sizeof(OnigStackIndex) * msa->ptr_num));\ +    stk_base   = (StackType* )(alloc_base\ +                 + (sizeof(StackIndex) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + msa->stack_n;\    }\    else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\      is_alloca  = 0;\ -    alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\ -                  + sizeof(OnigStackType) * (stack_num));\ +    alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ +                  + sizeof(StackType) * (stack_num));\      CHECK_NULL_RETURN_MEMERR(alloc_base);\ -    stk_base   = (OnigStackType* )(alloc_base\ -                 + (sizeof(OnigStackIndex) * msa->ptr_num));\ +    stk_base   = (StackType* )(alloc_base\ +                 + (sizeof(StackIndex) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + (stack_num);\    }\    else {\      is_alloca  = 1;\ -    alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\ -                 + sizeof(OnigStackType) * (stack_num));\ +    alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ +                 + sizeof(StackType) * (stack_num));\      CHECK_NULL_RETURN_MEMERR(alloc_base);\ -    stk_base   = (OnigStackType* )(alloc_base\ -                 + (sizeof(OnigStackIndex) * msa->ptr_num));\ +    stk_base   = (StackType* )(alloc_base\ +                 + (sizeof(StackIndex) * msa->ptr_num));\      stk        = stk_base;\      stk_end    = stk_base + (stack_num);\    }\ @@ -427,8 +480,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  #define STACK_SAVE do{\    msa->stack_n = stk_end - stk_base;\    if (is_alloca != 0) {\ -    size_t size = sizeof(OnigStackIndex) * msa->ptr_num \ -                + sizeof(OnigStackType) * msa->stack_n;\ +    size_t size = sizeof(StackIndex) * msa->ptr_num \ +                + sizeof(StackType) * msa->stack_n;\      msa->stack_p = xmalloc(size);\      CHECK_NULL_RETURN_MEMERR(msa->stack_p);\      xmemcpy(msa->stack_p, alloc_base, size);\ @@ -439,8 +492,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)  } while(0)  #define UPDATE_FOR_STACK_REALLOC do{\ -  repeat_stk    = (OnigStackIndex* )alloc_base;\ -  mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\ +  repeat_stk    = (StackIndex* )alloc_base;\ +  mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\    mem_end_stk   = mem_start_stk + num_mem + 1;\  } while(0) @@ -461,8 +514,8 @@ onig_set_match_stack_limit_size(unsigned int size)  static int  stack_double(int is_alloca, char** arg_alloc_base, -	     OnigStackType** arg_stk_base, -	     OnigStackType** arg_stk_end, OnigStackType** arg_stk, +	     StackType** arg_stk_base, +	     StackType** arg_stk_end, StackType** arg_stk,  	     OnigMatchArg* msa)  {    unsigned int n; @@ -471,7 +524,7 @@ stack_double(int is_alloca, char** arg_alloc_base,    size_t new_size;    char* alloc_base;    char* new_alloc_base; -  OnigStackType *stk_base, *stk_end, *stk; +  StackType *stk_base, *stk_end, *stk;    alloc_base = *arg_alloc_base;    stk_base = *arg_stk_base; @@ -479,9 +532,9 @@ stack_double(int is_alloca, char** arg_alloc_base,    stk      = *arg_stk;    n = stk_end - stk_base; -  size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; +  size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;    n *= 2; -  new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; +  new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;    if (is_alloca != 0) {      new_alloc_base = (char* )xmalloc(new_size);      if (IS_NULL(new_alloc_base)) { @@ -507,8 +560,8 @@ stack_double(int is_alloca, char** arg_alloc_base,    alloc_base = new_alloc_base;    used = stk - stk_base;    *arg_alloc_base = alloc_base; -  *arg_stk_base   = (OnigStackType* )(alloc_base -		       + (sizeof(OnigStackIndex) * msa->ptr_num)); +  *arg_stk_base   = (StackType* )(alloc_base +		       + (sizeof(StackIndex) * msa->ptr_num));    *arg_stk      = *arg_stk_base + used;    *arg_stk_end  = *arg_stk_base + n;    return 0; @@ -516,8 +569,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_ENSURE(n)	do {\    if (stk_end - stk < (n)) {\ -    int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\ -                         msa);\ +    int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\      if (r != 0) { STACK_SAVE; return r; } \      is_alloca = 0;\      UPDATE_FOR_STACK_REALLOC;\ @@ -610,12 +662,14 @@ stack_double(int is_alloca, char** arg_alloc_base,  } while(0)  #endif /* USE_COMBINATION_EXPLOSION_CHECK */ -#define STACK_PUSH_ALT(pat,s,sprev)     STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_ALT(pat,s,sprev)       STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)  #define STACK_PUSH_POS(s,sprev)         STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT              STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ -        STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) +#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \ +  STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BACKTRACK        STACK_PUSH_TYPE(STK_STOP_BACKTRACK) +#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \ +  STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)  #define STACK_PUSH_REPEAT(id, pat) do {\    STACK_ENSURE(1);\ @@ -725,6 +779,97 @@ stack_double(int is_alloca, char** arg_alloc_base,    STACK_INC;\  } while(0) +#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ +  STACK_ENSURE(1);\ +  stk->type = STK_SAVE_VAL;\ +  stk->u.val.id   = (sid);\ +  stk->u.val.type = (stype);\ +  stk->u.val.v    = (UChar* )(sval);\ +  STACK_INC;\ +} while(0) + +#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ +  STACK_ENSURE(1);\ +  stk->type = STK_SAVE_VAL;\ +  stk->u.val.id   = (sid);\ +  stk->u.val.type = (stype);\ +  stk->u.val.v    = (UChar* )(sval);\ +  stk->u.val.v2   = sprev;\ +  STACK_INC;\ +} while(0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\ +  StackType *k = stk;\ +  while (k > stk_base) {\ +    k--;\ +    STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \ +    if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\ +      (sval) = k->u.val.v;\ +      break;\ +    }\ +  }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \ +  int level = 0;\ +  StackType *k = stk;\ +  while (k > stk_base) {\ +    k--;\ +    STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ +    if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ +        && k->u.val.id == (sid)) {\ +      if (level == 0) {\ +        (sval) = k->u.val.v;\ +        break;\ +      }\ +    }\ +    else if (k->type == STK_CALL_FRAME)\ +      level--;\ +    else if (k->type == STK_RETURN)\ +      level++;\ +  }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \ +  int level = 0;\ +  StackType *k = stk;\ +  while (k > stk_base) {\ +    k--;\ +    STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ +    if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ +        && k->u.val.id == (sid)) {\ +      if (level == 0) {\ +        (sval) = k->u.val.v;\ +        sprev  = k->u.val.v2;\ +        break;\ +      }\ +    }\ +    else if (k->type == STK_CALL_FRAME)\ +      level--;\ +    else if (k->type == STK_RETURN)\ +      level++;\ +  }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \ +  int level = 0;\ +  StackType *k = (stk_from);\ +  while (k > stk_base) {\ +    STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \ +    if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ +        && k->u.val.id == (sid)) {\ +      if (level == 0) {\ +        (sval) = k->u.val.v;\ +        break;\ +      }\ +    }\ +    else if (k->type == STK_CALL_FRAME)\ +      level--;\ +    else if (k->type == STK_RETURN)\ +      level++;\ +    k--;\ +  }\ +} while (0)  #ifdef ONIG_DEBUG  #define STACK_BASE_CHECK(p, at) \ @@ -785,11 +930,11 @@ stack_double(int is_alloca, char** arg_alloc_base,    }\  } while(0) -#define STACK_POP_TIL_POS_NOT  do {\ +#define STACK_POP_TIL_ALT_PREC_READ_NOT  do {\    while (1) {\      stk--;\ -    STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ -    if (stk->type == STK_POS_NOT) break;\ +    STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ +    if (stk->type == STK_ALT_PREC_READ_NOT) break;\      else if (stk->type == STK_MEM_START) {\        mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\        mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\ @@ -805,11 +950,11 @@ stack_double(int is_alloca, char** arg_alloc_base,    }\  } while(0) -#define STACK_POP_TIL_LOOK_BEHIND_NOT  do {\ +#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT  do {\    while (1) {\      stk--;\ -    STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ -    if (stk->type == STK_LOOK_BEHIND_NOT) break;\ +    STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ +    if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\      else if (stk->type == STK_MEM_START) {\        mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\        mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\ @@ -840,15 +985,15 @@ stack_double(int is_alloca, char** arg_alloc_base,    }\  } while(0) -#define STACK_STOP_BT_END do {\ -  OnigStackType *k = stk;\ +#define STACK_STOP_BACKTRACK_END do {\ +  StackType *k = stk;\    while (1) {\      k--;\ -    STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ +    STACK_BASE_CHECK(k, "STACK_STOP_BACKTRACK_END"); \      if (IS_TO_VOID_TARGET(k)) {\        k->type = STK_VOID;\      }\ -    else if (k->type == STK_STOP_BT) {\ +    else if (k->type == STK_STOP_BACKTRACK) {\        k->type = STK_VOID;\        break;\      }\ @@ -856,7 +1001,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  } while(0)  #define STACK_EMPTY_CHECK(isnull,id,s) do {\ -  OnigStackType* k = stk;\ +  StackType* k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ @@ -871,7 +1016,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT  #define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\ -  OnigStackType* k = stk;\ +  StackType* k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ @@ -912,7 +1057,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\    int level = 0;\ -  OnigStackType* k = stk;\ +  StackType* k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ @@ -960,7 +1105,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #else  #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\    int level = 0;\ -  OnigStackType* k = stk;\ +  StackType* k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \ @@ -1000,7 +1145,7 @@ stack_double(int is_alloca, char** arg_alloc_base,  #define STACK_RETURN(addr)  do {\    int level = 0;\ -  OnigStackType* k = stk;\ +  StackType* k = stk;\    while (1) {\      k--;\      STACK_BASE_CHECK(k, "STACK_RETURN"); \ @@ -1074,27 +1219,26 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,  #define IS_EMPTY_STR           (str == end) -#define ON_STR_BEGIN(s)       ((s) == str) -#define ON_STR_END(s)         ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define ON_STR_BEGIN(s)        ((s) == str) +#define ON_STR_END(s)          ((s) == end)  #define DATA_ENSURE_CHECK1     (s < right_range)  #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)  #define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1     (s < end) -#define DATA_ENSURE_CHECK(n)   (s + (n) <= end) -#define DATA_ENSURE(n)         if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range +#else +#define INIT_RIGHT_RANGE    right_range = (UChar* )end +#endif  #ifdef USE_CAPTURE_HISTORY  static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, -                          OnigStackType* stk_top, UChar* str, regex_t* reg) +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, +                          StackType* stk_top, UChar* str, regex_t* reg)  {    int n, r;    OnigCaptureTreeNode* child; -  OnigStackType* k = *kp; +  StackType* k = *kp;    while (k < stk_top) {      if (k->type == STK_MEM_START) { @@ -1143,13 +1287,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)  }  static int backref_match_at_nested_level(regex_t* reg -	 , OnigStackType* top, OnigStackType* stk_base +	 , StackType* top, StackType* stk_base  	 , int ignore_case, int case_fold_flag  	 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)  {    UChar *ss, *p, *pstart, *pend = NULL_UCHARP;    int level; -  OnigStackType* k; +  StackType* k;    level = 0;    k = top; @@ -1197,6 +1341,37 @@ static int backref_match_at_nested_level(regex_t* reg    return 0;  } + +static int +backref_check_at_nested_level(regex_t* reg, +                              StackType* top, StackType* stk_base, +                              int nest, int mem_num, UChar* memp) +{ +  int level; +  StackType* k; + +  level = 0; +  k = top; +  k--; +  while (k >= stk_base) { +    if (k->type == STK_CALL_FRAME) { +      level--; +    } +    else if (k->type == STK_RETURN) { +      level++; +    } +    else if (level == nest) { +      if (k->type == STK_MEM_END) { +        if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { +          return 1; +        } +      } +    } +    k--; +  } + +  return 0; +}  #endif /* USE_BACKREF_WITH_LEVEL */ @@ -1300,7 +1475,7 @@ typedef struct {  static int  match_at(regex_t* reg, const UChar* str, const UChar* end,  #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -	 const UChar* right_range, +	 const UChar* in_right_range,  #endif  	 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)  { @@ -1311,13 +1486,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    MemNumType mem;    RelAddrType addr;    UChar *s, *q, *sbegin; +  UChar *right_range;    int is_alloca;    char *alloc_base; -  OnigStackType *stk_base, *stk, *stk_end; -  OnigStackType *stkp; /* used as any purpose. */ -  OnigStackIndex si; -  OnigStackIndex *repeat_stk; -  OnigStackIndex *mem_start_stk, *mem_end_stk; +  StackType *stk_base, *stk, *stk_end; +  StackType *stkp; /* used as any purpose. */ +  StackIndex si; +  StackIndex *repeat_stk; +  StackIndex *mem_start_stk, *mem_end_stk; +  UChar* keep;  #ifdef USE_COMBINATION_EXPLOSION_CHECK    int scv;    unsigned char* state_check_buff = msa->state_check_buff; @@ -1346,7 +1523,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    STACK_PUSH_ENSURED(STK_ALT, FinishCode);  /* bottom stack */    best_len = ONIG_MISMATCH; -  s = (UChar* )sstart; +  keep = s = (UChar* )sstart; +  INIT_RIGHT_RANGE; +    while (1) {  #ifdef ONIG_DEBUG_MATCH      { @@ -1394,12 +1573,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          best_len = n;          region = msa->region;          if (region) { +          if (keep > s) keep = s; +  #ifdef USE_POSIX_API_REGION_OPTION            if (IS_POSIX_REGION(msa->options)) {              posix_regmatch_t* rmt = (posix_regmatch_t* )region; -            rmt[0].rm_so = sstart - str; -            rmt[0].rm_eo = s      - str; +            rmt[0].rm_so = keep - str; +            rmt[0].rm_eo = s    - str;              for (i = 1; i <= num_mem; i++) {                if (mem_end_stk[i] != INVALID_STACK_INDEX) {                  if (MEM_STATUS_AT(reg->bt_mem_start, i)) @@ -1418,8 +1599,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            }            else {  #endif /* USE_POSIX_API_REGION_OPTION */ -            region->beg[0] = sstart - str; -            region->end[0] = s      - str; +            region->beg[0] = keep - str; +            region->end[0] = s    - str;              for (i = 1; i <= num_mem; i++) {                if (mem_end_stk[i] != INVALID_STACK_INDEX) {                  if (MEM_STATUS_AT(reg->bt_mem_start, i)) @@ -1451,8 +1632,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,                }                node->group = 0; -              node->beg   = sstart - str; -              node->end   = s      - str; +              node->beg   = keep - str; +              node->end   = s    - str;                stkp = stk_base;                r = make_capture_history_tree(region->history_root, &stkp, @@ -2138,7 +2319,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      case OP_MEMORY_START:  MOP_IN(OP_MEMORY_START);        GET_MEMNUM_INC(mem, p); -      mem_start_stk[mem] = (OnigStackIndex )((void* )s); +      mem_start_stk[mem] = (StackIndex )((void* )s);        MOP_OUT;        continue;        break; @@ -2152,12 +2333,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      case OP_MEMORY_END:  MOP_IN(OP_MEMORY_END);        GET_MEMNUM_INC(mem, p); -      mem_end_stk[mem] = (OnigStackIndex )((void* )s); +      mem_end_stk[mem] = (StackIndex )((void* )s);        MOP_OUT;        continue;        break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      case OP_MEMORY_END_PUSH_REC:  MOP_IN(OP_MEMORY_END_PUSH_REC);        GET_MEMNUM_INC(mem, p);        STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ @@ -2169,13 +2350,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,      case OP_MEMORY_END_REC:  MOP_IN(OP_MEMORY_END_REC);        GET_MEMNUM_INC(mem, p); -      mem_end_stk[mem] = (OnigStackIndex )((void* )s); +      mem_end_stk[mem] = (StackIndex )((void* )s);        STACK_GET_MEM_START(mem, stkp);        if (MEM_STATUS_AT(reg->bt_mem_start, mem))          mem_start_stk[mem] = GET_STACK_INDEX(stkp);        else -        mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); +        mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);        STACK_PUSH_MEM_END_MARK(mem);        MOP_OUT; @@ -2193,16 +2374,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto backref;        break; -    case OP_BACKREFN:  MOP_IN(OP_BACKREFN); +    case OP_BACKREF_N:  MOP_IN(OP_BACKREF_N);        GET_MEMNUM_INC(mem, p);      backref:        {          int len;          UChar *pstart, *pend; -        /* if you want to remove following line,  -           you should check in parse and compile time. */ -        if (mem > num_mem) goto fail;          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;          if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -2226,15 +2404,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        }        break; -    case OP_BACKREFN_IC:  MOP_IN(OP_BACKREFN_IC); +    case OP_BACKREF_N_IC:  MOP_IN(OP_BACKREF_N_IC);        GET_MEMNUM_INC(mem, p);        {          int len;          UChar *pstart, *pend; -        /* if you want to remove following line,  -           you should check in parse and compile time. */ -        if (mem > num_mem) goto fail;          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;          if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -2364,6 +2539,45 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        break;  #endif +    case OP_BACKREF_CHECK:  MOP_IN(OP_BACKREF_CHECK); +      { +        GET_LENGTH_INC(tlen, p); +        for (i = 0; i < tlen; i++) { +          GET_MEMNUM_INC(mem, p); + +          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue; +          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + +          p += (SIZE_MEMNUM * (tlen - i - 1)); +          break; /* success */ +        } +        if (i == tlen) goto fail; +        MOP_OUT; +        continue; +      } +      break; + +#ifdef USE_BACKREF_WITH_LEVEL +    case OP_BACKREF_CHECK_WITH_LEVEL: +      { +        LengthType level; + +        GET_LENGTH_INC(level, p); +        GET_LENGTH_INC(tlen,  p); + +        if (backref_check_at_nested_level(reg, stk, stk_base, +                                          (int )level, (int )tlen, p) != 0) { +          p += (SIZE_MEMNUM * tlen); +        } +        else +          goto fail; + +        MOP_OUT; +        continue; +      } +      break; +#endif +  #if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */      case OP_SET_OPTION_PUSH:  MOP_IN(OP_SET_OPTION_PUSH);        GET_OPTION_INC(option, p); @@ -2440,7 +2654,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        break;  #endif -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      case OP_EMPTY_CHECK_END_MEMST_PUSH:        MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);        { @@ -2484,6 +2698,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        continue;        break; +    case OP_PUSH_SUPER:  MOP_IN(OP_PUSH_SUPER); +      GET_RELADDR_INC(addr, p); +      STACK_PUSH_SUPER_ALT(p + addr, s, sprev); +      MOP_OUT; +      continue; +      break; +  #ifdef USE_COMBINATION_EXPLOSION_CHECK      case OP_STATE_CHECK_PUSH:  MOP_IN(OP_STATE_CHECK_PUSH);        GET_STATE_CHECK_NUM_INC(mem, p); @@ -2652,13 +2873,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        goto repeat_inc_ng;        break; -    case OP_PUSH_POS:  MOP_IN(OP_PUSH_POS); +    case OP_PREC_READ_START:  MOP_IN(OP_PREC_READ_START);        STACK_PUSH_POS(s, sprev);        MOP_OUT;        continue;        break; -    case OP_POP_POS:  MOP_IN(OP_POP_POS); +    case OP_PREC_READ_END:  MOP_IN(OP_PREC_READ_END);        {          STACK_POS_END(stkp);          s     = stkp->u.state.pstr; @@ -2668,26 +2889,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        continue;        break; -    case OP_PUSH_POS_NOT:  MOP_IN(OP_PUSH_POS_NOT); +    case OP_PUSH_PREC_READ_NOT:  MOP_IN(OP_PUSH_PREC_READ_NOT);        GET_RELADDR_INC(addr, p); -      STACK_PUSH_POS_NOT(p + addr, s, sprev); +      STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);        MOP_OUT;        continue;        break; -    case OP_FAIL_POS:  MOP_IN(OP_FAIL_POS); -      STACK_POP_TIL_POS_NOT; +    case OP_FAIL_PREC_READ_NOT:  MOP_IN(OP_FAIL_PREC_READ_NOT); +      STACK_POP_TIL_ALT_PREC_READ_NOT;        goto fail;        break;      case OP_PUSH_STOP_BT:  MOP_IN(OP_PUSH_STOP_BT); -      STACK_PUSH_STOP_BT; +      STACK_PUSH_STOP_BACKTRACK;        MOP_OUT;        continue;        break;      case OP_POP_STOP_BT:  MOP_IN(OP_POP_STOP_BT); -      STACK_STOP_BT_END; +      STACK_STOP_BACKTRACK_END;        MOP_OUT;        continue;        break; @@ -2712,7 +2933,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          /* goto fail; */        }        else { -        STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); +        STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);          s = q;          sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        } @@ -2721,11 +2942,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        break;      case OP_FAIL_LOOK_BEHIND_NOT:  MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); -      STACK_POP_TIL_LOOK_BEHIND_NOT; +      STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;        goto fail;        break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      case OP_CALL:  MOP_IN(OP_CALL);        GET_ABSADDR_INC(addr, p);        STACK_PUSH_CALL_FRAME(p); @@ -2742,6 +2963,56 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        break;  #endif +    case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); +      { +        SaveType type; +        GET_SAVE_TYPE_INC(type, p); +        GET_MEMNUM_INC(mem, p); /* mem: save id */ +        switch ((enum SaveType )type) { +        case SAVE_KEEP: +          STACK_PUSH_SAVE_VAL(mem, type, s); +          break; + +        case SAVE_S: +          STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s); +          break; + +        case SAVE_RIGHT_RANGE: +          STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range); +          break; +        } +      } +      MOP_OUT; +      continue; +      break; + +    case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); +      { +        UpdateVarType type; +        GET_UPDATE_VAR_TYPE_INC(type, p); +        GET_MEMNUM_INC(mem, p); /* mem: save id */ +        switch ((enum UpdateVarType )type) { +        case UPDATE_VAR_KEEP_FROM_STACK_LAST: +          STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); +          break; +        case UPDATE_VAR_S_FROM_STACK: +          STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s); +          break; +        case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK: +          STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_S, mem, right_range); +          break; +        case UPDATE_VAR_RIGHT_RANGE_FROM_STACK: +          STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_RIGHT_RANGE, mem, right_range); +          break; +        case UPDATE_VAR_RIGHT_RANGE_INIT: +          INIT_RIGHT_RANGE; +          break; +        } +      } +      MOP_OUT; +      continue; +      break; +      case OP_FINISH:        goto finish;        break; @@ -3248,7 +3519,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,        }      }      else { -      if (reg->dmax != ONIG_INFINITE_DISTANCE) { +      if (reg->dmax != INFINITE_LEN) {          if (p - str < reg->dmax) {            *low = (UChar* )str;            if (low_prev) @@ -3377,7 +3648,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,      }      /* no needs to adjust *high, *high is used as range check only */ -    if (reg->dmax != ONIG_INFINITE_DISTANCE) { +    if (reg->dmax != INFINITE_LEN) {        *low  = p - reg->dmax;        *high = p - reg->dmin;        *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); @@ -3514,7 +3785,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,      end_buf:        if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin) -	goto mismatch_no_msa; +        goto mismatch_no_msa;        if (range > start) {          if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) { @@ -3616,7 +3887,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,        sch_range = (UChar* )range;        if (reg->dmax != 0) { -        if (reg->dmax == ONIG_INFINITE_DISTANCE) +        if (reg->dmax == INFINITE_LEN)            sch_range = (UChar* )end;          else {            sch_range += reg->dmax; @@ -3627,7 +3898,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,        if ((end - start) < reg->threshold_len)          goto mismatch; -      if (reg->dmax != ONIG_INFINITE_DISTANCE) { +      if (reg->dmax != INFINITE_LEN) {          do {            if (! forward_search_range(reg, str, end, s, sch_range,                                       &low, &high, &low_prev)) goto mismatch; @@ -3689,7 +3960,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,        else          adjrange = (UChar* )end; -      if (reg->dmax != ONIG_INFINITE_DISTANCE && +      if (reg->dmax != INFINITE_LEN &&            (end - range) >= reg->threshold_len) {          do {            sch_start = s + reg->dmax; @@ -3714,7 +3985,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,          sch_start = s;          if (reg->dmax != 0) { -          if (reg->dmax == ONIG_INFINITE_DISTANCE) +          if (reg->dmax == INFINITE_LEN)              sch_start = (UChar* )end;            else {              sch_start += reg->dmax; diff --git a/src/regint.h b/src/regint.h index 8da27d2..185f4b6 100644 --- a/src/regint.h +++ b/src/regint.h @@ -57,7 +57,7 @@  /* config */  /* spec. config */  #define USE_NAMED_GROUP -#define USE_SUBEXP_CALL +#define USE_CALL  #define USE_BACKREF_WITH_LEVEL        /* \k<name+n>, \k<name-n> */  #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT  /* /(?:()|())*\2/ */  #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */ @@ -196,6 +196,8 @@ typedef int intptr_t;  #define CHECK_NULL_RETURN_MEMERR(p)   if (IS_NULL(p)) return ONIGERR_MEMORY  #define NULL_UCHARP                   ((UChar* )0) +#define INFINITE_LEN        ONIG_INFINITE_DISTANCE +  #ifdef PLATFORM_UNALIGNED_WORD_ACCESS  #define PLATFORM_GET_INC(val,p,type) do{\ @@ -211,7 +213,11 @@ typedef int intptr_t;  } while(0)  /* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE     SIZEOF_LONG +#ifdef SIZEOF_SIZE_T +# define WORD_ALIGNMENT_SIZE     SIZEOF_SIZE_T +#else +# define WORD_ALIGNMENT_SIZE     SIZEOF_LONG +#endif  #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\    (pad_size) = WORD_ALIGNMENT_SIZE \ @@ -226,10 +232,20 @@ typedef int intptr_t;  #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ +typedef struct { +  int num_keeper; +  int* keepers; +} RegExt; + +#define REG_EXTP(reg)      (RegExt* )((reg)->chain) +#define REG_EXTPL(reg)     ((reg)->chain) +  /* stack pop level */ -#define STACK_POP_LEVEL_FREE        0 -#define STACK_POP_LEVEL_MEM_START   1 -#define STACK_POP_LEVEL_ALL         2 +enum StackPopLevel { +  STACK_POP_LEVEL_FREE = 0, +  STACK_POP_LEVEL_MEM_START = 1, +  STACK_POP_LEVEL_ALL =2 +};  /* optimize flags */  #define ONIG_OPTIMIZE_NONE              0 @@ -482,11 +498,13 @@ enum OpCode {    OP_BACKREF1,    OP_BACKREF2, -  OP_BACKREFN, -  OP_BACKREFN_IC, +  OP_BACKREF_N, +  OP_BACKREF_N_IC,    OP_BACKREF_MULTI,    OP_BACKREF_MULTI_IC, -  OP_BACKREF_WITH_LEVEL,    /* \k<xxx+n>, \k<xxx-n> */ +  OP_BACKREF_WITH_LEVEL,        /* \k<xxx+n>, \k<xxx-n> */ +  OP_BACKREF_CHECK,             /* (?(n)), (?('name')) */ +  OP_BACKREF_CHECK_WITH_LEVEL,  /* (?(n)), (?('name')) */    OP_MEMORY_START,    OP_MEMORY_START_PUSH,   /* push back-tracker to stack */ @@ -498,6 +516,7 @@ enum OpCode {    OP_FAIL,               /* pop stack and move */    OP_JUMP,    OP_PUSH, +  OP_PUSH_SUPER,    OP_POP,    OP_PUSH_OR_JUMP_EXACT1,  /* if match exact then push, else jump. */    OP_PUSH_IF_PEEK_NEXT,    /* if match exact then push, else none. */ @@ -512,10 +531,10 @@ enum OpCode {    OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */    OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ -  OP_PUSH_POS,             /* (?=...)  start */ -  OP_POP_POS,              /* (?=...)  end   */ -  OP_PUSH_POS_NOT,         /* (?!...)  start */ -  OP_FAIL_POS,             /* (?!...)  end   */ +  OP_PREC_READ_START,             /* (?=...)  start */ +  OP_PREC_READ_END,              /* (?=...)  end   */ +  OP_PUSH_PREC_READ_NOT,   /* (?!...)  start */ +  OP_FAIL_PREC_READ_NOT,   /* (?!...)  end   */    OP_PUSH_STOP_BT,         /* (?>...)  start */    OP_POP_STOP_BT,          /* (?>...)  end   */    OP_LOOK_BEHIND,          /* (?<=...) start (no needs end opcode) */ @@ -524,6 +543,8 @@ enum OpCode {    OP_CALL,                 /* \g<name> */    OP_RETURN, +  OP_PUSH_SAVE_VAL, +  OP_UPDATE_VAR,    OP_STATE_CHECK_PUSH,         /* combination explosion check and push */    OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump  */ @@ -536,6 +557,20 @@ enum OpCode {    OP_SET_OPTION          /* set option */  }; +enum SaveType { +  SAVE_KEEP = 0, /* SAVE S */ +  SAVE_S = 1, +  SAVE_RIGHT_RANGE = 2, +}; + +enum UpdateVarType { +  UPDATE_VAR_KEEP_FROM_STACK_LAST     = 0, +  UPDATE_VAR_S_FROM_STACK             = 1, +  UPDATE_VAR_RIGHT_RANGE_FROM_STACK   = 2, +  UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, +  UPDATE_VAR_RIGHT_RANGE_INIT         = 4, +}; +  typedef int RelAddrType;  typedef int AbsAddrType;  typedef int LengthType; @@ -543,6 +578,8 @@ typedef int RepeatNumType;  typedef int MemNumType;  typedef short int StateCheckNumType;  typedef void* PointerType; +typedef int SaveType; +typedef int UpdateVarType;  #define SIZE_OPCODE           1  #define SIZE_RELADDR          sizeof(RelAddrType) @@ -554,7 +591,8 @@ typedef void* PointerType;  #define SIZE_OPTION           sizeof(OnigOptionType)  #define SIZE_CODE_POINT       sizeof(OnigCodePoint)  #define SIZE_POINTER          sizeof(PointerType) - +#define SIZE_SAVE_TYPE        sizeof(SaveType) +#define SIZE_UPDATE_VAR_TYPE  sizeof(UpdateVarType)  #define GET_RELADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, RelAddrType)  #define GET_ABSADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, AbsAddrType) @@ -564,6 +602,8 @@ typedef void* PointerType;  #define GET_OPTION_INC(option,p)   PLATFORM_GET_INC(option, p, OnigOptionType)  #define GET_POINTER_INC(ptr,p)     PLATFORM_GET_INC(ptr,    p, PointerType)  #define GET_STATE_CHECK_NUM_INC(num,p)  PLATFORM_GET_INC(num, p, StateCheckNumType) +#define GET_SAVE_TYPE_INC(type,p)       PLATFORM_GET_INC(type, p, SaveType) +#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)  /* code point's address must be aligned address. */  #define GET_CODE_POINT(code,p)   code = *((OnigCodePoint* )(p)) @@ -578,15 +618,16 @@ typedef void* PointerType;  #define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)  #define SIZE_OP_JUMP                   (SIZE_OPCODE + SIZE_RELADDR)  #define SIZE_OP_PUSH                   (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH_SUPER             (SIZE_OPCODE + SIZE_RELADDR)  #define SIZE_OP_POP                     SIZE_OPCODE  #define SIZE_OP_PUSH_OR_JUMP_EXACT1    (SIZE_OPCODE + SIZE_RELADDR + 1)  #define SIZE_OP_PUSH_IF_PEEK_NEXT      (SIZE_OPCODE + SIZE_RELADDR + 1)  #define SIZE_OP_REPEAT_INC             (SIZE_OPCODE + SIZE_MEMNUM)  #define SIZE_OP_REPEAT_INC_NG          (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_POS                SIZE_OPCODE -#define SIZE_OP_PUSH_POS_NOT           (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_POS                 SIZE_OPCODE -#define SIZE_OP_FAIL_POS                SIZE_OPCODE +#define SIZE_OP_PREC_READ_START         SIZE_OPCODE +#define SIZE_OP_PUSH_PREC_READ_NOT     (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PREC_READ_END           SIZE_OPCODE +#define SIZE_OP_FAIL_PREC_READ_NOT      SIZE_OPCODE  #define SIZE_OP_SET_OPTION             (SIZE_OPCODE + SIZE_OPTION)  #define SIZE_OP_SET_OPTION_PUSH        (SIZE_OPCODE + SIZE_OPTION)  #define SIZE_OP_FAIL                    SIZE_OPCODE @@ -605,6 +646,8 @@ typedef void* PointerType;  #define SIZE_OP_FAIL_LOOK_BEHIND_NOT    SIZE_OPCODE  #define SIZE_OP_CALL                   (SIZE_OPCODE + SIZE_ABSADDR)  #define SIZE_OP_RETURN                  SIZE_OPCODE +#define SIZE_OP_PUSH_SAVE_VAL          (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) +#define SIZE_OP_UPDATE_VAR             (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)  #ifdef USE_COMBINATION_EXPLOSION_CHECK  #define SIZE_OP_STATE_CHECK            (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) @@ -664,48 +707,6 @@ typedef void* PointerType;  #define NCCLASS_CLEAR_NOT(nd)   NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)  #define IS_NCCLASS_NOT(nd)      IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -typedef intptr_t OnigStackIndex; - -typedef struct _OnigStackType { -  unsigned int type; -  union { -    struct { -      UChar *pcode;      /* byte code position */ -      UChar *pstr;       /* string position */ -      UChar *pstr_prev;  /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK -      unsigned int state_check; -#endif -    } state; -    struct { -      int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ -      UChar *pcode;      /* byte code position (head of repeated target) */ -      int   num;         /* repeat id */ -    } repeat; -    struct { -      OnigStackIndex si;     /* index of stack */ -    } repeat_inc; -    struct { -      int num;           /* memory num */ -      UChar *pstr;       /* start/end position */ -      /* Following information is set, if this stack type is MEM-START */ -      OnigStackIndex start;  /* prev. info (for backtrack  "(...)*" ) */ -      OnigStackIndex end;    /* prev. info (for backtrack  "(...)*" ) */ -    } mem; -    struct { -      int num;           /* null check id */ -      UChar *pstr;       /* start position */ -    } empty_check; -#ifdef USE_SUBEXP_CALL -    struct { -      UChar *ret_addr;   /* byte code position */ -      int    num;        /* null check id */ -      UChar *pstr;       /* string position */ -    } call_frame; -#endif -  } u; -} OnigStackType; -  typedef struct {    void* stack_p;    int   stack_n; diff --git a/src/regparse.c b/src/regparse.c index a5f8e5b..25291c5 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -48,6 +48,11 @@ OnigSyntaxType OnigSyntaxRuby = {    , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |        ONIG_SYN_OP2_OPTION_RUBY |        ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | +      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | +      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | +      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | +      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | +      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |        ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |        ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |        ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | @@ -179,7 +184,10 @@ static int backref_rel_to_abs(int rel_no, ScanEnv* env)    }  } -#define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f)) +#define OPTION_ON(v,f)     ((v) |= (f)) +#define OPTION_OFF(v,f)    ((v) &= ~(f)) + +#define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))  #define MBCODE_START_POS(enc) \    (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) @@ -301,6 +309,34 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)  }  #endif +static int +save_entry(ScanEnv* env, enum SaveType type, int* id) +{ +  int nid = env->save_num; + +#if 0 +  if (IS_NULL(env->saves)) { +    int n = 10; +    env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n); +    CHECK_NULL_RETURN_MEMERR(env->saves); +    env->save_alloc_num = n; +  } +  else if (env->save_alloc_num <= nid) { +    int n = env->save_alloc_num * 2; +    SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n); +    CHECK_NULL_RETURN_MEMERR(p); +    env->saves = p; +    env->save_alloc_num = n; +  } + +  env->saves[nid].type = type; +#endif + +  env->save_num++; +  *id = nid; +  return 0; +} +  /* scan pattern methods */  #define PEND_VALUE   0 @@ -990,7 +1026,7 @@ scan_env_clear(ScanEnv* env)    env->error_end  = (UChar* )NULL;    env->num_call   = 0; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    env->unset_addr_list = NULL;    env->has_call_zero   = 0;  #endif @@ -1011,6 +1047,10 @@ scan_env_clear(ScanEnv* env)    env->has_recursion       = 0;  #endif    env->parse_depth         = 0; +  env->keep_num            = 0; +  env->save_num            = 0; +  env->save_alloc_num      = 0; +  env->saves               = 0;  }  static int @@ -1075,7 +1115,7 @@ onig_node_free(Node* node)  #endif    switch (NODE_TYPE(node)) { -  case NODE_STR: +  case NODE_STRING:      if (STR_(node)->capa != 0 &&          IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {        xfree(STR_(node)->s); @@ -1103,13 +1143,25 @@ onig_node_free(Node* node)      }      break; -  case NODE_BREF: -    if (IS_NOT_NULL(BREF_(node)->back_dynamic)) -      xfree(BREF_(node)->back_dynamic); +  case NODE_BACKREF: +    if (IS_NOT_NULL(BACKREF_(node)->back_dynamic)) +      xfree(BACKREF_(node)->back_dynamic);      break; -  case NODE_QUANT:    case NODE_ENCLOSURE: +    if (NODE_BODY(node)) +      onig_node_free(NODE_BODY(node)); + +    { +      EnclosureNode* en = ENCLOSURE_(node); +      if (en->type == ENCLOSURE_IF_ELSE) { +        onig_node_free(en->te.Then); +        onig_node_free(en->te.Else); +      } +    } +    break; + +  case NODE_QUANT:    case NODE_ANCHOR:      if (NODE_BODY(node))        onig_node_free(NODE_BODY(node)); @@ -1117,12 +1169,35 @@ onig_node_free(Node* node)    case NODE_CTYPE:    case NODE_CALL: +  case NODE_GIMMICK:      break;    }    xfree(node);  } +static void +cons_node_free_alone(Node* node) +{ +  NODE_CAR(node) = 0; +  NODE_CDR(node) = 0; +  onig_node_free(node); +} + +extern void +list_node_free_not_car(Node* node) +{ +  Node* next_node; + + start: +  if (IS_NULL(node)) return; + +  next_node = NODE_CDR(node); +  xfree(node); +  node = next_node; +  goto start; +} +  static Node*  node_new(void)  { @@ -1154,7 +1229,7 @@ node_new_cclass(void)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_CCLASS); +  NODE_SET_TYPE(node, NODE_CCLASS);    initialize_cclass(CCLASS_(node));    return node;  } @@ -1165,19 +1240,61 @@ node_new_ctype(int type, int not)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_CTYPE); +  NODE_SET_TYPE(node, NODE_CTYPE);    CTYPE_(node)->ctype = type;    CTYPE_(node)->not   = not;    return node;  }  static Node* +node_new_anychar(void) +{ +  Node* node = node_new_ctype(CTYPE_ANYCHAR, 0); +  return node; +} + +static Node* +node_new_anychar_with_fixed_option(OnigOptionType option) +{ +  CtypeNode* ct; +  Node* node; + +  node = node_new_anychar(); +  ct = CTYPE_(node); +  ct->options = option; +  NODE_STATUS_ADD(node, NST_FIXED_OPTION); +  return node; +} + +static int +node_new_no_newline(Node** node, ScanEnv* env) +{ +  Node* n; + +  n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE); +  CHECK_NULL_RETURN_MEMERR(n); +  *node = n; +  return 0; +} + +static int +node_new_true_anychar(Node** node, ScanEnv* env) +{ +  Node* n; + +  n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE); +  CHECK_NULL_RETURN_MEMERR(n); +  *node = n; +  return 0; +} + +static Node*  node_new_list(Node* left, Node* right)  {    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_LIST); +  NODE_SET_TYPE(node, NODE_LIST);    NODE_CAR(node)  = left;    NODE_CDR(node) = right;    return node; @@ -1213,19 +1330,65 @@ onig_node_new_alt(Node* left, Node* right)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_ALT); +  NODE_SET_TYPE(node, NODE_ALT);    NODE_CAR(node)  = left;    NODE_CDR(node) = right;    return node;  } +static Node* +make_list_or_alt(NodeType type, int n, Node* ns[]) +{ +  Node* r; + +  if (n <= 0) return NULL_NODE; + +  if (n == 1) { +    r = node_new(); +    CHECK_NULL_RETURN(r); +    NODE_SET_TYPE(r, type); +    NODE_CAR(r) = ns[0]; +    NODE_CDR(r) = NULL_NODE; +  } +  else { +    Node* right; + +    r = node_new(); +    CHECK_NULL_RETURN(r); + +    right = make_list_or_alt(type, n - 1, ns + 1); +    if (IS_NULL(right)) { +      onig_node_free(r); +      return NULL_NODE; +    } + +    NODE_SET_TYPE(r, type); +    NODE_CAR(r) = ns[0]; +    NODE_CDR(r) = right; +  } + +  return r; +} + +static Node* +make_list(int n, Node* ns[]) +{ +  return make_list_or_alt(NODE_LIST, n, ns); +} + +static Node* +make_alt(int n, Node* ns[]) +{ +  return make_list_or_alt(NODE_ALT, n, ns); +} +  extern Node*  onig_node_new_anchor(int type)  {    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_ANCHOR); +  NODE_SET_TYPE(node, NODE_ANCHOR);    ANCHOR_(node)->type     = type;    ANCHOR_(node)->char_len = -1;    return node; @@ -1243,16 +1406,16 @@ node_new_backref(int back_num, int* backrefs, int by_name,    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_BREF); -  BREF_(node)->back_num = back_num; -  BREF_(node)->back_dynamic = (int* )NULL; +  NODE_SET_TYPE(node, NODE_BACKREF); +  BACKREF_(node)->back_num = back_num; +  BACKREF_(node)->back_dynamic = (int* )NULL;    if (by_name != 0)      NODE_STATUS_ADD(node, NST_BY_NAME);  #ifdef USE_BACKREF_WITH_LEVEL    if (exist_level != 0) {      NODE_STATUS_ADD(node, NST_NEST_LEVEL); -    BREF_(node)->nest_level  = nest_level; +    BACKREF_(node)->nest_level  = nest_level;    }  #endif @@ -1266,7 +1429,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,    if (back_num <= NODE_BACKREFS_SIZE) {      for (i = 0; i < back_num; i++) -      BREF_(node)->back_static[i] = backrefs[i]; +      BACKREF_(node)->back_static[i] = backrefs[i];    }    else {      int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1274,21 +1437,41 @@ node_new_backref(int back_num, int* backrefs, int by_name,        onig_node_free(node);        return NULL;      } -    BREF_(node)->back_dynamic = p; +    BACKREF_(node)->back_dynamic = p;      for (i = 0; i < back_num; i++)        p[i] = backrefs[i];    }    return node;  } -#ifdef USE_SUBEXP_CALL +static Node* +node_new_backref_checker(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_WITH_LEVEL +		 int exist_level, int nest_level, +#endif +		 ScanEnv* env) +{ +  Node* node; + +  node = node_new_backref(back_num, backrefs, by_name, +#ifdef USE_BACKREF_WITH_LEVEL +                          exist_level, nest_level, +#endif +                          env); +  CHECK_NULL_RETURN(node); + +  NODE_STATUS_ADD(node, NST_CHECKER); +  return node; +} + +#ifdef USE_CALL  static Node*  node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)  {    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_CALL); +  NODE_SET_TYPE(node, NODE_CALL);    CALL_(node)->by_number   = by_number;    CALL_(node)->name        = name;    CALL_(node)->name_end    = name_end; @@ -1304,7 +1487,7 @@ node_new_quantifier(int lower, int upper, int by_number)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_QUANT); +  NODE_SET_TYPE(node, NODE_QUANT);    QUANT_(node)->lower  = lower;    QUANT_(node)->upper  = upper;    QUANT_(node)->greedy = 1; @@ -1328,7 +1511,7 @@ node_new_enclosure(int type)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_ENCLOSURE); +  NODE_SET_TYPE(node, NODE_ENCLOSURE);    ENCLOSURE_(node)->type = type;    switch (type) { @@ -1340,11 +1523,16 @@ node_new_enclosure(int type)      break;    case ENCLOSURE_OPTION: -    ENCLOSURE_(node)->o.option =  0; +    ENCLOSURE_(node)->o.options =  0;      break;    case ENCLOSURE_STOP_BACKTRACK:      break; + +  case ENCLOSURE_IF_ELSE: +    ENCLOSURE_(node)->te.Then = 0; +    ENCLOSURE_(node)->te.Else = 0; +    break;    }    ENCLOSURE_(node)->opt_count = 0; @@ -1358,7 +1546,20 @@ onig_node_new_enclosure(int type)  }  static Node* -node_new_enclosure_memory(int is_named) +node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else) +{ +  Node* n; +  n = node_new_enclosure(ENCLOSURE_IF_ELSE); +  CHECK_NULL_RETURN(n); + +  NODE_BODY(n) = cond; +  ENCLOSURE_(n)->te.Then = Then; +  ENCLOSURE_(n)->te.Else = Else; +  return n; +} + +static Node* +node_new_memory(int is_named)  {    Node* node = node_new_enclosure(ENCLOSURE_MEMORY);    CHECK_NULL_RETURN(node); @@ -1373,10 +1574,395 @@ node_new_option(OnigOptionType option)  {    Node* node = node_new_enclosure(ENCLOSURE_OPTION);    CHECK_NULL_RETURN(node); -  ENCLOSURE_(node)->o.option = option; +  ENCLOSURE_(node)->o.options = option;    return node;  } +static int +node_new_fail(Node** node, ScanEnv* env) +{ +  *node = node_new(); +  CHECK_NULL_RETURN_MEMERR(*node); + +  NODE_SET_TYPE(*node, NODE_GIMMICK); +  GIMMICK_(*node)->type = GIMMICK_FAIL; +  return ONIG_NORMAL; +} + +static int +node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env) +{ +  int id; +  int r; + +  r = save_entry(env, save_type, &id); +  if (r != ONIG_NORMAL) return r; + +  *node = node_new(); +  CHECK_NULL_RETURN_MEMERR(*node); + +  NODE_SET_TYPE(*node, NODE_GIMMICK); +  GIMMICK_(*node)->id   = id; +  GIMMICK_(*node)->type = GIMMICK_SAVE; +  GIMMICK_(*node)->detail_type = (int )save_type; + +  return ONIG_NORMAL; +} + +static int +node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type, +                            int id, ScanEnv* env) +{ +  *node = node_new(); +  CHECK_NULL_RETURN_MEMERR(*node); + +  NODE_SET_TYPE(*node, NODE_GIMMICK); +  GIMMICK_(*node)->id   = id; +  GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR; +  GIMMICK_(*node)->detail_type = (int )update_var_type; + +  return ONIG_NORMAL; +} + +static int +node_new_keep(Node** node, ScanEnv* env) +{ +  int r; + +  r = node_new_save_gimmick(node, SAVE_KEEP, env); +  if (r != 0) return r; + +  env->keep_num++; +  return ONIG_NORMAL; +} + +static int +make_absent_engine(Node** node, int pre_save_right_id, Node* absent, +                   Node* step_one, int lower, int upper, int possessive, +                   int is_range_cutter, ScanEnv* env) +{ +  int r; +  int i; +  int id; +  Node* x; +  Node* ns[4]; + +  for (i = 0; i < 4; i++) ns[i] = NULL_NODE; + +  ns[1] = absent; +  ns[3] = step_one; // for err +  r = node_new_save_gimmick(&ns[0], SAVE_S, env); +  if (r != 0) goto err; + +  id = GIMMICK_(ns[0])->id; +  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK, +                                  id, env); +  if (r != 0) goto err; + +  r = node_new_fail(&ns[3], env); +  if (r != 0) goto err; + +  x = make_list(4, ns); +  if (IS_NULL(x)) goto err; + +  ns[0] = x; +  ns[1] = step_one; +  ns[2] = ns[3] = NULL_NODE; + +  x = make_alt(2, ns); +  if (IS_NULL(x)) goto err; + +  ns[0] = x; + +  x = node_new_quantifier(lower, upper, 0); +  if (IS_NULL(x)) goto err; + +  NODE_BODY(x) = ns[0]; +  ns[0] = x; + +  if (possessive != 0) { +    x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +    if (IS_NULL(x)) goto err; + +    NODE_BODY(x) = ns[0]; +    ns[0] = x; +  } + +  r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, +                                  pre_save_right_id, env); +  if (r != 0) goto err; + +  r = node_new_fail(&ns[2], env); +  if (r != 0) goto err; + +  x = make_list(2, ns + 1); +  if (IS_NULL(x)) goto err; + +  ns[1] = x; ns[2] = NULL_NODE; + +  x = make_alt(2, ns); +  if (IS_NULL(x)) goto err; + +  if (is_range_cutter != 0) +    NODE_STATUS_ADD(x, NST_SUPER); + +  *node = x; +  return ONIG_NORMAL; + + err: +  for (i = 0; i < 4; i++) onig_node_free(ns[i]); +  return r; +} + +static int +make_absent_tail(Node** node1, Node** node2, int pre_save_right_id, +                 ScanEnv* env) +{ +  int r; +  int id; +  Node* save; +  Node* x; +  Node* ns[2]; + +  *node1 = *node2 = NULL_NODE; +  save = ns[0] = ns[1] = NULL_NODE; + +  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env); +  if (r != 0) goto err; + +  id = GIMMICK_(save)->id; +  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, +                                  id, env); +  if (r != 0) goto err; + +  r = node_new_fail(&ns[1], env); +  if (r != 0) goto err; + +  x = make_list(2, ns); +  if (IS_NULL(x)) goto err; + +  ns[0] = NULL_NODE; ns[1] = x; + +  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, +                                  pre_save_right_id, env); +  if (r != 0) goto err; + +  x = make_alt(2, ns); +  if (IS_NULL(x)) goto err; + +  *node1 = save; +  *node2 = x; +  return ONIG_NORMAL; + + err: +  onig_node_free(save); +  onig_node_free(ns[0]); +  onig_node_free(ns[1]); +  return r; +} + +static int +is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody, +                          int* is_possessive, ScanEnv* env) +{ +  Node* quant; +  Node* body; + +  *rquant = *rbody = 0; +  *is_possessive = 0; + +  if (NODE_TYPE(node) == NODE_QUANT) { +    quant = node; +  } +  else { +    if (NODE_TYPE(node) == NODE_ENCLOSURE) { +      EnclosureNode* en = ENCLOSURE_(node); +      if (en->type == ENCLOSURE_STOP_BACKTRACK) { +        *is_possessive = 1; +        quant = NODE_ENCLOSURE_BODY(en); +        if (NODE_TYPE(quant) != NODE_QUANT) +          return 0; +      } +      else +        return 0; +    } +    else +      return 0; +  } + +  body = NODE_BODY(quant); +  switch (NODE_TYPE(body)) { +  case NODE_STRING: +    { +      int len; +      StrNode* sn = STR_(body); +      UChar *s = sn->s; + +      len = 0; +      while (s < sn->end) { +        s += enclen(env->enc, s); +        len++; +      } +      if (len != 1) +        return 0; +    } + +  case NODE_CCLASS: +    break; + +  default: +    return 0; +    break; +  } + +  if (node != quant) { +    NODE_BODY(node) = 0; +    onig_node_free(node); +  } +  NODE_BODY(quant) = NULL_NODE; +  *rquant = quant; +  *rbody  = body; +  return 1; +} + +static int +make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant, +                                            Node* body, int possessive, ScanEnv* env) +{ +  int r; +  int i; +  int id1; +  int lower, upper; +  Node* x; +  Node* ns[4]; + +  *node = NULL_NODE; +  r = ONIGERR_MEMORY; +  ns[0] = ns[1] = NULL_NODE; +  ns[2] = body, ns[3] = absent; + +  lower = QUANT_(quant)->lower; +  upper = QUANT_(quant)->upper; +  onig_node_free(quant); + +  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); +  if (r != 0) goto err; + +  id1 = GIMMICK_(ns[0])->id; + +  r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive, +                         0, env); +  if (r != 0) goto err; + +  ns[2] = ns[3] = NULL_NODE; + +  r = make_absent_tail(&ns[2], &ns[3], id1, env); +  if (r != 0) goto err; + +  x = make_list(4, ns); +  if (IS_NULL(x)) goto err; + +  *node = x; +  return ONIG_NORMAL; + + err: +  for (i = 0; i < 4; i++) onig_node_free(ns[i]); +  return r; +} + +static int +make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, +                 ScanEnv* env) +{ +  int r; +  int i; +  int id1, id2; +  int possessive; +  Node* x; +  Node* ns[7]; + +  r = ONIGERR_MEMORY; +  for (i = 0; i < 7; i++) ns[i] = NULL_NODE; +  ns[4] = expr; ns[5] = absent; + +  if (is_range_cutter == 0) { +    Node* quant; +    Node* body; + +    if (expr == NULL_NODE) { +      /* default expr \O* */ +      quant = node_new_quantifier(0, REPEAT_INFINITE, 0); +      if (IS_NULL(quant)) goto err; + +      r = node_new_true_anychar(&body, env); +      if (r != 0) { +        onig_node_free(quant); +        goto err; +      } +      possessive = 0; +      goto simple; +    } +    else { +      if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) { +      simple: +        r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant, +                                                        body, possessive, env); +        if (r != 0) { +          ns[4] = NULL_NODE; +          onig_node_free(quant); +          onig_node_free(body); +          goto err; +        } + +        return ONIG_NORMAL; +      } +    } +  } + +  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); +  if (r != 0) goto err; + +  id1 = GIMMICK_(ns[0])->id; + +  r = node_new_save_gimmick(&ns[1], SAVE_S, env); +  if (r != 0) goto err; + +  id2 = GIMMICK_(ns[1])->id; + +  r = node_new_true_anychar(&ns[3], env); +  if (r != 0) goto err; + +  possessive = 1; +  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE, +                         possessive, is_range_cutter, env); +  if (r != 0) goto err; + +  ns[3] = NULL_NODE; +  ns[5] = NULL_NODE; + +  r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env); +  if (r != 0) goto err; + +  if (is_range_cutter != 0) { +    x = make_list(4, ns); +    if (IS_NULL(x)) goto err; +  } +  else { +    r = make_absent_tail(&ns[5], &ns[6], id1, env); +    if (r != 0) goto err; +   +    x = make_list(7, ns); +    if (IS_NULL(x)) goto err; +  } + +  *node = x; +  return ONIG_NORMAL; + + err: +  for (i = 0; i < 7; i++) onig_node_free(ns[i]); +  return r;   +} +  extern int  onig_node_str_cat(Node* node, const UChar* s, const UChar* end)  { @@ -1385,9 +1971,9 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)    if (addlen > 0) {      int len  = STR_(node)->end - STR_(node)->s; -    if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { +    if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {        UChar* p; -      int capa = len + addlen + NODE_STR_MARGIN; +      int capa = len + addlen + NODE_STRING_MARGIN;        if (capa <= STR_(node)->capa) {          onig_strcpy(STR_(node)->s + len, s, end); @@ -1432,7 +2018,7 @@ node_str_cat_char(Node* node, UChar c)  extern void  onig_node_conv_to_str_node(Node* node, int flag)  { -  SET_NODE_TYPE(node, NODE_STR); +  NODE_SET_TYPE(node, NODE_STRING);    STR_(node)->flag = flag;    STR_(node)->capa = 0;    STR_(node)->s    = STR_(node)->buf; @@ -1459,7 +2045,7 @@ node_new_str(const UChar* s, const UChar* end)    Node* node = node_new();    CHECK_NULL_RETURN(node); -  SET_NODE_TYPE(node, NODE_STR); +  NODE_SET_TYPE(node, NODE_STRING);    STR_(node)->capa = 0;    STR_(node)->flag = 0;    STR_(node)->s    = STR_(node)->buf; @@ -1481,7 +2067,7 @@ static Node*  node_new_str_raw(UChar* s, UChar* end)  {    Node* node = node_new_str(s, end); -  NSTRING_SET_RAW(node); +  NODE_STRING_SET_RAW(node);    return node;  } @@ -1511,7 +2097,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)      if (p && p > sn->s) { /* can be split. */        n = node_new_str(p, sn->end);        if ((sn->flag & STRING_RAW) != 0) -        NSTRING_SET_RAW(n); +        NODE_STRING_SET_RAW(n);        sn->end = (UChar* )p;      } @@ -1532,7 +2118,7 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)  static int  node_str_head_pad(StrNode* sn, int num, UChar val)  { -  UChar buf[NODE_STR_BUF_SIZE]; +  UChar buf[NODE_STRING_BUF_SIZE];    int i, len;    len = sn->end - sn->s; @@ -2090,6 +2676,7 @@ is_invalid_quantifier_target(Node* node)  {    switch (NODE_TYPE(node)) {    case NODE_ANCHOR: +  case NODE_GIMMICK:      return 1;      break; @@ -2212,6 +2799,56 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)    onig_node_free(cnode);  } +static int +node_new_general_newline(Node** node, ScanEnv* env) +{ +  int r; +  int dlen, alen; +  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; +  Node* crnl; +  Node* ncc; +  Node* x; +  CClassNode* cc; + +  dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf); +  if (dlen < 0) return dlen; +  alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen); +  if (alen < 0) return alen; + +  crnl = node_new_str_raw(buf, buf + dlen + alen); +  CHECK_NULL_RETURN_MEMERR(crnl); + +  ncc = node_new_cclass(); +  if (IS_NULL(ncc)) goto err2; + +  cc = CCLASS_(ncc); +  if (dlen == 1) { +    bitset_set_range(cc->bs, 0x0a, 0x0d); +  } +  else { +    r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d); +    if (r != 0) { +    err1: +      onig_node_free(ncc); +    err2: +      onig_node_free(crnl); +      return ONIGERR_MEMORY; +    } +  } + +  if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) { +    r = add_code_range(&(cc->mbuf), env, 0x85, 0x85); +    if (r != 0) goto err1; +    r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029); +    if (r != 0) goto err1; +  } + +  x = node_new_enclosure_if_else(crnl, 0, ncc); +  if (IS_NULL(x)) goto err1; + +  *node = x; +  return 0; +}  enum TokenSyms {    TK_EOT      = 0,   /* end of token */ @@ -2233,6 +2870,11 @@ enum TokenSyms {    TK_CC_OPEN,    TK_QUOTE_OPEN,    TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */ +  TK_KEEP,             /* \K */ +  TK_GENERAL_NEWLINE,  /* \R */ +  TK_NO_NEWLINE,       /* \N */ +  TK_TRUE_ANYCHAR,     /* \O */ +    /* in cc */    TK_CC_CLOSE,    TK_CC_RANGE, @@ -2452,8 +3094,9 @@ static OnigCodePoint  get_name_end_code_point(OnigCodePoint start)  {    switch (start) { -  case '<':  return (OnigCodePoint )'>'; break; +  case '<':  return (OnigCodePoint )'>';  break;    case '\'': return (OnigCodePoint )'\''; break; +  case '(':  return (OnigCodePoint )')';  break;    default:      break;    } @@ -2706,7 +3349,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,      if (c != end_code) {        r = ONIGERR_INVALID_GROUP_NAME; -      name_end = end; +      goto err;      }      if (*num_type != IS_NOT_NUM) { @@ -3378,6 +4021,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        tok->u.prop.not   = 1;        break; +    case 'K': +      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break; +      tok->type = TK_KEEP; +      break; + +    case 'R': +      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break; +      tok->type = TK_GENERAL_NEWLINE; +      break; + +    case 'N': +      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break; +      tok->type = TK_NO_NEWLINE; +      break; + +    case 'O': +      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break; +      tok->type = TK_TRUE_ANYCHAR; +      break; +      case 'A':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;      begin_buf: @@ -3561,7 +4224,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)            if (r == 1) tok->u.backref.exist_level = 1;            else        tok->u.backref.exist_level = 0;  #else -          r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1); +          r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);  #endif            if (r < 0) return r; @@ -3616,7 +4279,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        break;  #endif -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      case 'g':        if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {          PFETCH(c); @@ -3815,14 +4478,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)      case '^':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;        tok->type = TK_ANCHOR; -      tok->u.subtype = (IS_SINGLELINE(env->option) +      tok->u.subtype = (IS_SINGLELINE(env->options)  			? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);        break;      case '$':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;        tok->type = TK_ANCHOR; -      tok->u.subtype = (IS_SINGLELINE(env->option) +      tok->u.subtype = (IS_SINGLELINE(env->options)  			? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);        break; @@ -3837,7 +4500,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        break;      case '#': -      if (IS_EXTEND(env->option)) { +      if (IS_EXTEND(env->options)) {          while (!PEND) {            PFETCH(c);            if (ONIGENC_IS_CODE_NEWLINE(enc, c)) @@ -3849,7 +4512,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        break;      case ' ': case '\t': case '\n': case '\r': case '\f': -      if (IS_EXTEND(env->option)) +      if (IS_EXTEND(env->options))          goto start;        break; @@ -4640,7 +5303,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,    *np = NULL;    if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; -  option = env->option; +  option = env->options;    if (PPEEK_IS('?') &&        IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {      PINC; @@ -4711,7 +5374,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            r = name_add(env->reg, name, name_end, num, env);            if (r != 0) return r; -          *np = node_new_enclosure_memory(1); +          *np = node_new_memory(1);            CHECK_NULL_RETURN_MEMERR(*np);            ENCLOSURE_(*np)->m.regnum = num;            if (list_capture != 0) @@ -4729,6 +5392,259 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,  #endif        break; +    case '~': +      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) { +        Node* absent; +        Node* expr; +        int head_bar; +        int is_range_cutter; + +        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + +        if (PPEEK_IS('|')) { // (?~|generator|absent) +          PINC; +          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + +          head_bar = 1; +          if (PPEEK_IS(')')) { // (?~|)  : absent clear +            PINC; +            r = node_new_update_var_gimmick(np, UPDATE_VAR_RIGHT_RANGE_INIT, +                                            0, env); +            if (r != 0) return r; +            goto end; +          } +        } +        else +          head_bar = 0; + +        r = fetch_token(tok, &p, end, env); +        if (r < 0) return r; +        r = parse_subexp(&absent, tok, term, &p, end, env); +        if (r < 0) { +          onig_node_free(absent); +          return r; +        } + +        expr = NULL_NODE; +        is_range_cutter = 0; +        if (head_bar != 0) { +          Node* top = absent; +          if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) { +            expr = NULL_NODE; +            is_range_cutter = 1; +            //return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; +          } +          else { +            absent = NODE_CAR(top); +            expr   = NODE_CDR(top); +            NODE_CAR(top) = NULL_NODE; +            NODE_CDR(top) = NULL_NODE; +            onig_node_free(top); +            if (IS_NULL(NODE_CDR(expr))) { +              top = expr; +              expr = NODE_CAR(top); +              NODE_CAR(top) = NULL_NODE; +              onig_node_free(top); +            } +          } +        } + +        r = make_absent_tree(np, absent, expr, is_range_cutter, env); +        if (r != 0) { +          return r; +        } +        goto end; +      } +      else { +        return ONIGERR_UNDEFINED_GROUP_OPTION; +      } +      break; + +    case '(': +      /* (?()...) */ +      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) { +        UChar *prev; +        Node* condition; +        int condition_is_checker; + +        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; +        PFETCH(c); +        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + +        if (ONIGENC_IS_CODE_DIGIT(enc, c) +            || c == '-' || c == '+' || c == '<' || c == '\'') { +          UChar* name_end; +          int back_num; +          int exist_level; +          int level; +          enum REF_NUM num_type; +          int is_enclosed; + +          is_enclosed = (c == '<' || c == '\'') ? 1 : 0; +          if (! is_enclosed) +            PUNFETCH; +          prev = p; +          exist_level = 0; +#ifdef USE_BACKREF_WITH_LEVEL +          name_end = NULL_UCHARP; /* no need. escape gcc warning. */ +          r = fetch_name_with_level( +                    (OnigCodePoint )(is_enclosed != 0 ? c : '('), +                    &p, end, &name_end, +                    env, &back_num, &level, &num_type); +          if (r == 1) exist_level = 1; +#else +          r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('), +                         &p, end, &name_end, env, &back_num, &num_type, 1); +#endif +          if (r < 0) { +            if (is_enclosed == 0) { +              goto any_condition; +            } +            else +              return r; +          } + +          condition_is_checker = 1; +          if (num_type != IS_NOT_NUM) { +            if (num_type == IS_REL_NUM) { +              back_num = backref_rel_to_abs(back_num, env); +            } +            if (back_num <= 0) +              return ONIGERR_INVALID_BACKREF; + +            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { +              if (back_num > env->num_mem || +                  IS_NULL(SCANENV_MEMENV(env)[back_num].node)) +                return ONIGERR_INVALID_BACKREF; +            } + +            condition = node_new_backref_checker(1, &back_num, 0, +#ifdef USE_BACKREF_WITH_LEVEL +                                                 exist_level, level, +#endif +                                                 env); +          } +          else { +            int num; +            int* backs; + +            num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); +            if (num <= 0) { +              onig_scan_env_set_error_string(env, +                        ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); +              return ONIGERR_UNDEFINED_NAME_REFERENCE; +            } +            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { +              int i; +              for (i = 0; i < num; i++) { +                if (backs[i] > env->num_mem || +                    IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) +                  return ONIGERR_INVALID_BACKREF; +              } +            } + +            condition = node_new_backref_checker(num, backs, 1, +#ifdef USE_BACKREF_WITH_LEVEL +                                                 exist_level, level, +#endif +                                                 env); +          } + +          if (is_enclosed != 0) { +            if (PEND) goto err_if_else; +            PFETCH(c); +            if (c != ')') goto err_if_else; +          } +        } +        else { +        any_condition: +          PUNFETCH; +          condition_is_checker = 0; +          r = fetch_token(tok, &p, end, env); +          if (r < 0) return r; +          r = parse_subexp(&condition, tok, term, &p, end, env); +          if (r < 0) { +            onig_node_free(condition); +            return r; +          } +        } + +        CHECK_NULL_RETURN_MEMERR(condition); + +        if (PEND) { +        err_if_else: +          onig_node_free(condition); +          return ONIGERR_END_PATTERN_IN_GROUP; +        } + +        if (PPEEK_IS(')')) { /* case: empty body: make backref checker */ +          if (condition_is_checker == 0) { +            onig_node_free(condition); +            return ONIGERR_INVALID_IF_ELSE_SYNTAX; +          } +          PFETCH(c); +          *np = condition; +        } +        else { /* if-else */ +          int then_is_empty; +          Node *Then, *Else; + +          if (PPEEK_IS('|')) { +            PFETCH(c); +            Then = 0; +            then_is_empty = 1; +          } +          else +            then_is_empty = 0; + +          r = fetch_token(tok, &p, end, env); +          if (r < 0) { +            onig_node_free(condition); +            return r; +          } +          r = parse_subexp(&target, tok, term, &p, end, env); +          if (r < 0) { +            onig_node_free(condition); +            onig_node_free(target); +            return r; +          } + +          if (then_is_empty != 0) { +            Else = target; +          } +          else { +            if (NODE_TYPE(target) == NODE_ALT) { +              Then = NODE_CAR(target); +              if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) { +                Else = NODE_CAR(NODE_CDR(target)); +                cons_node_free_alone(NODE_CDR(target)); +              } +              else { +                Else = NODE_CDR(target); +              } +              cons_node_free_alone(target); +            } +            else { +              Then = target; +              Else = 0; +            } +          } + +          *np = node_new_enclosure_if_else(condition, Then, Else); +          if (IS_NULL(*np)) { +            onig_node_free(condition); +            onig_node_free(Then); +            onig_node_free(Else); +            return ONIGERR_MEMORY; +          } +        } +        goto end; +      } +      else { +        return ONIGERR_UNDEFINED_GROUP_OPTION; +      } +      break; +      case '@':        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {  #ifdef USE_NAMED_GROUP @@ -4741,7 +5657,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            PUNFETCH;          }  #endif -        *np = node_new_enclosure_memory(0); +        *np = node_new_memory(0);          CHECK_NULL_RETURN_MEMERR(*np);          num = scan_env_add_mem_entry(env);          if (num < 0) { @@ -4772,11 +5688,11 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              break;            case '-':  neg = 1; break; -          case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break; -          case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; +          case 'x':  OPTION_NEGATE(option, ONIG_OPTION_EXTEND,     neg); break; +          case 'i':  OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;            case 's':              if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { -              ONOFF(option, ONIG_OPTION_MULTILINE,  neg); +              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);              }              else                return ONIGERR_UNDEFINED_GROUP_OPTION; @@ -4784,17 +5700,17 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            case 'm':              if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { -              ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); +              OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));              }              else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { -              ONOFF(option, ONIG_OPTION_MULTILINE,  neg); +              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);              }              else                return ONIGERR_UNDEFINED_GROUP_OPTION;              break;  #ifdef USE_POSIXLINE_OPTION            case 'p': -            ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); +            OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);              break;  #endif            default: @@ -4808,13 +5724,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              return 2; /* option only */            }            else if (c == ':') { -            OnigOptionType prev = env->option; +            OnigOptionType prev = env->options; -            env->option     = option; +            env->options = option;              r = fetch_token(tok, &p, end, env);              if (r < 0) return r;              r = parse_subexp(&target, tok, term, &p, end, env); -            env->option = prev; +            env->options = prev;              if (r < 0) {                onig_node_free(target);                return r; @@ -4837,10 +5753,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,      }    }    else { -    if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) +    if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))        goto group; -    *np = node_new_enclosure_memory(0); +    *np = node_new_memory(0);      CHECK_NULL_RETURN_MEMERR(*np);      num = scan_env_add_mem_entry(env);      if (num < 0) return num; @@ -4866,6 +5782,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,      }    } + end:    *src = p;    return 0;  } @@ -4888,7 +5805,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)      return 1;    switch (NODE_TYPE(target)) { -  case NODE_STR: +  case NODE_STRING:      if (! group) {        StrNode* sn = STR_(target);        if (str_node_can_be_split(sn, env->enc)) { @@ -5060,7 +5977,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],            /* char-class expanded multi-char only               compare with string folded at match time. */ -          NSTRING_SET_AMBIG(snode); +          NODE_STRING_SET_AMBIG(snode);          }          else {            r = onig_node_str_cat(snode, buf, buf + len); @@ -5106,13 +6023,13 @@ parse_exp(Node** np, OnigToken* tok, int term,      if (r == 1) group = 1;      else if (r == 2) { /* option only */        Node* target; -      OnigOptionType prev = env->option; +      OnigOptionType prev = env->options; -      env->option = ENCLOSURE_(*np)->o.option; +      env->options = ENCLOSURE_(*np)->o.options;        r = fetch_token(tok, src, end, env);        if (r < 0) return r;        r = parse_subexp(&target, tok, term, src, end, env); -      env->option = prev; +      env->options = prev;        if (r < 0) {          onig_node_free(target);          return r; @@ -5161,7 +6078,7 @@ parse_exp(Node** np, OnigToken* tok, int term,          if (len >= ONIGENC_MBC_MINLEN(env->enc)) {            if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end()              r = fetch_token(tok, src, end, env); -            NSTRING_CLEAR_RAW(*np); +            NODE_STRING_CLEAR_RAW(*np);              goto string_end;            }          } @@ -5176,7 +6093,7 @@ parse_exp(Node** np, OnigToken* tok, int term,              rem = ONIGENC_MBC_MINLEN(env->enc) - len;              (void )node_str_head_pad(STR_(*np), rem, (UChar )0);              if (len + rem == enclen(env->enc, STR_(*np)->s)) { -              NSTRING_CLEAR_RAW(*np); +              NODE_STRING_CLEAR_RAW(*np);                goto string_end;              }            } @@ -5266,7 +6183,7 @@ parse_exp(Node** np, OnigToken* tok, int term,        if (r != 0) return r;        cc = CCLASS_(*np); -      if (IS_IGNORECASE(env->option)) { +      if (IS_IGNORECASE(env->options)) {          IApplyCaseFoldArg iarg;          iarg.env      = env; @@ -5293,12 +6210,12 @@ parse_exp(Node** np, OnigToken* tok, int term,      break;    case TK_ANYCHAR: -    *np = node_new_ctype(CTYPE_ANYCHAR, 0); +    *np = node_new_anychar();      CHECK_NULL_RETURN_MEMERR(*np);      break;    case TK_ANYCHAR_ANYTIME: -    *np = node_new_ctype(CTYPE_ANYCHAR, 0); +    *np = node_new_anychar();      CHECK_NULL_RETURN_MEMERR(*np);      qn = node_new_quantifier(0, REPEAT_INFINITE, 0);      CHECK_NULL_RETURN_MEMERR(qn); @@ -5319,7 +6236,7 @@ parse_exp(Node** np, OnigToken* tok, int term,      CHECK_NULL_RETURN_MEMERR(*np);      break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    case TK_CALL:      {        int gnum = tok->u.call.gnum; @@ -5352,6 +6269,26 @@ parse_exp(Node** np, OnigToken* tok, int term,      }      break; +  case TK_KEEP: +    r = node_new_keep(np, env); +    if (r < 0) return r; +    break; + +  case TK_GENERAL_NEWLINE: +    r = node_new_general_newline(np, env); +    if (r < 0) return r; +    break; + +  case TK_NO_NEWLINE: +    r = node_new_no_newline(np, env); +    if (r < 0) return r; +    break; + +  case TK_TRUE_ANYCHAR: +    r = node_new_true_anychar(np, env); +    if (r < 0) return r; +    break; +    default:      return ONIGERR_PARSER_BUG;      break; @@ -5526,13 +6463,13 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)    return 0;  } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  static int  make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)  {    int r; -  Node* x = node_new_enclosure_memory(0 /* 0: is not named */); +  Node* x = node_new_memory(0 /* 0: is not named */);    CHECK_NULL_RETURN_MEMERR(x);    NODE_BODY(x) = node; @@ -5560,7 +6497,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,  #endif    scan_env_clear(env); -  env->option         = reg->options; +  env->options        = reg->options;    env->case_fold_flag = reg->case_fold_flag;    env->enc            = reg->enc;    env->syntax         = reg->syntax; @@ -5576,7 +6513,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,    p = (UChar* )pattern;    r = parse_regexp(root, &p, (UChar* )end, env); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    if (r != 0) return r;    if (env->has_call_zero != 0) { diff --git a/src/regparse.h b/src/regparse.h index 884f4d5..b7260ea 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -33,48 +33,58 @@  /* node type */  typedef enum { -  NODE_STR       = 0, -  NODE_CCLASS    = 1, -  NODE_CTYPE     = 2, -  NODE_BREF      = 3, -  NODE_QUANT      = 4, -  NODE_ENCLOSURE = 5, -  NODE_ANCHOR    = 6, -  NODE_LIST      = 7, -  NODE_ALT       = 8, -  NODE_CALL      = 9 +  NODE_STRING    =  0, +  NODE_CCLASS    =  1, +  NODE_CTYPE     =  2, +  NODE_BACKREF   =  3, +  NODE_QUANT     =  4, +  NODE_ENCLOSURE =  5, +  NODE_ANCHOR    =  6, +  NODE_LIST      =  7, +  NODE_ALT       =  8, +  NODE_CALL      =  9, +  NODE_GIMMICK   = 10  } NodeType; +enum GimmickType { +  GIMMICK_FAIL = 0, +  GIMMICK_KEEP = 1, +  GIMMICK_SAVE = 2, +  GIMMICK_UPDATE_VAR = 3, +}; +  /* node type bit */  #define NODE_TYPE2BIT(type)      (1<<(type)) -#define BIT_NODE_STR        NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_STRING     NODE_TYPE2BIT(NODE_STRING)  #define BIT_NODE_CCLASS     NODE_TYPE2BIT(NODE_CCLASS)  #define BIT_NODE_CTYPE      NODE_TYPE2BIT(NODE_CTYPE) -#define BIT_NODE_BREF       NODE_TYPE2BIT(NODE_BREF) -#define BIT_NODE_QUANT       NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_BACKREF    NODE_TYPE2BIT(NODE_BACKREF) +#define BIT_NODE_QUANT      NODE_TYPE2BIT(NODE_QUANT)  #define BIT_NODE_ENCLOSURE  NODE_TYPE2BIT(NODE_ENCLOSURE)  #define BIT_NODE_ANCHOR     NODE_TYPE2BIT(NODE_ANCHOR)  #define BIT_NODE_LIST       NODE_TYPE2BIT(NODE_LIST)  #define BIT_NODE_ALT        NODE_TYPE2BIT(NODE_ALT)  #define BIT_NODE_CALL       NODE_TYPE2BIT(NODE_CALL) +#define BIT_NODE_GIMMICK    NODE_TYPE2BIT(NODE_GIMMICK)  #define NODE_IS_SIMPLE_TYPE(node) \    ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ -    (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) +    (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0)  #define NODE_TYPE(node)             ((node)->u.base.node_type) -#define SET_NODE_TYPE(node, ntype)   (node)->u.base.node_type = (ntype) +#define NODE_SET_TYPE(node, ntype)   (node)->u.base.node_type = (ntype)  #define STR_(node)         (&((node)->u.str))  #define CCLASS_(node)      (&((node)->u.cclass))  #define CTYPE_(node)       (&((node)->u.ctype)) -#define BREF_(node)        (&((node)->u.bref)) -#define QUANT_(node)        (&((node)->u.quant)) -#define ENCLOSURE_(node)     (&((node)->u.enclosure)) +#define BACKREF_(node)     (&((node)->u.backref)) +#define QUANT_(node)       (&((node)->u.quant)) +#define ENCLOSURE_(node)   (&((node)->u.enclosure))  #define ANCHOR_(node)      (&((node)->u.anchor))  #define CONS_(node)        (&((node)->u.cons))  #define CALL_(node)        (&((node)->u.call)) +#define GIMMICK_(node)     (&((node)->u.gimmick))  #define NODE_CAR(node)         (CONS_(node)->car)  #define NODE_CDR(node)         (CONS_(node)->cdr) @@ -83,6 +93,9 @@ typedef enum {  #define NODE_IS_ANYCHAR(node) \    (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) +#define CTYPE_OPTION(node, reg) \ +  (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) +  #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)  #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) @@ -90,24 +103,25 @@ typedef enum {  #define ENCLOSURE_MEMORY           (1<<0)  #define ENCLOSURE_OPTION           (1<<1)  #define ENCLOSURE_STOP_BACKTRACK   (1<<2) +#define ENCLOSURE_IF_ELSE          (1<<3) -#define NODE_STR_MARGIN         16 -#define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_MARGIN         16 +#define NODE_STRING_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */  #define NODE_BACKREFS_SIZE       6  #define STRING_RAW                (1<<0) /* by backslashed number */  #define STRING_AMBIG              (1<<1)  #define STRING_DONT_GET_OPT_INFO  (1<<2) -#define NSTRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node)          (node)->u.str.flag |= STRING_RAW -#define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~STRING_RAW -#define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= STRING_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node)          (node)->u.str.flag |= STRING_RAW +#define NODE_STRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~STRING_RAW +#define NODE_STRING_SET_AMBIG(node)        (node)->u.str.flag |= STRING_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \    (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node)          (((node)->u.str.flag & STRING_RAW)   != 0) -#define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & STRING_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_IS_RAW(node)          (((node)->u.str.flag & STRING_RAW)   != 0) +#define NODE_STRING_IS_AMBIG(node)        (((node)->u.str.flag & STRING_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \    (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)  #define BACKREFS_P(br) \ @@ -118,7 +132,7 @@ typedef enum {  #define QUANT_BODY_IS_EMPTY_MEM    2  #define QUANT_BODY_IS_EMPTY_REC    3 -/* status bits */ +/* node status bits */  #define NST_MIN_FIXED             (1<<0)  #define NST_MAX_FIXED             (1<<1)  #define NST_CLEN_FIXED            (1<<2) @@ -136,28 +150,37 @@ typedef enum {  #define NST_BY_NUMBER             (1<<14) /* {n,m} */  #define NST_BY_NAME               (1<<15) /* backref by name */  #define NST_BACKREF               (1<<16) +#define NST_CHECKER               (1<<17) +#define NST_FIXED_OPTION          (1<<18) +#define NST_PROHIBIT_RECURSION    (1<<19) +#define NST_SUPER                 (1<<20)  #define NODE_STATUS(node)           (((Node* )node)->u.base.status)  #define NODE_STATUS_ADD(node,f)     (NODE_STATUS(node) |= (f))  #define NODE_STATUS_REMOVE(node,f)  (NODE_STATUS(node) &= ~(f)) -#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NST_BY_NUMBER)      != 0)  #define NODE_IS_IN_REAL_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NST_CALLED)    != 0) +#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NST_CALLED)         != 0)  #define NODE_IS_IN_MULTI_ENTRY(node)  ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NST_RECURSION)      != 0)  #define NODE_IS_IN_ZERO_REPEAT(node)  ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NST_ADDR_FIXED)  != 0) -#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NST_CLEN_FIXED)  != 0) -#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NST_MIN_FIXED)   != 0) -#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NST_MAX_FIXED)   != 0) -#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NST_MARK1)       != 0) -#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NST_MARK2)       != 0) -#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NST_NEST_LEVEL)  != 0) -#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NST_BY_NAME)     != 0) -#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NST_BACKREF)     != 0) +#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NST_NAMED_GROUP)  != 0) +#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NST_ADDR_FIXED)   != 0) +#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NST_CLEN_FIXED)   != 0) +#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NST_MIN_FIXED)    != 0) +#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NST_MAX_FIXED)    != 0) +#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NST_MARK1)        != 0) +#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NST_MARK2)        != 0) +#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NST_NEST_LEVEL)   != 0) +#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NST_BY_NAME)      != 0) +#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NST_BACKREF)      != 0) +#define NODE_IS_CHECKER(node)         ((NODE_STATUS(node) & NST_CHECKER)      != 0) +#define NODE_IS_FIXED_OPTION(node)    ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node)           ((NODE_STATUS(node) & NST_SUPER)        != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ +    ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0)  #define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \      ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) @@ -168,8 +191,6 @@ typedef enum {  #define NODE_ANCHOR_BODY(node)    ((node)->body) -#define CALLNODE_REFNUM_UNDEF  -1 -  typedef struct {    NodeType node_type;    int status; @@ -178,7 +199,7 @@ typedef struct {    UChar* end;    unsigned int flag;    int    capa;    /* (allocated size - 1) or 0: use buf[] */ -  UChar  buf[NODE_STR_BUF_SIZE]; +  UChar  buf[NODE_STRING_BUF_SIZE];  } StrNode;  typedef struct { @@ -221,17 +242,22 @@ typedef struct {        int called_state;      } m;      struct { -      OnigOptionType option; +      OnigOptionType options;      } o; +    struct { +      /* body is condition */ +      struct _Node* Then; +      struct _Node* Else; +    } te;    };    /* for multiple call reference */ -  OnigLen min_len; /* min length (byte) */ -  OnigLen max_len; /* max length (byte) */ -  int char_len;         /* character length  */ -  int opt_count;        /* referenced count in optimize_node_left() */ +  OnigLen min_len;   /* min length (byte) */ +  OnigLen max_len;   /* max length (byte) */ +  int char_len;      /* character length  */ +  int opt_count;     /* referenced count in optimize_node_left() */  } EnclosureNode; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL  typedef struct {    int           offset; @@ -266,7 +292,7 @@ typedef struct {    int  back_static[NODE_BACKREFS_SIZE];    int* back_dynamic;    int  nest_level; -} BRefNode; +} BackRefNode;  typedef struct {    NodeType node_type; @@ -291,8 +317,18 @@ typedef struct {    int ctype;    int not; +  OnigOptionType options;  } CtypeNode; +typedef struct { +  NodeType node_type; +  int status; + +  enum GimmickType type; +  int  detail_type; +  int  id; +} GimmickNode; +  typedef struct _Node {    union {      struct { @@ -305,13 +341,14 @@ typedef struct _Node {      CClassNode    cclass;      QuantNode     quant;      EnclosureNode enclosure; -    BRefNode      bref; +    BackRefNode   backref;      AnchorNode    anchor;      ConsAltNode   cons;      CtypeNode     ctype; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL      CallNode      call;  #endif +    GimmickNode   gimmick;    } u;  } Node; @@ -332,7 +369,11 @@ typedef struct {  } MemEnv;  typedef struct { -  OnigOptionType   option; +  enum SaveType type; +} SaveItem; + +typedef struct { +  OnigOptionType   options;    OnigCaseFoldType case_fold_flag;    OnigEncoding     enc;    OnigSyntaxType*  syntax; @@ -346,7 +387,7 @@ typedef struct {    UChar*           error_end;    regex_t*         reg;       /* for reg->names only */    int              num_call; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL    UnsetAddrList*   unset_addr_list;    int              has_call_zero;  #endif @@ -364,6 +405,11 @@ typedef struct {    int has_recursion;  #endif    unsigned int parse_depth; + +  int keep_num; +  int save_num; +  int save_alloc_num; +  SaveItem* saves;  } ScanEnv; @@ -399,6 +445,7 @@ extern int    onig_names_free P_((regex_t* reg));  extern int    onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));  extern int    onig_free_shared_cclass_table P_((void));  extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);  #ifdef ONIG_DEBUG  #ifdef USE_NAMED_GROUP diff --git a/src/regposix.c b/src/regposix.c index bbe52dc..32b11b5 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@    regposix.c - Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -114,6 +114,9 @@ onig2posix_error_code(int code)      { ONIGERR_NEVER_ENDING_RECURSION,                     REG_BADPAT },      { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY,      REG_BADPAT },      { ONIGERR_INVALID_CHAR_PROPERTY_NAME,                 REG_BADPAT }, +    { ONIGERR_INVALID_IF_ELSE_SYNTAX,                     REG_BADPAT }, +    { ONIGERR_INVALID_ABSENT_GROUP_PATTERN,               REG_BADPAT }, +    { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN,     REG_BADPAT },      { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION,         REG_EONIG_BADARG },      { ONIGERR_LIBRARY_IS_NOT_INITIALIZED,                 REG_EONIG_INTERNAL }    }; diff --git a/src/regsyntax.c b/src/regsyntax.c index e751e24..6833e1d 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@    regsyntax.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,13 @@ OnigSyntaxType OnigSyntaxPerl = {     & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )    , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |        ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | +      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | +      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |        ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | -      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) +      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | +      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | +      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | +      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )    , SYN_GNU_REGEX_BV    , ONIG_OPTION_SINGLELINE    , @@ -199,11 +204,16 @@ OnigSyntaxType OnigSyntaxPerl_NG = {     & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )    , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |        ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | +      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | +      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |        ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |        ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |        ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |        ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        | -      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) +      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | +      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | +      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | +      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )    , ( SYN_GNU_REGEX_BV |        ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |        ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) @@ -90,6 +90,7 @@ is_valid_mbc_string(const UChar* p, const UChar* end)    return TRUE;  } +#if 0  static int  is_mbc_newline(const UChar* p, const UChar* end)  { @@ -114,6 +115,7 @@ is_mbc_newline(const UChar* p, const UChar* end)    return 0;  } +#endif  static OnigCodePoint  mbc_to_code(const UChar* p, const UChar* end) @@ -246,43 +248,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,    }  } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ -  const UChar* p = *pp; - -  if (ONIGENC_IS_MBC_ASCII(p)) { -    (*pp)++; -    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); -  } -  else { -    (*pp) += enclen(ONIG_ENCODING_UTF8, p); - -    if (*p == 0xc3) { -      int c = *(p + 1); -      if (c >= 0x80) { -        if (c <= (UChar )0x9e) { /* upper */ -          if (c == (UChar )0x97) return FALSE; -          return TRUE; -        } -        else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ -          if (c == (UChar )'\267') return FALSE; -          return TRUE; -        } -        else if (c == (UChar )0x9f && -                 (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -          return TRUE; -        } -      } -    } -  } - -  return FALSE; -} -#endif - -  static int  get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,  		     const OnigCodePoint* ranges[]) @@ -317,7 +282,7 @@ OnigEncodingType OnigEncodingUTF8 = {    "UTF-8",     /* name */    6,           /* max byte length */    1,           /* min byte length */ -  is_mbc_newline, +  onigenc_is_mbc_newline_0x0a,    mbc_to_code,    code_to_mbclen,    code_to_mbc, diff --git a/test-driver b/test-driver index d306056..8e575b0 100755 --- a/test-driver +++ b/test-driver @@ -3,7 +3,7 @@  scriptversion=2013-07-13.22; # UTC -# Copyright (C) 2011-2013 Free Software Foundation, Inc. +# Copyright (C) 2011-2014 Free Software Foundation, Inc.  #  # This program is free software; you can redistribute it and/or modify  # it under the terms of the GNU General Public License as published by @@ -106,11 +106,14 @@ trap "st=143; $do_exit" 15  # Test script is run here.  "$@" >$log_file 2>&1  estatus=$? +  if test $enable_hard_errors = no && test $estatus -eq 99; then -  estatus=1 +  tweaked_estatus=1 +else +  tweaked_estatus=$estatus  fi -case $estatus:$expect_failure in +case $tweaked_estatus:$expect_failure in    0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;    0:*)   col=$grn res=PASS  recheck=no  gcopy=no;;    77:*)  col=$blu res=SKIP  recheck=no  gcopy=yes;; @@ -119,6 +122,12 @@ case $estatus:$expect_failure in    *:*)   col=$red res=FAIL  recheck=yes gcopy=yes;;  esac +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file +  # Report outcome to console.  echo "${col}${res}${std}: $test_name" diff --git a/test/testc.c b/test/testc.c index 725e375..222c9cd 100644 --- a/test/testc.c +++ b/test/testc.c @@ -590,6 +590,88 @@ extern int main(int argc, char* argv[])    x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number    x2("A\\g'0'|B()", "AAAAB", 0, 5);    x3("(A\\g'0')|B", "AAAAB", 0, 5, 1); +  x2("(a*)(?(1))aa", "aaaaa", 0, 5); +  x2("(a*)(?(-1))aa", "aaaaa", 0, 5); +  x2("(?<name>aaa)(?('name'))aa", "aaaaa", 0, 5); +  x2("(a)(?(1)aa|bb)a", "aaaaa", 0, 4); +  x2("(?:aa|())(?(<1>)aa|bb)a", "aabba", 0, 5); +  x2("(?:aa|())(?('1')aa|bb|cc)a", "aacca", 0, 5); +  x3("(a*)(?(1)aa|a)b", "aaab", 0, 1, 1); +  n("(a)(?(1)a|b)c", "abc"); +  x2("(a)(?(1)|)c", "ac", 0, 2); +  n("(?()aaa|bbb)", "bbb"); +  x2("(a)(?(1+0)b|c)d", "abd", 0, 3); +  x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "ace", 0, 3); +  x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "bce", 0, 3); +  x2("\\R", "\r\n", 0, 2); +  x2("\\R", "\r", 0, 1); +  x2("\\R", "\n", 0, 1); +  x2("\\R", "\x0b", 0, 1); +  n("\\R\\n", "\r\n"); +  n("\\R", "\xc2\x85"); // because euc-jp is not Unicode +  x2("\\N", "a", 0, 1); +  n("\\N", "\n"); +  n("(?m:\\N)", "\n"); +  n("(?-m:\\N)", "\n"); +  x2("\\O", "a", 0, 1); +  x2("\\O", "\n", 0, 1); +  x2("(?m:\\O)", "\n", 0, 1); +  x2("(?-m:\\O)", "\n", 0, 1); +  x2("\\K", "a", 0, 0); +  x2("a\\K", "a", 1, 1); +  x2("a\\Kb", "ab", 1, 2); +  x2("(a\\Kb|ac\\Kd)", "acd", 2, 3); +  x2("(a\\Kb|\\Kac\\K)*", "acababacab", 9, 10); + +  x2("(?~)", "", 0, 0); +  x2("(?~)", "A", 0, 0); +  x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5); +  x2("(?~(?:|aaa))", "aaa", 0, 0); +  x2("(?~aaa|)", "aaa", 0, 0); +  x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! +  x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); +  x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); +  x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); +  x2("(?~A.C|B)", "ABC", 0, 0); +  x2("(?~XYZ|ABC)a", "ABCa", 1, 4); +  x2("(?~XYZ|ABC)a", "aABCa", 0, 1); +  x2("<[^>]*>(?~[<>])</[^>]*>", "<a>vvv</a>   <b>  </b>", 0, 10); +  x2("(?~ab)", "ccc\ndab", 0, 5); +  x2("(?m:(?~ab))", "ccc\ndab", 0, 5); +  x2("(?-m:(?~ab))", "ccc\ndab", 0, 5); + +  // absent with expr +  x2("(?~|78|\\d*)", "123456789", 0, 6); +  x2("(?~|def|(?:abc|de|f){0,100})", "abcdedeabcfdefabc", 0, 11); +  x2("(?~|ab|.*)", "ccc\nddd", 0, 3); +  x2("(?~|ab|\\O*)", "ccc\ndab", 0, 5); +  x2("(?~|ab|\\O{2,10})", "ccc\ndab", 0, 5); +  x2("(?~|ab|\\O{1,10})", "ab", 1, 2); +  n("(?~|ab|\\O{2,10})", "ab"); +  x2("(?~|abc|\\O{1,10})", "abc", 1, 3); +  x2("(?~|ab|\\O{5,10})|abc", "abc", 0, 3); +  x2("(?~|ab|\\O{1,10})", "cccccccccccab", 0, 10); +  x2("(?~|aaa|)", "aaa", 0, 0); +  x2("(?~||a*)", "aaaaaa", 0, 0); +  x2("(?~||a*?)", "aaaaaa", 0, 0); +  x2("(a)(?~|b|\\1)", "aaaaaa", 0, 2); +  x2("(a)(?~|bb|(?:a\\1)*)", "aaaaaa", 0, 5); +  x2("(b|c)(?~|abac|(?:a\\1)*)", "abababacabab", 1, 4); +  n("(?~|c|a*+)a", "aaaaa"); +  x2("(?~|aaaaa|a*+)", "aaaaa", 0, 0); +  x2("(?~|aaaaaa|a*+)b", "aaaaaab", 1, 7); +  x2("(?~|abcd|(?>))", "zzzabcd", 0, 0); + +  // absent range cutter +  x2("(?~|abc)a*", "aaaaaabc", 0, 5); +  x2("(?~|abc)a*z|aaaaaabc", "aaaaaabc", 0, 8); +  x2("(?~|aaaaaa)a*", "aaaaaa", 0, 0); +  x2("(?~|abc)aaaa|aaaabc", "aaaabc", 0, 6); +  x2("(?>(?~|abc))aaaa|aaaabc", "aaaabc", 0, 6); +  x2("(?~|)a", "a", 0, 1); +  n("(?~|a)a", "a"); +  x2("(?~|a)(?~|)a", "a", 0, 1); +  x2("(?~|a).*(?~|)a", "bbbbbbbbbbbbbbbbbbbba", 0, 21);    /*      < ifndef IGNORE_EUC_JP > diff --git a/test/testu.c b/test/testu.c index 017ebef..6ff3a10 100644 --- a/test/testu.c +++ b/test/testu.c @@ -905,6 +905,15 @@ extern int main(int argc, char* argv[])    x2("\000^\000\\\000p\000{\000K\000a\000t\000a\000k\000a\000n\000a\000}\000$\000\000", "\060\277\000\000", 0, 2);    x2("\000\\\000o\000{\0001\0000\0001\000}\000\000", "\000A\000\000", 0, 2);    x2("\000\\\000o\000{\0001\0001\0000\0007\0002\0001\000}\000\000", "\221\321\000\000", 0, 2); +  x2("\000\\\000R\000\000", "\000\015\000\012\000\000", 0, 4); // \R: general newline +  x2("\000\\\000R\000\000", "\000\012\000\000", 0, 2); +  x2("\000\\\000R\000\000", "\000\015\000\000", 0, 2); +  x2("\000\\\000R\000\000", "\000\013\000\000", 0, 2); +  n("\000\\\000R\000\012\000\000", "\000\015\000\012\000\000"); +  x2("\000\\\000R\000\000", "\x00\x85\000\000", 0, 2); +  x2("\000\\\000R\000\000", "\x20\x28\000\000", 0, 2); +  x2("\000\\\000R\000\000", "\x20\x29\000\000", 0, 2); +  n("\000\\\000R\000\000", "\x20\x2a\000\000");    fprintf(stdout,         "\nRESULT   SUCC: %d,  FAIL: %d,  ERROR: %d      (by Oniguruma %s)\n", | 
