summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2017-08-06 19:01:49 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2017-08-06 19:01:49 +0200
commitac077032be00edc79afc21983f50bc1cdf9af907 (patch)
tree926fa6e062344a84965a4998994243a2ad5bb866
parent995dfd20e78ad16cec678df25422ce032650e3aa (diff)
New upstream version 6.5.0upstream/6.5.0
-rw-r--r--CMakeLists.txt2
-rw-r--r--HISTORY14
-rw-r--r--README7
-rw-r--r--README.md19
-rwxr-xr-xcompile2
-rwxr-xr-xconfig.guess134
-rwxr-xr-xconfig.sub36
-rw-r--r--configure.ac4
-rwxr-xr-xdepcomp2
-rw-r--r--dist.info2
-rw-r--r--doc/RE69
-rw-r--r--doc/RE.ja71
-rw-r--r--index.html3
-rw-r--r--index_ja.html3
-rwxr-xr-xinstall-sh373
-rwxr-xr-xmissing2
-rw-r--r--src/oniguruma.h10
-rw-r--r--src/regcomp.c1102
-rw-r--r--src/regenc.h2
-rw-r--r--src/regerror.c6
-rw-r--r--src/regexec.c487
-rw-r--r--src/regint.h119
-rw-r--r--src/regparse.c1075
-rw-r--r--src/regparse.h159
-rw-r--r--src/regposix.c5
-rw-r--r--src/regsyntax.c16
-rw-r--r--src/utf8.c41
-rwxr-xr-xtest-driver15
-rw-r--r--test/testc.c82
-rw-r--r--test/testu.c9
30 files changed, 2946 insertions, 925 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60ce397..b40fb2c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8)
project(oniguruma C)
set(PACKAGE onig)
-set(PACKAGE_VERSION "6.4.0")
+set(PACKAGE_VERSION "6.5.0")
set(USE_COMBINATION_EXPLOSION_CHECK 0)
set(USE_CRNL_AS_LINE_TERMINATOR 0)
diff --git a/HISTORY b/HISTORY
index ca56a4d..9db4712 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,5 +1,19 @@
History
+2017/08/03: Version 6.5.0
+
+2017/07/30: [new] support Absent clear (Absent functions)
+2017/07/25: abolish configure option: --enable-combination-explosion-check
+2017/07/23: [new] support Absent functions (?~...)
+2017/07/14: fix #65: SIZEOF_SIZE_T doesn't exist on certain architecutres
+2017/07/11: [new] support \O (true anychar)
+2017/07/10: [new] support \K (keep)
+2017/07/10: add new node type: NODE_GIMMICK
+2017/07/07: [new] support \N (no newline)
+2017/07/05: [new] support \R (general newline)
+2017/07/05: [new] support if-then-else syntax
+2017/07/04: [new] support backref validity checker
+
2017/07/03: Version 6.4.0
2017/06/30: fix memory leaks
diff --git a/README b/README
index a3d3ab4..cc2b51f 100644
--- a/README
+++ b/README
@@ -1,9 +1,14 @@
-README 2016/05/06
+README 2017/07/08
Oniguruma ---- (C) K.Kosako <kkosako0@gmail.com>
https://github.com/kkos/oniguruma
+FIXED Security Issues:
+ CVE-2017-9224, CVE-2017-9225, CVE-2017-9226
+ CVE-2017-9227, CVE-2017-9228, CVE-2017-9229
+
+---
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
diff --git a/README.md b/README.md
index b3aad59..ec84884 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,12 @@ Oniguruma
https://github.com/kkos/oniguruma
+FIXED Security Issues:
+--------------------------
+ CVE-2017-9224, CVE-2017-9225, CVE-2017-9226
+ CVE-2017-9227, CVE-2017-9228, CVE-2017-9229
+
+
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
@@ -20,6 +26,19 @@ Supported character encodings:
* CP1251: contributed by Byte
+New feature of version 6.5.0
+--------------------------
+
+* NEW: \K (keep)
+* NEW: \R (general newline) \N (no newline)
+* NEW: \O (true anychar)
+* NEW: if-then-else syntax (?(...)...\|...)
+* NEW: Backreference validity checker (*original)
+* NEW: Absent repeater (?~absent)
+* NEW: Absent expression (?~|absent|expr) (*original)
+* NEW: Absent range cutter (?~|absent) (*original)
+
+
New feature of version 6.4.0
--------------------------
diff --git a/compile b/compile
index 531136b..a85b723 100755
--- a/compile
+++ b/compile
@@ -3,7 +3,7 @@
scriptversion=2012-10-14.11; # UTC
-# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
diff --git a/config.guess b/config.guess
index bf5ad89..1659250 100755
--- a/config.guess
+++ b/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2016 Free Software Foundation, Inc.
+# Copyright 1992-2015 Free Software Foundation, Inc.
-timestamp='2016-09-11'
+timestamp='2015-08-20'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 @@ timestamp='2016-09-11'
# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
#
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
#
# Please send patches to <config-patches@gnu.org>.
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -186,12 +186,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*) machine=${UNAME_MACHINE_ARCH}-unknown ;;
esac
# The Operating System including object format, if it has switched
- # to ELF recently (or will in the future) and ABI.
+ # to ELF recently, or will in the future.
case "${UNAME_MACHINE_ARCH}" in
- earm*)
- os=netbsdelf
- ;;
- arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+ arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
@@ -240,10 +237,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
exit ;;
- *:LibertyBSD:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
- exit ;;
*:ekkoBSD:*:*)
echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
exit ;;
@@ -275,42 +268,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
case "$ALPHA_CPU_TYPE" in
"EV4 (21064)")
- UNAME_MACHINE=alpha ;;
+ UNAME_MACHINE="alpha" ;;
"EV4.5 (21064)")
- UNAME_MACHINE=alpha ;;
+ UNAME_MACHINE="alpha" ;;
"LCA4 (21066/21068)")
- UNAME_MACHINE=alpha ;;
+ UNAME_MACHINE="alpha" ;;
"EV5 (21164)")
- UNAME_MACHINE=alphaev5 ;;
+ UNAME_MACHINE="alphaev5" ;;
"EV5.6 (21164A)")
- UNAME_MACHINE=alphaev56 ;;
+ UNAME_MACHINE="alphaev56" ;;
"EV5.6 (21164PC)")
- UNAME_MACHINE=alphapca56 ;;
+ UNAME_MACHINE="alphapca56" ;;
"EV5.7 (21164PC)")
- UNAME_MACHINE=alphapca57 ;;
+ UNAME_MACHINE="alphapca57" ;;
"EV6 (21264)")
- UNAME_MACHINE=alphaev6 ;;
+ UNAME_MACHINE="alphaev6" ;;
"EV6.7 (21264A)")
- UNAME_MACHINE=alphaev67 ;;
+ UNAME_MACHINE="alphaev67" ;;
"EV6.8CB (21264C)")
- UNAME_MACHINE=alphaev68 ;;
+ UNAME_MACHINE="alphaev68" ;;
"EV6.8AL (21264B)")
- UNAME_MACHINE=alphaev68 ;;
+ UNAME_MACHINE="alphaev68" ;;
"EV6.8CX (21264D)")
- UNAME_MACHINE=alphaev68 ;;
+ UNAME_MACHINE="alphaev68" ;;
"EV6.9A (21264/EV69A)")
- UNAME_MACHINE=alphaev69 ;;
+ UNAME_MACHINE="alphaev69" ;;
"EV7 (21364)")
- UNAME_MACHINE=alphaev7 ;;
+ UNAME_MACHINE="alphaev7" ;;
"EV7.9 (21364A)")
- UNAME_MACHINE=alphaev79 ;;
+ UNAME_MACHINE="alphaev79" ;;
esac
# A Pn.n version is a patched version.
# A Vn.n version is a released version.
# A Tn.n version is a released field test version.
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
@@ -383,16 +376,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
eval $set_cc_for_build
- SUN_ARCH=i386
+ SUN_ARCH="i386"
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
# This test works for both compilers.
- if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
- SUN_ARCH=x86_64
+ SUN_ARCH="x86_64"
fi
fi
echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
@@ -417,7 +410,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
exit ;;
sun*:*:4.2BSD:*)
UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
case "`/bin/arch`" in
sun3)
echo m68k-sun-sunos${UNAME_RELEASE}
@@ -642,13 +635,13 @@ EOF
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
case "${sc_cpu_version}" in
- 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
- 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
+ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
532) # CPU_PA_RISC2_0
case "${sc_kernel_bits}" in
- 32) HP_ARCH=hppa2.0n ;;
- 64) HP_ARCH=hppa2.0w ;;
- '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20
+ 32) HP_ARCH="hppa2.0n" ;;
+ 64) HP_ARCH="hppa2.0w" ;;
+ '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
esac ;;
esac
fi
@@ -687,11 +680,11 @@ EOF
exit (0);
}
EOF
- (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+ (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa
fi ;;
esac
- if [ ${HP_ARCH} = hppa2.0w ]
+ if [ ${HP_ARCH} = "hppa2.0w" ]
then
eval $set_cc_for_build
@@ -704,12 +697,12 @@ EOF
# $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
# => hppa64-hp-hpux11.23
- if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
+ if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
grep -q __LP64__
then
- HP_ARCH=hppa2.0w
+ HP_ARCH="hppa2.0w"
else
- HP_ARCH=hppa64
+ HP_ARCH="hppa64"
fi
fi
echo ${HP_ARCH}-hp-hpux${HPUX_REV}
@@ -814,14 +807,14 @@ EOF
echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
- FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
- FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+ FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
@@ -903,7 +896,7 @@ EOF
exit ;;
*:GNU/*:*:*)
# other systems with GNU libc and userland
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
exit ;;
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
@@ -926,7 +919,7 @@ EOF
EV68*) UNAME_MACHINE=alphaev68 ;;
esac
objdump --private-headers /bin/sh | grep -q ld.so.1
- if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arc:Linux:*:* | arceb:Linux:*:*)
@@ -972,9 +965,6 @@ EOF
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
- k1om:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
m32r*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
@@ -1032,9 +1022,6 @@ EOF
ppcle:Linux:*:*)
echo powerpcle-unknown-linux-${LIBC}
exit ;;
- riscv32:Linux:*:* | riscv64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
exit ;;
@@ -1133,7 +1120,7 @@ EOF
# uname -m prints for DJGPP always 'pc', but it prints nothing about
# the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
- # prints for the "djgpp" host, or else GDB configure will decide that
+ # prints for the "djgpp" host, or else GDB configury will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
exit ;;
@@ -1282,9 +1269,6 @@ EOF
SX-8R:SUPER-UX:*:*)
echo sx8r-nec-superux${UNAME_RELEASE}
exit ;;
- SX-ACE:SUPER-UX:*:*)
- echo sxace-nec-superux${UNAME_RELEASE}
- exit ;;
Power*:Rhapsody:*:*)
echo powerpc-apple-rhapsody${UNAME_RELEASE}
exit ;;
@@ -1298,9 +1282,9 @@ EOF
UNAME_PROCESSOR=powerpc
fi
if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
- if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
case $UNAME_PROCESSOR in
@@ -1322,7 +1306,7 @@ EOF
exit ;;
*:procnto*:*:* | *:QNX:[0123456789]*:*)
UNAME_PROCESSOR=`uname -p`
- if test "$UNAME_PROCESSOR" = x86; then
+ if test "$UNAME_PROCESSOR" = "x86"; then
UNAME_PROCESSOR=i386
UNAME_MACHINE=pc
fi
@@ -1353,7 +1337,7 @@ EOF
# "uname -m" is not consistent, so use $cputype instead. 386
# is converted to i386 for consistency with other x86
# operating systems.
- if test "$cputype" = 386; then
+ if test "$cputype" = "386"; then
UNAME_MACHINE=i386
else
UNAME_MACHINE="$cputype"
@@ -1395,7 +1379,7 @@ EOF
echo i386-pc-xenix
exit ;;
i*86:skyos:*:*)
- echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
+ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
exit ;;
i*86:rdos:*:*)
echo ${UNAME_MACHINE}-pc-rdos
@@ -1406,25 +1390,23 @@ EOF
x86_64:VMkernel:*:*)
echo ${UNAME_MACHINE}-unknown-esx
exit ;;
- amd64:Isilon\ OneFS:*:*)
- echo x86_64-unknown-onefs
- exit ;;
esac
cat >&2 <<EOF
$0: unable to guess system type
-This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite
-config.guess and config.sub with the latest versions from:
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
and
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-If $0 has already been updated, send the following data and any
-information you think might be pertinent to config-patches@gnu.org to
-provide the necessary information to handle your system.
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
config.guess timestamp = $timestamp
diff --git a/config.sub b/config.sub
index cc69b06..1acc966 100755
--- a/config.sub
+++ b/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2016 Free Software Foundation, Inc.
+# Copyright 1992-2015 Free Software Foundation, Inc.
-timestamp='2016-09-05'
+timestamp='2015-08-20'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -33,7 +33,7 @@ timestamp='2016-09-05'
# Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
@@ -53,7 +53,8 @@ timestamp='2016-09-05'
me=`echo "$0" | sed -e 's,.*/,,'`
usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+ $0 [OPTION] ALIAS
Canonicalize a configuration name.
@@ -67,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -117,7 +118,7 @@ case $maybe_os in
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
- kopensolaris*-gnu* | cloudabi*-eabi* | \
+ kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -520,7 +521,7 @@ case $basic_machine in
basic_machine=i386-pc
os=-aros
;;
- asmjs)
+ asmjs)
basic_machine=asmjs-unknown
;;
aux)
@@ -643,14 +644,6 @@ case $basic_machine in
basic_machine=m68k-bull
os=-sysv3
;;
- e500v[12])
- basic_machine=powerpc-unknown
- os=$os"spe"
- ;;
- e500v[12]-*)
- basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=$os"spe"
- ;;
ebmon29k)
basic_machine=a29k-amd
os=-ebmon
@@ -1030,7 +1023,7 @@ case $basic_machine in
ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppcle | powerpclittle)
+ ppcle | powerpclittle | ppc-le | powerpc-little)
basic_machine=powerpcle-unknown
;;
ppcle-* | powerpclittle-*)
@@ -1040,7 +1033,7 @@ case $basic_machine in
;;
ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppc64le | powerpc64little)
+ ppc64le | powerpc64little | ppc64-le | powerpc64-little)
basic_machine=powerpc64le-unknown
;;
ppc64le-* | powerpc64little-*)
@@ -1390,14 +1383,14 @@ case $os in
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
+ | -bitrig* | -openbsd* | -solidbsd* \
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+ | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
@@ -1406,8 +1399,7 @@ case $os in
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
- | -onefs* | -tirtos* | -phoenix*)
+ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1539,8 +1531,6 @@ case $os in
;;
-nacl*)
;;
- -ios)
- ;;
-none)
;;
*)
diff --git a/configure.ac b/configure.ac
index 688d15b..efaf5e1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(onig, 6.4.0)
+AC_INIT(onig, 6.5.0)
AC_CONFIG_MACRO_DIR([m4])
@@ -16,7 +16,7 @@ AC_SUBST(STATISTICS)
dnl check for COMBINATION_EXPLOSION
AC_ARG_ENABLE(combination-explosion-check,
- [ --enable-combination-explosion-check enable combination explosion check],
+ [ --enable-combination-explosion-check deprecated],
[comb_expl_check=$enableval])
if test "${comb_expl_check}" = yes; then
AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check])
diff --git a/depcomp b/depcomp
index 4ebd5b3..fc98710 100755
--- a/depcomp
+++ b/depcomp
@@ -3,7 +3,7 @@
scriptversion=2013-05-30.07; # UTC
-# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
diff --git a/dist.info b/dist.info
index 0e43e6b..76575fe 100644
--- a/dist.info
+++ b/dist.info
@@ -1,7 +1,7 @@
--- This file is part of LuaDist project
name = "onig"
-version = "6.4.0"
+version = "6.5.0"
desc = "Oniguruma is a regular expressions library."
author = "K.Kosako"
diff --git a/doc/RE b/doc/RE
index 729e71c..16cc888 100644
--- a/doc/RE
+++ b/doc/RE
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 6.4.0 2017/06/28
+Oniguruma Regular Expressions Version 6.5.0 2017/07/30
syntax: ONIG_SYNTAX_RUBY (default)
@@ -52,8 +52,8 @@ syntax: ONIG_SYNTAX_RUBY (default)
Not Unicode:
\t, \n, \v, \f, \r, \x20
- Unicode:
- 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ Unicode case:
+ U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),
General_Category -- Line_Separator
-- Paragraph_Separator
-- Space_Separator
@@ -70,6 +70,16 @@ syntax: ONIG_SYNTAX_RUBY (default)
\H non-hexdigit char
+ \R general newline (* can't be used in character-class)
+ "\r\n" or \n,\v,\f,\r (* but doesn't backtrack from \r\n to \r)
+
+ Unicode case:
+ "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029
+
+ \N negative newline (?-m:.)
+
+ \O true anychar (?m:.) (* original function)
+
Character Property
@@ -133,6 +143,8 @@ syntax: ONIG_SYNTAX_RUBY (default)
\Z end of string, or before newline at the end
\z end of string
\G where the current search attempt begins
+ \K keep (keep start position of the result string)
+
6. Character class
@@ -183,9 +195,9 @@ syntax: ONIG_SYNTAX_RUBY (default)
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
Open_Punctuation
space Space_Separator | Line_Separator | Paragraph_Separator |
- 0009 | 000A | 000B | 000C | 000D | 0085
+ U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085
upper Uppercase_Letter
- xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066
(0-9, a-f, A-F)
word Letter | Mark | Decimal_Number | Connector_Punctuation
@@ -228,6 +240,50 @@ syntax: ONIG_SYNTAX_RUBY (default)
Assigning the same name to two or more subexps is allowed.
+ <Absent functions>
+
+ (?~absent) Absent repeater (* proposed by Tanaka Akira)
+ This works like .* (more precisely \O*), but it is
+ limited by the range that does not include the string
+ match with absent.
+ This is a written abbreviation of (?~|absent|\O*).
+ \O* is used as a repeater.
+
+ (?~|absent|exp) Absent expression (* original)
+ This works like "exp", but it is limited by the range
+ that does not include the string match with absent.
+
+ ex. (?~|345|\d*) "12345678" ==> "12", "1", ""
+
+ (?~|absent) Absent cutter (* original)
+ After passed this operator, string right range is limited
+ at the point that does not include the string match whth
+ absent.
+
+ (?~|) Absent clear
+ Clear the effects caused by Absent cutters.
+ (* This operation is not cancelled by backtrack.)
+
+ * Nested Absent functions are not supported and the behavior
+ is undefined.
+
+
+ (?(condition_exp)then_exp|else_exp) if-then-else
+ (?(condition_exp)then_exp) if-then
+
+ condition_exp can be a backreference number/name or a normal
+ regular expression.
+ When condition_exp is a backreference, both then_exp and
+ else_exp can be omitted.
+ Then it works as a backreference validity checker.
+
+ [ backreference validity checker ] (* original)
+
+ (?(n)), (?(-n)), (?(+n)), (?(n+level)) ...
+ (?(<n>)), (?('-n')), (?(<+n>)) ...
+ (?(<name>)), (?('name')), (?(<name+level>)) ...
+
+
8. Backreferences
@@ -282,7 +338,7 @@ syntax: ONIG_SYNTAX_RUBY (default)
p r.match("<foo>f<bar>bbb</bar>f</foo>").captures
-9. Subexp calls ("Tanaka Akira special")
+9. Subexp calls ("Tanaka Akira special") (* original function)
When we say "call a group," it actually means, "re-execute the subexp in
that group."
@@ -367,7 +423,6 @@ A-3. Missing features compared with perl 5.8.0
+ \l,\u,\L,\U, \X, \C
+ (?{code})
+ (??{code})
- + (?(condition)yes-pat|no-pat)
* \Q...\E
This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
diff --git a/doc/RE.ja b/doc/RE.ja
index 08cbefc..c05468d 100644
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -1,4 +1,4 @@
-鬼車 正規表現 Version 6.4.0 2017/06/28
+鬼車 正規表現 Version 6.5.0 2017/07/30
使用文法: ONIG_SYNTAX_RUBY (既定値)
@@ -35,7 +35,7 @@
3. 文字種
- . 任意文字 (改行を除く)
+ . 任意文字 (改行を除く: オプションに依存)
\w 単語構成文字
@@ -53,7 +53,7 @@
\t, \n, \v, \f, \r, \x20
Unicodeの場合:
- 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),
General_Category -- Line_Separator
-- Paragraph_Separator
-- Space_Separator
@@ -70,6 +70,16 @@
\H 非16進数字
+ \R 汎改行 (* 文字集合の中では使用できない)
+ "\r\n" or \n,\v,\f,\r (* 但し \r\nから\rにはバックトラックしない)
+
+ Unicodeの場合:
+ "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029
+
+ \N 非改行文字 (?-m:.)
+
+ \O 真任意文字 (?m:.) (* 原作)
+
Character Property
@@ -133,6 +143,8 @@
\Z 文字列末尾、または文字列末尾の改行の直前
\z 文字列末尾
\G 照合開始位置
+ \K 保持 (結果の開始位置をこの位置に保つ)
+
6. 文字集合
@@ -182,9 +194,9 @@
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
Open_Punctuation
space Space_Separator | Line_Separator | Paragraph_Separator |
- 0009 | 000A | 000B | 000C | 000D | 0085
+ U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085
upper Uppercase_Letter
- xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066
(0-9, a-f, A-F)
word Letter | Mark | Decimal_Number | Connector_Punctuation
@@ -230,6 +242,52 @@
この場合には、この名前を使用した後方参照は可能であるが、
部分式呼出しはできない。
+ <不在機能群>
+
+ (?~不在式) 不在繰り返し (*原案 田中哲)
+ これは.*のように(より正確には\O*)動作するが、不在式に
+ 適合する文字列を含まない範囲に制限される。
+ これは(?~|不在式|\O*)の省略表記である。
+ \O*の部分はマルチラインオプション(?m)の影響を受けない。
+
+ (?~|不在式|式) 不在式 (* 原作)
+ これは"式"のように動作するが、不在式に適合する文字列を
+ 含まない範囲に制限される。
+
+ 例 (?~|345|\d*) "12345678" ==> "12", "1", ""
+
+ (?~|不在式) 不在切断 (* 原作)
+ この演算子を通過した後は、対象文字列の適合範囲の最後が
+ 不在式に適合する文字列を含まない範囲に制限される。
+
+ (?~|) 不在消去
+ 不在切断の効果を消して、初期状態にする。
+ (* この演算子の効果は後退再試行で無効化されない)
+
+ * 不在機能の入れ子はサポートしておらず、挙動は不定とする。
+
+
+ (?(条件式)成功式|失敗式) 条件式が成功すれば成功式、失敗すれば失敗式を実行する
+ この機能の存在理由は、成功式が失敗しても失敗式には
+ 行かないこと。これは他の正規表現で書くことができない。
+ もうひとつは、条件式が後方参照のとき、後方参照値の有効性
+ を調べる(文字列とマッチングはしない)意味になる。
+
+ (?(条件式)成功式) 条件式が成功すれば成功式を実行する
+ (条件式が通常の式のときには、この構文は不必要だが
+ 今のところエラーにはしない。)
+
+
+ 条件式は後方参照または通常の式を使用できる。
+ 条件式が後方参照の場合、成功式と失敗式の両方を省略可能であり、
+ この場合、後方参照値有効性を調べる(成功/失敗)機能のみになる。
+
+ [後方参照値有効性確認器] (* 原作)
+ (?(n)), (?(-n)), (?(+n)), (?(n+level)) ...
+ (?(<n>)), (?('-n')), (?(<+n>)) ...
+ (?(<name>)), (?('name')), (?(<name+level>)) ...
+
+
8. 後方参照
@@ -288,7 +346,7 @@
-9. 部分式呼出し ("田中哲スペシャル")
+9. 部分式呼出し ("田中哲スペシャル") (* 原作)
\g<name> 名前指定呼出し
\g'name' 名前指定呼出し
@@ -373,7 +431,6 @@
+ \l,\u,\L,\U, \X, \C
+ (?{code})
+ (??{code})
- + (?(condition)yes-pat|no-pat)
* \Q...\E
但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効
diff --git a/index.html b/index.html
index 59de4c6..9d54c60 100644
--- a/index.html
+++ b/index.html
@@ -8,7 +8,7 @@
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
<p>
-(c) K.Kosako, updated at: 2017/06/30
+(c) K.Kosako, updated at: 2017/08/03
</p>
<dl>
@@ -16,6 +16,7 @@
<dt><b>What's new</b>
</font>
<ul>
+<li>2017/08/03: Version 6.5.0 released.</li>
<li>2017/07/03: Version 6.4.0 released.</li>
<li>2017/05/29: Version 6.3.0 released.</li>
<li>2017/04/08: Version 6.2.0 released.</li>
diff --git a/index_ja.html b/index_ja.html
index 502f460..52f0412 100644
--- a/index_ja.html
+++ b/index_ja.html
@@ -8,7 +8,7 @@
<h1>鬼車</h1>
<p>
-(c) K.Kosako, 最終更新: 2017/06/30
+(c) K.Kosako, 最終更新: 2017/08/03
</p>
<dl>
@@ -16,6 +16,7 @@
<dt><b>更新情報</b>
</font>
<ul>
+<li>2017/08/03: Version 6.5.0 リリース</li>
<li>2017/07/03: Version 6.4.0 リリース</li>
<li>2017/05/29: Version 6.3.0 リリース</li>
<li>2017/04/08: Version 6.2.0 リリース</li>
diff --git a/install-sh b/install-sh
index 377bb86..59990a1 100755
--- a/install-sh
+++ b/install-sh
@@ -1,7 +1,7 @@
#!/bin/sh
# install - install a program, script, or datafile
-scriptversion=2011-11-20.07; # UTC
+scriptversion=2014-09-12.12; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
@@ -41,19 +41,15 @@ scriptversion=2011-11-20.07; # UTC
# This script is compatible with the BSD install script, but was written
# from scratch.
+tab=' '
nl='
'
-IFS=" "" $nl"
+IFS=" $tab$nl"
-# set DOITPROG to echo to test this script
+# Set DOITPROG to "echo" to test this script.
-# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
-if test -z "$doit"; then
- doit_exec=exec
-else
- doit_exec=$doit
-fi
+doit_exec=${doit:-exec}
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
@@ -68,17 +64,6 @@ mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
-posix_glob='?'
-initialize_posix_glob='
- test "$posix_glob" != "?" || {
- if (set -f) 2>/dev/null; then
- posix_glob=
- else
- posix_glob=:
- fi
- }
-'
-
posix_mkdir=
# Desired mode of installed file.
@@ -97,7 +82,7 @@ dir_arg=
dst_arg=
copy_on_change=false
-no_target_directory=
+is_target_a_directory=possibly
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
@@ -137,46 +122,57 @@ while test $# -ne 0; do
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
- shift;;
+ shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
- case $mode in
- *' '* | *' '* | *'
-'* | *'*'* | *'?'* | *'['*)
- echo "$0: invalid mode: $mode" >&2
- exit 1;;
- esac
- shift;;
+ case $mode in
+ *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+ echo "$0: invalid mode: $mode" >&2
+ exit 1;;
+ esac
+ shift;;
-o) chowncmd="$chownprog $2"
- shift;;
+ shift;;
-s) stripcmd=$stripprog;;
- -t) dst_arg=$2
- # Protect names problematic for 'test' and other utilities.
- case $dst_arg in
- -* | [=\(\)!]) dst_arg=./$dst_arg;;
- esac
- shift;;
+ -t)
+ is_target_a_directory=always
+ dst_arg=$2
+ # Protect names problematic for 'test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
+ shift;;
- -T) no_target_directory=true;;
+ -T) is_target_a_directory=never;;
--version) echo "$0 $scriptversion"; exit $?;;
- --) shift
- break;;
+ --) shift
+ break;;
- -*) echo "$0: invalid option: $1" >&2
- exit 1;;
+ -*) echo "$0: invalid option: $1" >&2
+ exit 1;;
*) break;;
esac
shift
done
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+ if test -n "$dst_arg"; then
+ echo "$0: target directory not allowed when installing a directory." >&2
+ exit 1
+ fi
+fi
+
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
@@ -208,6 +204,15 @@ if test $# -eq 0; then
fi
if test -z "$dir_arg"; then
+ if test $# -gt 1 || test "$is_target_a_directory" = always; then
+ if test ! -d "$dst_arg"; then
+ echo "$0: $dst_arg: Is not a directory." >&2
+ exit 1
+ fi
+ fi
+fi
+
+if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
@@ -223,16 +228,16 @@ if test -z "$dir_arg"; then
*[0-7])
if test -z "$stripcmd"; then
- u_plus_rw=
+ u_plus_rw=
else
- u_plus_rw='% 200'
+ u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
- u_plus_rw=
+ u_plus_rw=
else
- u_plus_rw=,u+rw
+ u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
@@ -269,41 +274,15 @@ do
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
- if test -n "$no_target_directory"; then
- echo "$0: $dst_arg: Is a directory" >&2
- exit 1
+ if test "$is_target_a_directory" = never; then
+ echo "$0: $dst_arg: Is a directory" >&2
+ exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
- # Prefer dirname, but fall back on a substitute if dirname fails.
- dstdir=`
- (dirname "$dst") 2>/dev/null ||
- expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$dst" : 'X\(//\)[^/]' \| \
- X"$dst" : 'X\(//\)$' \| \
- X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
- echo X"$dst" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'
- `
-
+ dstdir=`dirname "$dst"`
test -d "$dstdir"
dstdir_status=$?
fi
@@ -314,74 +293,81 @@ do
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
- # Create intermediate dirs using mode 755 as modified by the umask.
- # This is like FreeBSD 'install' as of 1997-10-28.
- umask=`umask`
- case $stripcmd.$umask in
- # Optimize common cases.
- *[2367][2367]) mkdir_umask=$umask;;
- .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
-
- *[0-7])
- mkdir_umask=`expr $umask + 22 \
- - $umask % 100 % 40 + $umask % 20 \
- - $umask % 10 % 4 + $umask % 2
- `;;
- *) mkdir_umask=$umask,go-w;;
- esac
-
- # With -d, create the new directory with the user-specified mode.
- # Otherwise, rely on $mkdir_umask.
- if test -n "$dir_arg"; then
- mkdir_mode=-m$mode
- else
- mkdir_mode=
- fi
-
- posix_mkdir=false
- case $umask in
- *[123567][0-7][0-7])
- # POSIX mkdir -p sets u+wx bits regardless of umask, which
- # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
- ;;
- *)
- tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
- trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
-
- if (umask $mkdir_umask &&
- exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
- then
- if test -z "$dir_arg" || {
- # Check for POSIX incompatibilities with -m.
- # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
- # other-writable bit of parent directory when it shouldn't.
- # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
- ls_ld_tmpdir=`ls -ld "$tmpdir"`
- case $ls_ld_tmpdir in
- d????-?r-*) different_mode=700;;
- d????-?--*) different_mode=755;;
- *) false;;
- esac &&
- $mkdirprog -m$different_mode -p -- "$tmpdir" && {
- ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
- test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
- }
- }
- then posix_mkdir=:
- fi
- rmdir "$tmpdir/d" "$tmpdir"
- else
- # Remove any dirs left behind by ancient mkdir implementations.
- rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
- fi
- trap '' 0;;
- esac;;
+ # Create intermediate dirs using mode 755 as modified by the umask.
+ # This is like FreeBSD 'install' as of 1997-10-28.
+ umask=`umask`
+ case $stripcmd.$umask in
+ # Optimize common cases.
+ *[2367][2367]) mkdir_umask=$umask;;
+ .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+ *[0-7])
+ mkdir_umask=`expr $umask + 22 \
+ - $umask % 100 % 40 + $umask % 20 \
+ - $umask % 10 % 4 + $umask % 2
+ `;;
+ *) mkdir_umask=$umask,go-w;;
+ esac
+
+ # With -d, create the new directory with the user-specified mode.
+ # Otherwise, rely on $mkdir_umask.
+ if test -n "$dir_arg"; then
+ mkdir_mode=-m$mode
+ else
+ mkdir_mode=
+ fi
+
+ posix_mkdir=false
+ case $umask in
+ *[123567][0-7][0-7])
+ # POSIX mkdir -p sets u+wx bits regardless of umask, which
+ # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+ ;;
+ *)
+ # $RANDOM is not portable (e.g. dash); use it when possible to
+ # lower collision chance
+ tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+ trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+ # As "mkdir -p" follows symlinks and we work in /tmp possibly; so
+ # create the $tmpdir first (and fail if unsuccessful) to make sure
+ # that nobody tries to guess the $tmpdir name.
+ if (umask $mkdir_umask &&
+ $mkdirprog $mkdir_mode "$tmpdir" &&
+ exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
+ then
+ if test -z "$dir_arg" || {
+ # Check for POSIX incompatibilities with -m.
+ # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+ # other-writable bit of parent directory when it shouldn't.
+ # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+ test_tmpdir="$tmpdir/a"
+ ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
+ case $ls_ld_tmpdir in
+ d????-?r-*) different_mode=700;;
+ d????-?--*) different_mode=755;;
+ *) false;;
+ esac &&
+ $mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
+ ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
+ test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+ }
+ }
+ then posix_mkdir=:
+ fi
+ rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
+ else
+ # Remove any dirs left behind by ancient mkdir implementations.
+ rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
+ fi
+ trap '' 0;;
+ esac;;
esac
if
$posix_mkdir && (
- umask $mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+ umask $mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
@@ -391,53 +377,51 @@ do
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
- /*) prefix='/';;
- [-=\(\)!]*) prefix='./';;
- *) prefix='';;
+ /*) prefix='/';;
+ [-=\(\)!]*) prefix='./';;
+ *) prefix='';;
esac
- eval "$initialize_posix_glob"
-
oIFS=$IFS
IFS=/
- $posix_glob set -f
+ set -f
set fnord $dstdir
shift
- $posix_glob set +f
+ set +f
IFS=$oIFS
prefixes=
for d
do
- test X"$d" = X && continue
-
- prefix=$prefix$d
- if test -d "$prefix"; then
- prefixes=
- else
- if $posix_mkdir; then
- (umask=$mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
- # Don't fail if two instances are running concurrently.
- test -d "$prefix" || exit 1
- else
- case $prefix in
- *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
- *) qprefix=$prefix;;
- esac
- prefixes="$prefixes '$qprefix'"
- fi
- fi
- prefix=$prefix/
+ test X"$d" = X && continue
+
+ prefix=$prefix$d
+ if test -d "$prefix"; then
+ prefixes=
+ else
+ if $posix_mkdir; then
+ (umask=$mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+ # Don't fail if two instances are running concurrently.
+ test -d "$prefix" || exit 1
+ else
+ case $prefix in
+ *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) qprefix=$prefix;;
+ esac
+ prefixes="$prefixes '$qprefix'"
+ fi
+ fi
+ prefix=$prefix/
done
if test -n "$prefixes"; then
- # Don't fail if two instances are running concurrently.
- (umask $mkdir_umask &&
- eval "\$doit_exec \$mkdirprog $prefixes") ||
- test -d "$dstdir" || exit 1
- obsolete_mkdir_used=true
+ # Don't fail if two instances are running concurrently.
+ (umask $mkdir_umask &&
+ eval "\$doit_exec \$mkdirprog $prefixes") ||
+ test -d "$dstdir" || exit 1
+ obsolete_mkdir_used=true
fi
fi
fi
@@ -472,15 +456,12 @@ do
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
- old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
- new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
-
- eval "$initialize_posix_glob" &&
- $posix_glob set -f &&
+ old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
+ new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
+ set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
- $posix_glob set +f &&
-
+ set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
@@ -493,24 +474,24 @@ do
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
- # Now remove or move aside any old file at destination location.
- # We try this two ways since rm can't unlink itself on some
- # systems and the destination file might be busy for other
- # reasons. In this case, the final cleanup might fail but the new
- # file should still install successfully.
- {
- test ! -f "$dst" ||
- $doit $rmcmd -f "$dst" 2>/dev/null ||
- { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
- { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
- } ||
- { echo "$0: cannot unlink or rename $dst" >&2
- (exit 1); exit 1
- }
- } &&
-
- # Now rename the file to the real destination.
- $doit $mvcmd "$dsttmp" "$dst"
+ # Now remove or move aside any old file at destination location.
+ # We try this two ways since rm can't unlink itself on some
+ # systems and the destination file might be busy for other
+ # reasons. In this case, the final cleanup might fail but the new
+ # file should still install successfully.
+ {
+ test ! -f "$dst" ||
+ $doit $rmcmd -f "$dst" 2>/dev/null ||
+ { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+ { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+ } ||
+ { echo "$0: cannot unlink or rename $dst" >&2
+ (exit 1); exit 1
+ }
+ } &&
+
+ # Now rename the file to the real destination.
+ $doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
diff --git a/missing b/missing
index db98974..f62bbae 100755
--- a/missing
+++ b/missing
@@ -3,7 +3,7 @@
scriptversion=2013-10-28.13; # UTC
-# Copyright (C) 1996-2013 Free Software Foundation, Inc.
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
diff --git a/src/oniguruma.h b/src/oniguruma.h
index a8ae09a..bc8983f 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -35,7 +35,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
-#define ONIGURUMA_VERSION_MINOR 4
+#define ONIGURUMA_VERSION_MINOR 5
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
@@ -496,6 +496,11 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
+#define ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (1U<<21) /* (?(n)) (?(...)...|...) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<22) /* \K */
+#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */
+#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */
+#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
@@ -596,6 +601,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_IF_ELSE_SYNTAX -224
+#define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225
+#define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
diff --git a/src/regcomp.c b/src/regcomp.c
index db83739..47023cb 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -147,7 +147,7 @@ swap_node(Node* a, Node* b)
Node c;
c = *a; *a = *b; *b = c;
- if (NODE_TYPE(a) == NODE_STR) {
+ if (NODE_TYPE(a) == NODE_STRING) {
StrNode* sn = STR_(a);
if (sn->capa == 0) {
int len = sn->end - sn->s;
@@ -156,7 +156,7 @@ swap_node(Node* a, Node* b)
}
}
- if (NODE_TYPE(b) == NODE_STR) {
+ if (NODE_TYPE(b) == NODE_STRING) {
StrNode* sn = STR_(b);
if (sn->capa == 0) {
int len = sn->end - sn->s;
@@ -169,11 +169,11 @@ swap_node(Node* a, Node* b)
static OnigLen
distance_add(OnigLen d1, OnigLen d2)
{
- if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
- return ONIG_INFINITE_DISTANCE;
+ if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
+ return INFINITE_LEN;
else {
- if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
- else return ONIG_INFINITE_DISTANCE;
+ if (d1 <= INFINITE_LEN - d2) return d1 + d2;
+ else return INFINITE_LEN;
}
}
@@ -182,10 +182,10 @@ distance_multiply(OnigLen d, int m)
{
if (m == 0) return 0;
- if (d < ONIG_INFINITE_DISTANCE / m)
+ if (d < INFINITE_LEN / m)
return d * m;
else
- return ONIG_INFINITE_DISTANCE;
+ return INFINITE_LEN;
}
static int
@@ -230,7 +230,7 @@ onig_bbuf_init(BBuf* buf, int size)
}
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
static int
unset_addr_list_init(UnsetAddrList* list, int size)
@@ -271,7 +271,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
list->num++;
return 0;
}
-#endif /* USE_SUBEXP_CALL */
+#endif /* USE_CALL */
static int
@@ -347,6 +347,24 @@ add_option(regex_t* reg, OnigOptionType option)
}
static int
+add_save_type(regex_t* reg, enum SaveType type)
+{
+ SaveType t = (SaveType )type;
+
+ BBUF_ADD(reg, &t, SIZE_SAVE_TYPE);
+ return 0;
+}
+
+static int
+add_update_var_type(regex_t* reg, enum UpdateVarType type)
+{
+ UpdateVarType t = (UpdateVarType )type;
+
+ BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
+ return 0;
+}
+
+static int
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
{
int r;
@@ -466,7 +484,7 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
return r;
}
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
static int
compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
{
@@ -545,7 +563,7 @@ compile_length_string_node(Node* node, regex_t* reg)
if (sn->end <= sn->s)
return 0;
- ambig = NSTRING_IS_AMBIG(node);
+ ambig = NODE_STRING_IS_AMBIG(node);
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -594,7 +612,7 @@ compile_string_node(Node* node, regex_t* reg)
return 0;
end = sn->end;
- ambig = NSTRING_IS_AMBIG(node);
+ ambig = NODE_STRING_IS_AMBIG(node);
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -767,7 +785,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
if (r != 0) return r;
if (
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
NODE_IS_IN_MULTI_ENTRY(qn) ||
#endif
NODE_IS_IN_REAL_REPEAT(qn)) {
@@ -893,7 +911,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
- if (IS_MULTILINE(reg->options))
+ if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
@@ -906,7 +924,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
}
else {
- if (IS_MULTILINE(reg->options)) {
+ if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) {
r = add_opcode(reg, (CKN_ON ?
OP_STATE_CHECK_ANYCHAR_ML_STAR
: OP_ANYCHAR_ML_STAR));
@@ -1109,7 +1127,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
- if (IS_MULTILINE(reg->options))
+ if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
@@ -1117,7 +1135,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
}
else {
- if (IS_MULTILINE(reg->options))
+ if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
return add_opcode(reg, OP_ANYCHAR_ML_STAR);
else
return add_opcode(reg, OP_ANYCHAR_STAR);
@@ -1229,7 +1247,7 @@ compile_length_option_node(EnclosureNode* node, regex_t* reg)
int tlen;
OnigOptionType prev = reg->options;
- reg->options = node->o.option;
+ reg->options = node->o.options;
tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
reg->options = prev;
@@ -1249,8 +1267,8 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
int r;
OnigOptionType prev = reg->options;
- if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) {
- r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option);
+ if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.options);
if (r != 0) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
if (r != 0) return r;
@@ -1258,11 +1276,11 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
}
- reg->options = node->o.option;
+ reg->options = node->o.options;
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
reg->options = prev;
- if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) {
+ if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) {
if (r != 0) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
}
@@ -1287,7 +1305,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
switch (node->type) {
case ENCLOSURE_MEMORY:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
@@ -1336,6 +1354,32 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
}
break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ Node* cond = NODE_ENCLOSURE_BODY(node);
+ Node* Then = node->te.Then;
+ Node* Else = node->te.Else;
+
+ len = compile_length_tree(cond, reg);
+ if (len < 0) return len;
+ len += SIZE_OP_PUSH;
+ len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT;
+
+ if (IS_NOT_NULL(Then)) {
+ tlen = compile_length_tree(Then, reg);
+ if (tlen < 0) return tlen;
+ len += tlen;
+ }
+
+ if (IS_NOT_NULL(Else)) {
+ len += SIZE_OP_JUMP;
+ tlen = compile_length_tree(Else, reg);
+ if (tlen < 0) return tlen;
+ len += tlen;
+ }
+ }
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1352,7 +1396,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
int r;
int len;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
r = add_opcode(reg, OP_CALL);
if (r != 0) return r;
@@ -1370,9 +1414,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
r = add_opcode(reg, OP_RETURN);
return r;
}
-#endif
-#ifdef USE_SUBEXP_CALL
if (NODE_IS_CALLED(node)) {
r = add_opcode(reg, OP_CALL);
if (r != 0) return r;
@@ -1404,7 +1446,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
if (r != 0) return r;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
r = add_opcode(reg, (NODE_IS_RECURSION(node)
? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
@@ -1434,14 +1476,15 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
{
int r, len;
- if (node->type == ENCLOSURE_OPTION)
- return compile_option_node(node, reg, env);
-
switch (node->type) {
case ENCLOSURE_MEMORY:
r = compile_enclosure_memory_node(node, reg, env);
break;
+ case ENCLOSURE_OPTION:
+ r = compile_option_node(node, reg, env);
+ break;
+
case ENCLOSURE_STOP_BACKTRACK:
if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
@@ -1469,6 +1512,49 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
}
break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ int cond_len, then_len, jump_len;
+ Node* cond = NODE_ENCLOSURE_BODY(node);
+ Node* Then = node->te.Then;
+ Node* Else = node->te.Else;
+
+ r = add_opcode(reg, OP_PUSH_STOP_BT);
+ if (r != 0) return r;
+
+ cond_len = compile_length_tree(cond, reg);
+ if (cond_len < 0) return cond_len;
+ if (IS_NOT_NULL(Then)) {
+ then_len = compile_length_tree(Then, reg);
+ if (then_len < 0) return then_len;
+ }
+ else
+ then_len = 0;
+
+ jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT;
+ if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
+
+ r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
+ if (r != 0) return r;
+ r = compile_tree(cond, reg, env);
+ if (r != 0) return r;
+ r = add_opcode(reg, OP_POP_STOP_BT);
+ if (r != 0) return r;
+
+ if (IS_NOT_NULL(Then)) {
+ r = compile_tree(Then, reg, env);
+ if (r != 0) return r;
+ }
+
+ if (IS_NOT_NULL(Else)) {
+ int else_len = compile_length_tree(Else, reg);
+ r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
+ if (r != 0) return r;
+ r = compile_tree(Else, reg, env);
+ }
+ }
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1490,10 +1576,10 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
switch (node->type) {
case ANCHOR_PREC_READ:
- len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
+ len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
break;
case ANCHOR_PREC_READ_NOT:
- len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
+ len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT;
break;
case ANCHOR_LOOK_BEHIND:
len = SIZE_OP_LOOK_BEHIND + tlen;
@@ -1531,21 +1617,21 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
#endif
case ANCHOR_PREC_READ:
- r = add_opcode(reg, OP_PUSH_POS);
+ r = add_opcode(reg, OP_PREC_READ_START);
if (r != 0) return r;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_POP_POS);
+ r = add_opcode(reg, OP_PREC_READ_END);
break;
case ANCHOR_PREC_READ_NOT:
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
+ r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT);
if (r != 0) return r;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_FAIL_POS);
+ r = add_opcode(reg, OP_FAIL_PREC_READ_NOT);
break;
case ANCHOR_LOOK_BEHIND:
@@ -1596,6 +1682,67 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
}
static int
+compile_gimmick_node(GimmickNode* node, regex_t* reg)
+{
+ int r;
+
+ switch (node->type) {
+ case GIMMICK_FAIL:
+ r = add_opcode(reg, OP_FAIL);
+ break;
+
+ case GIMMICK_KEEP:
+ r = add_opcode(reg, OP_PUSH_SAVE_VAL);
+ if (r != 0) return r;
+ r = add_save_type(reg, SAVE_KEEP);
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->id);
+ break;
+
+ case GIMMICK_SAVE:
+ r = add_opcode(reg, OP_PUSH_SAVE_VAL);
+ if (r != 0) return r;
+ r = add_save_type(reg, node->detail_type);
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->id);
+ break;
+
+ case GIMMICK_UPDATE_VAR:
+ r = add_opcode(reg, OP_UPDATE_VAR);
+ if (r != 0) return r;
+ r = add_update_var_type(reg, node->detail_type);
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->id);
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
+{
+ int len;
+
+ switch (node->type) {
+ case GIMMICK_FAIL:
+ len = SIZE_OP_FAIL;
+ break;
+
+ case GIMMICK_KEEP:
+ case GIMMICK_SAVE:
+ len = SIZE_OP_PUSH_SAVE_VAL;
+ break;
+
+ case GIMMICK_UPDATE_VAR:
+ len = SIZE_OP_UPDATE_VAR;
+ break;
+ }
+
+ return len;
+}
+
+static int
compile_length_tree(Node* node, regex_t* reg)
{
int len, r;
@@ -1624,8 +1771,8 @@ compile_length_tree(Node* node, regex_t* reg)
}
break;
- case NODE_STR:
- if (NSTRING_IS_RAW(node))
+ case NODE_STRING:
+ if (NODE_STRING_IS_RAW(node))
r = compile_length_string_raw_node(STR_(node), reg);
else
r = compile_length_string_node(node, reg);
@@ -1639,28 +1786,39 @@ compile_length_tree(Node* node, regex_t* reg)
r = SIZE_OPCODE;
break;
- case NODE_BREF:
+ case NODE_BACKREF:
{
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
+ if (NODE_IS_CHECKER(node)) {
#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
- SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else
+ if (NODE_IS_NEST_LEVEL(node)) {
+ r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
#endif
- if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
}
else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (NODE_IS_NEST_LEVEL(node)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
+ if (br->back_num == 1) {
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ }
+ else {
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
}
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
r = SIZE_OP_CALL;
break;
@@ -1678,6 +1836,10 @@ compile_length_tree(Node* node, regex_t* reg)
r = compile_length_anchor_node(ANCHOR_(node), reg);
break;
+ case NODE_GIMMICK:
+ r = compile_length_gimmick_node(GIMMICK_(node), reg);
+ break;
+
default:
return ONIGERR_TYPE_BUG;
break;
@@ -1713,7 +1875,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
do {
len = compile_length_tree(NODE_CAR(node), reg);
if (IS_NOT_NULL(NODE_CDR(node))) {
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
+ r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
if (r != 0) break;
}
r = compile_tree(NODE_CAR(node), reg, env);
@@ -1727,8 +1890,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
}
break;
- case NODE_STR:
- if (NSTRING_IS_RAW(node))
+ case NODE_STRING:
+ if (NODE_STRING_IS_RAW(node))
r = compile_string_raw_node(STR_(node), reg);
else
r = compile_string_node(node, reg);
@@ -1744,7 +1907,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
switch (CTYPE_(node)->ctype) {
case CTYPE_ANYCHAR:
- if (IS_MULTILINE(reg->options))
+ if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
r = add_opcode(reg, OP_ANYCHAR_ML);
else
r = add_opcode(reg, OP_ANYCHAR);
@@ -1764,69 +1927,86 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
}
break;
- case NODE_BREF:
+ case NODE_BACKREF:
{
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
+ if (NODE_IS_CHECKER(node)) {
#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r != 0) return r;
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r != 0) return r;
- r = add_length(reg, br->nest_level);
- if (r != 0) return r;
+ if (NODE_IS_NEST_LEVEL(node)) {
+ r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
+ if (r != 0) return r;
+ r = add_length(reg, br->nest_level);
+ if (r != 0) return r;
+ }
+ else
+#endif
+ {
+ r = add_opcode(reg, OP_BACKREF_CHECK);
+ if (r != 0) return r;
+ }
goto add_bacref_mems;
}
- else
-#endif
- if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREFN_IC);
+ else {
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (NODE_IS_NEST_LEVEL(node)) {
+ r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
+ if (r != 0) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r != 0) return r;
+ r = add_length(reg, br->nest_level);
if (r != 0) return r;
- r = add_mem_num(reg, n);
+
+ goto add_bacref_mems;
}
- else {
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
+ else
+#endif
+ if (br->back_num == 1) {
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_N_IC);
if (r != 0) return r;
r = add_mem_num(reg, n);
- break;
}
- }
- }
- else {
- int i;
- int* p;
-
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ default:
+ r = add_opcode(reg, OP_BACKREF_N);
+ if (r != 0) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
}
else {
- r = add_opcode(reg, OP_BACKREF_MULTI);
- }
- if (r != 0) return r;
+ int i;
+ int* p;
-#ifdef USE_BACKREF_WITH_LEVEL
- add_bacref_mems:
-#endif
- r = add_length(reg, br->back_num);
- if (r != 0) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ r = add_opcode(reg, OP_BACKREF_MULTI);
+ }
if (r != 0) return r;
+
+ add_bacref_mems:
+ r = add_length(reg, br->back_num);
+ if (r != 0) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r != 0) return r;
+ }
}
}
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
r = compile_call(CALL_(node), reg, env);
break;
@@ -1844,6 +2024,10 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
r = compile_anchor_node(ANCHOR_(node), reg, env);
break;
+ case NODE_GIMMICK:
+ r = compile_gimmick_node(GIMMICK_(node), reg);
+ break;
+
default:
#ifdef ONIG_DEBUG
fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
@@ -1898,6 +2082,18 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
r = noname_disable_map(plink, map, counter);
}
}
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
+ if (r != 0) return r;
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = noname_disable_map(&(en->te.Then), map, counter);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = noname_disable_map(&(en->te.Else), map, counter);
+ if (r != 0) return r;
+ }
+ }
else
r = noname_disable_map(&(NODE_BODY(node)), map, counter);
}
@@ -1920,7 +2116,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map)
{
int i, pos, n, old_num;
int *backs;
- BRefNode* bn = BREF_(node);
+ BackRefNode* bn = BACKREF_(node);
if (! NODE_IS_BY_NAME(node))
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
@@ -1957,11 +2153,29 @@ renumber_by_map(Node* node, GroupNumRemap* map)
break;
case NODE_QUANT:
- case NODE_ENCLOSURE:
r = renumber_by_map(NODE_BODY(node), map);
break;
- case NODE_BREF:
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+ r = renumber_by_map(NODE_BODY(node), map);
+ if (r != 0) return r;
+
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = renumber_by_map(en->te.Then, map);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = renumber_by_map(en->te.Else, map);
+ if (r != 0) return r;
+ }
+ }
+ }
+ break;
+
+ case NODE_BACKREF:
r = renumber_node_backref(node, map);
break;
@@ -1995,11 +2209,30 @@ numbered_ref_check(Node* node)
break;
/* fall */
case NODE_QUANT:
- case NODE_ENCLOSURE:
r = numbered_ref_check(NODE_BODY(node));
break;
- case NODE_BREF:
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+ r = numbered_ref_check(NODE_BODY(node));
+ if (r != 0) return r;
+
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = numbered_ref_check(en->te.Then);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = numbered_ref_check(en->te.Else);
+ if (r != 0) return r;
+ }
+ }
+ }
+
+ break;
+
+ case NODE_BACKREF:
if (! NODE_IS_BY_NAME(node))
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
@@ -2052,7 +2285,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
}
#endif /* USE_NAMED_GROUP */
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
static int
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
{
@@ -2061,9 +2294,11 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
AbsAddrType addr;
for (i = 0; i < uslist->num; i++) {
+ if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
+ return ONIGERR_PARSER_BUG;
+
en = ENCLOSURE_(uslist->us[i].target);
- if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
- addr = en->m.called_addr;
+ addr = en->m.called_addr;
offset = uslist->us[i].offset;
BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
@@ -2120,7 +2355,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* sn = STR_(node);
UChar *s = sn->s;
@@ -2135,16 +2370,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
{
QuantNode* qn = QUANT_(node);
if (qn->lower == qn->upper) {
- r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
- if (r == 0)
- *len = distance_multiply(tlen, qn->lower);
+ if (qn->upper == 0) {
+ *len = 0;
+ }
+ else {
+ r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
+ if (r == 0)
+ *len = distance_multiply(tlen, qn->lower);
+ }
}
else
r = GET_CHAR_LEN_VARLEN;
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
if (! NODE_IS_RECURSION(node))
r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
@@ -2166,7 +2406,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
case ENCLOSURE_MEMORY:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (NODE_IS_CLEN_FIXED(node))
*len = en->char_len;
else {
@@ -2182,6 +2422,31 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
case ENCLOSURE_STOP_BACKTRACK:
r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ int clen, elen;
+ r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
+ if (r == 0) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
+ if (r != 0) break;
+ }
+ else tlen = 0;
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = get_char_length_tree1(en->te.Else, reg, &elen, level);
+ if (r != 0) break;
+ }
+ else elen = 0;
+
+ if (clen + tlen != elen) {
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else {
+ *len = elen;
+ }
+ }
+ }
+ break;
default:
break;
}
@@ -2189,8 +2454,13 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
break;
case NODE_ANCHOR:
+ case NODE_GIMMICK:
break;
+ case NODE_BACKREF:
+ if (NODE_IS_CHECKER(node))
+ break;
+ /* fall */
default:
r = GET_CHAR_LEN_VARLEN;
break;
@@ -2241,7 +2511,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
}
break;
- case NODE_STR:
+ case NODE_STRING:
goto swap;
break;
@@ -2318,7 +2588,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
}
break;
- case NODE_STR:
+ case NODE_STRING:
goto swap;
break;
@@ -2328,10 +2598,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
}
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* xs = STR_(x);
- if (NSTRING_LEN(x) == 0)
+ if (NODE_STRING_LEN(x) == 0)
break;
//c = *(xs->s);
@@ -2362,13 +2632,13 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
}
break;
- case NODE_STR:
+ case NODE_STRING:
{
UChar *q;
StrNode* ys = STR_(y);
- len = NSTRING_LEN(x);
- if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ len = NODE_STRING_LEN(x);
+ if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
+ if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
/* tiny version */
return 0;
}
@@ -2399,9 +2669,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
Node* n = NULL_NODE;
switch (NODE_TYPE(node)) {
- case NODE_BREF:
+ case NODE_BACKREF:
case NODE_ALT:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
#endif
break;
@@ -2420,7 +2690,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
n = get_head_value_node(NODE_CAR(node), exact, reg);
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* sn = STR_(node);
@@ -2428,7 +2698,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
break;
if (exact != 0 &&
- !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
+ !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
}
else {
n = node;
@@ -2456,7 +2726,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
{
OnigOptionType options = reg->options;
- reg->options = ENCLOSURE_(node)->o.option;
+ reg->options = ENCLOSURE_(node)->o.options;
n = get_head_value_node(NODE_BODY(node), exact, reg);
reg->options = options;
}
@@ -2464,6 +2734,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
case ENCLOSURE_MEMORY:
case ENCLOSURE_STOP_BACKTRACK:
+ case ENCLOSURE_IF_ELSE:
n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
}
@@ -2475,6 +2746,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
+ case NODE_GIMMICK:
default:
break;
}
@@ -2512,6 +2784,15 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
return 1;
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
+ if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
+ if (r != 0) break;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
+ }
+ }
}
break;
@@ -2524,6 +2805,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
break;
+ case NODE_GIMMICK:
default:
break;
}
@@ -2531,31 +2813,31 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
}
static OnigLen
-get_min_len(Node* node, ScanEnv* env)
+tree_min_len(Node* node, ScanEnv* env)
{
OnigLen len;
OnigLen tmin;
len = 0;
switch (NODE_TYPE(node)) {
- case NODE_BREF:
- {
+ case NODE_BACKREF:
+ if (! NODE_IS_CHECKER(node)) {
int i;
int* backs;
MemEnv* mem_env = SCANENV_MEMENV(env);
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
if (NODE_IS_RECURSION(node)) break;
backs = BACKREFS_P(br);
- len = get_min_len(mem_env[backs[0]].node, env);
+ len = tree_min_len(mem_env[backs[0]].node, env);
for (i = 1; i < br->back_num; i++) {
- tmin = get_min_len(mem_env[backs[i]].node, env);
+ tmin = tree_min_len(mem_env[backs[i]].node, env);
if (len > tmin) len = tmin;
}
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
{
Node* t = NODE_BODY(node);
@@ -2564,15 +2846,15 @@ get_min_len(Node* node, ScanEnv* env)
len = ENCLOSURE_(t)->min_len;
}
else
- len = get_min_len(t, env);
+ len = tree_min_len(t, env);
}
break;
#endif
case NODE_LIST:
do {
- tmin = get_min_len(NODE_CAR(node), env);
- len += tmin;
+ tmin = tree_min_len(NODE_CAR(node), env);
+ len = distance_add(len, tmin);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -2582,14 +2864,14 @@ get_min_len(Node* node, ScanEnv* env)
y = node;
do {
x = NODE_CAR(y);
- tmin = get_min_len(x, env);
+ tmin = tree_min_len(x, env);
if (y == node) len = tmin;
else if (len > tmin) len = tmin;
} while (IS_NOT_NULL(y = NODE_CDR(y)));
}
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* sn = STR_(node);
len = sn->end - sn->s;
@@ -2598,7 +2880,7 @@ get_min_len(Node* node, ScanEnv* env)
case NODE_CTYPE:
case NODE_CCLASS:
- len = 1;
+ len = ONIGENC_MBC_MINLEN(env->enc);
break;
case NODE_QUANT:
@@ -2606,7 +2888,7 @@ get_min_len(Node* node, ScanEnv* env)
QuantNode* qn = QUANT_(node);
if (qn->lower > 0) {
- len = get_min_len(NODE_BODY(node), env);
+ len = tree_min_len(NODE_BODY(node), env);
len = distance_multiply(len, qn->lower);
}
}
@@ -2624,7 +2906,7 @@ get_min_len(Node* node, ScanEnv* env)
len = 0; // recursive
else {
NODE_STATUS_ADD(node, NST_MARK1);
- len = get_min_len(NODE_BODY(node), env);
+ len = tree_min_len(NODE_BODY(node), env);
NODE_STATUS_REMOVE(node, NST_MARK1);
en->min_len = len;
@@ -2635,12 +2917,34 @@ get_min_len(Node* node, ScanEnv* env)
case ENCLOSURE_OPTION:
case ENCLOSURE_STOP_BACKTRACK:
- len = get_min_len(NODE_BODY(node), env);
+ len = tree_min_len(NODE_BODY(node), env);
+ break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ int elen;
+ len = tree_min_len(NODE_BODY(node), env);
+ if (IS_NOT_NULL(en->te.Then))
+ len += tree_min_len(en->te.Then, env);
+ if (IS_NOT_NULL(en->te.Else))
+ elen = tree_min_len(en->te.Else, env);
+ else elen = 0;
+
+ if (elen < len) len = elen;
+ }
break;
}
}
break;
+ case NODE_GIMMICK:
+ {
+ GimmickNode* g = GIMMICK_(node);
+ if (g->type == GIMMICK_FAIL) {
+ len = INFINITE_LEN;
+ break;
+ }
+ }
+ /* fall */
case NODE_ANCHOR:
default:
break;
@@ -2650,7 +2954,7 @@ get_min_len(Node* node, ScanEnv* env)
}
static OnigLen
-get_max_len(Node* node, ScanEnv* env)
+tree_max_len(Node* node, ScanEnv* env)
{
OnigLen len;
OnigLen tmax;
@@ -2659,19 +2963,19 @@ get_max_len(Node* node, ScanEnv* env)
switch (NODE_TYPE(node)) {
case NODE_LIST:
do {
- tmax = get_max_len(NODE_CAR(node), env);
+ tmax = tree_max_len(NODE_CAR(node), env);
len = distance_add(len, tmax);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_ALT:
do {
- tmax = get_max_len(NODE_CAR(node), env);
+ tmax = tree_max_len(NODE_CAR(node), env);
if (len < tmax) len = tmax;
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* sn = STR_(node);
len = sn->end - sn->s;
@@ -2683,30 +2987,30 @@ get_max_len(Node* node, ScanEnv* env)
len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
- case NODE_BREF:
- {
+ case NODE_BACKREF:
+ if (! NODE_IS_CHECKER(node)) {
int i;
int* backs;
MemEnv* mem_env = SCANENV_MEMENV(env);
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
if (NODE_IS_RECURSION(node)) {
- len = ONIG_INFINITE_DISTANCE;
+ len = INFINITE_LEN;
break;
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- tmax = get_max_len(mem_env[backs[i]].node, env);
+ tmax = tree_max_len(mem_env[backs[i]].node, env);
if (len < tmax) len = tmax;
}
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
if (! NODE_IS_RECURSION(node))
- len = get_max_len(NODE_BODY(node), env);
+ len = tree_max_len(NODE_BODY(node), env);
else
- len = ONIG_INFINITE_DISTANCE;
+ len = INFINITE_LEN;
break;
#endif
@@ -2715,12 +3019,12 @@ get_max_len(Node* node, ScanEnv* env)
QuantNode* qn = QUANT_(node);
if (qn->upper != 0) {
- len = get_max_len(NODE_BODY(node), env);
+ len = tree_max_len(NODE_BODY(node), env);
if (len != 0) {
if (! IS_REPEAT_INFINITE(qn->upper))
len = distance_multiply(len, qn->upper);
else
- len = ONIG_INFINITE_DISTANCE;
+ len = INFINITE_LEN;
}
}
}
@@ -2735,10 +3039,10 @@ get_max_len(Node* node, ScanEnv* env)
len = en->max_len;
else {
if (NODE_IS_MARK1(node))
- len = ONIG_INFINITE_DISTANCE;
+ len = INFINITE_LEN;
else {
NODE_STATUS_ADD(node, NST_MARK1);
- len = get_max_len(NODE_BODY(node), env);
+ len = tree_max_len(NODE_BODY(node), env);
NODE_STATUS_REMOVE(node, NST_MARK1);
en->max_len = len;
@@ -2749,13 +3053,29 @@ get_max_len(Node* node, ScanEnv* env)
case ENCLOSURE_OPTION:
case ENCLOSURE_STOP_BACKTRACK:
- len = get_max_len(NODE_BODY(node), env);
+ len = tree_max_len(NODE_BODY(node), env);
+ break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ int tlen, elen;
+ len = tree_max_len(NODE_BODY(node), env);
+ if (IS_NOT_NULL(en->te.Then)) {
+ tlen = tree_max_len(en->te.Then, env);
+ len = distance_add(len, tlen);
+ }
+ if (IS_NOT_NULL(en->te.Else))
+ elen = tree_max_len(en->te.Else, env);
+ else elen = 0;
+
+ if (elen > len) len = elen;
+ }
break;
}
}
break;
case NODE_ANCHOR:
+ case NODE_GIMMICK:
default:
break;
}
@@ -2783,14 +3103,31 @@ check_backrefs(Node* node, ScanEnv* env)
}
/* fall */
case NODE_QUANT:
+ r = check_backrefs(NODE_BODY(node), env);
+ break;
+
case NODE_ENCLOSURE:
r = check_backrefs(NODE_BODY(node), env);
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ if (r != 0) return r;
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = check_backrefs(en->te.Then, env);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = check_backrefs(en->te.Else, env);
+ }
+ }
+ }
break;
- case NODE_BREF:
+ case NODE_BACKREF:
{
int i;
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
int* backs = BACKREFS_P(br);
MemEnv* mem_env = SCANENV_MEMENV(env);
@@ -2813,7 +3150,7 @@ check_backrefs(Node* node, ScanEnv* env)
}
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
#define RECURSION_EXIST (1<<0)
#define RECURSION_MUST (1<<1)
@@ -2822,6 +3159,7 @@ check_backrefs(Node* node, ScanEnv* env)
static int
infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
{
+ int ret;
int r = 0;
switch (NODE_TYPE(node)) {
@@ -2829,15 +3167,14 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
{
Node *x;
OnigLen min;
- int ret;
x = node;
do {
ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
r |= ret;
- if (head) {
- min = get_min_len(NODE_CAR(x), env);
+ if (head != 0) {
+ min = tree_min_len(NODE_CAR(x), env);
if (min != 0) head = 0;
}
} while (IS_NOT_NULL(x = NODE_CDR(x)));
@@ -2846,7 +3183,6 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
case NODE_ALT:
{
- int ret;
int must;
must = RECURSION_MUST;
@@ -2894,6 +3230,31 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
NODE_STATUS_REMOVE(node, NST_MARK2);
}
}
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ int eret;
+
+ ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
+ if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
+ r |= ret;
+ if (IS_NOT_NULL(en->te.Then)) {
+ OnigLen min;
+ if (head != 0) {
+ min = tree_min_len(NODE_BODY(node), env);
+ }
+ else min = 0;
+
+ ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
+ if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
+ r |= ret;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ eret = infinite_recursive_call_check(en->te.Else, env, head);
+ if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
+ r |= (eret & RECURSION_EXIST);
+ if ((eret & RECURSION_MUST) == 0)
+ r &= ~RECURSION_MUST;
+ }
+ }
else {
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
}
@@ -2948,6 +3309,16 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
NODE_STATUS_REMOVE(node, NST_MARK1);
}
}
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = infinite_recursive_call_check_trav(en->te.Then, env);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = infinite_recursive_call_check_trav(en->te.Else, env);
+ if (r != 0) return r;
+ }
+ }
}
r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
@@ -2987,7 +3358,10 @@ recursive_call_check(Node* node)
case NODE_CALL:
r = recursive_call_check(NODE_BODY(node));
- if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION);
+ if (r != 0) {
+ if (NODE_IS_MARK1(NODE_BODY(node)))
+ NODE_STATUS_ADD(node, NST_RECURSION);
+ }
break;
case NODE_ENCLOSURE:
@@ -3005,6 +3379,16 @@ recursive_call_check(Node* node)
NODE_STATUS_REMOVE(node, NST_MARK2);
}
}
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ r = 0;
+ if (IS_NOT_NULL(en->te.Then)) {
+ r |= recursive_call_check(en->te.Then);
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r |= recursive_call_check(en->te.Else);
+ }
+ r |= recursive_call_check(NODE_BODY(node));
+ }
else {
r = recursive_call_check(NODE_BODY(node));
}
@@ -3058,6 +3442,8 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
case NODE_ENCLOSURE:
{
+ int ret;
+ int state1;
EnclosureNode* en = ENCLOSURE_(node);
if (en->type == ENCLOSURE_MEMORY) {
@@ -3075,16 +3461,25 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
}
}
- {
- int ret;
- int state1 = state;
+ state1 = state;
+ if (NODE_IS_RECURSION(node))
+ state1 |= IN_RECURSION;
- if (NODE_IS_RECURSION(node))
- state1 |= IN_RECURSION;
+ ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
+ if (ret == FOUND_CALLED_NODE)
+ r = FOUND_CALLED_NODE;
- ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
- if (ret == FOUND_CALLED_NODE)
- r = FOUND_CALLED_NODE;
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ ret = recursive_call_check_trav(en->te.Then, env, state1);
+ if (ret == FOUND_CALLED_NODE)
+ r = FOUND_CALLED_NODE;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ ret = recursive_call_check_trav(en->te.Else, env, state1);
+ if (ret == FOUND_CALLED_NODE)
+ r = FOUND_CALLED_NODE;
+ }
}
}
break;
@@ -3126,7 +3521,7 @@ divide_look_behind_alternatives(Node* node)
if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
np = node;
do {
- SET_NODE_TYPE(np, NODE_LIST); /* alt -> list */
+ NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
} while (IS_NOT_NULL(np = NODE_CDR(np)));
}
return 0;
@@ -3257,8 +3652,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
return r;
}
- NSTRING_SET_AMBIG(node);
- NSTRING_SET_DONT_GET_OPT_INFO(node);
+ NODE_STRING_SET_AMBIG(node);
+ NODE_STRING_SET_DONT_GET_OPT_INFO(node);
*rnode = node;
return 0;
}
@@ -3386,7 +3781,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
StrNode* sn = STR_(node);
- if (NSTRING_IS_AMBIG(node)) return 0;
+ if (NODE_STRING_IS_AMBIG(node)) return 0;
start = sn->s;
end = sn->end;
@@ -3529,10 +3924,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
switch (NODE_TYPE(node)) {
case NODE_LIST:
{
- Node* prev = NULL_NODE;
do {
r = setup_comb_exp_check(NODE_CAR(node), r, env);
- prev = NODE_CAR(node);
} while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
}
break;
@@ -3619,8 +4012,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
switch (en->type) {
case ENCLOSURE_MEMORY:
{
- if (env->curr_max_regnum < en->regnum)
- env->curr_max_regnum = en->regnum;
+ if (env->curr_max_regnum < en->m.regnum)
+ env->curr_max_regnum = en->m.regnum;
r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);
}
@@ -3633,7 +4026,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node))
env->has_recursion = 1;
@@ -3668,7 +4061,7 @@ quantifiers_memory_node_info(Node* node)
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
@@ -3702,17 +4095,32 @@ quantifiers_memory_node_info(Node* node)
case ENCLOSURE_STOP_BACKTRACK:
r = quantifiers_memory_node_info(NODE_BODY(node));
break;
+ case ENCLOSURE_IF_ELSE:
+ {
+ int v;
+ r = quantifiers_memory_node_info(NODE_BODY(node));
+ if (IS_NOT_NULL(en->te.Then)) {
+ v = quantifiers_memory_node_info(en->te.Then);
+ if (v > r) r = v;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ v = quantifiers_memory_node_info(en->te.Else);
+ if (v > r) r = v;
+ }
+ }
+ break;
default:
break;
}
}
break;
- case NODE_BREF:
- case NODE_STR:
+ case NODE_BACKREF:
+ case NODE_STRING:
case NODE_CTYPE:
case NODE_CCLASS:
case NODE_ANCHOR:
+ case NODE_GIMMICK:
default:
break;
}
@@ -3729,7 +4137,7 @@ quantifiers_memory_node_info(Node* node)
#define IN_ZERO_REPEAT (1<<4)
#define IN_MULTI_ENTRY (1<<5)
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
#ifdef __GNUC__
__inline
@@ -3745,7 +4153,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
#ifdef USE_NAMED_GROUP
if (env->num_named > 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
#endif
@@ -3811,10 +4219,26 @@ setup_call2_call(Node* node)
break;
case NODE_ENCLOSURE:
- if (! NODE_IS_MARK1(node)) {
- NODE_STATUS_ADD(node, NST_MARK1);
- setup_call2_call(NODE_BODY(node));
- NODE_STATUS_REMOVE(node, NST_MARK1);
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (! NODE_IS_MARK1(node)) {
+ NODE_STATUS_ADD(node, NST_MARK1);
+ setup_call2_call(NODE_BODY(node));
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+ }
+ }
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ setup_call2_call(NODE_BODY(node));
+ if (IS_NOT_NULL(en->te.Then))
+ setup_call2_call(en->te.Then);
+ if (IS_NOT_NULL(en->te.Else))
+ setup_call2_call(en->te.Else);
+ }
+ else {
+ setup_call2_call(NODE_BODY(node));
+ }
}
break;
@@ -3868,11 +4292,29 @@ setup_call(Node* node, ScanEnv* env, int state)
break;
case NODE_ENCLOSURE:
- if ((state & IN_ZERO_REPEAT) != 0) {
- NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT);
- ENCLOSURE_(node)->m.entry_count--;
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if ((state & IN_ZERO_REPEAT) != 0) {
+ NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT);
+ ENCLOSURE_(node)->m.entry_count--;
+ }
+ r = setup_call(NODE_BODY(node), env, state);
+ }
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ r = setup_call(NODE_BODY(node), env, state);
+ if (r != 0) return r;
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = setup_call(en->te.Then, env, state);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else))
+ r = setup_call(en->te.Else, env, state);
+ }
+ else
+ r = setup_call(NODE_BODY(node), env, state);
}
- r = setup_call(NODE_BODY(node), env, state);
break;
case NODE_CALL:
@@ -3918,6 +4360,20 @@ setup_call2(Node* node)
case NODE_ENCLOSURE:
if (! NODE_IS_IN_ZERO_REPEAT(node))
r = setup_call2(NODE_BODY(node));
+
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (r != 0) return r;
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = setup_call2(en->te.Then);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else))
+ r = setup_call2(en->te.Else);
+ }
+ }
break;
case NODE_CALL:
@@ -3997,6 +4453,13 @@ setup_called_state_call(Node* node, int state)
NODE_STATUS_REMOVE(node, NST_MARK1);
}
}
+ else if (en->type == ENCLOSURE_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ setup_called_state_call(en->te.Then, state);
+ }
+ if (IS_NOT_NULL(en->te.Else))
+ setup_called_state_call(en->te.Else, state);
+ }
else {
setup_called_state_call(NODE_BODY(node), state);
}
@@ -4025,7 +4488,7 @@ setup_called_state(Node* node, int state)
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
setup_called_state_call(node, state);
break;
@@ -4046,6 +4509,13 @@ setup_called_state(Node* node, int state)
case ENCLOSURE_STOP_BACKTRACK:
setup_called_state(NODE_BODY(node), state);
break;
+ case ENCLOSURE_IF_ELSE:
+ setup_called_state(NODE_BODY(node), state);
+ if (IS_NOT_NULL(en->te.Then))
+ setup_called_state(en->te.Then, state);
+ if (IS_NOT_NULL(en->te.Else))
+ setup_called_state(en->te.Else, state);
+ break;
}
}
break;
@@ -4082,16 +4552,17 @@ setup_called_state(Node* node, int state)
}
break;
- case NODE_BREF:
- case NODE_STR:
+ case NODE_BACKREF:
+ case NODE_STRING:
case NODE_CTYPE:
case NODE_CCLASS:
+ case NODE_GIMMICK:
default:
break;
}
}
-#endif /* USE_SUBEXP_CALL */
+#endif /* USE_CALL */
static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
@@ -4104,8 +4575,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
{
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
- ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \
- | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL )
+ ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \
+ | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \
+ | BIT_NODE_CALL )
#define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION )
#define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION
@@ -4182,7 +4654,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
- d = get_min_len(body, env);
+ d = tree_min_len(body, env);
if (d == 0) {
#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
qn->body_empty_info = quantifiers_memory_node_info(body);
@@ -4208,10 +4680,10 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
- if (NODE_TYPE(body) == NODE_STR) {
+ if (NODE_TYPE(body) == NODE_STRING) {
if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int len = NSTRING_LEN(body);
+ int len = NODE_STRING_LEN(body);
StrNode* sn = STR_(body);
if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
@@ -4278,17 +4750,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NODE_STR:
- if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
+ case NODE_STRING:
+ if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
r = expand_case_fold_string(node, reg);
}
break;
- case NODE_BREF:
+ case NODE_BACKREF:
{
int i;
int* p;
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
p = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
@@ -4311,14 +4783,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case ENCLOSURE_OPTION:
{
OnigOptionType options = reg->options;
- reg->options = ENCLOSURE_(node)->o.option;
+ reg->options = ENCLOSURE_(node)->o.options;
r = setup_tree(NODE_BODY(node), reg, state, env);
reg->options = options;
}
break;
case ENCLOSURE_MEMORY:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
state |= en->m.called_state;
#endif
@@ -4343,6 +4815,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
break;
+
+ case ENCLOSURE_IF_ELSE:
+ r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
+ if (r != 0) return r;
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else))
+ r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
+ break;
}
}
break;
@@ -4355,11 +4838,12 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = setup_anchor(node, reg, state, env);
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
#endif
case NODE_CTYPE:
case NODE_CCLASS:
+ case NODE_GIMMICK:
default:
break;
}
@@ -4487,7 +4971,7 @@ distance_value(MinMaxLen* mm)
OnigLen d;
- if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
+ if (mm->max == INFINITE_LEN) return 0;
d = mm->max - mm->min;
if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
@@ -5048,15 +5532,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NODE_STR:
+ case NODE_STRING:
{
StrNode* sn = STR_(node);
int slen = sn->end - sn->s;
- int is_raw = NSTRING_IS_RAW(node);
+ int is_raw = NODE_STRING_IS_RAW(node);
- if (! NSTRING_IS_AMBIG(node)) {
+ if (! NODE_STRING_IS_AMBIG(node)) {
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- NSTRING_IS_RAW(node), env->enc);
+ NODE_STRING_IS_RAW(node), env->enc);
if (slen > 0) {
add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
}
@@ -5065,7 +5549,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
else {
int max;
- if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+ if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
int n = onigenc_strlen(env->enc, sn->s, sn->end);
max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
}
@@ -5191,24 +5675,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NODE_BREF:
- {
+ case NODE_BACKREF:
+ if (! NODE_IS_CHECKER(node)) {
int i;
int* backs;
OnigLen min, max, tmin, tmax;
MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
if (NODE_IS_RECURSION(node)) {
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ set_mml(&opt->len, 0, INFINITE_LEN);
break;
}
backs = BACKREFS_P(br);
- min = get_min_len(mem_env[backs[0]].node, env->scan_env);
- max = get_max_len(mem_env[backs[0]].node, env->scan_env);
+ min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
+ max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
for (i = 1; i < br->back_num; i++) {
- tmin = get_min_len(mem_env[backs[i]].node, env->scan_env);
- tmax = get_max_len(mem_env[backs[i]].node, env->scan_env);
+ tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
+ tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
@@ -5216,13 +5700,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node))
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ set_mml(&opt->len, 0, INFINITE_LEN);
else {
OnigOptionType save = env->options;
- env->options = ENCLOSURE_(NODE_BODY(node))->o.option;
+ env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
r = optimize_node_left(NODE_BODY(node), opt, env);
env->options = save;
}
@@ -5242,7 +5726,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
- if (IS_MULTILINE(env->options))
+ if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
else
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
@@ -5274,7 +5758,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
min = distance_multiply(nopt.len.min, qn->lower);
if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ max = (nopt.len.max > 0 ? INFINITE_LEN : 0);
else
max = distance_multiply(nopt.len.max, qn->upper);
@@ -5291,20 +5775,20 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
{
OnigOptionType save = env->options;
- env->options = en->o.option;
+ env->options = en->o.options;
r = optimize_node_left(NODE_BODY(node), opt, env);
env->options = save;
}
break;
case ENCLOSURE_MEMORY:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
en->opt_count++;
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
OnigLen min, max;
min = 0;
- max = ONIG_INFINITE_DISTANCE;
+ max = INFINITE_LEN;
if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
set_mml(&opt->len, min, max);
@@ -5324,10 +5808,39 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ENCLOSURE_STOP_BACKTRACK:
r = optimize_node_left(NODE_BODY(node), opt, env);
break;
+
+ case ENCLOSURE_IF_ELSE:
+ {
+ OptEnv nenv;
+ NodeOptInfo nopt;
+
+ copy_opt_env(&nenv, env);
+ r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = optimize_node_left(en->te.Then, &nopt, &nenv);
+ if (r == 0) {
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ }
+ }
+
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = optimize_node_left(en->te.Else, &nopt, env);
+ if (r == 0)
+ alt_merge_node_opt_info(opt, &nopt, env);
+ }
+ }
+ }
+ break;
}
}
break;
+ case NODE_GIMMICK:
+ break;
+
default:
#ifdef ONIG_DEBUG
fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node));
@@ -5379,7 +5892,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
reg->dmin = e->mmd.min;
reg->dmax = e->mmd.max;
- if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ if (reg->dmin != INFINITE_LEN) {
reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
}
@@ -5398,7 +5911,7 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m)
reg->dmin = m->mmd.min;
reg->dmax = m->mmd.max;
- if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ if (reg->dmin != INFINITE_LEN) {
reg->threshold_len = reg->dmin + 1;
}
}
@@ -5531,14 +6044,14 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
static void
print_distance_range(FILE* f, OnigLen a, OnigLen b)
{
- if (a == ONIG_INFINITE_DISTANCE)
+ if (a == INFINITE_LEN)
fputs("inf", f);
else
fprintf(f, "(%u)", a);
fputs("-", f);
- if (b == ONIG_INFINITE_DISTANCE)
+ if (b == INFINITE_LEN)
fputs("inf", f);
else
fprintf(f, "(%u)", b);
@@ -5656,7 +6169,7 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+ if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg));
#ifdef USE_NAMED_GROUP
onig_names_free(reg);
@@ -5702,7 +6215,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
int r, init_size;
Node* root;
ScanEnv scan_env;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
UnsetAddrList uslist;
#endif
@@ -5751,7 +6264,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = check_backrefs(root, &scan_env);
if (r != 0) goto err;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_init(&uslist, scan_env.num_call);
if (r != 0) goto err;
@@ -5791,12 +6304,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (scan_env.backrefed_mem == 0
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
|| scan_env.num_call == 0
#endif
) {
setup_comb_exp_check(root, 0, &scan_env);
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (scan_env.has_recursion != 0) {
scan_env.num_comb_exp_check = 0;
}
@@ -5829,8 +6342,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = compile_tree(root, reg, &scan_env);
if (r == 0) {
+ if (scan_env.keep_num > 0) {
+ r = add_opcode(reg, OP_UPDATE_VAR);
+ if (r != 0) goto err;
+ r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
+ if (r != 0) goto err;
+ r = add_mem_num(reg, 0 /* not used */);
+ if (r != 0) goto err;
+ }
+
r = add_opcode(reg, OP_END);
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_fix(&uslist, reg);
unset_addr_list_end(&uslist);
@@ -5847,7 +6369,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
}
}
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
else if (scan_env.num_call > 0) {
unset_addr_list_end(&uslist);
}
@@ -5865,7 +6387,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
return r;
err_unset:
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (scan_env.num_call > 0) {
unset_addr_list_end(&uslist);
}
@@ -5890,8 +6412,8 @@ static int onig_inited = 0;
extern int
onig_reg_init(regex_t* reg, OnigOptionType option,
- OnigCaseFoldType case_fold_flag,
- OnigEncoding enc, OnigSyntaxType* syntax)
+ OnigCaseFoldType case_fold_flag,
+ OnigEncoding enc, OnigSyntaxType* syntax)
{
int r;
@@ -5938,7 +6460,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
(reg)->exact = (UChar* )NULL;
(reg)->int_map = (int* )NULL;
(reg)->int_map_backward = (int* )NULL;
- (reg)->chain = (regex_t* )NULL;
+ REG_EXTPL(reg) = NULL;
(reg)->p = (UChar* )NULL;
(reg)->alloc = 0;
@@ -6165,11 +6687,13 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
{ OP_BACKREF1, "backref1", ARG_NON },
{ OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_N, "backref-n", ARG_MEMNUM },
+ { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
{ OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
- { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL },
+ { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL },
+ { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
{ OP_MEMORY_START, "mem-start", ARG_MEMNUM },
{ OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
@@ -6181,6 +6705,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_FAIL, "fail", ARG_NON },
{ OP_JUMP, "jump", ARG_RELADDR },
{ OP_PUSH, "push", ARG_RELADDR },
+ { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR },
{ OP_POP, "pop", ARG_NON },
{ OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
{ OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
@@ -6194,10 +6719,10 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
{ OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
{ OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PREC_READ_START, "push-pos", ARG_NON },
+ { OP_PREC_READ_END, "pop-pos", ARG_NON },
+ { OP_PUSH_PREC_READ_NOT, "push-prec-read-not", ARG_RELADDR },
+ { OP_FAIL_PREC_READ_NOT, "fail-prec-read-not", ARG_NON },
{ OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
{ OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
@@ -6205,6 +6730,8 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
{ OP_CALL, "call", ARG_ABSADDR },
{ OP_RETURN, "return", ARG_NON },
+ { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL },
+ { OP_UPDATE_VAR, "update-var", ARG_SPECIAL },
{ OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
{ OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
{ OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
@@ -6272,6 +6799,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
MemNumType mem;
StateCheckNumType scn;
OnigCodePoint code;
+ OnigOptionType option;
UChar *q;
fprintf(f, "%s", op2name(*bp));
@@ -6421,7 +6949,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
}
break;
- case OP_BACKREFN_IC:
+ case OP_BACKREF_N_IC:
mem = *((MemNumType* )bp);
bp += SIZE_MEMNUM;
fprintf(f, ":%d", mem);
@@ -6429,6 +6957,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
case OP_BACKREF_MULTI_IC:
case OP_BACKREF_MULTI:
+ case OP_BACKREF_CHECK:
fputs(" ", f);
GET_LENGTH_INC(len, bp);
for (i = 0; i < len; i++) {
@@ -6439,12 +6968,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
break;
case OP_BACKREF_WITH_LEVEL:
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ /* fall */
+ case OP_BACKREF_CHECK_WITH_LEVEL:
{
- OnigOptionType option;
LengthType level;
- GET_OPTION_INC(option, bp);
- fprintf(f, ":%d", option);
GET_LENGTH_INC(level, bp);
fprintf(f, ":%d", level);
@@ -6501,6 +7031,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
p_rel_addr(f, addr, bp, start);
break;
+ case OP_PUSH_SAVE_VAL:
+ {
+ SaveType type;
+ GET_SAVE_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
+ case OP_UPDATE_VAR:
+ {
+ UpdateVarType type;
+ GET_UPDATE_VAR_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);
@@ -6576,9 +7124,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NODE_STR:
+ case NODE_STRING:
fprintf(f, "<string%s:%p>",
- (NSTRING_IS_RAW(node) ? "-raw" : ""), node);
+ (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
for (p = STR_(node)->s; p < STR_(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
fputc(*p, f);
@@ -6659,12 +7207,12 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NODE_BREF:
+ case NODE_BACKREF:
{
int* p;
- BRefNode* br = BREF_(node);
+ BackRefNode* br = BACKREF_(node);
p = BACKREFS_P(br);
- fprintf(f, "<backref:%p>", node);
+ fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
for (i = 0; i < br->back_num; i++) {
if (i > 0) fputs(", ", f);
fprintf(f, "%d", p[i]);
@@ -6672,7 +7220,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case NODE_CALL:
{
CallNode* cn = CALL_(node);
@@ -6693,10 +7241,10 @@ print_indent_tree(FILE* f, Node* node, int indent)
fprintf(f, "<enclosure:%p> ", node);
switch (ENCLOSURE_(node)->type) {
case ENCLOSURE_OPTION:
- fprintf(f, "option:%d", ENCLOSURE_(node)->option);
+ fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
break;
case ENCLOSURE_MEMORY:
- fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum);
+ fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
break;
case ENCLOSURE_STOP_BACKTRACK:
fprintf(f, "stop-bt");
@@ -6709,6 +7257,24 @@ print_indent_tree(FILE* f, Node* node, int indent)
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
+ case NODE_GIMMICK:
+ fprintf(f, "<gimmick:%p> ", node);
+ switch (GIMMICK_(node)->type) {
+ case GIMMICK_FAIL:
+ fprintf(f, "fail");
+ break;
+ case GIMMICK_KEEP:
+ fprintf(f, "keep:%d", GIMMICK_(node)->id);
+ break;
+ case GIMMICK_SAVE:
+ fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
+ break;
+ case GIMMICK_UPDATE_VAR:
+ fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
+ break;
+ }
+ break;
+
default:
fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
break;
diff --git a/src/regenc.h b/src/regenc.h
index 897c704..abc26be 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -239,5 +239,7 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
+#define ONIGENC_IS_UNICODE_ENCODING(enc) \
+ ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype)
#endif /* REGENC_H */
diff --git a/src/regerror.c b/src/regerror.c
index 0285272..a430e60 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -174,6 +174,12 @@ onig_error_code_to_format(int code)
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name {%n}"; break;
+ case ONIGERR_INVALID_IF_ELSE_SYNTAX:
+ p = "invalid if-else syntax"; break;
+ case ONIGERR_INVALID_ABSENT_GROUP_PATTERN:
+ p = "invalid absent group pattern"; break;
+ case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN:
+ p = "invalid absent group generator pattern"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
diff --git a/src/regexec.c b/src/regexec.c
index f66da1f..e7dfb96 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -305,32 +305,85 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
#define INVALID_STACK_INDEX -1
+#define STK_ALT_FLAG 0x0001
+
/* stack type */
/* used by normal-POP */
-#define STK_ALT 0x0001
-#define STK_LOOK_BEHIND_NOT 0x0002
-#define STK_POS_NOT 0x0003
+#define STK_SUPER_ALT STK_ALT_FLAG
+#define STK_ALT (0x0002 | STK_ALT_FLAG)
+#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
+#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
/* handled by normal-POP */
#define STK_MEM_START 0x0100
#define STK_MEM_END 0x8200
#define STK_REPEAT_INC 0x0300
#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */
+#define STK_VOID 0x0000 /* for fill a blank */
#define STK_EMPTY_CHECK_START 0x3000
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8400
#define STK_POS 0x0500 /* used when POP-POS */
-#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_STOP_BACKTRACK 0x0600 /* mark for "(?>...)" */
#define STK_REPEAT 0x0700
#define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900
-#define STK_VOID 0x0a00 /* for fill a blank */
+#define STK_SAVE_VAL 0x0a00
/* stack type check mask */
-#define STK_MASK_POP_USED 0x00ff
-#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_POP_USED STK_ALT_FLAG
+#define STK_MASK_TO_VOID_TARGET 0x10fe
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
+typedef intptr_t StackIndex;
+
+typedef struct _StackType {
+ unsigned int type;
+ union {
+ struct {
+ UChar *pcode; /* byte code position */
+ UChar *pstr; /* string position */
+ UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
+ } state;
+ struct {
+ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ UChar *pcode; /* byte code position (head of repeated target) */
+ int num; /* repeat id */
+ } repeat;
+ struct {
+ StackIndex si; /* index of stack */
+ } repeat_inc;
+ struct {
+ int num; /* memory num */
+ UChar *pstr; /* start/end position */
+ /* Following information is set, if this stack type is MEM-START */
+ StackIndex start; /* prev. info (for backtrack "(...)*" ) */
+ StackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ } mem;
+ struct {
+ int num; /* null check id */
+ UChar *pstr; /* start position */
+ } empty_check;
+#ifdef USE_CALL
+ struct {
+ UChar *ret_addr; /* byte code position */
+ int num; /* null check id */
+ UChar *pstr; /* string position */
+ } call_frame;
+#endif
+ struct {
+ int id;
+ enum SaveType type;
+ UChar* v;
+ UChar* v2;
+ } val;
+ } u;
+} StackType;
+
+
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
(msa).stack_p = (void* )0;\
@@ -396,28 +449,28 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
if (msa->stack_p) {\
is_alloca = 0;\
alloc_base = msa->stack_p;\
- stk_base = (OnigStackType* )(alloc_base\
- + (sizeof(OnigStackIndex) * msa->ptr_num));\
+ stk_base = (StackType* )(alloc_base\
+ + (sizeof(StackIndex) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
is_alloca = 0;\
- alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\
- + sizeof(OnigStackType) * (stack_num));\
+ alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
+ + sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
- stk_base = (OnigStackType* )(alloc_base\
- + (sizeof(OnigStackIndex) * msa->ptr_num));\
+ stk_base = (StackType* )(alloc_base\
+ + (sizeof(StackIndex) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
else {\
is_alloca = 1;\
- alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\
- + sizeof(OnigStackType) * (stack_num));\
+ alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
+ + sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
- stk_base = (OnigStackType* )(alloc_base\
- + (sizeof(OnigStackIndex) * msa->ptr_num));\
+ stk_base = (StackType* )(alloc_base\
+ + (sizeof(StackIndex) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
@@ -427,8 +480,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STACK_SAVE do{\
msa->stack_n = stk_end - stk_base;\
if (is_alloca != 0) {\
- size_t size = sizeof(OnigStackIndex) * msa->ptr_num \
- + sizeof(OnigStackType) * msa->stack_n;\
+ size_t size = sizeof(StackIndex) * msa->ptr_num \
+ + sizeof(StackType) * msa->stack_n;\
msa->stack_p = xmalloc(size);\
CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
xmemcpy(msa->stack_p, alloc_base, size);\
@@ -439,8 +492,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
} while(0)
#define UPDATE_FOR_STACK_REALLOC do{\
- repeat_stk = (OnigStackIndex* )alloc_base;\
- mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\
+ repeat_stk = (StackIndex* )alloc_base;\
+ mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
@@ -461,8 +514,8 @@ onig_set_match_stack_limit_size(unsigned int size)
static int
stack_double(int is_alloca, char** arg_alloc_base,
- OnigStackType** arg_stk_base,
- OnigStackType** arg_stk_end, OnigStackType** arg_stk,
+ StackType** arg_stk_base,
+ StackType** arg_stk_end, StackType** arg_stk,
OnigMatchArg* msa)
{
unsigned int n;
@@ -471,7 +524,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
size_t new_size;
char* alloc_base;
char* new_alloc_base;
- OnigStackType *stk_base, *stk_end, *stk;
+ StackType *stk_base, *stk_end, *stk;
alloc_base = *arg_alloc_base;
stk_base = *arg_stk_base;
@@ -479,9 +532,9 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk = *arg_stk;
n = stk_end - stk_base;
- size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
+ size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
n *= 2;
- new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n;
+ new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
if (is_alloca != 0) {
new_alloc_base = (char* )xmalloc(new_size);
if (IS_NULL(new_alloc_base)) {
@@ -507,8 +560,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
alloc_base = new_alloc_base;
used = stk - stk_base;
*arg_alloc_base = alloc_base;
- *arg_stk_base = (OnigStackType* )(alloc_base
- + (sizeof(OnigStackIndex) * msa->ptr_num));
+ *arg_stk_base = (StackType* )(alloc_base
+ + (sizeof(StackIndex) * msa->ptr_num));
*arg_stk = *arg_stk_base + used;
*arg_stk_end = *arg_stk_base + n;
return 0;
@@ -516,8 +569,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_ENSURE(n) do {\
if (stk_end - stk < (n)) {\
- int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\
- msa);\
+ int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
if (r != 0) { STACK_SAVE; return r; } \
is_alloca = 0;\
UPDATE_FOR_STACK_REALLOC;\
@@ -610,12 +662,14 @@ stack_double(int is_alloca, char** arg_alloc_base,
} while(0)
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
+#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
+#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
-#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
-#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
-#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
- STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
+#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
+ STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
+#define STACK_PUSH_STOP_BACKTRACK STACK_PUSH_TYPE(STK_STOP_BACKTRACK)
+#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
+ STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
#define STACK_PUSH_REPEAT(id, pat) do {\
STACK_ENSURE(1);\
@@ -725,6 +779,97 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_INC;\
} while(0)
+#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_SAVE_VAL;\
+ stk->u.val.id = (sid);\
+ stk->u.val.type = (stype);\
+ stk->u.val.v = (UChar* )(sval);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_SAVE_VAL;\
+ stk->u.val.id = (sid);\
+ stk->u.val.type = (stype);\
+ stk->u.val.v = (UChar* )(sval);\
+ stk->u.val.v2 = sprev;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
+ StackType *k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
+ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
+ (sval) = k->u.val.v;\
+ break;\
+ }\
+ }\
+} while (0)
+
+#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
+ int level = 0;\
+ StackType *k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
+ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
+ && k->u.val.id == (sid)) {\
+ if (level == 0) {\
+ (sval) = k->u.val.v;\
+ break;\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME)\
+ level--;\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ }\
+} while (0)
+
+#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
+ int level = 0;\
+ StackType *k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
+ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
+ && k->u.val.id == (sid)) {\
+ if (level == 0) {\
+ (sval) = k->u.val.v;\
+ sprev = k->u.val.v2;\
+ break;\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME)\
+ level--;\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ }\
+} while (0)
+
+#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
+ int level = 0;\
+ StackType *k = (stk_from);\
+ while (k > stk_base) {\
+ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
+ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
+ && k->u.val.id == (sid)) {\
+ if (level == 0) {\
+ (sval) = k->u.val.v;\
+ break;\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME)\
+ level--;\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ k--;\
+ }\
+} while (0)
#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p, at) \
@@ -785,11 +930,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_POP_TIL_POS_NOT do {\
+#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
- if (stk->type == STK_POS_NOT) break;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \
+ if (stk->type == STK_ALT_PREC_READ_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
@@ -805,11 +950,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
+#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
- if (stk->type == STK_LOOK_BEHIND_NOT) break;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \
+ if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
@@ -840,15 +985,15 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_STOP_BT_END do {\
- OnigStackType *k = stk;\
+#define STACK_STOP_BACKTRACK_END do {\
+ StackType *k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
+ STACK_BASE_CHECK(k, "STACK_STOP_BACKTRACK_END"); \
if (IS_TO_VOID_TARGET(k)) {\
k->type = STK_VOID;\
}\
- else if (k->type == STK_STOP_BT) {\
+ else if (k->type == STK_STOP_BACKTRACK) {\
k->type = STK_VOID;\
break;\
}\
@@ -856,7 +1001,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
} while(0)
#define STACK_EMPTY_CHECK(isnull,id,s) do {\
- OnigStackType* k = stk;\
+ StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
@@ -871,7 +1016,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
#define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\
- OnigStackType* k = stk;\
+ StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \
@@ -912,7 +1057,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\
int level = 0;\
- OnigStackType* k = stk;\
+ StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \
@@ -960,7 +1105,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#else
#define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
int level = 0;\
- OnigStackType* k = stk;\
+ StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
@@ -1000,7 +1145,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_RETURN(addr) do {\
int level = 0;\
- OnigStackType* k = stk;\
+ StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_RETURN"); \
@@ -1074,27 +1219,26 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define IS_EMPTY_STR (str == end)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
#define DATA_ENSURE_CHECK1 (s < right_range)
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
-#else
-#define DATA_ENSURE_CHECK1 (s < end)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
-#define DATA_ENSURE(n) if (s + (n) > end) goto fail
-#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
+#else
+#define INIT_RIGHT_RANGE right_range = (UChar* )end
+#endif
#ifdef USE_CAPTURE_HISTORY
static int
-make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
- OnigStackType* stk_top, UChar* str, regex_t* reg)
+make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
+ StackType* stk_top, UChar* str, regex_t* reg)
{
int n, r;
OnigCaptureTreeNode* child;
- OnigStackType* k = *kp;
+ StackType* k = *kp;
while (k < stk_top) {
if (k->type == STK_MEM_START) {
@@ -1143,13 +1287,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
}
static int backref_match_at_nested_level(regex_t* reg
- , OnigStackType* top, OnigStackType* stk_base
+ , StackType* top, StackType* stk_base
, int ignore_case, int case_fold_flag
, int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
- OnigStackType* k;
+ StackType* k;
level = 0;
k = top;
@@ -1197,6 +1341,37 @@ static int backref_match_at_nested_level(regex_t* reg
return 0;
}
+
+static int
+backref_check_at_nested_level(regex_t* reg,
+ StackType* top, StackType* stk_base,
+ int nest, int mem_num, UChar* memp)
+{
+ int level;
+ StackType* k;
+
+ level = 0;
+ k = top;
+ k--;
+ while (k >= stk_base) {
+ if (k->type == STK_CALL_FRAME) {
+ level--;
+ }
+ else if (k->type == STK_RETURN) {
+ level++;
+ }
+ else if (level == nest) {
+ if (k->type == STK_MEM_END) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ return 1;
+ }
+ }
+ }
+ k--;
+ }
+
+ return 0;
+}
#endif /* USE_BACKREF_WITH_LEVEL */
@@ -1300,7 +1475,7 @@ typedef struct {
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar* right_range,
+ const UChar* in_right_range,
#endif
const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
{
@@ -1311,13 +1486,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MemNumType mem;
RelAddrType addr;
UChar *s, *q, *sbegin;
+ UChar *right_range;
int is_alloca;
char *alloc_base;
- OnigStackType *stk_base, *stk, *stk_end;
- OnigStackType *stkp; /* used as any purpose. */
- OnigStackIndex si;
- OnigStackIndex *repeat_stk;
- OnigStackIndex *mem_start_stk, *mem_end_stk;
+ StackType *stk_base, *stk, *stk_end;
+ StackType *stkp; /* used as any purpose. */
+ StackIndex si;
+ StackIndex *repeat_stk;
+ StackIndex *mem_start_stk, *mem_end_stk;
+ UChar* keep;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int scv;
unsigned char* state_check_buff = msa->state_check_buff;
@@ -1346,7 +1523,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
- s = (UChar* )sstart;
+ keep = s = (UChar* )sstart;
+ INIT_RIGHT_RANGE;
+
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
@@ -1394,12 +1573,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
best_len = n;
region = msa->region;
if (region) {
+ if (keep > s) keep = s;
+
#ifdef USE_POSIX_API_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
- rmt[0].rm_so = sstart - str;
- rmt[0].rm_eo = s - str;
+ rmt[0].rm_so = keep - str;
+ rmt[0].rm_eo = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
if (MEM_STATUS_AT(reg->bt_mem_start, i))
@@ -1418,8 +1599,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else {
#endif /* USE_POSIX_API_REGION_OPTION */
- region->beg[0] = sstart - str;
- region->end[0] = s - str;
+ region->beg[0] = keep - str;
+ region->end[0] = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
if (MEM_STATUS_AT(reg->bt_mem_start, i))
@@ -1451,8 +1632,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
node->group = 0;
- node->beg = sstart - str;
- node->end = s - str;
+ node->beg = keep - str;
+ node->end = s - str;
stkp = stk_base;
r = make_capture_history_tree(region->history_root, &stkp,
@@ -2138,7 +2319,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
GET_MEMNUM_INC(mem, p);
- mem_start_stk[mem] = (OnigStackIndex )((void* )s);
+ mem_start_stk[mem] = (StackIndex )((void* )s);
MOP_OUT;
continue;
break;
@@ -2152,12 +2333,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (OnigStackIndex )((void* )s);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
MOP_OUT;
continue;
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
GET_MEMNUM_INC(mem, p);
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
@@ -2169,13 +2350,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (OnigStackIndex )((void* )s);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
if (MEM_STATUS_AT(reg->bt_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
MOP_OUT;
@@ -2193,16 +2374,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto backref;
break;
- case OP_BACKREFN: MOP_IN(OP_BACKREFN);
+ case OP_BACKREF_N: MOP_IN(OP_BACKREF_N);
GET_MEMNUM_INC(mem, p);
backref:
{
int len;
UChar *pstart, *pend;
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
@@ -2226,15 +2404,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
break;
- case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
+ case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC);
GET_MEMNUM_INC(mem, p);
{
int len;
UChar *pstart, *pend;
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
@@ -2364,6 +2539,45 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
+ case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK);
+ {
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ continue;
+ }
+ break;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ case OP_BACKREF_CHECK_WITH_LEVEL:
+ {
+ LengthType level;
+
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
+
+ if (backref_check_at_nested_level(reg, stk, stk_base,
+ (int )level, (int )tlen, p) != 0) {
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
+
+ MOP_OUT;
+ continue;
+ }
+ break;
+#endif
+
#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
@@ -2440,7 +2654,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
{
@@ -2484,6 +2698,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
continue;
break;
+ case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
+ break;
+
#ifdef USE_COMBINATION_EXPLOSION_CHECK
case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
GET_STATE_CHECK_NUM_INC(mem, p);
@@ -2652,13 +2873,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto repeat_inc_ng;
break;
- case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
+ case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START);
STACK_PUSH_POS(s, sprev);
MOP_OUT;
continue;
break;
- case OP_POP_POS: MOP_IN(OP_POP_POS);
+ case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END);
{
STACK_POS_END(stkp);
s = stkp->u.state.pstr;
@@ -2668,26 +2889,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
continue;
break;
- case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
+ case OP_PUSH_PREC_READ_NOT: MOP_IN(OP_PUSH_PREC_READ_NOT);
GET_RELADDR_INC(addr, p);
- STACK_PUSH_POS_NOT(p + addr, s, sprev);
+ STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
MOP_OUT;
continue;
break;
- case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
- STACK_POP_TIL_POS_NOT;
+ case OP_FAIL_PREC_READ_NOT: MOP_IN(OP_FAIL_PREC_READ_NOT);
+ STACK_POP_TIL_ALT_PREC_READ_NOT;
goto fail;
break;
case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
- STACK_PUSH_STOP_BT;
+ STACK_PUSH_STOP_BACKTRACK;
MOP_OUT;
continue;
break;
case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
- STACK_STOP_BT_END;
+ STACK_STOP_BACKTRACK_END;
MOP_OUT;
continue;
break;
@@ -2712,7 +2933,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* goto fail; */
}
else {
- STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
+ STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
s = q;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
@@ -2721,11 +2942,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
- STACK_POP_TIL_LOOK_BEHIND_NOT;
+ STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
goto fail;
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case OP_CALL: MOP_IN(OP_CALL);
GET_ABSADDR_INC(addr, p);
STACK_PUSH_CALL_FRAME(p);
@@ -2742,6 +2963,56 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
+ case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL);
+ {
+ SaveType type;
+ GET_SAVE_TYPE_INC(type, p);
+ GET_MEMNUM_INC(mem, p); /* mem: save id */
+ switch ((enum SaveType )type) {
+ case SAVE_KEEP:
+ STACK_PUSH_SAVE_VAL(mem, type, s);
+ break;
+
+ case SAVE_S:
+ STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
+ break;
+
+ case SAVE_RIGHT_RANGE:
+ STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
+ break;
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR);
+ {
+ UpdateVarType type;
+ GET_UPDATE_VAR_TYPE_INC(type, p);
+ GET_MEMNUM_INC(mem, p); /* mem: save id */
+ switch ((enum UpdateVarType )type) {
+ case UPDATE_VAR_KEEP_FROM_STACK_LAST:
+ STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
+ break;
+ case UPDATE_VAR_S_FROM_STACK:
+ STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
+ break;
+ case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
+ STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_S, mem, right_range);
+ break;
+ case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
+ STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_RIGHT_RANGE, mem, right_range);
+ break;
+ case UPDATE_VAR_RIGHT_RANGE_INIT:
+ INIT_RIGHT_RANGE;
+ break;
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+
case OP_FINISH:
goto finish;
break;
@@ -3248,7 +3519,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
}
else {
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ if (reg->dmax != INFINITE_LEN) {
if (p - str < reg->dmax) {
*low = (UChar* )str;
if (low_prev)
@@ -3377,7 +3648,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
}
/* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ if (reg->dmax != INFINITE_LEN) {
*low = p - reg->dmax;
*high = p - reg->dmin;
*high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
@@ -3514,7 +3785,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
end_buf:
if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
- goto mismatch_no_msa;
+ goto mismatch_no_msa;
if (range > start) {
if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
@@ -3616,7 +3887,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
sch_range = (UChar* )range;
if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ if (reg->dmax == INFINITE_LEN)
sch_range = (UChar* )end;
else {
sch_range += reg->dmax;
@@ -3627,7 +3898,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if ((end - start) < reg->threshold_len)
goto mismatch;
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ if (reg->dmax != INFINITE_LEN) {
do {
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, &low_prev)) goto mismatch;
@@ -3689,7 +3960,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else
adjrange = (UChar* )end;
- if (reg->dmax != ONIG_INFINITE_DISTANCE &&
+ if (reg->dmax != INFINITE_LEN &&
(end - range) >= reg->threshold_len) {
do {
sch_start = s + reg->dmax;
@@ -3714,7 +3985,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
sch_start = s;
if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ if (reg->dmax == INFINITE_LEN)
sch_start = (UChar* )end;
else {
sch_start += reg->dmax;
diff --git a/src/regint.h b/src/regint.h
index 8da27d2..185f4b6 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -57,7 +57,7 @@
/* config */
/* spec. config */
#define USE_NAMED_GROUP
-#define USE_SUBEXP_CALL
+#define USE_CALL
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
@@ -196,6 +196,8 @@ typedef int intptr_t;
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
+#define INFINITE_LEN ONIG_INFINITE_DISTANCE
+
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
@@ -211,7 +213,11 @@ typedef int intptr_t;
} while(0)
/* sizeof(OnigCodePoint) */
-#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
+#ifdef SIZEOF_SIZE_T
+# define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T
+#else
+# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
+#endif
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
@@ -226,10 +232,20 @@ typedef int intptr_t;
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+typedef struct {
+ int num_keeper;
+ int* keepers;
+} RegExt;
+
+#define REG_EXTP(reg) (RegExt* )((reg)->chain)
+#define REG_EXTPL(reg) ((reg)->chain)
+
/* stack pop level */
-#define STACK_POP_LEVEL_FREE 0
-#define STACK_POP_LEVEL_MEM_START 1
-#define STACK_POP_LEVEL_ALL 2
+enum StackPopLevel {
+ STACK_POP_LEVEL_FREE = 0,
+ STACK_POP_LEVEL_MEM_START = 1,
+ STACK_POP_LEVEL_ALL =2
+};
/* optimize flags */
#define ONIG_OPTIMIZE_NONE 0
@@ -482,11 +498,13 @@ enum OpCode {
OP_BACKREF1,
OP_BACKREF2,
- OP_BACKREFN,
- OP_BACKREFN_IC,
+ OP_BACKREF_N,
+ OP_BACKREF_N_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
- OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+ OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+ OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
+ OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n)), (?('name')) */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
@@ -498,6 +516,7 @@ enum OpCode {
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
+ OP_PUSH_SUPER,
OP_POP,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
@@ -512,10 +531,10 @@ enum OpCode {
OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
- OP_PUSH_POS, /* (?=...) start */
- OP_POP_POS, /* (?=...) end */
- OP_PUSH_POS_NOT, /* (?!...) start */
- OP_FAIL_POS, /* (?!...) end */
+ OP_PREC_READ_START, /* (?=...) start */
+ OP_PREC_READ_END, /* (?=...) end */
+ OP_PUSH_PREC_READ_NOT, /* (?!...) start */
+ OP_FAIL_PREC_READ_NOT, /* (?!...) end */
OP_PUSH_STOP_BT, /* (?>...) start */
OP_POP_STOP_BT, /* (?>...) end */
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
@@ -524,6 +543,8 @@ enum OpCode {
OP_CALL, /* \g<name> */
OP_RETURN,
+ OP_PUSH_SAVE_VAL,
+ OP_UPDATE_VAR,
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
@@ -536,6 +557,20 @@ enum OpCode {
OP_SET_OPTION /* set option */
};
+enum SaveType {
+ SAVE_KEEP = 0, /* SAVE S */
+ SAVE_S = 1,
+ SAVE_RIGHT_RANGE = 2,
+};
+
+enum UpdateVarType {
+ UPDATE_VAR_KEEP_FROM_STACK_LAST = 0,
+ UPDATE_VAR_S_FROM_STACK = 1,
+ UPDATE_VAR_RIGHT_RANGE_FROM_STACK = 2,
+ UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3,
+ UPDATE_VAR_RIGHT_RANGE_INIT = 4,
+};
+
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
@@ -543,6 +578,8 @@ typedef int RepeatNumType;
typedef int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType;
+typedef int SaveType;
+typedef int UpdateVarType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@@ -554,7 +591,8 @@ typedef void* PointerType;
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#define SIZE_POINTER sizeof(PointerType)
-
+#define SIZE_SAVE_TYPE sizeof(SaveType)
+#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType)
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
@@ -564,6 +602,8 @@ typedef void* PointerType;
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
+#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
+#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@@ -578,15 +618,16 @@ typedef void* PointerType;
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_PUSH_POS SIZE_OPCODE
-#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP_POS SIZE_OPCODE
-#define SIZE_OP_FAIL_POS SIZE_OPCODE
+#define SIZE_OP_PREC_READ_START SIZE_OPCODE
+#define SIZE_OP_PUSH_PREC_READ_NOT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PREC_READ_END SIZE_OPCODE
+#define SIZE_OP_FAIL_PREC_READ_NOT SIZE_OPCODE
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_FAIL SIZE_OPCODE
@@ -605,6 +646,8 @@ typedef void* PointerType;
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
+#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)
+#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
@@ -664,48 +707,6 @@ typedef void* PointerType;
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-typedef intptr_t OnigStackIndex;
-
-typedef struct _OnigStackType {
- unsigned int type;
- union {
- struct {
- UChar *pcode; /* byte code position */
- UChar *pstr; /* string position */
- UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- UChar *pcode; /* byte code position (head of repeated target) */
- int num; /* repeat id */
- } repeat;
- struct {
- OnigStackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- int num; /* memory num */
- UChar *pstr; /* start/end position */
- /* Following information is set, if this stack type is MEM-START */
- OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
- OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- int num; /* null check id */
- UChar *pstr; /* start position */
- } empty_check;
-#ifdef USE_SUBEXP_CALL
- struct {
- UChar *ret_addr; /* byte code position */
- int num; /* null check id */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- } u;
-} OnigStackType;
-
typedef struct {
void* stack_p;
int stack_n;
diff --git a/src/regparse.c b/src/regparse.c
index a5f8e5b..25291c5 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -48,6 +48,11 @@ OnigSyntaxType OnigSyntaxRuby = {
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_RUBY |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
+ ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
+ ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
@@ -179,7 +184,10 @@ static int backref_rel_to_abs(int rel_no, ScanEnv* env)
}
}
-#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
+#define OPTION_ON(v,f) ((v) |= (f))
+#define OPTION_OFF(v,f) ((v) &= ~(f))
+
+#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
#define MBCODE_START_POS(enc) \
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
@@ -301,6 +309,34 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
}
#endif
+static int
+save_entry(ScanEnv* env, enum SaveType type, int* id)
+{
+ int nid = env->save_num;
+
+#if 0
+ if (IS_NULL(env->saves)) {
+ int n = 10;
+ env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
+ CHECK_NULL_RETURN_MEMERR(env->saves);
+ env->save_alloc_num = n;
+ }
+ else if (env->save_alloc_num <= nid) {
+ int n = env->save_alloc_num * 2;
+ SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n);
+ CHECK_NULL_RETURN_MEMERR(p);
+ env->saves = p;
+ env->save_alloc_num = n;
+ }
+
+ env->saves[nid].type = type;
+#endif
+
+ env->save_num++;
+ *id = nid;
+ return 0;
+}
+
/* scan pattern methods */
#define PEND_VALUE 0
@@ -990,7 +1026,7 @@ scan_env_clear(ScanEnv* env)
env->error_end = (UChar* )NULL;
env->num_call = 0;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
env->unset_addr_list = NULL;
env->has_call_zero = 0;
#endif
@@ -1011,6 +1047,10 @@ scan_env_clear(ScanEnv* env)
env->has_recursion = 0;
#endif
env->parse_depth = 0;
+ env->keep_num = 0;
+ env->save_num = 0;
+ env->save_alloc_num = 0;
+ env->saves = 0;
}
static int
@@ -1075,7 +1115,7 @@ onig_node_free(Node* node)
#endif
switch (NODE_TYPE(node)) {
- case NODE_STR:
+ case NODE_STRING:
if (STR_(node)->capa != 0 &&
IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
xfree(STR_(node)->s);
@@ -1103,13 +1143,25 @@ onig_node_free(Node* node)
}
break;
- case NODE_BREF:
- if (IS_NOT_NULL(BREF_(node)->back_dynamic))
- xfree(BREF_(node)->back_dynamic);
+ case NODE_BACKREF:
+ if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
+ xfree(BACKREF_(node)->back_dynamic);
break;
- case NODE_QUANT:
case NODE_ENCLOSURE:
+ if (NODE_BODY(node))
+ onig_node_free(NODE_BODY(node));
+
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+ if (en->type == ENCLOSURE_IF_ELSE) {
+ onig_node_free(en->te.Then);
+ onig_node_free(en->te.Else);
+ }
+ }
+ break;
+
+ case NODE_QUANT:
case NODE_ANCHOR:
if (NODE_BODY(node))
onig_node_free(NODE_BODY(node));
@@ -1117,12 +1169,35 @@ onig_node_free(Node* node)
case NODE_CTYPE:
case NODE_CALL:
+ case NODE_GIMMICK:
break;
}
xfree(node);
}
+static void
+cons_node_free_alone(Node* node)
+{
+ NODE_CAR(node) = 0;
+ NODE_CDR(node) = 0;
+ onig_node_free(node);
+}
+
+extern void
+list_node_free_not_car(Node* node)
+{
+ Node* next_node;
+
+ start:
+ if (IS_NULL(node)) return;
+
+ next_node = NODE_CDR(node);
+ xfree(node);
+ node = next_node;
+ goto start;
+}
+
static Node*
node_new(void)
{
@@ -1154,7 +1229,7 @@ node_new_cclass(void)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_CCLASS);
+ NODE_SET_TYPE(node, NODE_CCLASS);
initialize_cclass(CCLASS_(node));
return node;
}
@@ -1165,19 +1240,61 @@ node_new_ctype(int type, int not)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_CTYPE);
+ NODE_SET_TYPE(node, NODE_CTYPE);
CTYPE_(node)->ctype = type;
CTYPE_(node)->not = not;
return node;
}
static Node*
+node_new_anychar(void)
+{
+ Node* node = node_new_ctype(CTYPE_ANYCHAR, 0);
+ return node;
+}
+
+static Node*
+node_new_anychar_with_fixed_option(OnigOptionType option)
+{
+ CtypeNode* ct;
+ Node* node;
+
+ node = node_new_anychar();
+ ct = CTYPE_(node);
+ ct->options = option;
+ NODE_STATUS_ADD(node, NST_FIXED_OPTION);
+ return node;
+}
+
+static int
+node_new_no_newline(Node** node, ScanEnv* env)
+{
+ Node* n;
+
+ n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
+ CHECK_NULL_RETURN_MEMERR(n);
+ *node = n;
+ return 0;
+}
+
+static int
+node_new_true_anychar(Node** node, ScanEnv* env)
+{
+ Node* n;
+
+ n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
+ CHECK_NULL_RETURN_MEMERR(n);
+ *node = n;
+ return 0;
+}
+
+static Node*
node_new_list(Node* left, Node* right)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_LIST);
+ NODE_SET_TYPE(node, NODE_LIST);
NODE_CAR(node) = left;
NODE_CDR(node) = right;
return node;
@@ -1213,19 +1330,65 @@ onig_node_new_alt(Node* left, Node* right)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_ALT);
+ NODE_SET_TYPE(node, NODE_ALT);
NODE_CAR(node) = left;
NODE_CDR(node) = right;
return node;
}
+static Node*
+make_list_or_alt(NodeType type, int n, Node* ns[])
+{
+ Node* r;
+
+ if (n <= 0) return NULL_NODE;
+
+ if (n == 1) {
+ r = node_new();
+ CHECK_NULL_RETURN(r);
+ NODE_SET_TYPE(r, type);
+ NODE_CAR(r) = ns[0];
+ NODE_CDR(r) = NULL_NODE;
+ }
+ else {
+ Node* right;
+
+ r = node_new();
+ CHECK_NULL_RETURN(r);
+
+ right = make_list_or_alt(type, n - 1, ns + 1);
+ if (IS_NULL(right)) {
+ onig_node_free(r);
+ return NULL_NODE;
+ }
+
+ NODE_SET_TYPE(r, type);
+ NODE_CAR(r) = ns[0];
+ NODE_CDR(r) = right;
+ }
+
+ return r;
+}
+
+static Node*
+make_list(int n, Node* ns[])
+{
+ return make_list_or_alt(NODE_LIST, n, ns);
+}
+
+static Node*
+make_alt(int n, Node* ns[])
+{
+ return make_list_or_alt(NODE_ALT, n, ns);
+}
+
extern Node*
onig_node_new_anchor(int type)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_ANCHOR);
+ NODE_SET_TYPE(node, NODE_ANCHOR);
ANCHOR_(node)->type = type;
ANCHOR_(node)->char_len = -1;
return node;
@@ -1243,16 +1406,16 @@ node_new_backref(int back_num, int* backrefs, int by_name,
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_BREF);
- BREF_(node)->back_num = back_num;
- BREF_(node)->back_dynamic = (int* )NULL;
+ NODE_SET_TYPE(node, NODE_BACKREF);
+ BACKREF_(node)->back_num = back_num;
+ BACKREF_(node)->back_dynamic = (int* )NULL;
if (by_name != 0)
NODE_STATUS_ADD(node, NST_BY_NAME);
#ifdef USE_BACKREF_WITH_LEVEL
if (exist_level != 0) {
NODE_STATUS_ADD(node, NST_NEST_LEVEL);
- BREF_(node)->nest_level = nest_level;
+ BACKREF_(node)->nest_level = nest_level;
}
#endif
@@ -1266,7 +1429,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,
if (back_num <= NODE_BACKREFS_SIZE) {
for (i = 0; i < back_num; i++)
- BREF_(node)->back_static[i] = backrefs[i];
+ BACKREF_(node)->back_static[i] = backrefs[i];
}
else {
int* p = (int* )xmalloc(sizeof(int) * back_num);
@@ -1274,21 +1437,41 @@ node_new_backref(int back_num, int* backrefs, int by_name,
onig_node_free(node);
return NULL;
}
- BREF_(node)->back_dynamic = p;
+ BACKREF_(node)->back_dynamic = p;
for (i = 0; i < back_num; i++)
p[i] = backrefs[i];
}
return node;
}
-#ifdef USE_SUBEXP_CALL
+static Node*
+node_new_backref_checker(int back_num, int* backrefs, int by_name,
+#ifdef USE_BACKREF_WITH_LEVEL
+ int exist_level, int nest_level,
+#endif
+ ScanEnv* env)
+{
+ Node* node;
+
+ node = node_new_backref(back_num, backrefs, by_name,
+#ifdef USE_BACKREF_WITH_LEVEL
+ exist_level, nest_level,
+#endif
+ env);
+ CHECK_NULL_RETURN(node);
+
+ NODE_STATUS_ADD(node, NST_CHECKER);
+ return node;
+}
+
+#ifdef USE_CALL
static Node*
node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_CALL);
+ NODE_SET_TYPE(node, NODE_CALL);
CALL_(node)->by_number = by_number;
CALL_(node)->name = name;
CALL_(node)->name_end = name_end;
@@ -1304,7 +1487,7 @@ node_new_quantifier(int lower, int upper, int by_number)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_QUANT);
+ NODE_SET_TYPE(node, NODE_QUANT);
QUANT_(node)->lower = lower;
QUANT_(node)->upper = upper;
QUANT_(node)->greedy = 1;
@@ -1328,7 +1511,7 @@ node_new_enclosure(int type)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_ENCLOSURE);
+ NODE_SET_TYPE(node, NODE_ENCLOSURE);
ENCLOSURE_(node)->type = type;
switch (type) {
@@ -1340,11 +1523,16 @@ node_new_enclosure(int type)
break;
case ENCLOSURE_OPTION:
- ENCLOSURE_(node)->o.option = 0;
+ ENCLOSURE_(node)->o.options = 0;
break;
case ENCLOSURE_STOP_BACKTRACK:
break;
+
+ case ENCLOSURE_IF_ELSE:
+ ENCLOSURE_(node)->te.Then = 0;
+ ENCLOSURE_(node)->te.Else = 0;
+ break;
}
ENCLOSURE_(node)->opt_count = 0;
@@ -1358,7 +1546,20 @@ onig_node_new_enclosure(int type)
}
static Node*
-node_new_enclosure_memory(int is_named)
+node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)
+{
+ Node* n;
+ n = node_new_enclosure(ENCLOSURE_IF_ELSE);
+ CHECK_NULL_RETURN(n);
+
+ NODE_BODY(n) = cond;
+ ENCLOSURE_(n)->te.Then = Then;
+ ENCLOSURE_(n)->te.Else = Else;
+ return n;
+}
+
+static Node*
+node_new_memory(int is_named)
{
Node* node = node_new_enclosure(ENCLOSURE_MEMORY);
CHECK_NULL_RETURN(node);
@@ -1373,10 +1574,395 @@ node_new_option(OnigOptionType option)
{
Node* node = node_new_enclosure(ENCLOSURE_OPTION);
CHECK_NULL_RETURN(node);
- ENCLOSURE_(node)->o.option = option;
+ ENCLOSURE_(node)->o.options = option;
return node;
}
+static int
+node_new_fail(Node** node, ScanEnv* env)
+{
+ *node = node_new();
+ CHECK_NULL_RETURN_MEMERR(*node);
+
+ NODE_SET_TYPE(*node, NODE_GIMMICK);
+ GIMMICK_(*node)->type = GIMMICK_FAIL;
+ return ONIG_NORMAL;
+}
+
+static int
+node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)
+{
+ int id;
+ int r;
+
+ r = save_entry(env, save_type, &id);
+ if (r != ONIG_NORMAL) return r;
+
+ *node = node_new();
+ CHECK_NULL_RETURN_MEMERR(*node);
+
+ NODE_SET_TYPE(*node, NODE_GIMMICK);
+ GIMMICK_(*node)->id = id;
+ GIMMICK_(*node)->type = GIMMICK_SAVE;
+ GIMMICK_(*node)->detail_type = (int )save_type;
+
+ return ONIG_NORMAL;
+}
+
+static int
+node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
+ int id, ScanEnv* env)
+{
+ *node = node_new();
+ CHECK_NULL_RETURN_MEMERR(*node);
+
+ NODE_SET_TYPE(*node, NODE_GIMMICK);
+ GIMMICK_(*node)->id = id;
+ GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
+ GIMMICK_(*node)->detail_type = (int )update_var_type;
+
+ return ONIG_NORMAL;
+}
+
+static int
+node_new_keep(Node** node, ScanEnv* env)
+{
+ int r;
+
+ r = node_new_save_gimmick(node, SAVE_KEEP, env);
+ if (r != 0) return r;
+
+ env->keep_num++;
+ return ONIG_NORMAL;
+}
+
+static int
+make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
+ Node* step_one, int lower, int upper, int possessive,
+ int is_range_cutter, ScanEnv* env)
+{
+ int r;
+ int i;
+ int id;
+ Node* x;
+ Node* ns[4];
+
+ for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
+
+ ns[1] = absent;
+ ns[3] = step_one; // for err
+ r = node_new_save_gimmick(&ns[0], SAVE_S, env);
+ if (r != 0) goto err;
+
+ id = GIMMICK_(ns[0])->id;
+ r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
+ id, env);
+ if (r != 0) goto err;
+
+ r = node_new_fail(&ns[3], env);
+ if (r != 0) goto err;
+
+ x = make_list(4, ns);
+ if (IS_NULL(x)) goto err;
+
+ ns[0] = x;
+ ns[1] = step_one;
+ ns[2] = ns[3] = NULL_NODE;
+
+ x = make_alt(2, ns);
+ if (IS_NULL(x)) goto err;
+
+ ns[0] = x;
+
+ x = node_new_quantifier(lower, upper, 0);
+ if (IS_NULL(x)) goto err;
+
+ NODE_BODY(x) = ns[0];
+ ns[0] = x;
+
+ if (possessive != 0) {
+ x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ if (IS_NULL(x)) goto err;
+
+ NODE_BODY(x) = ns[0];
+ ns[0] = x;
+ }
+
+ r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
+ pre_save_right_id, env);
+ if (r != 0) goto err;
+
+ r = node_new_fail(&ns[2], env);
+ if (r != 0) goto err;
+
+ x = make_list(2, ns + 1);
+ if (IS_NULL(x)) goto err;
+
+ ns[1] = x; ns[2] = NULL_NODE;
+
+ x = make_alt(2, ns);
+ if (IS_NULL(x)) goto err;
+
+ if (is_range_cutter != 0)
+ NODE_STATUS_ADD(x, NST_SUPER);
+
+ *node = x;
+ return ONIG_NORMAL;
+
+ err:
+ for (i = 0; i < 4; i++) onig_node_free(ns[i]);
+ return r;
+}
+
+static int
+make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
+ ScanEnv* env)
+{
+ int r;
+ int id;
+ Node* save;
+ Node* x;
+ Node* ns[2];
+
+ *node1 = *node2 = NULL_NODE;
+ save = ns[0] = ns[1] = NULL_NODE;
+
+ r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
+ if (r != 0) goto err;
+
+ id = GIMMICK_(save)->id;
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
+ id, env);
+ if (r != 0) goto err;
+
+ r = node_new_fail(&ns[1], env);
+ if (r != 0) goto err;
+
+ x = make_list(2, ns);
+ if (IS_NULL(x)) goto err;
+
+ ns[0] = NULL_NODE; ns[1] = x;
+
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
+ pre_save_right_id, env);
+ if (r != 0) goto err;
+
+ x = make_alt(2, ns);
+ if (IS_NULL(x)) goto err;
+
+ *node1 = save;
+ *node2 = x;
+ return ONIG_NORMAL;
+
+ err:
+ onig_node_free(save);
+ onig_node_free(ns[0]);
+ onig_node_free(ns[1]);
+ return r;
+}
+
+static int
+is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
+ int* is_possessive, ScanEnv* env)
+{
+ Node* quant;
+ Node* body;
+
+ *rquant = *rbody = 0;
+ *is_possessive = 0;
+
+ if (NODE_TYPE(node) == NODE_QUANT) {
+ quant = node;
+ }
+ else {
+ if (NODE_TYPE(node) == NODE_ENCLOSURE) {
+ EnclosureNode* en = ENCLOSURE_(node);
+ if (en->type == ENCLOSURE_STOP_BACKTRACK) {
+ *is_possessive = 1;
+ quant = NODE_ENCLOSURE_BODY(en);
+ if (NODE_TYPE(quant) != NODE_QUANT)
+ return 0;
+ }
+ else
+ return 0;
+ }
+ else
+ return 0;
+ }
+
+ body = NODE_BODY(quant);
+ switch (NODE_TYPE(body)) {
+ case NODE_STRING:
+ {
+ int len;
+ StrNode* sn = STR_(body);
+ UChar *s = sn->s;
+
+ len = 0;
+ while (s < sn->end) {
+ s += enclen(env->enc, s);
+ len++;
+ }
+ if (len != 1)
+ return 0;
+ }
+
+ case NODE_CCLASS:
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+
+ if (node != quant) {
+ NODE_BODY(node) = 0;
+ onig_node_free(node);
+ }
+ NODE_BODY(quant) = NULL_NODE;
+ *rquant = quant;
+ *rbody = body;
+ return 1;
+}
+
+static int
+make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,
+ Node* body, int possessive, ScanEnv* env)
+{
+ int r;
+ int i;
+ int id1;
+ int lower, upper;
+ Node* x;
+ Node* ns[4];
+
+ *node = NULL_NODE;
+ r = ONIGERR_MEMORY;
+ ns[0] = ns[1] = NULL_NODE;
+ ns[2] = body, ns[3] = absent;
+
+ lower = QUANT_(quant)->lower;
+ upper = QUANT_(quant)->upper;
+ onig_node_free(quant);
+
+ r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
+ if (r != 0) goto err;
+
+ id1 = GIMMICK_(ns[0])->id;
+
+ r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
+ 0, env);
+ if (r != 0) goto err;
+
+ ns[2] = ns[3] = NULL_NODE;
+
+ r = make_absent_tail(&ns[2], &ns[3], id1, env);
+ if (r != 0) goto err;
+
+ x = make_list(4, ns);
+ if (IS_NULL(x)) goto err;
+
+ *node = x;
+ return ONIG_NORMAL;
+
+ err:
+ for (i = 0; i < 4; i++) onig_node_free(ns[i]);
+ return r;
+}
+
+static int
+make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
+ ScanEnv* env)
+{
+ int r;
+ int i;
+ int id1, id2;
+ int possessive;
+ Node* x;
+ Node* ns[7];
+
+ r = ONIGERR_MEMORY;
+ for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
+ ns[4] = expr; ns[5] = absent;
+
+ if (is_range_cutter == 0) {
+ Node* quant;
+ Node* body;
+
+ if (expr == NULL_NODE) {
+ /* default expr \O* */
+ quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ if (IS_NULL(quant)) goto err;
+
+ r = node_new_true_anychar(&body, env);
+ if (r != 0) {
+ onig_node_free(quant);
+ goto err;
+ }
+ possessive = 0;
+ goto simple;
+ }
+ else {
+ if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
+ simple:
+ r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
+ body, possessive, env);
+ if (r != 0) {
+ ns[4] = NULL_NODE;
+ onig_node_free(quant);
+ onig_node_free(body);
+ goto err;
+ }
+
+ return ONIG_NORMAL;
+ }
+ }
+ }
+
+ r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
+ if (r != 0) goto err;
+
+ id1 = GIMMICK_(ns[0])->id;
+
+ r = node_new_save_gimmick(&ns[1], SAVE_S, env);
+ if (r != 0) goto err;
+
+ id2 = GIMMICK_(ns[1])->id;
+
+ r = node_new_true_anychar(&ns[3], env);
+ if (r != 0) goto err;
+
+ possessive = 1;
+ r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
+ possessive, is_range_cutter, env);
+ if (r != 0) goto err;
+
+ ns[3] = NULL_NODE;
+ ns[5] = NULL_NODE;
+
+ r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
+ if (r != 0) goto err;
+
+ if (is_range_cutter != 0) {
+ x = make_list(4, ns);
+ if (IS_NULL(x)) goto err;
+ }
+ else {
+ r = make_absent_tail(&ns[5], &ns[6], id1, env);
+ if (r != 0) goto err;
+
+ x = make_list(7, ns);
+ if (IS_NULL(x)) goto err;
+ }
+
+ *node = x;
+ return ONIG_NORMAL;
+
+ err:
+ for (i = 0; i < 7; i++) onig_node_free(ns[i]);
+ return r;
+}
+
extern int
onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
{
@@ -1385,9 +1971,9 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
if (addlen > 0) {
int len = STR_(node)->end - STR_(node)->s;
- if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+ if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
UChar* p;
- int capa = len + addlen + NODE_STR_MARGIN;
+ int capa = len + addlen + NODE_STRING_MARGIN;
if (capa <= STR_(node)->capa) {
onig_strcpy(STR_(node)->s + len, s, end);
@@ -1432,7 +2018,7 @@ node_str_cat_char(Node* node, UChar c)
extern void
onig_node_conv_to_str_node(Node* node, int flag)
{
- SET_NODE_TYPE(node, NODE_STR);
+ NODE_SET_TYPE(node, NODE_STRING);
STR_(node)->flag = flag;
STR_(node)->capa = 0;
STR_(node)->s = STR_(node)->buf;
@@ -1459,7 +2045,7 @@ node_new_str(const UChar* s, const UChar* end)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NODE_TYPE(node, NODE_STR);
+ NODE_SET_TYPE(node, NODE_STRING);
STR_(node)->capa = 0;
STR_(node)->flag = 0;
STR_(node)->s = STR_(node)->buf;
@@ -1481,7 +2067,7 @@ static Node*
node_new_str_raw(UChar* s, UChar* end)
{
Node* node = node_new_str(s, end);
- NSTRING_SET_RAW(node);
+ NODE_STRING_SET_RAW(node);
return node;
}
@@ -1511,7 +2097,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)
if (p && p > sn->s) { /* can be split. */
n = node_new_str(p, sn->end);
if ((sn->flag & STRING_RAW) != 0)
- NSTRING_SET_RAW(n);
+ NODE_STRING_SET_RAW(n);
sn->end = (UChar* )p;
}
@@ -1532,7 +2118,7 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)
static int
node_str_head_pad(StrNode* sn, int num, UChar val)
{
- UChar buf[NODE_STR_BUF_SIZE];
+ UChar buf[NODE_STRING_BUF_SIZE];
int i, len;
len = sn->end - sn->s;
@@ -2090,6 +2676,7 @@ is_invalid_quantifier_target(Node* node)
{
switch (NODE_TYPE(node)) {
case NODE_ANCHOR:
+ case NODE_GIMMICK:
return 1;
break;
@@ -2212,6 +2799,56 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
onig_node_free(cnode);
}
+static int
+node_new_general_newline(Node** node, ScanEnv* env)
+{
+ int r;
+ int dlen, alen;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
+ Node* crnl;
+ Node* ncc;
+ Node* x;
+ CClassNode* cc;
+
+ dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
+ if (dlen < 0) return dlen;
+ alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
+ if (alen < 0) return alen;
+
+ crnl = node_new_str_raw(buf, buf + dlen + alen);
+ CHECK_NULL_RETURN_MEMERR(crnl);
+
+ ncc = node_new_cclass();
+ if (IS_NULL(ncc)) goto err2;
+
+ cc = CCLASS_(ncc);
+ if (dlen == 1) {
+ bitset_set_range(cc->bs, 0x0a, 0x0d);
+ }
+ else {
+ r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
+ if (r != 0) {
+ err1:
+ onig_node_free(ncc);
+ err2:
+ onig_node_free(crnl);
+ return ONIGERR_MEMORY;
+ }
+ }
+
+ if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
+ r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
+ if (r != 0) goto err1;
+ r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
+ if (r != 0) goto err1;
+ }
+
+ x = node_new_enclosure_if_else(crnl, 0, ncc);
+ if (IS_NULL(x)) goto err1;
+
+ *node = x;
+ return 0;
+}
enum TokenSyms {
TK_EOT = 0, /* end of token */
@@ -2233,6 +2870,11 @@ enum TokenSyms {
TK_CC_OPEN,
TK_QUOTE_OPEN,
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
+ TK_KEEP, /* \K */
+ TK_GENERAL_NEWLINE, /* \R */
+ TK_NO_NEWLINE, /* \N */
+ TK_TRUE_ANYCHAR, /* \O */
+
/* in cc */
TK_CC_CLOSE,
TK_CC_RANGE,
@@ -2452,8 +3094,9 @@ static OnigCodePoint
get_name_end_code_point(OnigCodePoint start)
{
switch (start) {
- case '<': return (OnigCodePoint )'>'; break;
+ case '<': return (OnigCodePoint )'>'; break;
case '\'': return (OnigCodePoint )'\''; break;
+ case '(': return (OnigCodePoint )')'; break;
default:
break;
}
@@ -2706,7 +3349,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
if (c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
+ goto err;
}
if (*num_type != IS_NOT_NUM) {
@@ -3378,6 +4021,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.prop.not = 1;
break;
+ case 'K':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
+ tok->type = TK_KEEP;
+ break;
+
+ case 'R':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
+ tok->type = TK_GENERAL_NEWLINE;
+ break;
+
+ case 'N':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
+ tok->type = TK_NO_NEWLINE;
+ break;
+
+ case 'O':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
+ tok->type = TK_TRUE_ANYCHAR;
+ break;
+
case 'A':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
begin_buf:
@@ -3561,7 +4224,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
- r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1);
+ r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
#endif
if (r < 0) return r;
@@ -3616,7 +4279,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
#endif
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case 'g':
if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
@@ -3815,14 +4478,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '^':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
- tok->u.subtype = (IS_SINGLELINE(env->option)
+ tok->u.subtype = (IS_SINGLELINE(env->options)
? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
break;
case '$':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
- tok->u.subtype = (IS_SINGLELINE(env->option)
+ tok->u.subtype = (IS_SINGLELINE(env->options)
? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
break;
@@ -3837,7 +4500,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '#':
- if (IS_EXTEND(env->option)) {
+ if (IS_EXTEND(env->options)) {
while (!PEND) {
PFETCH(c);
if (ONIGENC_IS_CODE_NEWLINE(enc, c))
@@ -3849,7 +4512,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case ' ': case '\t': case '\n': case '\r': case '\f':
- if (IS_EXTEND(env->option))
+ if (IS_EXTEND(env->options))
goto start;
break;
@@ -4640,7 +5303,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = NULL;
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
- option = env->option;
+ option = env->options;
if (PPEEK_IS('?') &&
IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
PINC;
@@ -4711,7 +5374,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = name_add(env->reg, name, name_end, num, env);
if (r != 0) return r;
- *np = node_new_enclosure_memory(1);
+ *np = node_new_memory(1);
CHECK_NULL_RETURN_MEMERR(*np);
ENCLOSURE_(*np)->m.regnum = num;
if (list_capture != 0)
@@ -4729,6 +5392,259 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#endif
break;
+ case '~':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
+ Node* absent;
+ Node* expr;
+ int head_bar;
+ int is_range_cutter;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ if (PPEEK_IS('|')) { // (?~|generator|absent)
+ PINC;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ head_bar = 1;
+ if (PPEEK_IS(')')) { // (?~|) : absent clear
+ PINC;
+ r = node_new_update_var_gimmick(np, UPDATE_VAR_RIGHT_RANGE_INIT,
+ 0, env);
+ if (r != 0) return r;
+ goto end;
+ }
+ }
+ else
+ head_bar = 0;
+
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&absent, tok, term, &p, end, env);
+ if (r < 0) {
+ onig_node_free(absent);
+ return r;
+ }
+
+ expr = NULL_NODE;
+ is_range_cutter = 0;
+ if (head_bar != 0) {
+ Node* top = absent;
+ if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
+ expr = NULL_NODE;
+ is_range_cutter = 1;
+ //return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN;
+ }
+ else {
+ absent = NODE_CAR(top);
+ expr = NODE_CDR(top);
+ NODE_CAR(top) = NULL_NODE;
+ NODE_CDR(top) = NULL_NODE;
+ onig_node_free(top);
+ if (IS_NULL(NODE_CDR(expr))) {
+ top = expr;
+ expr = NODE_CAR(top);
+ NODE_CAR(top) = NULL_NODE;
+ onig_node_free(top);
+ }
+ }
+ }
+
+ r = make_absent_tree(np, absent, expr, is_range_cutter, env);
+ if (r != 0) {
+ return r;
+ }
+ goto end;
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
+
+ case '(':
+ /* (?()...) */
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
+ UChar *prev;
+ Node* condition;
+ int condition_is_checker;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)
+ || c == '-' || c == '+' || c == '<' || c == '\'') {
+ UChar* name_end;
+ int back_num;
+ int exist_level;
+ int level;
+ enum REF_NUM num_type;
+ int is_enclosed;
+
+ is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
+ if (! is_enclosed)
+ PUNFETCH;
+ prev = p;
+ exist_level = 0;
+#ifdef USE_BACKREF_WITH_LEVEL
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */
+ r = fetch_name_with_level(
+ (OnigCodePoint )(is_enclosed != 0 ? c : '('),
+ &p, end, &name_end,
+ env, &back_num, &level, &num_type);
+ if (r == 1) exist_level = 1;
+#else
+ r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
+ &p, end, &name_end, env, &back_num, &num_type, 1);
+#endif
+ if (r < 0) {
+ if (is_enclosed == 0) {
+ goto any_condition;
+ }
+ else
+ return r;
+ }
+
+ condition_is_checker = 1;
+ if (num_type != IS_NOT_NUM) {
+ if (num_type == IS_REL_NUM) {
+ back_num = backref_rel_to_abs(back_num, env);
+ }
+ if (back_num <= 0)
+ return ONIGERR_INVALID_BACKREF;
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (back_num > env->num_mem ||
+ IS_NULL(SCANENV_MEMENV(env)[back_num].node))
+ return ONIGERR_INVALID_BACKREF;
+ }
+
+ condition = node_new_backref_checker(1, &back_num, 0,
+#ifdef USE_BACKREF_WITH_LEVEL
+ exist_level, level,
+#endif
+ env);
+ }
+ else {
+ int num;
+ int* backs;
+
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ if (num <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ int i;
+ for (i = 0; i < num; i++) {
+ if (backs[i] > env->num_mem ||
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ }
+
+ condition = node_new_backref_checker(num, backs, 1,
+#ifdef USE_BACKREF_WITH_LEVEL
+ exist_level, level,
+#endif
+ env);
+ }
+
+ if (is_enclosed != 0) {
+ if (PEND) goto err_if_else;
+ PFETCH(c);
+ if (c != ')') goto err_if_else;
+ }
+ }
+ else {
+ any_condition:
+ PUNFETCH;
+ condition_is_checker = 0;
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&condition, tok, term, &p, end, env);
+ if (r < 0) {
+ onig_node_free(condition);
+ return r;
+ }
+ }
+
+ CHECK_NULL_RETURN_MEMERR(condition);
+
+ if (PEND) {
+ err_if_else:
+ onig_node_free(condition);
+ return ONIGERR_END_PATTERN_IN_GROUP;
+ }
+
+ if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
+ if (condition_is_checker == 0) {
+ onig_node_free(condition);
+ return ONIGERR_INVALID_IF_ELSE_SYNTAX;
+ }
+ PFETCH(c);
+ *np = condition;
+ }
+ else { /* if-else */
+ int then_is_empty;
+ Node *Then, *Else;
+
+ if (PPEEK_IS('|')) {
+ PFETCH(c);
+ Then = 0;
+ then_is_empty = 1;
+ }
+ else
+ then_is_empty = 0;
+
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) {
+ onig_node_free(condition);
+ return r;
+ }
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ if (r < 0) {
+ onig_node_free(condition);
+ onig_node_free(target);
+ return r;
+ }
+
+ if (then_is_empty != 0) {
+ Else = target;
+ }
+ else {
+ if (NODE_TYPE(target) == NODE_ALT) {
+ Then = NODE_CAR(target);
+ if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
+ Else = NODE_CAR(NODE_CDR(target));
+ cons_node_free_alone(NODE_CDR(target));
+ }
+ else {
+ Else = NODE_CDR(target);
+ }
+ cons_node_free_alone(target);
+ }
+ else {
+ Then = target;
+ Else = 0;
+ }
+ }
+
+ *np = node_new_enclosure_if_else(condition, Then, Else);
+ if (IS_NULL(*np)) {
+ onig_node_free(condition);
+ onig_node_free(Then);
+ onig_node_free(Else);
+ return ONIGERR_MEMORY;
+ }
+ }
+ goto end;
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
+
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
#ifdef USE_NAMED_GROUP
@@ -4741,7 +5657,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
PUNFETCH;
}
#endif
- *np = node_new_enclosure_memory(0);
+ *np = node_new_memory(0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) {
@@ -4772,11 +5688,11 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
break;
case '-': neg = 1; break;
- case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
- case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
+ case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;
+ case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
case 's':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
}
else
return ONIGERR_UNDEFINED_GROUP_OPTION;
@@ -4784,17 +5700,17 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'm':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
}
else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
}
else
return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
#ifdef USE_POSIXLINE_OPTION
case 'p':
- ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
break;
#endif
default:
@@ -4808,13 +5724,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return 2; /* option only */
}
else if (c == ':') {
- OnigOptionType prev = env->option;
+ OnigOptionType prev = env->options;
- env->option = option;
+ env->options = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
- env->option = prev;
+ env->options = prev;
if (r < 0) {
onig_node_free(target);
return r;
@@ -4837,10 +5753,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
else {
- if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
goto group;
- *np = node_new_enclosure_memory(0);
+ *np = node_new_memory(0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
@@ -4866,6 +5782,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
+ end:
*src = p;
return 0;
}
@@ -4888,7 +5805,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
return 1;
switch (NODE_TYPE(target)) {
- case NODE_STR:
+ case NODE_STRING:
if (! group) {
StrNode* sn = STR_(target);
if (str_node_can_be_split(sn, env->enc)) {
@@ -5060,7 +5977,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
/* char-class expanded multi-char only
compare with string folded at match time. */
- NSTRING_SET_AMBIG(snode);
+ NODE_STRING_SET_AMBIG(snode);
}
else {
r = onig_node_str_cat(snode, buf, buf + len);
@@ -5106,13 +6023,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (r == 1) group = 1;
else if (r == 2) { /* option only */
Node* target;
- OnigOptionType prev = env->option;
+ OnigOptionType prev = env->options;
- env->option = ENCLOSURE_(*np)->o.option;
+ env->options = ENCLOSURE_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, src, end, env);
- env->option = prev;
+ env->options = prev;
if (r < 0) {
onig_node_free(target);
return r;
@@ -5161,7 +6078,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end()
r = fetch_token(tok, src, end, env);
- NSTRING_CLEAR_RAW(*np);
+ NODE_STRING_CLEAR_RAW(*np);
goto string_end;
}
}
@@ -5176,7 +6093,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
rem = ONIGENC_MBC_MINLEN(env->enc) - len;
(void )node_str_head_pad(STR_(*np), rem, (UChar )0);
if (len + rem == enclen(env->enc, STR_(*np)->s)) {
- NSTRING_CLEAR_RAW(*np);
+ NODE_STRING_CLEAR_RAW(*np);
goto string_end;
}
}
@@ -5266,7 +6183,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (r != 0) return r;
cc = CCLASS_(*np);
- if (IS_IGNORECASE(env->option)) {
+ if (IS_IGNORECASE(env->options)) {
IApplyCaseFoldArg iarg;
iarg.env = env;
@@ -5293,12 +6210,12 @@ parse_exp(Node** np, OnigToken* tok, int term,
break;
case TK_ANYCHAR:
- *np = node_new_ctype(CTYPE_ANYCHAR, 0);
+ *np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
break;
case TK_ANYCHAR_ANYTIME:
- *np = node_new_ctype(CTYPE_ANYCHAR, 0);
+ *np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
CHECK_NULL_RETURN_MEMERR(qn);
@@ -5319,7 +6236,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
CHECK_NULL_RETURN_MEMERR(*np);
break;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
case TK_CALL:
{
int gnum = tok->u.call.gnum;
@@ -5352,6 +6269,26 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
break;
+ case TK_KEEP:
+ r = node_new_keep(np, env);
+ if (r < 0) return r;
+ break;
+
+ case TK_GENERAL_NEWLINE:
+ r = node_new_general_newline(np, env);
+ if (r < 0) return r;
+ break;
+
+ case TK_NO_NEWLINE:
+ r = node_new_no_newline(np, env);
+ if (r < 0) return r;
+ break;
+
+ case TK_TRUE_ANYCHAR:
+ r = node_new_true_anychar(np, env);
+ if (r < 0) return r;
+ break;
+
default:
return ONIGERR_PARSER_BUG;
break;
@@ -5526,13 +6463,13 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
return 0;
}
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
static int
make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
{
int r;
- Node* x = node_new_enclosure_memory(0 /* 0: is not named */);
+ Node* x = node_new_memory(0 /* 0: is not named */);
CHECK_NULL_RETURN_MEMERR(x);
NODE_BODY(x) = node;
@@ -5560,7 +6497,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
#endif
scan_env_clear(env);
- env->option = reg->options;
+ env->options = reg->options;
env->case_fold_flag = reg->case_fold_flag;
env->enc = reg->enc;
env->syntax = reg->syntax;
@@ -5576,7 +6513,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
p = (UChar* )pattern;
r = parse_regexp(root, &p, (UChar* )end, env);
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
if (r != 0) return r;
if (env->has_call_zero != 0) {
diff --git a/src/regparse.h b/src/regparse.h
index 884f4d5..b7260ea 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -33,48 +33,58 @@
/* node type */
typedef enum {
- NODE_STR = 0,
- NODE_CCLASS = 1,
- NODE_CTYPE = 2,
- NODE_BREF = 3,
- NODE_QUANT = 4,
- NODE_ENCLOSURE = 5,
- NODE_ANCHOR = 6,
- NODE_LIST = 7,
- NODE_ALT = 8,
- NODE_CALL = 9
+ NODE_STRING = 0,
+ NODE_CCLASS = 1,
+ NODE_CTYPE = 2,
+ NODE_BACKREF = 3,
+ NODE_QUANT = 4,
+ NODE_ENCLOSURE = 5,
+ NODE_ANCHOR = 6,
+ NODE_LIST = 7,
+ NODE_ALT = 8,
+ NODE_CALL = 9,
+ NODE_GIMMICK = 10
} NodeType;
+enum GimmickType {
+ GIMMICK_FAIL = 0,
+ GIMMICK_KEEP = 1,
+ GIMMICK_SAVE = 2,
+ GIMMICK_UPDATE_VAR = 3,
+};
+
/* node type bit */
#define NODE_TYPE2BIT(type) (1<<(type))
-#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR)
+#define BIT_NODE_STRING NODE_TYPE2BIT(NODE_STRING)
#define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
#define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
-#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF)
-#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
+#define BIT_NODE_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
+#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
#define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
#define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
#define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST)
#define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT)
#define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL)
+#define BIT_NODE_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
#define NODE_IS_SIMPLE_TYPE(node) \
((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0)
+ (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0)
#define NODE_TYPE(node) ((node)->u.base.node_type)
-#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
+#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
#define STR_(node) (&((node)->u.str))
#define CCLASS_(node) (&((node)->u.cclass))
#define CTYPE_(node) (&((node)->u.ctype))
-#define BREF_(node) (&((node)->u.bref))
-#define QUANT_(node) (&((node)->u.quant))
-#define ENCLOSURE_(node) (&((node)->u.enclosure))
+#define BACKREF_(node) (&((node)->u.backref))
+#define QUANT_(node) (&((node)->u.quant))
+#define ENCLOSURE_(node) (&((node)->u.enclosure))
#define ANCHOR_(node) (&((node)->u.anchor))
#define CONS_(node) (&((node)->u.cons))
#define CALL_(node) (&((node)->u.call))
+#define GIMMICK_(node) (&((node)->u.gimmick))
#define NODE_CAR(node) (CONS_(node)->car)
#define NODE_CDR(node) (CONS_(node)->cdr)
@@ -83,6 +93,9 @@ typedef enum {
#define NODE_IS_ANYCHAR(node) \
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
+#define CTYPE_OPTION(node, reg) \
+ (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
+
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@@ -90,24 +103,25 @@ typedef enum {
#define ENCLOSURE_MEMORY (1<<0)
#define ENCLOSURE_OPTION (1<<1)
#define ENCLOSURE_STOP_BACKTRACK (1<<2)
+#define ENCLOSURE_IF_ELSE (1<<3)
-#define NODE_STR_MARGIN 16
-#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_STRING_MARGIN 16
+#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
#define STRING_RAW (1<<0) /* by backslashed number */
#define STRING_AMBIG (1<<1)
#define STRING_DONT_GET_OPT_INFO (1<<2)
-#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
-#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
-#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
+#define NODE_STRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
+#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
+#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
+#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= STRING_DONT_GET_OPT_INFO
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
-#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
-#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
+#define NODE_STRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
+#define NODE_STRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
+#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
@@ -118,7 +132,7 @@ typedef enum {
#define QUANT_BODY_IS_EMPTY_MEM 2
#define QUANT_BODY_IS_EMPTY_REC 3
-/* status bits */
+/* node status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
@@ -136,28 +150,37 @@ typedef enum {
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define NST_BY_NAME (1<<15) /* backref by name */
#define NST_BACKREF (1<<16)
+#define NST_CHECKER (1<<17)
+#define NST_FIXED_OPTION (1<<18)
+#define NST_PROHIBIT_RECURSION (1<<19)
+#define NST_SUPER (1<<20)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f))
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f))
-#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
+#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0)
-#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
+#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0)
-#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
+#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0)
-#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
-#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
-#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
-#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
-#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
-#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
-#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
-#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
-#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
-#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
+#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
+#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
+#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
+#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
+#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
+#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
+#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
+#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
+#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NST_CHECKER) != 0)
+#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0)
+#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NST_SUPER) != 0)
+#define NODE_IS_PROHIBIT_RECURSION(node) \
+ ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0)
#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0)
@@ -168,8 +191,6 @@ typedef enum {
#define NODE_ANCHOR_BODY(node) ((node)->body)
-#define CALLNODE_REFNUM_UNDEF -1
-
typedef struct {
NodeType node_type;
int status;
@@ -178,7 +199,7 @@ typedef struct {
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
- UChar buf[NODE_STR_BUF_SIZE];
+ UChar buf[NODE_STRING_BUF_SIZE];
} StrNode;
typedef struct {
@@ -221,17 +242,22 @@ typedef struct {
int called_state;
} m;
struct {
- OnigOptionType option;
+ OnigOptionType options;
} o;
+ struct {
+ /* body is condition */
+ struct _Node* Then;
+ struct _Node* Else;
+ } te;
};
/* for multiple call reference */
- OnigLen min_len; /* min length (byte) */
- OnigLen max_len; /* max length (byte) */
- int char_len; /* character length */
- int opt_count; /* referenced count in optimize_node_left() */
+ OnigLen min_len; /* min length (byte) */
+ OnigLen max_len; /* max length (byte) */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
} EnclosureNode;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
typedef struct {
int offset;
@@ -266,7 +292,7 @@ typedef struct {
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
int nest_level;
-} BRefNode;
+} BackRefNode;
typedef struct {
NodeType node_type;
@@ -291,8 +317,18 @@ typedef struct {
int ctype;
int not;
+ OnigOptionType options;
} CtypeNode;
+typedef struct {
+ NodeType node_type;
+ int status;
+
+ enum GimmickType type;
+ int detail_type;
+ int id;
+} GimmickNode;
+
typedef struct _Node {
union {
struct {
@@ -305,13 +341,14 @@ typedef struct _Node {
CClassNode cclass;
QuantNode quant;
EnclosureNode enclosure;
- BRefNode bref;
+ BackRefNode backref;
AnchorNode anchor;
ConsAltNode cons;
CtypeNode ctype;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
CallNode call;
#endif
+ GimmickNode gimmick;
} u;
} Node;
@@ -332,7 +369,11 @@ typedef struct {
} MemEnv;
typedef struct {
- OnigOptionType option;
+ enum SaveType type;
+} SaveItem;
+
+typedef struct {
+ OnigOptionType options;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
@@ -346,7 +387,7 @@ typedef struct {
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
-#ifdef USE_SUBEXP_CALL
+#ifdef USE_CALL
UnsetAddrList* unset_addr_list;
int has_call_zero;
#endif
@@ -364,6 +405,11 @@ typedef struct {
int has_recursion;
#endif
unsigned int parse_depth;
+
+ int keep_num;
+ int save_num;
+ int save_alloc_num;
+ SaveItem* saves;
} ScanEnv;
@@ -399,6 +445,7 @@ extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
diff --git a/src/regposix.c b/src/regposix.c
index bbe52dc..32b11b5 100644
--- a/src/regposix.c
+++ b/src/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,6 +114,9 @@ onig2posix_error_code(int code)
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT },
+ { ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT },
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL }
};
diff --git a/src/regsyntax.c b/src/regsyntax.c
index e751e24..6833e1d 100644
--- a/src/regsyntax.c
+++ b/src/regsyntax.c
@@ -2,7 +2,7 @@
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -174,8 +174,13 @@ OnigSyntaxType OnigSyntaxPerl = {
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
- ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
+ ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
+ ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
,
@@ -199,11 +204,16 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
- ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
+ ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
+ ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
diff --git a/src/utf8.c b/src/utf8.c
index 219b7ea..1ad9653 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -90,6 +90,7 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
return TRUE;
}
+#if 0
static int
is_mbc_newline(const UChar* p, const UChar* end)
{
@@ -114,6 +115,7 @@ is_mbc_newline(const UChar* p, const UChar* end)
return 0;
}
+#endif
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
@@ -246,43 +248,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
}
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
- (*pp)++;
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
- else {
- (*pp) += enclen(ONIG_ENCODING_UTF8, p);
-
- if (*p == 0xc3) {
- int c = *(p + 1);
- if (c >= 0x80) {
- if (c <= (UChar )0x9e) { /* upper */
- if (c == (UChar )0x97) return FALSE;
- return TRUE;
- }
- else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
- if (c == (UChar )'\267') return FALSE;
- return TRUE;
- }
- else if (c == (UChar )0x9f &&
- (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
- }
- }
- }
-
- return FALSE;
-}
-#endif
-
-
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
const OnigCodePoint* ranges[])
@@ -317,7 +282,7 @@ OnigEncodingType OnigEncodingUTF8 = {
"UTF-8", /* name */
6, /* max byte length */
1, /* min byte length */
- is_mbc_newline,
+ onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
diff --git a/test-driver b/test-driver
index d306056..8e575b0 100755
--- a/test-driver
+++ b/test-driver
@@ -3,7 +3,7 @@
scriptversion=2013-07-13.22; # UTC
-# Copyright (C) 2011-2013 Free Software Foundation, Inc.
+# Copyright (C) 2011-2014 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -106,11 +106,14 @@ trap "st=143; $do_exit" 15
# Test script is run here.
"$@" >$log_file 2>&1
estatus=$?
+
if test $enable_hard_errors = no && test $estatus -eq 99; then
- estatus=1
+ tweaked_estatus=1
+else
+ tweaked_estatus=$estatus
fi
-case $estatus:$expect_failure in
+case $tweaked_estatus:$expect_failure in
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
0:*) col=$grn res=PASS recheck=no gcopy=no;;
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
@@ -119,6 +122,12 @@ case $estatus:$expect_failure in
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
esac
+# Report the test outcome and exit status in the logs, so that one can
+# know whether the test passed or failed simply by looking at the '.log'
+# file, without the need of also peaking into the corresponding '.trs'
+# file (automake bug#11814).
+echo "$res $test_name (exit status: $estatus)" >>$log_file
+
# Report outcome to console.
echo "${col}${res}${std}: $test_name"
diff --git a/test/testc.c b/test/testc.c
index 725e375..222c9cd 100644
--- a/test/testc.c
+++ b/test/testc.c
@@ -590,6 +590,88 @@ extern int main(int argc, char* argv[])
x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number
x2("A\\g'0'|B()", "AAAAB", 0, 5);
x3("(A\\g'0')|B", "AAAAB", 0, 5, 1);
+ x2("(a*)(?(1))aa", "aaaaa", 0, 5);
+ x2("(a*)(?(-1))aa", "aaaaa", 0, 5);
+ x2("(?<name>aaa)(?('name'))aa", "aaaaa", 0, 5);
+ x2("(a)(?(1)aa|bb)a", "aaaaa", 0, 4);
+ x2("(?:aa|())(?(<1>)aa|bb)a", "aabba", 0, 5);
+ x2("(?:aa|())(?('1')aa|bb|cc)a", "aacca", 0, 5);
+ x3("(a*)(?(1)aa|a)b", "aaab", 0, 1, 1);
+ n("(a)(?(1)a|b)c", "abc");
+ x2("(a)(?(1)|)c", "ac", 0, 2);
+ n("(?()aaa|bbb)", "bbb");
+ x2("(a)(?(1+0)b|c)d", "abd", 0, 3);
+ x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "ace", 0, 3);
+ x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "bce", 0, 3);
+ x2("\\R", "\r\n", 0, 2);
+ x2("\\R", "\r", 0, 1);
+ x2("\\R", "\n", 0, 1);
+ x2("\\R", "\x0b", 0, 1);
+ n("\\R\\n", "\r\n");
+ n("\\R", "\xc2\x85"); // because euc-jp is not Unicode
+ x2("\\N", "a", 0, 1);
+ n("\\N", "\n");
+ n("(?m:\\N)", "\n");
+ n("(?-m:\\N)", "\n");
+ x2("\\O", "a", 0, 1);
+ x2("\\O", "\n", 0, 1);
+ x2("(?m:\\O)", "\n", 0, 1);
+ x2("(?-m:\\O)", "\n", 0, 1);
+ x2("\\K", "a", 0, 0);
+ x2("a\\K", "a", 1, 1);
+ x2("a\\Kb", "ab", 1, 2);
+ x2("(a\\Kb|ac\\Kd)", "acd", 2, 3);
+ x2("(a\\Kb|\\Kac\\K)*", "acababacab", 9, 10);
+
+ x2("(?~)", "", 0, 0);
+ x2("(?~)", "A", 0, 0);
+ x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5);
+ x2("(?~(?:|aaa))", "aaa", 0, 0);
+ x2("(?~aaa|)", "aaa", 0, 0);
+ x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!!
+ x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5);
+ x2("(?~\\w+)zzzzz", "zzzzz", 0, 5);
+ x2("(?~\\w*)zzzzz", "zzzzz", 0, 5);
+ x2("(?~A.C|B)", "ABC", 0, 0);
+ x2("(?~XYZ|ABC)a", "ABCa", 1, 4);
+ x2("(?~XYZ|ABC)a", "aABCa", 0, 1);
+ x2("<[^>]*>(?~[<>])</[^>]*>", "<a>vvv</a> <b> </b>", 0, 10);
+ x2("(?~ab)", "ccc\ndab", 0, 5);
+ x2("(?m:(?~ab))", "ccc\ndab", 0, 5);
+ x2("(?-m:(?~ab))", "ccc\ndab", 0, 5);
+
+ // absent with expr
+ x2("(?~|78|\\d*)", "123456789", 0, 6);
+ x2("(?~|def|(?:abc|de|f){0,100})", "abcdedeabcfdefabc", 0, 11);
+ x2("(?~|ab|.*)", "ccc\nddd", 0, 3);
+ x2("(?~|ab|\\O*)", "ccc\ndab", 0, 5);
+ x2("(?~|ab|\\O{2,10})", "ccc\ndab", 0, 5);
+ x2("(?~|ab|\\O{1,10})", "ab", 1, 2);
+ n("(?~|ab|\\O{2,10})", "ab");
+ x2("(?~|abc|\\O{1,10})", "abc", 1, 3);
+ x2("(?~|ab|\\O{5,10})|abc", "abc", 0, 3);
+ x2("(?~|ab|\\O{1,10})", "cccccccccccab", 0, 10);
+ x2("(?~|aaa|)", "aaa", 0, 0);
+ x2("(?~||a*)", "aaaaaa", 0, 0);
+ x2("(?~||a*?)", "aaaaaa", 0, 0);
+ x2("(a)(?~|b|\\1)", "aaaaaa", 0, 2);
+ x2("(a)(?~|bb|(?:a\\1)*)", "aaaaaa", 0, 5);
+ x2("(b|c)(?~|abac|(?:a\\1)*)", "abababacabab", 1, 4);
+ n("(?~|c|a*+)a", "aaaaa");
+ x2("(?~|aaaaa|a*+)", "aaaaa", 0, 0);
+ x2("(?~|aaaaaa|a*+)b", "aaaaaab", 1, 7);
+ x2("(?~|abcd|(?>))", "zzzabcd", 0, 0);
+
+ // absent range cutter
+ x2("(?~|abc)a*", "aaaaaabc", 0, 5);
+ x2("(?~|abc)a*z|aaaaaabc", "aaaaaabc", 0, 8);
+ x2("(?~|aaaaaa)a*", "aaaaaa", 0, 0);
+ x2("(?~|abc)aaaa|aaaabc", "aaaabc", 0, 6);
+ x2("(?>(?~|abc))aaaa|aaaabc", "aaaabc", 0, 6);
+ x2("(?~|)a", "a", 0, 1);
+ n("(?~|a)a", "a");
+ x2("(?~|a)(?~|)a", "a", 0, 1);
+ x2("(?~|a).*(?~|)a", "bbbbbbbbbbbbbbbbbbbba", 0, 21);
/*
< ifndef IGNORE_EUC_JP >
diff --git a/test/testu.c b/test/testu.c
index 017ebef..6ff3a10 100644
--- a/test/testu.c
+++ b/test/testu.c
@@ -905,6 +905,15 @@ extern int main(int argc, char* argv[])
x2("\000^\000\\\000p\000{\000K\000a\000t\000a\000k\000a\000n\000a\000}\000$\000\000", "\060\277\000\000", 0, 2);
x2("\000\\\000o\000{\0001\0000\0001\000}\000\000", "\000A\000\000", 0, 2);
x2("\000\\\000o\000{\0001\0001\0000\0007\0002\0001\000}\000\000", "\221\321\000\000", 0, 2);
+ x2("\000\\\000R\000\000", "\000\015\000\012\000\000", 0, 4); // \R: general newline
+ x2("\000\\\000R\000\000", "\000\012\000\000", 0, 2);
+ x2("\000\\\000R\000\000", "\000\015\000\000", 0, 2);
+ x2("\000\\\000R\000\000", "\000\013\000\000", 0, 2);
+ n("\000\\\000R\000\012\000\000", "\000\015\000\012\000\000");
+ x2("\000\\\000R\000\000", "\x00\x85\000\000", 0, 2);
+ x2("\000\\\000R\000\000", "\x20\x28\000\000", 0, 2);
+ x2("\000\\\000R\000\000", "\x20\x29\000\000", 0, 2);
+ n("\000\\\000R\000\000", "\x20\x2a\000\000");
fprintf(stdout,
"\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",