diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2018-03-20 06:14:49 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2018-03-20 06:14:49 +0100 |
commit | 10abcf77cc24dfae451d96310b4391dad35906ed (patch) | |
tree | b9812ad2b1b038fd121f1031e9ff87978af0f5ff | |
parent | 98ab313fe496ae7c792db29c80bf6b23347484ff (diff) |
New upstream version 6.8.1upstream/6.8.1
99 files changed, 6481 insertions, 2523 deletions
@@ -25,4 +25,5 @@ Makefile.in .libs/ .deps/ /build +/onig-*.tar.gz m4/*.m4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e1efe8..1d4d3a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,9 +4,8 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.6.0") +set(PACKAGE_VERSION "6.8.1") -set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) set(VERSION ${PACKAGE_VERSION}) @@ -40,6 +39,7 @@ check_include_files(stdint.h HAVE_STDINT_H) check_include_files(stdlib.h HAVE_STDLIB_H) check_include_files(strings.h HAVE_STRINGS_H) check_include_files(string.h HAVE_STRING_H) +check_include_files(limits.h HAVE_LIMITS_H) check_include_files(sys/times.h HAVE_SYS_TIMES_H) check_include_files(sys/time.h HAVE_SYS_TIME_H) check_include_files(sys/types.h HAVE_SYS_TYPES_H) @@ -80,7 +80,7 @@ install_library(onig) install_header(src/oniguruma.h src/onigposix.h src/oniggnu.h) -install_doc(doc/API doc/API.ja doc/RE doc/RE.ja doc/UNICODE_PROPERTIES) +install_doc(doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja doc/CALLOUTS.BUILTIN doc/CALLOUTS.BUILTIN.ja doc/UNICODE_PROPERTIES) install_data(AUTHORS COPYING HISTORY README.md) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/oniguruma.pc @@ -1,7 +1,40 @@ History +2018/03/19: Version 6.8.1 + +2018/03/19: update LTVERSION from 4:0:0 to 5:0:0 +2018/03/19: add flag, sb_range etc.. into OnigEncodingType +2018/03/19: move regex structure from oniguruma.h to regint.h +2018/03/19: ONIGENC_CTYPE_XXX to be enum (Issue #33) + +2018/03/16: Version 6.8.0 + +2018/03/12: add doc/CALLOUTS.BUILTIN for builtin callouts +2018/03/08: allow abbreviated notation for callouts (?(*name)..|..) (?(?{...})..|..) +2018/03/02: NEW API: move onigenc_strdup() from regenc.h to oniguruma.h +2018/02/21: remove all USE_COMBINATION_EXPLOSION_CHECK +2018/02/15: fix #78: bad definition of PV_() +2018/02/14: add configure option --enable-posix-api (for #77) +2018/02/08: implement callouts of name +2018/02/01: implement callouts of contents +2018/01/30: define ONIGURUMA_VERSION_INT +2018/01/29: enable USE_TRY_IN_MATCH_LIMIT by default +2018/01/29: NEW API: onig_search_with_param() onig_match_with_param() +2018/01/26: remove include windows.h from oniguruma.h + +2018/01/26: Version 6.7.1 + +2018/01/25: disable USE_TRY_IN_MATCH_LIMIT by default +2018/01/24: implement mechanism of try-in-match-limit +2018/01/24: #76: rename EXPORT to ONIGURUMA_EXPORT +2018/01/15: #73: update for automake 1.15.1 +2018/01/14: #74: update description of README +2018/01/10: #72: Correct spelling and grammar in FAQ (English) +2017/12/25: remove USE_COMBINATION_EXPLOSION_CHECK codes + 2017/12/11: Version 6.7.0 +2017/12/08: Disable \N and \O on ONIG_SYNTAX_RUBY 2017/12/08: add ONIG_SYNTAX_ONIGURUMA (default syntax) 2017/12/05: restructure StackType 2017/11/13: implement subexp calls (?R), (?&name), (?-n), (?+n) for Perl syntax @@ -1,8 +1,8 @@ Installation Instructions ************************* -Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, -Inc. + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software +Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright @@ -12,97 +12,96 @@ without warranty of any kind. Basic Installation ================== - Briefly, the shell command `./configure && make && make install' + Briefly, the shell command './configure && make && make install' should configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for +more-detailed instructions are generic; see the 'README' file for instructions specific to this package. Some packages provide this -`INSTALL' file but do not implement all of the features documented +'INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. - The `configure' shell script attempts to guess correct values for + The 'configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you +some point 'config.cache' contains results you don't want to keep, you may remove or edit it. - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. The simplest way to compile this package is: - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. - Running `configure' might take a while. While running, it prints + Running 'configure' might take a while. While running, it prints some messages telling which features it is checking for. - 2. Type `make' to compile the package. + 2. Type 'make' to compile the package. - 3. Optionally, type `make check' to run any self-tests that come with + 3. Optionally, type 'make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. - 4. Type `make install' to install the programs and any data files and + 4. Type 'make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular - user, and only the `make install' phase executed with root + user, and only the 'make install' phase executed with root privileges. - 5. Optionally, type `make installcheck' to repeat any self-tests, but + 5. Optionally, type 'make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a - regular user, particularly if the prior `make install' required + regular user, particularly if the prior 'make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. - 7. Often, you can also type `make uninstall' to remove the installed + 7. Often, you can also type 'make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. - 8. Some packages, particularly those that use Automake, provide `make + 8. Some packages, particularly those that use Automake, provide 'make distcheck', which can by used by developers to test that all other - targets like `make install' and `make uninstall' work correctly. + targets like 'make install' and 'make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that -the `configure' script does not know about. Run `./configure --help' +the 'configure' script does not know about. Run './configure --help' for details on some of the pertinent environment variables. - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix @@ -113,21 +112,21 @@ Compiling For Multiple Architectures You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the +own directory. To do this, you can use GNU 'make'. 'cd' to the directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. This -is known as a "VPATH" build. +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. - With a non-GNU `make', it is safer to compile the package for one + With a non-GNU 'make', it is safer to compile the package for one architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before +installed the package for one architecture, use 'make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or -"universal" binaries--by specifying multiple `-arch' options to the -compiler but only a single `-arch' option to the preprocessor. Like +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ @@ -136,105 +135,104 @@ this: This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results -using the `lipo' tool if you have problems. +using the 'lipo' tool if you have problems. Installation Names ================== - By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX', where PREFIX must be an + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. In general, the -default for these options is expressed in terms of `${prefix}', so that -specifying just `--prefix' will affect all of the other directory +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the -correct locations to `configure'; however, many packages provide one or +correct locations to 'configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the -`make install' command line to change installation locations without +'make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each -affected directory. For example, `make install +affected directory. For example, 'make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of -`${prefix}'. Any directories that were specified during `configure', -but not in terms of `${prefix}', must each be overridden at install -time for the entire installation to be relocated. The approach of -makefile variable overrides for each directory variable is required by -the GNU Coding Standards, and ideally causes no recompilation. -However, some platforms have known limitations with the semantics of -shared libraries that end up requiring recompilation when using this -method, particularly noticeable in packages that use GNU Libtool. - - The second method involves providing the `DESTDIR' variable. For -example, `make install DESTDIR=/alternate/directory' will prepend -`/alternate/directory' before all installation names. The approach of -`DESTDIR' overrides is not required by the GNU Coding Standards, and +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even -when some directory options were not specified in terms of `${prefix}' -at `configure' time. +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - - Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the package recognizes. - For packages that use the X Window System, `configure' can usually + For packages that use the X Window System, 'configure' can usually find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the -execution of `make' will be. For these packages, running `./configure +execution of 'make' will be. For these packages, running './configure --enable-silent-rules' sets the default to minimal output, which can be -overridden with `make V=1'; while running `./configure +overridden with 'make V=1'; while running './configure --disable-silent-rules' sets the default to verbose, which can be -overridden with `make V=0'. +overridden with 'make V=0'. Particular systems ================== - On HP-UX, the default C compiler is not ANSI C compatible. If GNU -CC is not installed, it is recommended to use the following options in + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. - HP-UX `make' updates targets which have the same time stamps as -their prerequisites, which makes it generally unusable when shipped -generated files such as `configure' are involved. Use GNU `make' -instead. + HP-UX 'make' updates targets which have the same time stamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot -parse its `<wchar.h>' header file. The option `-nodtk' can be used as -a workaround. If GNU CC is not installed, it is therefore recommended -to try +parse its '<wchar.h>' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try ./configure CC="cc" @@ -242,26 +240,26 @@ and if that doesn't work, try ./configure CC="cc -nodtk" - On Solaris, don't put `/usr/ucb' early in your `PATH'. This + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This directory contains several dysfunctional programs; working variants of -these programs are available in `/usr/bin'. So, if you need `/usr/ucb' -in your `PATH', put it _after_ `/usr/bin'. +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. - On Haiku, software installed for all users goes in `/boot/common', -not `/usr/local'. It is recommended to use the following options: + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== - There may be some features `configure' cannot figure out + There may be some features 'configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the -_same_ architectures, `configure' can figure that out, but if it prints +_same_ architectures, 'configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM @@ -270,101 +268,101 @@ where SYSTEM can have one of these forms: OS KERNEL-OS - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will +use the option '--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. +eventually be run) with '--host=TYPE'. Sharing Defaults ================ - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run +environment passed to 'configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: +them in the 'configure' command line, using 'VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc -causes the specified `gcc' to be used as the C compiler (unless it is +causes the specified 'gcc' to be used as the C compiler (unless it is overridden in the site shell script). -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf limitation. Until the limitation is lifted, you can use -this workaround: +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash -`configure' Invocation +'configure' Invocation ====================== - `configure' recognizes the following options to control how it + 'configure' recognizes the following options to control how it operates. -`--help' -`-h' - Print a summary of all of the options to `configure', and exit. +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. -`--help=short' -`--help=recursive' +'--help=short' +'--help=recursive' Print a summary of the options unique to this package's - `configure', and exit. The `short' variant lists options used - only in the top level, while the `recursive' variant lists options - also present in any nested packages. + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' script, and exit. -`--cache-file=FILE' +'--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to + traditionally 'config.cache'. FILE defaults to '/dev/null' to disable caching. -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. -`--quiet' -`--silent' -`-q' +'--quiet' +'--silent' +'-q' Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error + suppress all normal output, redirect it to '/dev/null' (any error messages will still be shown). -`--srcdir=DIR' +'--srcdir=DIR' Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. + 'configure' can determine that directory automatically. -`--prefix=DIR' - Use DIR as the installation prefix. *note Installation Names:: - for more details, including other options available for fine-tuning - the installation locations. +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. -`--no-create' -`-n' +'--no-create' +'-n' Run the configure checks, but stop before creating any output files. -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/Makefile.am b/Makefile.am index 4703747..1f4b38f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,9 +9,10 @@ EXTRA_DIST = oniguruma.pc.in HISTORY README_japanese README.md \ CMakeLists.txt dist.info oniguruma.pc.cmake.in cmake/dist.cmake \ src/config.h.cmake.in \ doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \ + doc/CALLOUTS.BUILTIN doc/CALLOUTS.BUILTIN.ja \ doc/UNICODE_PROPERTIES \ src/Makefile.windows src/config.h.win32 src/config.h.win64 \ - windows/testc.c contributed/libfuzzer-onig.cpp + windows/testc.c contributed/libfuzzer-onig.cpp contributed/makefile bin_SCRIPTS = onig-config @@ -1,4 +1,4 @@ -README 2017/08/25 +README 2018/01/14 Oniguruma ---- (C) K.Kosako @@ -9,9 +9,22 @@ FIXED Security Issues: CVE-2017-9227, CVE-2017-9228, CVE-2017-9229 --- -Oniguruma is a regular expressions library. -The characteristics of this library is that different character encoding -for every regular expression object can be specified. +Oniguruma is a modern and flexible regular expressions library. It +encompasses features from different regular expression implementations +that traditionally exist in different languages. It comes close to +being a complete superset of all regular expression features found +in other regular expression implementations. + +Its features include: +* Character encoding can be specified per regular expression object. +* Several regular expression types are supported: + * POSIX + * Grep + * GNU Regex + * Perl + * Java + * Ruby + * Emacs Supported character encodings: @@ -8,10 +8,23 @@ FIXED Security Issues: **CVE-2017-9224, CVE-2017-9225, CVE-2017-9226** **CVE-2017-9227, CVE-2017-9228, CVE-2017-9229** - -Oniguruma is a regular expressions library. -The characteristics of this library is that different character encoding -for every regular expression object can be specified. +Oniguruma is a modern and flexible regular expressions library. It +encompasses features from different regular expression implementations +that traditionally exist in different languages. It comes close to +being a complete superset of all regular expression features found +in other regular expression implementations. + +Its features include: +* Character encoding can be specified per regular expression object. +* Several regular expression types are supported: + * Oniguruma (native) + * POSIX + * Grep + * GNU Regex + * Perl + * Java + * Ruby + * Emacs Supported character encodings: @@ -26,19 +39,38 @@ Supported character encodings: * CP1251: contributed by Byte -New feature of version 6.7.0 +New feature of version 6.8.1 -------------------------- -* NEW: hexadecimal codepoint \uHHHH -* NEW: add ONIG_SYNTAX_ONIGURUMA (== ONIG_SYNTAX_DEFAULT) -* Disabled \N and \O on ONIG_SYNTAX_RUBY -* Reduced object size +* Update shared library version to 5.0.0 for API incompatible changes from 6.7.1 + + +New feature of version 6.8.0 +-------------------------- + +* Retry-limit-in-match function enabled by default +* NEW: configure option --enable-posix-api=no (* enabled by default) +* NEW API: onig_search_with_param(), onig_match_with_param() +* NEW: Callouts of contents (?{...contents...}) (?{...}\[X<>]) (?{{....}}) +* NEW: Callouts of name (*name) (*name\[tag]{args...}) +* NEW: Builtin callouts (*FAIL) (*MISMATCH) (*ERROR{n}) (*COUNT) (*MAX{n}) etc.. +(* Callout function API is experimental level and isn't fixed definitely yet. Undocumented now) -New feature of version 6.6.1 + +New feature of version 6.7.1 -------------------------- -* Fix definition of \X +* NEW: Mechanism of retry-limit-in-match (* disabled by default) + + +New feature of version 6.7.0 +-------------------------- + +* NEW: hexadecimal codepoint \uHHHH +* NEW: add ONIG_SYNTAX_ONIGURUMA (== ONIG_SYNTAX_DEFAULT) +* Disabled \N and \O on ONIG_SYNTAX_RUBY +* Reduced size of object file New feature of version 6.6.0 @@ -47,7 +79,7 @@ New feature of version 6.6.0 * NEW: ASCII only mode options for character type/property (?WDSP) * NEW: Extended Grapheme Cluster boundary \y, \Y (*original) * NEW: Extended Grapheme Cluster \X -* Range-clear (Absent-clear) operator restores previous range in backtrack. +* Range-clear (Absent-clear) operator restores previous range in retractions. New feature of version 6.5.0 @@ -56,9 +88,9 @@ New feature of version 6.5.0 * NEW: \K (keep) * NEW: \R (general newline) \N (no newline) * NEW: \O (true anychar) -* NEW: if-then-else syntax (?(...)...\|...) +* NEW: if-then-else (?(...)...\|...) * NEW: Backreference validity checker (?(xxx)) (*original) -* NEW: Absent repeater (?~absent) +* NEW: Absent repeater (?~absent) \[is equal to (?\~\|absent|\O*)] * NEW: Absent expression (?~|absent|expr) (*original) * NEW: Absent stopper (?~|absent) (*original) @@ -186,6 +218,7 @@ Sample Programs |sample/scan.c |example of using onig_scan(). | |sample/sql.c |example of the variable meta characters. | |sample/user_property.c|example of user defined Unicode property. | +|sample/callout.c |example of callouts. | Test Programs diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..006fd36 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,9 @@ +#!/bin/sh +# autogen.sh for Oniguruma + +echo "Generating autotools files." +#autoreconf --install --force --symlink || exit 1 +autoreconf --install --force || exit 1 + +echo "" +echo "Run ./configure, make, and make install." @@ -1,9 +1,9 @@ #! /bin/sh # Wrapper for compilers which do not understand '-c -o'. -scriptversion=2012-10-14.11; # UTC +scriptversion=2016-01-11.22; # UTC -# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# Copyright (C) 1999-2017 Free Software Foundation, Inc. # Written by Tom Tromey <tromey@cygnus.com>. # # This program is free software; you can redistribute it and/or modify @@ -255,7 +255,8 @@ EOF echo "compile $scriptversion" exit $? ;; - cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) func_cl_wrapper "$@" # Doesn't return... ;; esac @@ -342,6 +343,6 @@ exit $ret # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" +# time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: diff --git a/config.guess b/config.guess index 1659250..2193702 100755 --- a/config.guess +++ b/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2015 Free Software Foundation, Inc. +# Copyright 1992-2017 Free Software Foundation, Inc. -timestamp='2015-08-20' +timestamp='2017-05-27' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ timestamp='2015-08-20' # Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess # # Please send patches to <config-patches@gnu.org>. @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2015 Free Software Foundation, Inc. +Copyright 1992-2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -186,9 +186,12 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. + # to ELF recently (or will in the future) and ABI. case "${UNAME_MACHINE_ARCH}" in - arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax) + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ @@ -237,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -268,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; + UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; + UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; + UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; + UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; + UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; + UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; + UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; + UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; + UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 @@ -376,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH="i386" + SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH="x86_64" + SUN_ARCH=x86_64 fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -410,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -635,13 +642,13 @@ EOF sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi @@ -680,11 +687,11 @@ EOF exit (0); } EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = "hppa2.0w" ] + if [ ${HP_ARCH} = hppa2.0w ] then eval $set_cc_for_build @@ -697,12 +704,12 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH="hppa2.0w" + HP_ARCH=hppa2.0w else - HP_ARCH="hppa64" + HP_ARCH=hppa64 fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -807,14 +814,14 @@ EOF echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -830,10 +837,11 @@ EOF UNAME_PROCESSOR=`/usr/bin/uname -p` case ${UNAME_PROCESSOR} in amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; esac + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin @@ -896,7 +904,7 @@ EOF exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix @@ -919,7 +927,7 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arc:Linux:*:* | arceb:Linux:*:*) @@ -965,6 +973,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; + k1om:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -990,6 +1001,9 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; + mips64el:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; openrisc*:Linux:*:*) echo or1k-unknown-linux-${LIBC} exit ;; @@ -1022,6 +1036,9 @@ EOF ppcle:Linux:*:*) echo powerpcle-unknown-linux-${LIBC} exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; @@ -1120,7 +1137,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that + # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1269,6 +1286,9 @@ EOF SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1282,16 +1302,23 @@ EOF UNAME_PROCESSOR=powerpc fi if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null then case $UNAME_PROCESSOR in i386) UNAME_PROCESSOR=x86_64 ;; powerpc) UNAME_PROCESSOR=powerpc64 ;; esac fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi fi elif test "$UNAME_PROCESSOR" = i386 ; then # Avoid executing cc on OS X 10.9, as it ships with a stub @@ -1306,7 +1333,7 @@ EOF exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then + if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1315,15 +1342,18 @@ EOF *:QNX:*:4*) echo i386-pc-qnx exit ;; - NEO-?:NONSTOP_KERNEL:*:*) + NEO-*:NONSTOP_KERNEL:*:*) echo neo-tandem-nsk${UNAME_RELEASE} exit ;; NSE-*:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; - NSR-?:NONSTOP_KERNEL:*:*) + NSR-*:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk${UNAME_RELEASE} + exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux exit ;; @@ -1337,7 +1367,7 @@ EOF # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = "386"; then + if test "$cputype" = 386; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1379,7 +1409,7 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1390,23 +1420,25 @@ EOF x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; esac cat >&2 <<EOF $0: unable to guess system type -This script, last modified $timestamp, has failed to recognize -the operating system you are using. It is advised that you -download the most up to date version of the config scripts from +This script (version $timestamp), has failed to recognize the +operating system you are using. If your script is old, overwrite +config.guess and config.sub with the latest versions from: - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess and - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub -If the version you run ($0) is already up to date, please -send the following data and any information you think might be -pertinent to <config-patches@gnu.org> in order to provide the needed -information to handle your system. +If $0 has already been updated, send the following data and any +information you think might be pertinent to config-patches@gnu.org to +provide the necessary information to handle your system. config.guess timestamp = $timestamp @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2015 Free Software Foundation, Inc. +# Copyright 1992-2017 Free Software Foundation, Inc. -timestamp='2015-08-20' +timestamp='2017-04-02' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ timestamp='2015-08-20' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,8 +53,7 @@ timestamp='2015-08-20' me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. @@ -68,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>." version="\ GNU config.sub ($timestamp) -Copyright 1992-2015 Free Software Foundation, Inc. +Copyright 1992-2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -118,7 +117,7 @@ case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ - kopensolaris*-gnu* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -264,7 +263,7 @@ case $basic_machine in | fido | fr30 | frv | ft32 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ - | i370 | i860 | i960 | ia64 \ + | i370 | i860 | i960 | ia16 | ia64 \ | ip2k | iq2000 \ | k1om \ | le32 | le64 \ @@ -302,6 +301,7 @@ case $basic_machine in | open8 | or1k | or1knd | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ | pyramid \ | riscv32 | riscv64 \ | rl78 | rx \ @@ -315,6 +315,7 @@ case $basic_machine in | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ | visium \ + | wasm32 \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) @@ -388,7 +389,7 @@ case $basic_machine in | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ - | i*86-* | i860-* | i960-* | ia64-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ | ip2k-* | iq2000-* \ | k1om-* \ | le32-* | le64-* \ @@ -429,6 +430,7 @@ case $basic_machine in | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ | pyramid-* \ | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ @@ -445,6 +447,7 @@ case $basic_machine in | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ | visium-* \ + | wasm32-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -521,7 +524,7 @@ case $basic_machine in basic_machine=i386-pc os=-aros ;; - asmjs) + asmjs) basic_machine=asmjs-unknown ;; aux) @@ -644,6 +647,14 @@ case $basic_machine in basic_machine=m68k-bull os=-sysv3 ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + os=$os"spe" + ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -939,6 +950,9 @@ case $basic_machine in nsr-tandem) basic_machine=nsr-tandem ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf @@ -1023,7 +1037,7 @@ case $basic_machine in ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle | ppc-le | powerpc-little) + ppcle | powerpclittle) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1033,7 +1047,7 @@ case $basic_machine in ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) + ppc64le | powerpc64little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1234,6 +1248,9 @@ case $basic_machine in basic_machine=a29k-wrs os=-vxworks ;; + wasm32) + basic_machine=wasm32-unknown + ;; w65*) basic_machine=w65-wdc os=-none @@ -1383,14 +1400,14 @@ case $os in | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* | -cegcc* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ @@ -1399,7 +1416,8 @@ case $os in | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1531,6 +1549,8 @@ case $os in ;; -nacl*) ;; + -ios) + ;; -none) ;; *) @@ -1626,6 +1646,9 @@ case $basic_machine in sparc-* | *-sun) os=-sunos4.1.1 ;; + pru-*) + os=-elf + ;; *-be) os=-beos ;; diff --git a/configure.ac b/configure.ac index ebb0a66..b146e08 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.7.0) +AC_INIT(onig, 6.8.1) AC_CONFIG_MACRO_DIR([m4]) @@ -14,13 +14,19 @@ AC_ARG_WITH(statistics, [ STATISTICS=-DONIG_DEBUG_STATISTICS ]) AC_SUBST(STATISTICS) -dnl check for COMBINATION_EXPLOSION -AC_ARG_ENABLE(combination-explosion-check, - [ --enable-combination-explosion-check deprecated], - [comb_expl_check=$enableval]) -if test "${comb_expl_check}" = yes; then - AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check]) -fi + +dnl check for POSIX API +AC_ARG_ENABLE(posix-api, +[ --enable-posix-api turn on to include POSIX API [[default=yes]]], +[\ +case "${enableval}" in + yes) enable_posix_api=yes ;; + no) enable_posix_api=no ;; + *) AC_MSG_ERROR(bad value for --enable-posix-api) ;; +esac], +enable_posix_api=yes) +AM_CONDITIONAL(ENABLE_POSIX_API, test x"${enable_posix_api}" = xyes) + dnl check for CRNL_AS_LINE_TERMINATOR AC_ARG_ENABLE(crnl-as-line-terminator, @@ -34,7 +40,7 @@ fi dnl Checks for programs. AC_PROG_CC AM_PROG_LIBTOOL -LTVERSION="4:0:0" +LTVERSION="5:0:0" AC_SUBST(LTVERSION) AC_PROG_INSTALL @@ -44,7 +50,7 @@ dnl Checks for libraries. dnl Checks for header files. AC_HEADER_STDC -AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h) +AC_CHECK_HEADERS(stdlib.h string.h strings.h limits.h sys/time.h unistd.h sys/times.h) dnl Checks for typedefs, structures, and compiler characteristics. AC_CHECK_SIZEOF(int, 4) diff --git a/contributed/libfuzzer-onig.cpp b/contributed/libfuzzer-onig.cpp index 984110d..dcd7c63 100644 --- a/contributed/libfuzzer-onig.cpp +++ b/contributed/libfuzzer-onig.cpp @@ -23,9 +23,21 @@ http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) { regex_t *reg; - if (onig_new - (®, Data, Data + Size, ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, - ONIG_SYNTAX_DEFAULT, 0) == 0) + OnigEncoding enc; + + enc = ONIG_ENCODING_UTF8; + +#ifdef FULL_TEST + onig_initialize(&enc, 1); +#endif + + if (onig_new(®, Data, Data + Size, ONIG_OPTION_DEFAULT, enc, + ONIG_SYNTAX_DEFAULT, 0) == 0) onig_free(reg); + +#ifdef FULL_TEST + onig_end(); +#endif + return 0; } diff --git a/contributed/makefile b/contributed/makefile new file mode 100644 index 0000000..c50ab36 --- /dev/null +++ b/contributed/makefile @@ -0,0 +1,22 @@ + +ONIG_LIB=../src/.libs/libonig.a +LIBS=$(ONIG_LIB) /usr/local/lib/libLLVMFuzzerMain.a + +TARGETS=libfuzzer-onig libfuzzer-onig-full + +default: $(TARGETS) + +libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) + clang++ $< $(LIBS) -o $@ -fsanitize-coverage=trace-pc-guard -fsanitize=fuzzer,address + +libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB) + clang++ -DFULL_TEST $< $(LIBS) -o $@ -fsanitize-coverage=trace-pc-guard -fsanitize=fuzzer,address + + +$(ONIG_LIB): + cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=fuzzer,address" LDFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=fuzzer,address"; make + + + +clean: + rm -f $(TARGETS) @@ -1,9 +1,9 @@ #! /bin/sh # depcomp - compile a program generating dependencies as side-effects -scriptversion=2013-05-30.07; # UTC +scriptversion=2016-01-11.22; # UTC -# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# Copyright (C) 1999-2017 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -786,6 +786,6 @@ exit 0 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" +# time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.7.0" +version = "6.8.1" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" @@ -1,4 +1,4 @@ -Oniguruma API Version 6.7.0 2017/12/08 +Oniguruma API Version 6.8.0 2018/03/13 #include <oniguruma.h> @@ -222,6 +222,77 @@ Oniguruma API Version 6.7.0 2017/12/08 1 reg: regex object. +# OnigMatchParam* onig_new_match_param() + + Allocate a OnigMatchParam object and initialize the contents by + onig_initialize_match_param(). + + +# void onig_free_match_param(OnigMatchParam* mp) + + Free memory used by a OnigMatchParam object. + + arguments + 1 mp: OnigMatchParam object + + +# void onig_initialize_match_param(OnigMatchParam* mp) + + Set match-param fields to default values. + Match-param is used in onig_match_with_param() and onig_search_with_param(). + + arguments + 1 mp: match-param pointer + + +# int onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* mp, unsigned int limit) + + Set a maximum number of match-stack depth. + 0 means unlimited. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + +# int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) + + Set a retry limit count of a match process. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + If 0 (NULL) is set, never called in progress. + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + If 0 (NULL) is set, never called in retraction. + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + + # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) @@ -246,8 +317,17 @@ Oniguruma API Version 6.7.0 2017/12/08 ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. +# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + arguments + 1-7: same as onig_search() + 8 mp: match parameter values (match_stack_limit, retry_limit_in_match) + + # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, - OnigRegion* region, OnigOptionType option) + OnigRegion* region, OnigOptionType option) Match string and return result and matching region. @@ -267,6 +347,15 @@ Oniguruma API Version 6.7.0 2017/12/08 ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. +# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + arguments + 1-6: same as onig_match() + 7 mp: match parameter values (match_stack_limit, retry_limit_in_match) + + # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), @@ -623,6 +712,47 @@ Oniguruma API Version 6.7.0 2017/12/08 normal return: ONIG_NORMAL +# unsigned long onig_get_retry_limit_in_match(void) + + Return the limit of retry counts in matching process. + (default: 10000000) + + normal return: limit value + + +# int onig_set_retry_limit_in_match(unsigned long n) + + Set the limit of retry counts in matching process. + + normal return: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_progress_callout(void) + + Get a function for callouts of contents in progress. + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + If 0 (NULL) is set, never called in progress. + + normal return: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + Get a function for callouts of contents in retraction (backtrack). + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + If 0 (NULL) is set, never called in retraction. + + normal return: ONIG_NORMAL + + # int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) Define new Unicode property. @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.7.0 2017/12/08 +鬼車インターフェース Version 6.8.0 2018/03/13 #include <oniguruma.h> @@ -220,6 +220,77 @@ 1 reg: 正規表現オブジェクト +# OnigMatchParam* onig_new_match_param() + + OnigMatchParamオブジェクトを生成し、onig_initialize_match_param()を使用して + 中身を初期化する。 + + +# void onig_free_match_param(OnigMatchParam* mp) + + OnigMatchParamオブジェクトで使用しているメモリを開放する。 + + 引数 + 1 mp: OnigMatchParamオブジェクト + + +# void onig_initialize_match_param(OnigMatchParam* mp) + + マッチパラメタ構造体にデフォルト値をセットする。 + マッチパラメタは、onig_match_with_param(), onig_search_with_param()で + 使用される。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + + +# int onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* mp, unsigned int limit) + + マッチスタックの最大深さをセットする。 + 0は、無制限を表す。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) + + 一回のマッチでのリトライ数の最大値をセットする。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限回数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 前進時の内容の呼び出し(callouts)で呼び出される関数をセットする。 + もし0(NULL)がセットされると、前進時に呼び出しは起こらない。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 f: 呼び出される関数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 後退時の内容の呼び出し(callouts)で呼び出される関数をセットする。 + もし0(NULL)がセットされると、後退時に呼び出しは起こらない。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 f: 呼び出される関数 + + 正常終了戻り値: ONIG_NORMAL + + # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) @@ -245,6 +316,15 @@ ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする +# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + 引数 + 1-7: onig_search()と同じ + 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + + # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) @@ -266,6 +346,15 @@ ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする +# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + 引数 + 1-6: onig_match()と同じ + 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + + # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), @@ -629,6 +718,47 @@ 正常終了戻り値: ONIG_NORMAL +# unsigned long onig_get_retry_limit_in_match(void) + + マッチング関数内でのリトライ数の制限値を返す。 + (デフォルト: 10000000) + + 正常終了戻り値: 制限値 + + +# int onig_set_retry_limit_in_match(unsigned long n) + + マッチング関数内でのリトライ数の制限値を指定する。 + + 正常終了戻り値: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_progress_callout(void) + + 前進時の内容の呼び出しで呼び出される関数を返す。 + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + 前進時の内容の呼び出しで呼び出される関数を指定する。 + もし0(NULL)を指定すると、前進時の内容の呼び出しで呼び出しは起こらない。 + + 正常終了戻り値: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + 後退時の内容の呼び出しで呼び出される関数を返す。 + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + 後退時の内容の呼び出しで呼び出される関数を指定する。 + もし0(NULL)を指定すると、後退時の内容の呼び出しで呼び出しは起こらない。 + + 正常終了戻り値: ONIG_NORMAL + + # int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) 新しいUnicodeプロパティを定義する。 diff --git a/doc/CALLOUTS.BUILTIN b/doc/CALLOUTS.BUILTIN new file mode 100644 index 0000000..dcf87f8 --- /dev/null +++ b/doc/CALLOUTS.BUILTIN @@ -0,0 +1,88 @@ +CALLOUTS.BUILTIN 2018/03/19 + + +* FAIL (progress) + + (*FAIL) + + Always fail. + + +* MISMATCH (progress) + + (*MISMATCH) + + Terminate Match process. + Continue Search process. + + +* ERROR (progress) + + (*ERROR{n::LONG}) + + Terminate Search/Match process. + + Return value is the argument 'n'. (The value must be less than -1) + 'n' is an optional argument. (default value is ONIG_ABORT) + + +* MAX (progress/retraction) + + (*MAX{n::LONG}) + + Restrict the maximum count of success. + + [callout data] + slot 0: current success count. + + +* COUNT (progress/retraction) + + (*COUNT{c::CHAR}) + + Counter. + Depends on 'c' argument, the slot 0 value changes. + 'c' is an optional argument, deefault value is '>'. + + [callout data] + slot 0: '>': progress count, '<': retraction count, 'X': success count + slot 1: progress count + slot 2: retraction count + + (* success count = progress count - retraction count) + + ** If option ONIG_OPTION_FIND_LONGEST or ONIG_OPTION_FIND_NOT_EMPTY is used, + counts are not accurate. + + +* TOTAL_COUNT (progress/retraction) + + (*TOTAL_COUNT{c::CHAR}) + + It's the almost same as COUNT. + But the counts are integrated in a search process. + 'c' is an optional argument, deefault value is '>'. + + [callout data] + slot 0: '>': progress count, '<': retraction count, 'X': success count + slot 1: progress count + slot 2: retraction count + + ** If option ONIG_OPTION_FIND_LONGEST or ONIG_OPTION_FIND_NOT_EMPTY is used, + counts are not accurate. + + +* CMP (progress) + + (*CMP{x::TAG/LONG, op::STRING, y::TAG/LONG}) + + Compare x value and y value with op operator. + + op: '==', '!=', '>', '<', '>=', '<=' + + ex. "(?:(*MAX[TA]{7})a|(*MAX[TB]{5})b)*(*CMP{TA,>=,4})" + + [callout data] + slot 0: op value (enum OP_CMP in src/regexec.c) + +//END diff --git a/doc/CALLOUTS.BUILTIN.ja b/doc/CALLOUTS.BUILTIN.ja new file mode 100644 index 0000000..e1a5b7a --- /dev/null +++ b/doc/CALLOUTS.BUILTIN.ja @@ -0,0 +1,88 @@ +CALLOUTS.BUILTIN.ja 2018/03/19 + + +* FAIL (前進) + + (*FAIL) + + 常に失敗する + + +* MISMATCH (前進) + + (*MISMATCH) + + 照合を中止する + 検索は継続する + + +* ERROR (前進) + + (*ERROR{n::LONG}) + + 検索/照合を中止する + 戻り値は引数'n'の値。(-1より小さい負の値でなければならない) + 'n'はオプション引数で、デフォルト値はONIG_ABORT + + +* MAX (前進/後退) + + (*MAX{n::LONG}) + + 成功回数を制限する + + [callout data] + slot 0: 現在の成功回数 + + +* COUNT (前進/後退) + + (*COUNT{c::CHAR}) + + カウンタ + 'c'引数の値によって、slot 0の値が変化する + 'c'はオプション引数で、デフォルト値は'>' + + [callout data] + slot 0: '>': 前進回数, '<': 後退回数, 'X': 成功回数 + slot 1: 前進回数 + slot 2: 後退回数 + + (* 成功回数 = 前進回数 - 後退回数) + + ** ONIG_OPTION_FIND_LONGEST または ONIG_OPTION_FIND_NOT_EMPTY が使用されると + 正確な動作ができなくなる + + +* TOTAL_COUNT (前進/後退) + + (*TOTAL_COUNT{c::CHAR}) + + これはCOUNTとほとんど同じ + しかし、カウントが検索過程で積算される + 'c'はオプション引数で、デフォルト値は'>' + + [callout data] + slot 0: '>': 前進回数, '<': 後退回数, 'X': 成功回数 + slot 1: 前進回数 + slot 2: 後退回数 + + ** ONIG_OPTION_FIND_LONGEST または ONIG_OPTION_FIND_NOT_EMPTY が使用されると + 正確な動作ができなくなる + + +* CMP (前進) + + (*CMP{x::TAG/LONG, op::STRING, y::TAG/LONG}) + + xの値とyの値をop演算子で比較する + x, yがTAGのときにはそのcalloutのslot 0の値が参照される + + op: '==', '!=', '>', '<', '>=', '<=' + + 例: "(?:(*MAX[TA]{7})a|(*MAX[TB]{5})b)*(*CMP{TA,>=,4})" + + [callout data] + slot 0: op値 (src/regexec.c の中の enum OP_CMP) + +//END @@ -1,12 +1,12 @@ FAQ 2006/11/14 -1. Lognest match +1. Longest match - You can execute longest match by using ONIG_OPTION_FIND_LONGEST option + You can execute the longest match by using ONIG_OPTION_FIND_LONGEST option in onig_new(). 2. Mailing list - There is no mailing list about Oniguruma. + There is no mailing list for Oniguruma. // END @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.7.0 2017/12/08 +Oniguruma Regular Expressions Version 6.8.0 2018/03/08 syntax: ONIG_SYNTAX_ONIGURUMA (default) @@ -262,6 +262,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) Assigning the same name to two or more subexps is allowed. + + <Callouts> + + * Callouts of contents + (?{...contents...}) callouts in progress + (?{...contents...}D) D is a direction flag char. ('X' or '<' or '>') + D = 'X': progress and retraction, '<': retraction only + '>': progress only (default) + (?{...contents...}[tag]) tag assigned + (?{...contents...}[tag]D) + + * Callouts of name + (*name) + (*name{args...}) with args + (*name[tag]) tag assigned + (*name[tag]{args...}) + + <Absent functions> (?~absent) Absent repeater (* proposed by Tanaka Akira) @@ -289,6 +307,8 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) is undefined. + <if-then-else> + (?(condition_exp)then_exp|else_exp) if-then-else (?(condition_exp)then_exp) if-then @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.7.0 2017/12/08 +鬼車 正規表現 Version 6.8.0 2018/03/08 使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -264,10 +264,28 @@ この場合には、この名前を使用した後方参照は可能であるが、 部分式呼出しはできない。 + + <呼び出し> + + * 内容の呼び出し + (?{...contents...}) 前進中のみの呼び出し + (?{...contents...}D) Dは方向指定文字 ('X' or '<' or '>') + D = 'X': 前進および後退, '<' 後退のみ, '>': 前進のみ + (?{...contents...}[tag]) 名札付き + (?{...contents...}[tag]D) + + * 名前の呼び出し + (*name) + (*name{args...}) 引数付き + (*name[tag]) 名札付き + (*name[tag]{args...}) + + + <不在機能群> (?~不在式) 不在繰り返し (*原案 田中哲) - これは.*(より正確には\O*)のように動作するが、<不在式>に + これは .*(より正確には\O*)のように動作するが、<不在式>に 適合する文字列を含まない範囲に制限される。 これは(?~|不在式|\O*)の省略表記である。 @@ -282,10 +300,13 @@ <不在式>に適合する文字列を含まない範囲に制限される。 (?~|) 範囲消去 - 不在停止の効果を消して、初期状態にする。 + 不在停止の効果を消して、それ以前の状態にする。 + + * 不在機能の入れ子には対応しておらず、その場合の挙動は不定とする。 + - * 不在機能の入れ子には対応しておらず、挙動は不定とする。 + <条件文> (?(条件式)成功式|失敗式) 条件式が成功すれば成功式、失敗すれば失敗式を実行する この機能の存在理由は、成功式が失敗しても失敗式には @@ -8,7 +8,7 @@ <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -(c) K.Kosako, updated at: 2017/12/08 +(c) K.Kosako, updated at: 2018/03/19 </p> <dl> @@ -16,6 +16,9 @@ <dt><b>What's new</b> </font> <ul> +<li>2018/03/19: Version 6.8.1 released.</li> +<li>2018/03/16: Version 6.8.0 released.</li> +<li>2018/01/26: Version 6.7.1 released.</li> <li>2017/12/11: Version 6.7.0 released.</li> <li>2017/08/30: Version 6.6.1 released.</li> <li>2017/08/28: Version 6.6.0 released.</li> diff --git a/index_ja.html b/index_ja.html index 929ae16..02565a0 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@ <h1>鬼車</h1> <p> -(c) K.Kosako, 最終更新: 2017/12/08 +(c) K.Kosako, 最終更新: 2018/03/19 </p> <dl> @@ -16,6 +16,9 @@ <dt><b>更新情報</b> </font> <ul> +<li>2018/03/19: Version 6.8.1 リリース</li> +<li>2018/03/16: Version 6.8.0 リリース</li> +<li>2018/01/26: Version 6.7.1 リリース</li> <li>2017/12/11: Version 6.7.0 リリース</li> <li>2017/08/30: Version 6.6.1 リリース</li> <li>2017/08/28: Version 6.6.0 リリース</li> @@ -1,7 +1,7 @@ #!/bin/sh # install - install a program, script, or datafile -scriptversion=2014-09-12.12; # UTC +scriptversion=2016-01-11.22; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the @@ -324,41 +324,34 @@ do # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) - # $RANDOM is not portable (e.g. dash); use it when possible to - # lower collision chance tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ - trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 - # As "mkdir -p" follows symlinks and we work in /tmp possibly; so - # create the $tmpdir first (and fail if unsuccessful) to make sure - # that nobody tries to guess the $tmpdir name. if (umask $mkdir_umask && - $mkdirprog $mkdir_mode "$tmpdir" && - exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. - test_tmpdir="$tmpdir/a" - ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && - $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { - ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi - rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. - rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; @@ -503,6 +496,6 @@ done # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" +# time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: diff --git a/make_win32.bat b/make_win32.bat index e628994..bd1a072 100644 --- a/make_win32.bat +++ b/make_win32.bat @@ -1,3 +1,5 @@ -cd src
-copy config.h.win32 config.h
-nmake -f Makefile.windows
+SET ONIG_DIR=%~dp0\src +set THIS_DIR=%~dp0 +set BUILD_DIR=%cd% +copy %ONIG_DIR%\config.h.win32 %BUILD_DIR%\config.h +nmake -f %ONIG_DIR%\Makefile.windows %1 diff --git a/make_win64.bat b/make_win64.bat index b8ca930..ee0a049 100644 --- a/make_win64.bat +++ b/make_win64.bat @@ -1,3 +1,5 @@ -cd src
-copy config.h.win64 config.h
-nmake -f Makefile.windows
+SET ONIG_DIR=%~dp0\src +set THIS_DIR=%~dp0 +set BUILD_DIR=%cd% +copy %ONIG_DIR%\config.h.win64 %BUILD_DIR%\config.h +nmake -f %ONIG_DIR%\Makefile.windows %1 @@ -1,9 +1,9 @@ #! /bin/sh # Common wrapper for a few potentially missing GNU programs. -scriptversion=2013-10-28.13; # UTC +scriptversion=2016-01-11.22; # UTC -# Copyright (C) 1996-2014 Free Software Foundation, Inc. +# Copyright (C) 1996-2017 Free Software Foundation, Inc. # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. # This program is free software; you can redistribute it and/or modify @@ -210,6 +210,6 @@ exit $st # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" +# time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: diff --git a/sample/.gitignore b/sample/.gitignore index 79fab44..b6b591b 100644 --- a/sample/.gitignore +++ b/sample/.gitignore @@ -7,5 +7,8 @@ /sql /syntax /user_property +/callout +/echo +/count /bug_fix /log* diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index de25ee7..58d6a99 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -5,6 +5,15 @@ project(oniguruma_sample C) add_executable(crnl crnl.c) target_link_libraries(crnl onig) +add_executable(callout callout.c) +target_link_libraries(callout onig) + +add_executable(echo echo.c) +target_link_libraries(echo onig) + +add_executable(count count.c) +target_link_libraries(count onig) + add_executable(encode encode.c) target_link_libraries(encode onig) diff --git a/sample/Makefile.am b/sample/Makefile.am index 7403384..fd809fa 100644 --- a/sample/Makefile.am +++ b/sample/Makefile.am @@ -6,9 +6,9 @@ LDADD = $(lib_onig) AM_LDFLAGS = -L$(prefix)/lib AM_CPPFLAGS = -I$(top_srcdir)/src -I$(includedir) -TESTS = encode listcap names posix simple sql syntax user_property bug_fix +TESTS = encode listcap names posix simple sql syntax user_property callout echo count bug_fix -check_PROGRAMS = encode listcap names posix simple sql syntax user_property bug_fix +check_PROGRAMS = $(TESTS) encode_SOURCES = encode.c listcap_SOURCES = listcap.c @@ -18,11 +18,14 @@ simple_SOURCES = simple.c sql_SOURCES = sql.c syntax_SOURCES = syntax.c user_property_SOURCES = user_property.c +callout_SOURCES = callout.c +echo_SOURCES = echo.c +count_SOURCES = count.c bug_fix = bug_fix.c sampledir = . -test: encode listcap names posix simple sql syntax user_property bug_fix +test: $(TESTS) $(sampledir)/encode $(sampledir)/listcap $(sampledir)/names @@ -31,4 +34,7 @@ test: encode listcap names posix simple sql syntax user_property bug_fix $(sampledir)/sql $(sampledir)/syntax $(sampledir)/user_property + $(sampledir)/callout + $(sampledir)/echo + $(sampledir)/count $(sampledir)/bug_fix diff --git a/sample/bug_fix.c b/sample/bug_fix.c index 9a45a78..44802ef 100644 --- a/sample/bug_fix.c +++ b/sample/bug_fix.c @@ -33,7 +33,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); return -1; @@ -69,7 +69,7 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, &ci, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -99,7 +99,7 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr) options, enc, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/sample/callout.c b/sample/callout.c new file mode 100644 index 0000000..ab89543 --- /dev/null +++ b/sample/callout.c @@ -0,0 +1,254 @@ +/* + * callout.c + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +static int +callout_body(OnigCalloutArgs* args, void* user_data) +{ + int r; + int i; + int n; + int begin, end; + int len; + int used_num; + int used_bytes; + OnigCalloutIn in; + int name_id; + const UChar* contents; + const UChar* start; + const UChar* current; + regex_t* regex; + + in = onig_get_callout_in_by_callout_args(args); + name_id = onig_get_name_id_by_callout_args(args); + start = onig_get_start_by_callout_args(args); + current = onig_get_current_by_callout_args(args); + regex = onig_get_regex_by_callout_args(args); + + contents = onig_get_contents_by_callout_args(args); + + if (name_id != ONIG_NON_NAME_ID) { + UChar* name = onig_get_callout_name_by_name_id(name_id); + fprintf(stdout, "name: %s\n", name); + } + fprintf(stdout, + "%s %s: contents: \"%s\", start: \"%s\", current: \"%s\"\n", + contents != 0 ? "CONTENTS" : "NAME", + in == ONIG_CALLOUT_IN_PROGRESS ? "PROGRESS" : "RETRACTION", + contents, start, current); + + (void )onig_get_used_stack_size_in_callout(args, &used_num, &used_bytes); + fprintf(stdout, "stack: used_num: %d, used_bytes: %d\n", used_num, used_bytes); + + n = onig_number_of_captures(regex); + for (i = 1; i <= n; i++) { + r = onig_get_capture_range_in_callout(args, i, &begin, &end); + if (r != ONIG_NORMAL) return r; + + fprintf(stdout, "capture %d: (%d-%d)\n", i, begin, end); + } + + fflush(stdout); + return ONIG_CALLOUT_SUCCESS; +} + +static int +progress_callout_func(OnigCalloutArgs* args, void* user_data) +{ + return callout_body(args, user_data); +} + +static int +retraction_callout_func(OnigCalloutArgs* args, void* user_data) +{ + return callout_body(args, user_data); +} + +static int +foo(OnigCalloutArgs* args, void* user_data) +{ + return callout_body(args, user_data); +} + +static int +bar(OnigCalloutArgs* args, void* user_data) +{ + int r; + int i; + int n; + OnigType type; + OnigValue val; + + fprintf(stdout, "bar called.\n"); + + n = onig_get_args_num_by_callout_args(args); + if (n < 0) { + fprintf(stderr, "FAIL: onig_get_args_num_by_callout_args(): %d\n", n); + return n; + } + + for (i = 0; i < n; i++) { + r = onig_get_arg_by_callout_args(args, i, &type, &val); + if (r != 0) { + fprintf(stderr, "FAIL: onig_get_arg_by_callout_args(): %d\n", r); + return r; + } + + fprintf(stdout, "arg[%d]: ", i); + switch (type) { + case ONIG_TYPE_LONG: + fprintf(stdout, "%ld\n", val.l); + break; + case ONIG_TYPE_CHAR: + fprintf(stdout, "0x%06x\n", val.c); + break; + case ONIG_TYPE_STRING: + fprintf(stdout, "'%s'\n", val.s.start); + break; + default: + /* Never come here. But escape warning. */ + break; + }; + } + + return ONIG_CALLOUT_SUCCESS; +} + +static int +test(OnigEncoding enc, char* in_pattern, char* in_str) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar* pattern; + UChar* str; + + pattern = (UChar* )in_pattern; + str = (UChar* )in_str; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stderr, "COMPILE ERROR: %d: %s\n", r, s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stderr, "SEARCH ERROR: %d: %s\n", r, s); + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return r; +} + +extern int main(int argc, char* argv[]) +{ + int r; + int id; + UChar* name; + OnigEncoding use_encs[1]; + unsigned int arg_types[4]; + OnigValue opt_defaults[4]; + OnigEncoding enc; + + enc = ONIG_ENCODING_UTF8; + use_encs[0] = enc; + + r = onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + if (r != ONIG_NORMAL) return -1; + + /* monitor on */ + r = onig_setup_builtin_monitors_by_ascii_encoded_name(stdout); + if (r != ONIG_NORMAL) return -1; + + name = (UChar* )"foo"; + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE, + name, name + strlen((char* )name), + ONIG_CALLOUT_IN_BOTH, foo, 0, 0, 0, 0, 0); + if (id < 0) { + fprintf(stderr, "ERROR: fail to set callout of name: %s\n", name); + //return -1; + } + + name = (UChar* )"bar"; + arg_types[0] = ONIG_TYPE_LONG; + arg_types[1] = ONIG_TYPE_STRING; + arg_types[2] = ONIG_TYPE_CHAR; + opt_defaults[0].s.start = (UChar* )"I am a option argument's default value."; + opt_defaults[0].s.end = opt_defaults[0].s.start + + strlen((char* )opt_defaults[0].s.start); + opt_defaults[1].c = 0x4422; + + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE, + name, name + strlen((char* )name), + ONIG_CALLOUT_IN_PROGRESS, bar, 0, + 3, arg_types, 2, opt_defaults); + if (id < 0) { + fprintf(stderr, "ERROR: fail to set callout of name: %s\n", name); + //return -1; + } + + (void)onig_set_progress_callout(progress_callout_func); + (void)onig_set_retraction_callout(retraction_callout_func); + + /* callout of contents */ + test(enc, "a+(?{foo bar baz...}X)$", "aaab"); + test(enc, "(?{{!{}#$%&'()=-~^|[_]`@*:+;<>?/.\\,}}[symbols])c", "abc"); + test(enc, "\\A(...)(?{{{booooooooooooo{{ooo}}ooooooooooz}}}<)", "aaab"); + test(enc, "\\A(?!a(?{in prec-read-not}[xxx]X)b)", "ac"); + test(enc, "(?<!a(?{in look-behind-not}X)c)c", "abc"); + + // callout of name + test(enc, "\\A(*foo)abc", "abc"); + test(enc, "abc(?:(*FAIL)|$)", "abcabc"); + test(enc, "abc(?:$|(*MISMATCH)|abc$)", "abcabc"); + test(enc, "abc(?:(*ERROR)|$)", "abcabc"); + test(enc, "ab(*foo{})(*FAIL)", "abc"); + test(enc, "abc(d|(*ERROR{-999}))", "abc"); + test(enc, "ab(*bar{372,I am a bar's argument,あ})c(*FAIL)", "abc"); + test(enc, "ab(*bar{1234567890})", "abc"); + test(enc, "(?:a(*MAX{2})|b)*", "abbabbabbabb"); + test(enc, "(?:(*MAX{2})a|b)*", "abbabbabbabb"); + test(enc, "(?:(*MAX{1})a|b)*", "bbbbbabbbbbabbbbb"); + test(enc, "(?:(*MAX{3})a|(*MAX{4})b)*", "bbbaabbab"); + test(enc, "(?:(*MAX[A]{3})a|(*MAX[B]{5})b)*(*CMP{A,<,B})", "abababc"); + test(enc, "(?:(*MAX[A]{7})a|(*MAX[B]{5})b)*(*CMP{A,>=,4})", "abababcabababaa"); + + /* callouts in condition */ + test(enc, "\\A(?(?{in condition})then|else)\\z", "then"); + test(enc, "\\A(?(*FAIL)then|else)\\z", "else"); + + /* monitor test */ + test(enc, "(?:(*MON{X})(*FAIL)|.{,3}(*MON[FOO])k)", "abcdefghijk"); + + onig_end(); + return 0; +} diff --git a/sample/count.c b/sample/count.c new file mode 100644 index 0000000..0f0e1f2 --- /dev/null +++ b/sample/count.c @@ -0,0 +1,129 @@ +/* + * count.c + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#define ulen(enc, p) onigenc_str_bytelen_null(enc, (UChar* )p) + +static int +test(OnigEncoding enc, OnigMatchParam* mp, char* in_pattern, char* in_str) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar* pattern; + UChar* str; + + pattern = (UChar* )in_pattern; + str = (UChar* )in_str; + + r = onig_new(®, pattern, pattern + ulen(enc, pattern), + ONIG_OPTION_DEFAULT, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stderr, "COMPILE ERROR: %d: %s\n", r, s); + return -1; + } + + region = onig_region_new(); + + end = str + ulen(enc, str); + start = str; + range = end; + r = onig_search_with_param(reg, str, end, start, range, region, + ONIG_OPTION_NONE, mp); + if (r >= 0) { + int slot; + OnigValue val; + char* tag; + int tag_len; + + fprintf(stdout, "match at %d\n", r); + + show_count: + if (enc == ONIG_ENCODING_UTF16_BE) { + tag = "\000x\000\000"; + } + else if (enc == ONIG_ENCODING_UTF16_LE) { + tag = "x\000\000\000"; + } + else { + tag = "x"; + } + tag_len = ulen(enc, tag); + + slot = 0; + r = onig_get_callout_data_by_tag(reg, mp, (UChar* )tag, (UChar* )tag + tag_len, + slot, 0, &val); + if (r < ONIG_NORMAL) goto err; + else if (r > ONIG_NORMAL) { + fprintf(stdout, "COUNT[x]: NO DATA\n"); + } + else { + fprintf(stdout, "COUNT[x]: %ld\n", val.l); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stdout, "search fail\n"); + goto show_count; + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + err: + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "SEARCH ERROR: %d: %s\n", r, s); + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return r; +} + +extern int main(int argc, char* argv[]) +{ + int r; + int id; + UChar* name; + OnigMatchParam* mp; + OnigEncoding encs[3]; + OnigType arg_types[4]; + OnigValue opt_defaults[4]; + + encs[0] = ONIG_ENCODING_UTF8; + encs[1] = ONIG_ENCODING_UTF16_BE; + encs[2] = ONIG_ENCODING_UTF16_LE; + + r = onig_initialize(encs, sizeof(encs)/sizeof(encs[0])); + if (r != ONIG_NORMAL) { + fprintf(stderr, "FAIL: onig_initialize(): %d\n", r); + return -1; + } + + mp = onig_new_match_param(); + + test(encs[0], mp, "abc(.(*COUNT[x]))*(*FAIL)", "abcdefg"); + test(encs[0], mp, "abc(.(*COUNT[_any_]))*(.(*COUNT[x]))*d", "abcdefg"); + /* fail count */ + test(encs[0], mp, "abc(.(*COUNT[x]{<}))*f", "abcdefg"); + /* success count */ + test(encs[0], mp, "abc(.(*COUNT[x]{X}))*f", "abcdefg"); + /* passed count */ + test(encs[0], mp, "abc(.(*COUNT[x]))*f", "abcdefg"); + test(encs[0], mp, "a(.(*COUNT[x]))*z", "abcd\nabcdz"); + /* total count */ + test(encs[0], mp, "a(.(*TOTAL_COUNT[x]))*z", "abcd\nabcdz"); + + test(encs[1], mp, "\000a\000b\000c\000(\000.\000(\000*\000C\000O\000U\000N\000T\000[\000x\000]\000)\000)\000*\000(\000*\000F\000A\000I\000L\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000\000"); + + test(encs[2], mp, "a\000b\000c\000(\000.\000(\000*\000C\000O\000U\000N\000T\000[\000x\000]\000)\000)\000*\000(\000*\000F\000A\000I\000L\000)\000\000\000", "a\000b\000c\000d\000e\000f\000g\000\000\000"); + + onig_free_match_param(mp); + onig_end(); + return 0; +} diff --git a/sample/echo.c b/sample/echo.c new file mode 100644 index 0000000..76df207 --- /dev/null +++ b/sample/echo.c @@ -0,0 +1,136 @@ +/* + * echo.c + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +static int +echo(OnigCalloutArgs* args, void* user_data) +{ + int r; + OnigCalloutIn in; + OnigType type; + OnigValue val; + FILE* fp; + + fp = stdout; + + in = onig_get_callout_in_by_callout_args(args); + + r = onig_get_arg_by_callout_args(args, 1, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (in == ONIG_CALLOUT_IN_PROGRESS) { + if (val.c == '<') + return ONIG_CALLOUT_SUCCESS; + } + else { + if (val.c != 'X' && val.c != '<') + return ONIG_CALLOUT_SUCCESS; + } + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + + fprintf(fp, "%s %s\n", + (in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<="), + val.s.start); + fflush(fp); + + return ONIG_CALLOUT_SUCCESS; +} + + +static int +test(OnigEncoding enc, char* in_pattern, char* in_str) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar* pattern; + UChar* str; + + pattern = (UChar* )in_pattern; + str = (UChar* )in_str; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stderr, "COMPILE ERROR: %d: %s\n", r, s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stderr, "SEARCH ERROR: %d: %s\n", r, s); + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return r; +} + +extern int main(int argc, char* argv[]) +{ + int r; + int id; + UChar* name; + OnigEncoding use_encs[1]; + OnigType arg_types[4]; + OnigValue opt_defaults[4]; + OnigEncoding enc; + + enc = ONIG_ENCODING_UTF8; + use_encs[0] = enc; + + r = onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + if (r != ONIG_NORMAL) return -1; + + name = (UChar* )"echo"; + arg_types[0] = ONIG_TYPE_STRING; + arg_types[1] = ONIG_TYPE_CHAR; + opt_defaults[0].s.start = (UChar* )"echo"; + opt_defaults[0].s.end = opt_defaults[0].s.start + + strlen((char* )opt_defaults[0].s.start); + opt_defaults[1].c = '>'; + + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE, + name, name + strlen((char* )name), + ONIG_CALLOUT_IN_BOTH, echo, 0, + 2, arg_types, 2, opt_defaults); + if (id < 0) { + fprintf(stderr, "ERROR: fail to set callout of name: %s\n", name); + return -1; + } + + test(enc, "(?:(*echo{abc!!!})a|b)*", "abba"); + test(enc, "(?:(*echo{xyz,X})a|b)*", "abba"); + + onig_end(); + return 0; +} diff --git a/sample/encode.c b/sample/encode.c index 1daafa9..4eead6e 100644 --- a/sample/encode.c +++ b/sample/encode.c @@ -31,7 +31,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); return -1; @@ -59,7 +59,7 @@ exec(OnigEncoding enc, OnigOptionType options, options, enc, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -108,7 +108,7 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, &ci, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/sample/listcap.c b/sample/listcap.c index e283890..6f9880c 100644 --- a/sample/listcap.c +++ b/sample/listcap.c @@ -36,7 +36,7 @@ extern int ex(unsigned char* str, unsigned char* pattern, ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -68,7 +68,7 @@ extern int ex(unsigned char* str, unsigned char* pattern, } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); return -1; } diff --git a/sample/names.c b/sample/names.c index 2cf3a2f..e570402 100644 --- a/sample/names.c +++ b/sample/names.c @@ -42,7 +42,7 @@ extern int main(int argc, char* argv[]) ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -64,7 +64,7 @@ extern int main(int argc, char* argv[]) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); return -1; } diff --git a/sample/simple.c b/sample/simple.c index e570a30..9860dbe 100644 --- a/sample/simple.c +++ b/sample/simple.c @@ -23,7 +23,7 @@ extern int main(int argc, char* argv[]) ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -47,7 +47,7 @@ extern int main(int argc, char* argv[]) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/sample/sql.c b/sample/sql.c index dfa176c..4f5283a 100644 --- a/sample/sql.c +++ b/sample/sql.c @@ -40,7 +40,7 @@ extern int main(int argc, char* argv[]) ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -64,7 +64,7 @@ extern int main(int argc, char* argv[]) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/sample/syntax.c b/sample/syntax.c index 6bf27c5..02af2ea 100644 --- a/sample/syntax.c +++ b/sample/syntax.c @@ -20,7 +20,7 @@ extern int exec(OnigSyntaxType* syntax, ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -44,7 +44,7 @@ extern int exec(OnigSyntaxType* syntax, } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/sample/user_property.c b/sample/user_property.c index fb604f6..99b3f2c 100644 --- a/sample/user_property.c +++ b/sample/user_property.c @@ -39,7 +39,7 @@ main(int argc, char* argv[]) } else { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } @@ -51,7 +51,7 @@ main(int argc, char* argv[]) } else { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "onig_new: ERROR: %s\n", s); return -1; } @@ -75,7 +75,7 @@ main(int argc, char* argv[]) } else { /* error */ char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(stderr, "ERROR: %s\n", s); return -1; } diff --git a/src/Makefile.am b/src/Makefile.am index be35b24..911aecd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,13 +4,24 @@ libname = libonig.la AM_CFLAGS = -Wall AM_CPPFLAGS = -I$(top_srcdir) -I$(includedir) -include_HEADERS = oniguruma.h oniggnu.h onigposix.h +include_HEADERS = oniguruma.h oniggnu.h + +posix_headers = onigposix.h + +if ENABLE_POSIX_API +posix_sources = regposix.c regposerr.c +include_HEADERS += $(posix_headers) +else +posix_sources = +endif + + lib_LTLIBRARIES = $(libname) libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ regenc.c regsyntax.c regtrav.c regversion.c st.c \ - regposix.c regposerr.c \ + $(posix_sources) \ unicode.c \ unicode_unfold_key.c \ unicode_fold1_key.c \ diff --git a/src/Makefile.windows b/src/Makefile.windows index 046345a..1ce8ce2 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -15,8 +15,7 @@ LINKFLAGS = -link -incremental:no -pdb:none INSTALL = install -c
CP = copy
CC = cl
-DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
-RUBYDIR = ..
+DEFS = -DHAVE_CONFIG_H
subdirs =
@@ -25,44 +24,43 @@ libname = $(libbase)_s.lib dllname = $(libbase).dll
dlllib = $(libbase).lib
-onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h
-posixheaders = onigposix.h
+onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
+posixheaders = $(ONIG_DIR)/onigposix.h
headers = $(posixheaders) $(onigheaders)
-onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \
- regexec.obj regenc.obj regsyntax.obj regtrav.obj \
- regversion.obj st.obj onig_init.obj
-posixobjs = regposix.obj regposerr.obj
+onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
+ $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
+ $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
+posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
libobjs = $(onigobjs) $(posixobjs)
-jp_objs = euc_jp.obj sjis.obj
-iso8859_objs = iso8859_1.obj iso8859_2.obj \
- iso8859_3.obj iso8859_4.obj \
- iso8859_5.obj iso8859_6.obj \
- iso8859_7.obj iso8859_8.obj \
- iso8859_9.obj iso8859_10.obj \
- iso8859_11.obj iso8859_13.obj \
- iso8859_14.obj iso8859_15.obj \
- iso8859_16.obj
-
-encobjs = ascii.obj utf8.obj \
- unicode.obj \
- utf16_be.obj utf16_le.obj \
- utf32_be.obj utf32_le.obj \
+jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
+iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \
+ $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \
+ $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \
+ $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \
+ $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \
+ $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
+ $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
+ $(BUILD_DIR)/iso8859_16.obj
+
+encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
+ $(BUILD_DIR)/unicode.obj \
+ $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
+ $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
$(jp_objs) $(iso8859_objs) \
- euc_tw.obj euc_kr.obj big5.obj \
- gb18030.obj \
- koi8_r.obj \
- cp1251.obj \
- euc_jp_prop.obj sjis_prop.obj \
- unicode_unfold_key.obj unicode_fold1_key.obj \
- unicode_fold2_key.obj unicode_fold3_key.obj # koi8.obj
-
-onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \
- regsyntax.c regtrav.c regversion.c reggnu.c st.c
-posixsources = regposix.c regposerr.c
+ $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
+ $(BUILD_DIR)/gb18030.obj \
+ $(BUILD_DIR)/koi8_r.obj \
+ $(BUILD_DIR)/cp1251.obj \
+ $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
+ $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
+ $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
+
+onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
+ $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
+posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
libsources = $(posixsources) $(onigsources)
-rubysources = $(onigsources)
patchfiles = re.c.168.patch re.c.181.patch
distfiles = README COPYING HISTORY \
@@ -77,7 +75,7 @@ makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' .SUFFIXES:
.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
-.c.obj:
+{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
# targets
@@ -96,58 +94,58 @@ $(libname): $(libobjs) $(encobjs) $(dllname): $(libobjs) $(encobjs)
$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
-regparse.obj: regparse.c $(onigheaders) config.h st.h
-regext.obj: regext.c $(onigheaders) config.h
-regtrav.obj: regtrav.c $(onigheaders) config.h
-regcomp.obj: regcomp.c $(onigheaders) config.h
-regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h
-reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h
-regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h
-regenc.obj: regenc.c regenc.h oniguruma.h config.h
-regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h
-regversion.obj: regversion.c oniguruma.h config.h
-regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h
-regposerr.obj: regposerr.c $(posixheaders) config.h
-st.obj: st.c regint.h oniguruma.h config.h st.h
-onig_init.obj: onig_init.c oniguruma.h
-
-ascii.obj: ascii.c regenc.h config.h
-unicode.obj: unicode.c unicode_fold_data.c unicode_property_data.c regenc.h config.h
-utf8.obj: utf8.c regenc.h config.h
-utf16_be.obj: utf16_be.c regenc.h config.h
-utf16_le.obj: utf16_le.c regenc.h config.h
-utf32_be.obj: utf32_be.c regenc.h config.h
-utf32_le.obj: utf32_le.c regenc.h config.h
-euc_jp.obj: euc_jp.c regenc.h config.h
-euc_tw.obj: euc_tw.c regenc.h config.h
-euc_kr.obj: euc_kr.c regenc.h config.h
-sjis.obj: sjis.c regenc.h config.h
-iso8859_1.obj: iso8859_1.c regenc.h config.h
-iso8859_2.obj: iso8859_2.c regenc.h config.h
-iso8859_3.obj: iso8859_3.c regenc.h config.h
-iso8859_4.obj: iso8859_4.c regenc.h config.h
-iso8859_5.obj: iso8859_5.c regenc.h config.h
-iso8859_6.obj: iso8859_6.c regenc.h config.h
-iso8859_7.obj: iso8859_7.c regenc.h config.h
-iso8859_8.obj: iso8859_8.c regenc.h config.h
-iso8859_9.obj: iso8859_9.c regenc.h config.h
-iso8859_10.obj: iso8859_10.c regenc.h config.h
-iso8859_11.obj: iso8859_11.c regenc.h config.h
-iso8859_13.obj: iso8859_13.c regenc.h config.h
-iso8859_14.obj: iso8859_14.c regenc.h config.h
-iso8859_15.obj: iso8859_15.c regenc.h config.h
-iso8859_16.obj: iso8859_16.c regenc.h config.h
-koi8.obj: koi8.c regenc.h config.h
-koi8_r.obj: koi8_r.c regenc.h config.h
-cp1251.obj: cp1251.c regenc.h config.h
-big5.obj: big5.c regenc.h config.h
-gb18030.obj: gb18030.c regenc.h config.h
-euc_jp_prop.obj: euc_jp_prop.c regenc.h
-sjis_prop.obj: sjis_prop.c regenc.h
-unicode_unfold_key.obj: unicode_unfold_key.c regenc.h config.h
-unicode_fold1_key.obj: unicode_fold1_key.c regenc.h config.h
-unicode_fold2_key.obj: unicode_fold2_key.c regenc.h config.h
-unicode_fold3_key.obj: unicode_fold3_key.c regenc.h config.h
+$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
+$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
+$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
+
+$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
# C library test
ctest: $(testc)
@@ -158,26 +156,23 @@ ptest: $(testp) .\$(testp)
$(testc): $(testc).c $(libname)
- $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
+ $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
-
-#$(testc)u.c: test.rb testconvu.rb
-# ruby -Ke testconvu.rb test.rb > $@
+ $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib)
$(testc)u: $(testc)u.c $(libname)
- $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
+ $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
clean:
- del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
+ del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
samples: all
- $(CC) $(CFLAGS) -I. /Fe:simple sample\simple.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:posix sample\posix.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:names sample\names.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:listcap sample\listcap.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:sql sample\sql.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:encode sample\encode.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:syntax sample\syntax.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o simple $(ONIG_DIR)\sample\simple.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o posix $(ONIG_DIR)\sample\posix.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o names $(ONIG_DIR)\sample\names.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o sql $(ONIG_DIR)\sample\sql.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o encode $(ONIG_DIR)\sample\encode.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o syntax $(ONIG_DIR)\sample\syntax.c $(dlllib)
\ No newline at end of file diff --git a/src/ascii.c b/src/ascii.c index b21878d..7efaa26 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -2,7 +2,7 @@ ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,59 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" /* for USE_CALLOUT */ + +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_ASCII; + t_long = ONIG_TYPE_LONG; + + name = "FAIL"; BC0_P(name, fail); + name = "MISMATCH"; BC0_P(name, mismatch); + name = "MAX"; BC_B(name, max, 1, &t_long); + + name = "ERROR"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "COUNT"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "TOTAL_COUNT"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "CMP"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} + +#if 0 +static int +is_initialized(void) +{ + /* Don't use this function */ + /* can't answer, because builtin callout entries removed in onig_end() */ + return 0; +} +#endif static int ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -41,8 +93,8 @@ ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) OnigEncodingType OnigEncodingASCII = { onigenc_single_byte_mbc_enc_len, "US-ASCII", /* name */ - 1, /* max byte length */ - 1, /* min byte length */ + 1, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, @@ -55,7 +107,8 @@ OnigEncodingType OnigEncodingASCII = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + init, + 0, /* is_initialized */ + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -186,5 +186,6 @@ OnigEncodingType OnigEncodingBIG5 = { big5_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index e13fad1..b59cc8d 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -31,6 +31,9 @@ /* Define to 1 if you have the <string.h> header file. */ #cmakedefine HAVE_STRING_H ${HAVE_STRING_H} +/* Define to 1 if you have the <limits.h> header file. */ +#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} + /* Define to 1 if you have the <sys/times.h> header file. */ #cmakedefine HAVE_SYS_TIMES_H ${HAVE_SYS_TIMES_H} @@ -64,9 +67,6 @@ /* Define to 1 if you have the ANSI C header files. */ #cmakedefine STDC_HEADERS ${STDC_HEADERS} -/* Define if combination explosion check */ -#cmakedefine USE_COMBINATION_EXPLOSION_CHECK ${USE_COMBINATION_EXPLOSION_CHECK} - /* Define if enable CR+NL as line terminator */ #cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/cp1251.c b/src/cp1251.c index 4d655bb..f7b43c3 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -2,7 +2,7 @@ cp1251.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2006-2016 Byte <byte AT mail DOT kna DOT ru> + * Copyright (c) 2006-2018 Byte <byte AT mail DOT kna DOT ru> * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * @@ -199,5 +199,6 @@ OnigEncodingType OnigEncodingCP1251 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 42c3bce..8dd6ac1 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -306,5 +306,6 @@ OnigEncodingType OnigEncodingEUC_JP = { is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_kr.c b/src/euc_kr.c index 450caf1..08bfa1c 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -184,5 +184,6 @@ OnigEncodingType OnigEncodingEUC_CN = { euckr_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_tw.c b/src/euc_tw.c index b3ee628..dbf0eac 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -167,5 +167,6 @@ OnigEncodingType OnigEncodingEUC_TW = { euctw_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/gb18030.c b/src/gb18030.c index c8b5865..073c83b 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,7 +2,7 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2016 KUBO Takehiro <kubo AT jiubao DOT org> + * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org> * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * @@ -534,5 +534,6 @@ OnigEncodingType OnigEncodingGB18030 = { gb18030_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_1.c b/src/iso8859_1.c index 573931f..bcd7e26 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -271,5 +271,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index 91b18d4..a5946cc 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -238,5 +238,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index 518be25..ec94fd1 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -2,7 +2,7 @@ iso8859_11.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index d1f39a2..fba7fd4 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 3361b0d..e1f71f5 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -240,5 +240,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index b09e876..236e9e7 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 29a350d..42045bd 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index 9eb3536..db93046 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 862823a..6fe5e6f 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index db706da..ee1eb93 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index 0e03e9c..7d828e1 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -2,7 +2,7 @@ iso8859_5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -225,5 +225,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 6289af5..a959e98 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -2,7 +2,7 @@ iso8859_6.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 75b520f..e695523 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -2,7 +2,7 @@ iso8859_7.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -221,5 +221,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 5f18345..66b63b8 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -2,7 +2,7 @@ iso8859_8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index d0c06bb..d780293 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; @@ -2,7 +2,7 @@ koi8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -249,5 +249,6 @@ OnigEncodingType OnigEncodingKOI8 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/koi8_r.c b/src/koi8_r.c index f8ef34f..e88cfe3 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,5 +211,6 @@ OnigEncodingType OnigEncodingKOI8_R = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/onig_init.c b/src/onig_init.c index 9f53568..7ad98b7 100644 --- a/src/onig_init.c +++ b/src/onig_init.c @@ -2,7 +2,7 @@ onig_init.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2016-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ * SUCH DAMAGE. */ -#include "oniguruma.h" +#include "regint.h" /* onig_init(): deprecated function */ extern int diff --git a/src/oniggnu.h b/src/oniggnu.h index 3da9f23..d688883 100644 --- a/src/oniggnu.h +++ b/src/oniggnu.h @@ -35,10 +35,12 @@ extern "C" { #endif -#define RE_MBCTYPE_ASCII 0 -#define RE_MBCTYPE_EUC 1 -#define RE_MBCTYPE_SJIS 2 -#define RE_MBCTYPE_UTF8 3 +enum { + RE_MBCTYPE_ASCII = 0, + RE_MBCTYPE_EUC = 1, + RE_MBCTYPE_SJIS = 2, + RE_MBCTYPE_UTF8 = 3 +}; /* GNU regex options */ #ifndef RE_NREGS diff --git a/src/onigposix.h b/src/onigposix.h index 22211e4..da0f919 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@ onigposix.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -97,7 +97,7 @@ typedef struct { #ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) +#if defined(ONIGURUMA_EXPORT) #define ONIG_EXTERN extern __declspec(dllexport) #else #define ONIG_EXTERN extern __declspec(dllimport) diff --git a/src/oniguruma.h b/src/oniguruma.h index 5ad4469..349c00e 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,30 +35,10 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 7 -#define ONIGURUMA_VERSION_TEENY 0 +#define ONIGURUMA_VERSION_MINOR 8 +#define ONIGURUMA_VERSION_TEENY 1 -#ifdef __cplusplus -# ifndef HAVE_PROTOTYPES -# define HAVE_PROTOTYPES 1 -# endif -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif +#define ONIGURUMA_VERSION_INT 60801 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -69,16 +49,12 @@ extern "C" { #endif #ifndef PV_ -#ifdef HAVE_STDARG_PROTOTYPES # define PV_(args) args -#else -# define PV_(args) () -#endif #endif #ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) +#if defined(ONIGURUMA_EXPORT) #define ONIG_EXTERN extern __declspec(dllexport) #else #define ONIG_EXTERN extern __declspec(dllimport) @@ -96,10 +72,6 @@ extern "C" { #define UChar OnigUChar #endif -#ifdef _WIN32 -#include <windows.h> -#endif - typedef unsigned int OnigCodePoint; typedef unsigned char OnigUChar; typedef unsigned int OnigCtype; @@ -166,6 +138,9 @@ typedef struct OnigEncodingTypeST { int (*init)(void); int (*is_initialized)(void); int (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end); + unsigned int flag; + OnigCodePoint sb_range; + int index; } OnigEncodingType; typedef OnigEncodingType* OnigEncoding; @@ -243,21 +218,24 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; /* 18: 6(max-byte) * 3(case-fold chars) */ /* character types */ -#define ONIGENC_CTYPE_NEWLINE 0 -#define ONIGENC_CTYPE_ALPHA 1 -#define ONIGENC_CTYPE_BLANK 2 -#define ONIGENC_CTYPE_CNTRL 3 -#define ONIGENC_CTYPE_DIGIT 4 -#define ONIGENC_CTYPE_GRAPH 5 -#define ONIGENC_CTYPE_LOWER 6 -#define ONIGENC_CTYPE_PRINT 7 -#define ONIGENC_CTYPE_PUNCT 8 -#define ONIGENC_CTYPE_SPACE 9 -#define ONIGENC_CTYPE_UPPER 10 -#define ONIGENC_CTYPE_XDIGIT 11 -#define ONIGENC_CTYPE_WORD 12 -#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII 14 +typedef enum { + ONIGENC_CTYPE_NEWLINE = 0, + ONIGENC_CTYPE_ALPHA = 1, + ONIGENC_CTYPE_BLANK = 2, + ONIGENC_CTYPE_CNTRL = 3, + ONIGENC_CTYPE_DIGIT = 4, + ONIGENC_CTYPE_GRAPH = 5, + ONIGENC_CTYPE_LOWER = 6, + ONIGENC_CTYPE_PRINT = 7, + ONIGENC_CTYPE_PUNCT = 8, + ONIGENC_CTYPE_SPACE = 9, + ONIGENC_CTYPE_UPPER = 10, + ONIGENC_CTYPE_XDIGIT = 11, + ONIGENC_CTYPE_WORD = 12, + ONIGENC_CTYPE_ALNUM = 13, /* alpha || digit */ + ONIGENC_CTYPE_ASCII = 14 +} OnigEncCtype; + #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII @@ -365,7 +343,8 @@ ONIG_EXTERN int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); - +ONIG_EXTERN +UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end)); /* PART: regular expression */ @@ -513,6 +492,8 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */ #define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */ #define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */ +#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */ +#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ @@ -552,6 +533,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_NORMAL 0 #define ONIG_MISMATCH -1 #define ONIG_NO_SUPPORT_CONFIG -2 +#define ONIG_ABORT -3 /* internal error */ #define ONIGERR_MEMORY -5 @@ -562,6 +544,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_UNEXPECTED_BYTECODE -14 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15 #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 +#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -616,6 +599,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_INVALID_IF_ELSE_SYNTAX -224 #define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225 #define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226 +#define ONIGERR_INVALID_CALLOUT_PATTERN -227 +#define ONIGERR_INVALID_CALLOUT_NAME -228 +#define ONIGERR_UNDEFINED_CALLOUT_NAME -229 +#define ONIGERR_INVALID_CALLOUT_BODY -230 +#define ONIGERR_INVALID_CALLOUT_TAG_NAME -231 +#define ONIGERR_INVALID_CALLOUT_ARG -232 #define ONIGERR_INVALID_CODE_POINT_VALUE -400 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 @@ -681,49 +670,8 @@ extern void onig_null_warn P_((const char* s)); #define ONIG_CHAR_TABLE_SIZE 256 -typedef struct re_pattern_buffer { - /* common members of BBuf(bytes-buffer) */ - unsigned char* p; /* compiled pattern */ - unsigned int used; /* used space for p */ - unsigned int alloc; /* allocated space for p */ - - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ - int num_comb_exp_check; /* combination explosion check */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; - OnigRepeatRange* repeat_range; - - OnigEncoding enc; - OnigOptionType options; - OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; - - /* optimization info (string search, char-map and anchors) */ - int optimize; /* optimize flag */ - int threshold_len; /* search str-length for apply optimize */ - int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ - int sub_anchor; /* start-anchor for exact or map */ - unsigned char *exact; - unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigLen dmin; /* min-distance of exact or map */ - OnigLen dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ -} OnigRegexType; - +struct re_pattern_buffer; +typedef struct re_pattern_buffer OnigRegexType; typedef OnigRegexType* OnigRegex; #ifndef ONIG_ESCAPE_REGEX_T_COLLISION @@ -740,10 +688,74 @@ typedef struct { OnigCaseFoldType case_fold_flag; } OnigCompileInfo; + +/* types for callout */ +typedef enum { + ONIG_CALLOUT_IN_PROGRESS = 1, /* 1<<0 */ + ONIG_CALLOUT_IN_RETRACTION = 2 /* 1<<1 */ +} OnigCalloutIn; + +#define ONIG_CALLOUT_IN_BOTH (ONIG_CALLOUT_IN_PROGRESS | ONIG_CALLOUT_IN_RETRACTION) + +typedef enum { + ONIG_CALLOUT_OF_CONTENTS = 0, + ONIG_CALLOUT_OF_NAME = 1 +} OnigCalloutOf; + +typedef enum { + ONIG_CALLOUT_TYPE_SINGLE = 0, + ONIG_CALLOUT_TYPE_START_CALL = 1, + ONIG_CALLOUT_TYPE_BOTH_CALL = 2, + ONIG_CALLOUT_TYPE_START_MARK_END_CALL = 3, +} OnigCalloutType; + + +#define ONIG_NON_NAME_ID -1 +#define ONIG_NON_CALLOUT_NUM 0 + +#define ONIG_CALLOUT_MAX_ARGS_NUM 4 +#define ONIG_CALLOUT_DATA_SLOT_NUM 5 + +struct OnigCalloutArgsStruct; +typedef struct OnigCalloutArgsStruct OnigCalloutArgs; + +typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data); + +/* callout function return values (less than -1: error code) */ +typedef enum { + ONIG_CALLOUT_FAIL = 1, + ONIG_CALLOUT_SUCCESS = 0 +} OnigCalloutResult; + +typedef enum { + ONIG_TYPE_VOID = 0, + ONIG_TYPE_LONG = 1<<0, + ONIG_TYPE_CHAR = 1<<1, + ONIG_TYPE_STRING = 1<<2, + ONIG_TYPE_POINTER = 1<<3, + ONIG_TYPE_TAG = 1<<4, +} OnigType; + +typedef union { + long l; + OnigCodePoint c; + struct { + OnigUChar* start; + OnigUChar* end; + } s; + void* p; + int tag; /* tag -> callout_num */ +} OnigValue; + + +struct OnigMatchParamStruct; +typedef struct OnigMatchParamStruct OnigMatchParam; + + /* Oniguruma Native API */ ONIG_EXTERN -int onig_initialize P_((OnigEncoding encodings[], int n)); +int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings)); /* onig_init(): deprecated function. Use onig_initialize(). */ ONIG_EXTERN int onig_init P_((void)); @@ -756,7 +768,7 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); @@ -765,12 +777,16 @@ void onig_free P_((OnigRegex)); ONIG_EXTERN void onig_free_body P_((OnigRegex)); ONIG_EXTERN -int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN +int onig_search_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN +int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN OnigRegion* onig_region_new P_((void)); ONIG_EXTERN void onig_region_init P_((OnigRegion* region)); @@ -843,6 +859,10 @@ unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN int onig_set_match_stack_limit_size P_((unsigned int size)); ONIG_EXTERN +unsigned long onig_get_retry_limit_in_match P_((void)); +ONIG_EXTERN +int onig_set_retry_limit_in_match P_((unsigned long n)); +ONIG_EXTERN unsigned int onig_get_parse_depth_limit P_((void)); ONIG_EXTERN int onig_set_capture_num_limit P_((int num)); @@ -857,6 +877,121 @@ const char* onig_version P_((void)); ONIG_EXTERN const char* onig_copyright P_((void)); +/* for OnigMatchParam */ +ONIG_EXTERN +OnigMatchParam* onig_new_match_param P_((void)); +ONIG_EXTERN +void onig_free_match_param P_((OnigMatchParam* p)); +ONIG_EXTERN +void onig_free_match_param_content P_((OnigMatchParam* p)); +ONIG_EXTERN +int onig_initialize_match_param P_((OnigMatchParam* mp)); +ONIG_EXTERN +int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, unsigned int limit)); +ONIG_EXTERN +int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit)); +ONIG_EXTERN +int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); + +/* for callout functions */ +ONIG_EXTERN +OnigCalloutFunc onig_get_progress_callout P_((void)); +ONIG_EXTERN +int onig_set_progress_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +OnigCalloutFunc onig_get_retraction_callout P_((void)); +ONIG_EXTERN +int onig_set_retraction_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */ +ONIG_EXTERN +OnigUChar* onig_get_callout_name_by_name_id P_((int id)); +ONIG_EXTERN +int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end)); +ONIG_EXTERN +int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val)); + +/* used in callout functions */ +ONIG_EXTERN +int onig_get_callout_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigCalloutIn onig_get_callout_in_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_name_id_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_passed_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_arg_by_callout_args P_((OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val)); +ONIG_EXTERN +const OnigUChar* onig_get_string_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_string_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_start_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_right_range_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_current_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigRegex onig_get_regex_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +unsigned long onig_get_retry_counter_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_callout_tag_is_exist_at_callout_num P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_start P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_end P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +int onig_get_callout_data_dont_clear_old P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self_dont_clear_old P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_get_capture_range_in_callout P_((OnigCalloutArgs* args, int mem_num, int* begin, int* end)); +ONIG_EXTERN +int onig_get_used_stack_size_in_callout P_((OnigCalloutArgs* args, int* used_num, int* used_bytes)); + +/* builtin callout functions */ +ONIG_EXTERN +int onig_builtin_fail P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_max P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_cmp P_((OnigCalloutArgs* args, void* user_data)); + +ONIG_EXTERN +int onig_setup_builtin_monitors_by_ascii_encoded_name P_((void* fp)); + #ifdef __cplusplus } #endif diff --git a/src/regcomp.c b/src/regcomp.c index 63df18b..a19109f 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -78,7 +78,7 @@ int_stack_push(int_stack* s, int v) { if (s->n >= s->alloc) { int new_size = s->alloc * 2; - int* nv = (int* )xrealloc(s->v, new_size); + int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size); if (IS_NULL(nv)) return ONIGERR_MEMORY; s->alloc = new_size; @@ -121,26 +121,28 @@ onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) return 0; } - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) +static int +int_multiply_cmp(int x, int y, int v) { - int len = (int )(end - s); + if (x == 0 || y == 0) return -1; - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; + if (x < INT_MAX / y) { + int xy = x * y; + if (xy > v) return 1; + else { + if (xy == v) return 0; + else return -1; + } } - else return NULL; + else + return 1; } + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + static void swap_node(Node* a, Node* b) { @@ -200,20 +202,6 @@ bitset_is_empty(BitSetRef bs) return 1; } -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ - int i, n; - - n = 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(bs, i)) n++; - } - return n; -} -#endif - extern int onig_bbuf_init(BBuf* buf, int size) { @@ -282,17 +270,6 @@ add_opcode(regex_t* reg, int opcode) return 0; } -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ - StateCheckNumType n = (StateCheckNumType )num; - - BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM); - return 0; -} -#endif - static int add_rel_addr(regex_t* reg, int addr) { @@ -811,7 +788,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, } static int -is_anychar_star_quantifier(QuantNode* qn) +is_anychar_infinite_greedy(QuantNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) @@ -823,254 +800,21 @@ is_anychar_star_quantifier(QuantNode* qn) #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 #define CKN_ON (ckn > 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QuantNode* qn, regex_t* reg) -{ - int len, mod_tlen, cklen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; - int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - - /* anychar repeat */ - if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; - } - } - - if (empty_info == QUANT_BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) - len = SIZE_OP_JUMP; - else - len = 0; - - len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; - } - else { - if (qn->lower == 0) - len = SIZE_OP_JUMP; - else - len = 0; - - len += mod_tlen + SIZE_OP_PUSH + cklen; - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) /* /(?<n>..){0}/ */ - len = SIZE_OP_JUMP + tlen; - else - len = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; - } - else { - len = SIZE_OP_PUSH + tlen; - } - } - else { - len = tlen; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - if (CKN_ON) - len += SIZE_OP_STATE_CHECK; - } - - return len; -} - -static int -compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) -{ - int r, mod_tlen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; - int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); - if (r != 0) return r; - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r != 0) return r; - if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - } - - return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); - } - else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); - } - if (r != 0) return r; - if (CKN_ON) - r = add_state_check_num(reg, ckn); - - return r; - } - } - - if (empty_info == QUANT_BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r != 0) return r; - } - - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - } - if (r != 0) return r; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); - if (r != 0) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); - } - else { - if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r != 0) return r; - } - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); - if (r != 0) return r; - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); - } - else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r != 0) return r; - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else - r = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, tlen); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); - } - if (r != 0) return r; - } - - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - } - - if (r != 0) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r != 0) return r; - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); - if (CKN_ON) { - if (r != 0) return r; - r = add_opcode(reg, OP_STATE_CHECK); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - } - } - return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - static int compile_length_quantifier_node(QuantNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; + enum QuantBodyEmpty empty_info = qn->body_empty_info; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; + if (tlen == 0) return 0; /* anychar repeat */ - if (is_anychar_star_quantifier(qn)) { - if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (is_anychar_infinite_greedy(qn)) { + if (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else @@ -1084,7 +828,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = SIZE_OP_JUMP; } @@ -1107,8 +852,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) len = SIZE_OP_JUMP + tlen; } else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->upper == 1 || + int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { len = tlen * qn->lower; len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); } @@ -1128,13 +874,15 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; + enum QuantBodyEmpty empty_info = qn->body_empty_info; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; + if (tlen == 0) return 0; - if (is_anychar_star_quantifier(qn) && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (is_anychar_infinite_greedy(qn) && + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1159,7 +907,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) @@ -1223,8 +972,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else if (! infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->upper == 1 || + int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { int n = qn->upper - qn->lower; r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); @@ -1250,7 +1000,6 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } return r; } -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int compile_length_option_node(EnclosureNode* node, regex_t* reg) @@ -1358,7 +1107,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) if (tlen < 0) return tlen; len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; } else { len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; @@ -1505,14 +1254,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) len = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP); if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP); + r = add_opcode(reg, OP_POP_OUT); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_ATOMIC_START); @@ -1762,6 +1511,30 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg) if (r != 0) return r; r = add_mem_num(reg, node->id); break; + +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (node->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + case ONIG_CALLOUT_OF_NAME: + { + r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ? + OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME); + if (r != 0) return r; + if (node->detail_type == ONIG_CALLOUT_OF_NAME) { + r = add_mem_num(reg, node->id); + if (r != 0) return r; + } + r = add_mem_num(reg, node->num); + if (r != 0) return r; + } + break; + + default: + r = ONIGERR_TYPE_BUG; + break; + } +#endif } return r; @@ -1785,6 +1558,23 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg) case GIMMICK_UPDATE_VAR: len = SIZE_OP_UPDATE_VAR; break; + +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (node->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + len = SIZE_OP_CALLOUT_CONTENTS; + break; + case ONIG_CALLOUT_OF_NAME: + len = SIZE_OP_CALLOUT_NAME; + break; + + default: + len = ONIGERR_TYPE_BUG; + break; + } + break; +#endif } return len; @@ -2337,7 +2127,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) #ifdef USE_CALL static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) { int i, offset; EnclosureNode* en; @@ -3725,11 +3515,12 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* re } static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, regex_t* reg, - Node **rnode) +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, + int slen, UChar *end, regex_t* reg, Node **rnode) { - int r, i, j, len, varlen; + int r, i, j; + int len; + int varlen; Node *anode, *var_anode, *snode, *xnode, *an; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -3972,145 +3763,8 @@ expand_case_fold_string(Node* node, regex_t* reg) return r; } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT 512 -#define CEC_INFINITE_NUM 0x7fffffff - -#define CEC_IN_INFINITE_REPEAT (1<<0) -#define CEC_IN_FINITE_REPEAT (1<<1) -#define CEC_CONT_BIG_REPEAT (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ - int r = state; - - switch (NODE_TYPE(node)) { - case NODE_LIST: - { - do { - r = setup_comb_exp_check(NODE_CAR(node), r, env); - } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); - } - break; - - case NODE_ALT: - { - int ret; - do { - ret = setup_comb_exp_check(NODE_CAR(node), state, env); - r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); - } - break; - - case NODE_QUANT: - { - int var_num; - int child_state = state; - int add_state = 0; - QuantNode* qn = QUANT_(node); - Node* target = NODE_QUANT_BODY(qn); - - if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) { - EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn)); - if (en->type == ENCLOSURE_MEMORY) { - if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) { - QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en)); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } - } - - if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; - } - else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } - } - - r = setup_comb_exp_check(target, child_state, env); - r |= add_state; - } - break; - - case NODE_ENCLOSURE: - { - EnclosureNode* en = ENCLOSURE_(node); - - switch (en->type) { - case ENCLOSURE_MEMORY: - { - if (env->curr_max_regnum < en->m.regnum) - env->curr_max_regnum = en->m.regnum; - - r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); - } - break; - - default: - r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); - break; - } - } - break; - -#ifdef USE_CALL - case NODE_CALL: - if (NODE_IS_RECURSION(node)) - env->has_recursion = 1; - else - r = setup_comb_exp_check(NODE_BODY(node), state, env); - break; -#endif - - default: - break; - } - - return r; -} -#endif - #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -static int +static enum QuantBodyEmpty quantifiers_memory_node_info(Node* node) { int r = QUANT_BODY_IS_EMPTY; @@ -4638,7 +4292,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_TYPE_IN_LB \ ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \ | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ - | BIT_NODE_CALL ) + | BIT_NODE_CALL | BIT_NODE_GIMMICK) #define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION ) #define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION) @@ -4765,7 +4419,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } #ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && (qn->body_empty_info != 0)) { + if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) { if (NODE_TYPE(body) == NODE_QUANT) { QuantNode* tqn = QUANT_(body); if (IS_NOT_NULL(tqn->head_exact)) { @@ -4948,10 +4602,10 @@ set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, typedef struct { OnigLen min; /* min byte length */ OnigLen max; /* max byte length */ -} MinMaxLen; +} MinMax; typedef struct { - MinMaxLen mmd; + MinMax mmd; OnigEncoding enc; OnigOptionType options; OnigCaseFoldType case_fold_flag; @@ -4964,35 +4618,35 @@ typedef struct { } OptAnc; typedef struct { - MinMaxLen mmd; /* info position */ + MinMax mmd; /* position */ OptAnc anc; - int reach_end; - int ignore_case; - int len; - UChar s[OPT_EXACT_MAXLEN]; + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; } OptExact; typedef struct { - MinMaxLen mmd; /* info position */ - OptAnc anc; - int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; + MinMax mmd; /* position */ + OptAnc anc; + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; } OptMap; typedef struct { - MinMaxLen len; - OptAnc anc; - OptExact exb; /* boundary */ - OptExact exm; /* middle */ - OptExact expr; /* prec read (?=...) */ - OptMap map; /* boundary */ + MinMax len; + OptAnc anc; + OptExact exb; /* boundary */ + OptExact exm; /* middle */ + OptExact expr; /* prec read (?=...) */ + OptMap map; /* boundary */ } NodeOpt; static int map_position_value(OnigEncoding enc, int i) { - static const short int ByteValTable[] = { + static const short int Vals[] = { 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, @@ -5003,18 +4657,18 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else - return (int )ByteValTable[i]; + return (int )Vals[i]; } else return 4; /* Take it easy. */ } static int -distance_value(MinMaxLen* mm) +distance_value(MinMax* mm) { /* 1000 / (min-max-dist + 1) */ static const short int dist_vals[] = { @@ -5043,7 +4697,7 @@ distance_value(MinMaxLen* mm) } static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2) { if (v2 <= 0) return -1; if (v1 <= 0) return 1; @@ -5060,40 +4714,40 @@ comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) } static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) +is_equal_mml(MinMax* a, MinMax* b) { return (a->min == b->min && a->max == b->max) ? 1 : 0; } static void -set_mml(MinMaxLen* mml, OnigLen min, OnigLen max) +set_mml(MinMax* l, OnigLen min, OnigLen max) { - mml->min = min; - mml->max = max; + l->min = min; + l->max = max; } static void -clear_mml(MinMaxLen* mml) +clear_mml(MinMax* l) { - mml->min = mml->max = 0; + l->min = l->max = 0; } static void -copy_mml(MinMaxLen* to, MinMaxLen* from) +copy_mml(MinMax* to, MinMax* from) { to->min = from->min; to->max = from->max; } static void -add_mml(MinMaxLen* to, MinMaxLen* from) +add_mml(MinMax* to, MinMax* from) { to->min = distance_add(to->min, from->min); to->max = distance_add(to->max, from->max); } static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +alt_merge_mml(MinMax* to, MinMax* from) { if (to->min > from->min) to->min = from->min; if (to->max < from->max) to->max = from->max; @@ -5106,10 +4760,10 @@ copy_opt_env(OptEnv* to, OptEnv* from) } static void -clear_opt_anc_info(OptAnc* anc) +clear_opt_anc_info(OptAnc* a) { - anc->left = 0; - anc->right = 0; + a->left = 0; + a->right = 0; } static void @@ -5139,11 +4793,10 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right, } static int -is_left(int anc) +is_left(int a) { - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || - anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || - anc == ANCHOR_PREC_READ_NOT) + if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF || + a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT) return 0; return 1; @@ -5183,20 +4836,20 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add) } static int -is_full_opt_exact(OptExact* ex) +is_full_opt_exact(OptExact* e) { - return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); + return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0); } static void -clear_opt_exact(OptExact* ex) +clear_opt_exact(OptExact* e) { - clear_mml(&ex->mmd); - clear_opt_anc_info(&ex->anc); - ex->reach_end = 0; - ex->ignore_case = 0; - ex->len = 0; - ex->s[0] = '\0'; + clear_mml(&e->mmd); + clear_opt_anc_info(&e->anc); + e->reach_end = 0; + e->ignore_case = 0; + e->len = 0; + e->s[0] = '\0'; } static void @@ -5205,24 +4858,28 @@ copy_opt_exact(OptExact* to, OptExact* from) *to = *from; } -static void +static int concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) { - int i, j, len; + int i, j, len, r; UChar *p, *end; OptAnc tanc; if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ + if (to->len >= add->len) return 0; /* avoid */ to->ignore_case = 1; } + r = 0; p = add->s; end = p + add->len; for (i = to->len; p < end; ) { len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; + if (i + len > OPT_EXACT_MAXLEN) { + r = 1; /* 1:full */ + break; + } for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; } @@ -5233,11 +4890,12 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); if (! to->reach_end) tanc.right = 0; copy_opt_anc_info(&to->anc, &tanc); + + return r; } static void -concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) +concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc) { int i, j, len; UChar *p; @@ -5291,31 +4949,31 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) static void select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) { - int v1, v2; + int vn, va; - v1 = now->len; - v2 = alt->len; + vn = now->len; + va = alt->len; - if (v2 == 0) { + if (va == 0) { return ; } - else if (v1 == 0) { + else if (vn == 0) { copy_opt_exact(now, alt); return ; } - else if (v1 <= 2 && v2 <= 2) { + else if (vn <= 2 && va <= 2) { /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(enc, now->s[0]); - v1 = map_position_value(enc, alt->s[0]); + va = map_position_value(enc, now->s[0]); + vn = map_position_value(enc, alt->s[0]); - if (now->len > 1) v1 += 5; - if (alt->len > 1) v2 += 5; + if (now->len > 1) vn += 5; + if (alt->len > 1) va += 5; } - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; + if (now->ignore_case == 0) vn *= 2; + if (alt->ignore_case == 0) va *= 2; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) copy_opt_exact(now, alt); } @@ -5354,17 +5012,17 @@ copy_opt_map(OptMap* to, OptMap* from) } static void -add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc) +add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc) { - if (map->map[c] == 0) { - map->map[c] = 1; - map->value += map_position_value(enc, c); + if (m->map[c] == 0) { + m->map[c] = 1; + m->value += map_position_value(enc, c); } } static int add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) + OnigEncoding enc, OnigCaseFoldType fold_flag) { OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -5372,8 +5030,8 @@ add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, add_char_opt_map(map, p[0], enc); - case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items); if (n < 0) return n; for (i = 0; i < n; i++) { @@ -5389,7 +5047,7 @@ select_opt_map(OptMap* now, OptMap* alt) { static int z = 1<<15; /* 32768: something big value */ - int v1, v2; + int vn, va; if (alt->value == 0) return ; if (now->value == 0) { @@ -5397,9 +5055,9 @@ select_opt_map(OptMap* now, OptMap* alt) return ; } - v1 = z / now->value; - v2 = z / alt->value; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + vn = z / now->value; + va = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) copy_opt_map(now, alt); } @@ -5407,13 +5065,13 @@ static int comp_opt_exact_or_map(OptExact* e, OptMap* m) { #define COMP_EM_BASE 20 - int ve, vm; + int ae, am; if (m->value <= 0) return -1; - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); - vm = COMP_EM_BASE * 5 * 2 / m->value; - return comp_distance_value(&e->mmd, &m->mmd, ve, vm); + ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + am = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ae, am); } static void @@ -5444,11 +5102,11 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) } static void -set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd) +set_bound_node_opt_info(NodeOpt* opt, MinMax* plen) { - copy_mml(&(opt->exb.mmd), mmd); - copy_mml(&(opt->expr.mmd), mmd); - copy_mml(&(opt->map.mmd), mmd); + copy_mml(&(opt->exb.mmd), plen); + copy_mml(&(opt->expr.mmd), plen); + copy_mml(&(opt->map.mmd), plen); } static void @@ -5543,10 +5201,12 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) static int optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { - OnigEncoding enc; int i; - int r = 0; + int r; + NodeOpt xo; + OnigEncoding enc; + r = 0; enc = env->enc; clear_node_opt_info(opt); set_bound_node_opt_info(opt, &env->mmd); @@ -5555,15 +5215,14 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_LIST: { OptEnv nenv; - NodeOpt nopt; Node* nd = node; copy_opt_env(&nenv, env); do { - r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv); + r = optimize_nodes(NODE_CAR(nd), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(enc, opt, &nopt); + add_mml(&nenv.mmd, &xo.len); + concat_left_node_opt_info(enc, opt, &xo); } } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5571,14 +5230,13 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_ALT: { - NodeOpt nopt; Node* nd = node; do { - r = optimize_nodes(NODE_CAR(nd), &nopt, env); + r = optimize_nodes(NODE_CAR(nd), &xo, env); if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); + if (nd == node) copy_node_opt_info(opt, &xo); + else alt_merge_node_opt_info(opt, &xo, env); } } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5588,11 +5246,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { StrNode* sn = STR_(node); int slen = (int )(sn->end - sn->s); - int is_raw = NODE_STRING_IS_RAW(node); + /* int is_raw = NODE_STRING_IS_RAW(node); */ if (! NODE_STRING_IS_AMBIG(node)) { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, - NODE_STRING_IS_RAW(node), enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); if (slen > 0) { add_char_opt_map(&opt->map, *(sn->s), enc); } @@ -5606,7 +5263,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; } else { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); opt->exb.ignore_case = 1; if (slen > 0) { @@ -5709,19 +5366,17 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case ANCHOR_PREC_READ: { - NodeOpt nopt; - - r = optimize_nodes(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact(&opt->expr, &nopt.exm); + if (xo.exb.len > 0) + copy_opt_exact(&opt->expr, &xo.exb); + else if (xo.exm.len > 0) + copy_opt_exact(&opt->expr, &xo.exm); opt->expr.reach_end = 0; - if (nopt.map.value > 0) - copy_opt_map(&opt->map, &nopt.map); + if (xo.map.value > 0) + copy_opt_map(&opt->map, &xo.map); } } break; @@ -5771,48 +5426,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_QUANT: { OnigLen min, max; - NodeOpt nopt; QuantNode* qn = QUANT_(node); - r = optimize_nodes(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r != 0) break; - if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (qn->lower > 0) { + copy_node_opt_info(opt, &xo); + if (xo.exb.len > 0) { + if (xo.exb.reach_end) { + for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { + int rc = concat_opt_exact(&opt->exb, &xo.exb, enc); + if (rc > 0) break; + } + if (i < qn->lower) opt->exb.reach_end = 0; + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + + if (IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML); else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF); } + + max = (xo.len.max > 0 ? INFINITE_LEN : 0); } else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { - concat_opt_exact(&opt->exb, &nopt.exb, enc); - } - if (i < qn->lower) opt->exb.reach_end = 0; - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } + max = distance_multiply(xo.len.max, qn->upper); } - min = distance_multiply(nopt.len.min, qn->lower); - if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? INFINITE_LEN : 0); - else - max = distance_multiply(nopt.len.max, qn->upper); - + min = distance_multiply(xo.len.min, qn->lower); set_mml(&opt->len, min, max); } break; @@ -5848,9 +5502,9 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) #endif { r = optimize_nodes(NODE_BODY(node), opt, env); - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) { if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK); } } break; @@ -5862,24 +5516,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case ENCLOSURE_IF_ELSE: { OptEnv nenv; - NodeOpt nopt; copy_opt_env(&nenv, env); - r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(enc, opt, &nopt); + add_mml(&nenv.mmd, &xo.len); + concat_left_node_opt_info(enc, opt, &xo); if (IS_NOT_NULL(en->te.Then)) { - r = optimize_nodes(en->te.Then, &nopt, &nenv); + r = optimize_nodes(en->te.Then, &xo, &nenv); if (r == 0) { - concat_left_node_opt_info(enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &xo); } } if (IS_NOT_NULL(en->te.Else)) { - r = optimize_nodes(en->te.Else, &nopt, env); + r = optimize_nodes(en->te.Else, &xo, env); if (r == 0) - alt_merge_node_opt_info(opt, &nopt, env); + alt_merge_node_opt_info(opt, &xo, env); } } } @@ -5914,12 +5567,12 @@ set_optimize_exact(regex_t* reg, OptExact* e) CHECK_NULL_RETURN_MEMERR(reg->exact); xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + reg->optimize = OPTIMIZE_EXACT_IC; } else { int allow_reverse; - reg->exact = str_dup(e->s, e->s + e->len); + reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len); CHECK_NULL_RETURN_MEMERR(reg->exact); reg->exact_end = reg->exact + e->len; @@ -5932,10 +5585,10 @@ set_optimize_exact(regex_t* reg, OptExact* e) if (r != 0) return r; reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV); } else { - reg->optimize = ONIG_OPTIMIZE_EXACT; + reg->optimize = OPTIMIZE_EXACT; } } @@ -5957,7 +5610,7 @@ set_optimize_map(regex_t* reg, OptMap* m) for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) reg->map[i] = m->map[i]; - reg->optimize = ONIG_OPTIMIZE_MAP; + reg->optimize = OPTIMIZE_MAP; reg->dmin = m->mmd.min; reg->dmax = m->mmd.max; @@ -5994,11 +5647,11 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r != 0) return r; reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML | ANCHOR_LOOK_BEHIND); if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) - reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; + reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML; reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | ANCHOR_PREC_READ_NOT); @@ -6038,7 +5691,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) static void clear_optimize_info(regex_t* reg) { - reg->optimize = ONIG_OPTIMIZE_NONE; + reg->optimize = OPTIMIZE_NONE; reg->anchor = 0; reg->anchor_dmin = 0; reg->anchor_dmax = 0; @@ -6141,14 +5794,14 @@ print_anchor(FILE* f, int anchor) q = 1; fprintf(f, "end-line"); } - if (anchor & ANCHOR_ANYCHAR_STAR) { + if (anchor & ANCHOR_ANYCHAR_INF) { if (q) fprintf(f, ", "); q = 1; - fprintf(f, "anychar-star"); + fprintf(f, "anychar-inf"); } - if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (anchor & ANCHOR_ANYCHAR_INF_ML) { if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-ml"); + fprintf(f, "anychar-inf-ml"); } fprintf(f, "]"); @@ -6180,7 +5833,7 @@ print_optimize_info(FILE* f, regex_t* reg) } fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); } - else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + else if (reg->optimize & OPTIMIZE_MAP) { int c, i, n = 0; for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) @@ -6208,6 +5861,66 @@ print_optimize_info(FILE* f, regex_t* reg) #endif +extern RegexExt* +onig_get_regex_ext(regex_t* reg) +{ + if (IS_NULL(REG_EXTP(reg))) { + RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext)); + if (IS_NULL(ext)) return 0; + + ext->pattern = 0; + ext->pattern_end = 0; +#ifdef USE_CALLOUT + ext->tag_table = 0; + ext->callout_num = 0; + ext->callout_list_alloc = 0; + ext->callout_list = 0; +#endif + + REG_EXTPL(reg) = (void* )ext; + } + + return REG_EXTP(reg); +} + +static void +free_regex_ext(RegexExt* ext) +{ + if (IS_NOT_NULL(ext)) { + if (IS_NOT_NULL(ext->pattern)) + xfree((void* )ext->pattern); + +#ifdef USE_CALLOUT + if (IS_NOT_NULL(ext->tag_table)) + onig_callout_tag_table_free(ext->tag_table); + + if (IS_NOT_NULL(ext->callout_list)) + onig_free_reg_callout_list(ext->callout_num, ext->callout_list); +#endif + + xfree(ext); + } +} + +extern int +onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end) +{ + RegexExt* ext; + UChar* s; + + ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); + + s = onigenc_strdup(reg->enc, pattern, pattern_end); + CHECK_NULL_RETURN_MEMERR(s); + + ext->pattern = s; + ext->pattern_end = s + (pattern_end - pattern); + + return ONIG_NORMAL; +} + + extern void onig_free_body(regex_t* reg) { @@ -6217,7 +5930,10 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg)); + if (IS_NOT_NULL(REG_EXTP(reg))) { + free_regex_ext(REG_EXTP(reg)); + REG_EXTPL(reg) = 0; + } onig_names_free(reg); } @@ -6245,9 +5961,6 @@ onig_transfer(regex_t* to, regex_t* from) } -#ifdef ONIG_DEBUG_COMPILE -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif #ifdef ONIG_DEBUG_PARSE static void print_tree P_((FILE* f, Node* node)); #endif @@ -6286,9 +5999,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_null_check = 0; reg->repeat_range_alloc = 0; reg->repeat_range = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - reg->num_comb_exp_check = 0; -#endif r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; @@ -6346,33 +6056,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } reg->bt_mem_start |= reg->bt_mem_end; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (scan_env.backrefed_mem == 0 -#ifdef USE_CALL - || scan_env.num_call == 0 -#endif - ) { - setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_CALL - if (scan_env.has_recursion != 0) { - scan_env.num_comb_exp_check = 0; - } - else -#endif - if (scan_env.comb_exp_max_regnum > 0) { - int i; - for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } - } - } - } - - reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif - clear_optimize_info(reg); #ifndef ONIG_DONT_OPTIMIZE r = set_optimize_info_from_tree(root, reg, &scan_env); @@ -6398,13 +6081,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = add_opcode(reg, OP_END); #ifdef USE_CALL if (scan_env.num_call > 0) { - r = unset_addr_list_fix(&uslist, reg); + r = fix_unset_addr_list(&uslist, reg); unset_addr_list_end(&uslist); if (r != 0) goto err; } #endif - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) +#ifdef USE_CALLOUT + || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0) +#endif + ) reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { if (reg->bt_mem_start != 0) @@ -6422,7 +6109,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef ONIG_DEBUG_COMPILE onig_print_names(stderr, reg); - print_compiled_byte_code_list(stderr, reg); + onig_print_compiled_byte_code_list(stderr, reg); #endif end: @@ -6464,11 +6151,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl #if 0 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED; #else - r = onig_initialize(NULL, 0); - if (r != 0) - return ONIGERR_FAIL_TO_INITIALIZE; - - r = onig_initialize_encoding(enc); + r = onig_initialize(&enc, 1); if (r != 0) return ONIGERR_FAIL_TO_INITIALIZE; @@ -6569,16 +6252,21 @@ onig_initialize(OnigEncoding encodings[], int n) return r; } - return 0; + return ONIG_NORMAL; } -static OnigEndCallListItemType* EndCallTop; +typedef struct EndCallListItem { + struct EndCallListItem* next; + void (*func)(void); +} EndCallListItemType; + +static EndCallListItemType* EndCallTop; extern void onig_add_end_call(void (*func)(void)) { - OnigEndCallListItemType* item; + EndCallListItemType* item; - item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); + item = (EndCallListItemType* )xmalloc(sizeof(*item)); if (item == 0) return ; item->next = EndCallTop; @@ -6590,7 +6278,7 @@ extern void onig_add_end_call(void (*func)(void)) static void exec_end_call_list(void) { - OnigEndCallListItemType* prev; + EndCallListItemType* prev; void (*func)(void); while (EndCallTop != 0) { @@ -6608,6 +6296,12 @@ onig_end(void) { exec_end_call_list(); +#ifdef USE_CALLOUT + onig_global_callout_names_free(); +#endif + + onigenc_end(); + onig_inited = 0; return 0; @@ -6673,144 +6367,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) } -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_STATE_CHECK 6 -#define ARG_MODE 7 - -OnigOpInfoType OnigOpInfo[] = { - { OP_FINISH, "finish", ARG_NON }, - { OP_END, "end", ARG_NON }, - { OP_EXACT1, "exact1", ARG_SPECIAL }, - { OP_EXACT2, "exact2", ARG_SPECIAL }, - { OP_EXACT3, "exact3", ARG_SPECIAL }, - { OP_EXACT4, "exact4", ARG_SPECIAL }, - { OP_EXACT5, "exact5", ARG_SPECIAL }, - { OP_EXACTN, "exactn", ARG_SPECIAL }, - { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, - { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, - { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, - { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, - { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, - { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, - { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, - { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, - { OP_CCLASS, "cclass", ARG_SPECIAL }, - { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, - { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, - { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, - { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, - { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, -#ifdef USE_OP_CCLASS_NODE - { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, -#endif - { OP_ANYCHAR, "anychar", ARG_NON }, - { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, - { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, - { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, - { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, - { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_WORD_ASCII, "word-ascii", ARG_NON }, - { OP_NO_WORD, "not-word", ARG_NON }, - { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, - { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, - { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, - { OP_WORD_BEGIN, "word-begin", ARG_MODE }, - { OP_WORD_END, "word-end", ARG_MODE }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, - { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, - { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, - { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, - { OP_PREC_READ_START, "push-pos", ARG_NON }, - { OP_PREC_READ_END, "pop-pos", ARG_NON }, - { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, - { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, - { OP_ATOMIC_START, "atomic-start", ARG_NON }, - { OP_ATOMIC_END, "atomic-end", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, - { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, - { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, - { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, - { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR, - "state-check-anychar-ml*", ARG_STATE_CHECK }, - { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].name; - } - return ""; -} - -static int -op2arg_type(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].arg_type; - } - return ARG_SPECIAL; -} +#ifdef ONIG_DEBUG_PARSE static void p_string(FILE* f, int len, UChar* s) @@ -6820,326 +6377,6 @@ p_string(FILE* f, int len, UChar* s) } static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ - int x = len * mb_len; - - fprintf(f, ":%d:", len); - while (x-- > 0) { fputc(*s++, f); } -} - -static void -p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) -{ - RelAddrType curr = (RelAddrType )(p - start); - - fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, - OnigEncoding enc) -{ - int i, n, arg_type; - RelAddrType addr; - LengthType len; - MemNumType mem; - StateCheckNumType scn; - OnigCodePoint code; - OnigOptionType option; - ModeType mode; - UChar *q; - - fprintf(f, "%s", op2name(*bp)); - arg_type = op2arg_type(*bp); - if (arg_type != ARG_SPECIAL) { - bp++; - switch (arg_type) { - case ARG_NON: - break; - case ARG_RELADDR: - GET_RELADDR_INC(addr, bp); - fputc(':', f); - p_rel_addr(f, addr, bp, start); - break; - case ARG_ABSADDR: - GET_ABSADDR_INC(addr, bp); - fprintf(f, ":{/%d}", addr); - break; - case ARG_LENGTH: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - case ARG_MEMNUM: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - case ARG_OPTION: - { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); - } - break; - - case ARG_STATE_CHECK: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - break; - - case ARG_MODE: - mode = *((ModeType* )bp); - bp += SIZE_MODE; - fprintf(f, ":%d", mode); - break; - } - } - else { - switch (*bp++) { - case OP_EXACT1: - case OP_ANYCHAR_STAR_PEEK_NEXT: - case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p_string(f, 1, bp++); break; - case OP_EXACT2: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACT3: - p_string(f, 3, bp); bp += 3; break; - case OP_EXACT4: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACT5: - p_string(f, 5, bp); bp += 5; break; - case OP_EXACTN: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_EXACTMB2N1: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACTMB2N2: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACTMB2N3: - p_string(f, 6, bp); bp += 6; break; - case OP_EXACTMB2N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 2, bp); - bp += len * 2; - break; - case OP_EXACTMB3N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 3, bp); - bp += len * 3; - break; - case OP_EXACTMBN: - { - int mb_len; - - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } - } - break; - - case OP_EXACT1_IC: - len = enclen(enc, bp); - p_string(f, len, bp); - bp += len; - break; - case OP_EXACTN_IC: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_CCLASS: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_MB: - case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d", (int )code, len); - break; - - case OP_CCLASS_MIX: - case OP_CCLASS_MIX_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d:%d", n, (int )code, len); - break; - -#ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: - { - CClassNode *cc; - - GET_POINTER_INC(cc, bp); - n = bitset_on_num(cc->bs); - fprintf(f, ":%p:%d", cc, n); - } - break; -#endif - - case OP_BACKREF_N_IC: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - - case OP_BACKREF_MULTI_IC: - case OP_BACKREF_MULTI: - case OP_BACKREF_CHECK: - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - break; - - case OP_BACKREF_WITH_LEVEL: - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - /* fall */ - case OP_BACKREF_CHECK_WITH_LEVEL: - { - LengthType level; - - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - } - break; - - case OP_REPEAT: - case OP_REPEAT_NG: - { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); - } - break; - - case OP_PUSH_OR_JUMP_EXACT1: - case OP_PUSH_IF_PEEK_NEXT: - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fputc(':', f); - p_rel_addr(f, addr, bp, start); - p_string(f, 1, bp); - bp += 1; - break; - - case OP_LOOK_BEHIND: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - - case OP_LOOK_BEHIND_NOT_START: - GET_RELADDR_INC(addr, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:", len); - p_rel_addr(f, addr, bp, start); - break; - - case OP_STATE_CHECK_PUSH: - case OP_STATE_CHECK_PUSH_OR_JUMP: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:", scn); - p_rel_addr(f, addr, bp, start); - break; - - case OP_PUSH_SAVE_VAL: - { - SaveType type; - GET_SAVE_TYPE_INC(type, bp); - GET_MEMNUM_INC(mem, bp); - fprintf(f, ":%d:%d", type, mem); - } - break; - - case OP_UPDATE_VAR: - { - UpdateVarType type; - GET_UPDATE_VAR_TYPE_INC(type, bp); - GET_MEMNUM_INC(mem, bp); - fprintf(f, ":%d:%d", type, mem); - } - break; - - default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); - } - } - if (nextp) *nextp = bp; -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_COMPILE -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ - UChar* bp; - UChar* start = reg->p; - UChar* end = reg->p + reg->used; - - fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", - reg->bt_mem_start, reg->bt_mem_end); - fprintf(f, "code-length: %d\n", reg->used); - - bp = start; - while (bp < end) { - int pos = bp - start; - - fprintf(f, "%4d: ", pos); - onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} -#endif - -#ifdef ONIG_DEBUG_PARSE - -static void Indent(FILE* f, int indent) { int i; @@ -7334,6 +6571,17 @@ print_indent_tree(FILE* f, Node* node, int indent) case GIMMICK_UPDATE_VAR: fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); break; +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (GIMMICK_(node)->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + fprintf(f, "callout:contents:%d", GIMMICK_(node)->num); + break; + case ONIG_CALLOUT_OF_NAME: + fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num); + break; + } +#endif } break; diff --git a/src/regenc.c b/src/regenc.c index 7ded5a8..21f3536 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,66 @@ OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; +#define INITED_LIST_SIZE 20 + +static int InitedListNum; + +static struct { + OnigEncoding enc; + int inited; +} InitedList[INITED_LIST_SIZE]; + +static int +enc_inited_entry(OnigEncoding enc) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + if (InitedList[i].enc == enc) { + InitedList[i].inited = 1; + return i; + } + } + + i = InitedListNum; + if (i < INITED_LIST_SIZE - 1) { + InitedList[i].enc = enc; + InitedList[i].inited = 1; + InitedListNum++; + return i; + } + + return -1; +} + +static int +enc_is_inited(OnigEncoding enc) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + if (InitedList[i].enc == enc) { + return InitedList[i].inited; + } + } + + return 0; +} + +extern int +onigenc_end(void) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + InitedList[i].enc = 0; + InitedList[i].inited = 0; + } + + InitedListNum = 0; + return ONIG_NORMAL; +} + extern int onigenc_init(void) { @@ -40,8 +100,23 @@ onigenc_init(void) extern int onig_initialize_encoding(OnigEncoding enc) { - if (enc->init != 0 && (enc->is_initialized() == 0)) { - int r = (enc->init)(); + int r; + + if (enc != ONIG_ENCODING_ASCII && + ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { + OnigEncoding ascii = ONIG_ENCODING_ASCII; + if (ascii->init != 0 && enc_is_inited(ascii) == 0) { + r = ascii->init(); + if (r != ONIG_NORMAL) return r; + enc_inited_entry(ascii); + } + } + + if (enc->init != 0 && + enc_is_inited(enc) == 0) { + r = (enc->init)(); + if (r == ONIG_NORMAL) + enc_inited_entry(enc); return r; } @@ -62,6 +137,25 @@ onigenc_set_default_encoding(OnigEncoding enc) } extern UChar* +onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = (int )(end - s); + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} + +extern UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); diff --git a/src/regenc.h b/src/regenc.h index cda3bcd..4dd89ba 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,11 +29,12 @@ * SUCH DAMAGE. */ -#ifndef PACKAGE -/* PACKAGE is defined in config.h */ -#include "config.h" +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT #endif +#include "config.h" + #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif @@ -118,51 +119,53 @@ struct PropertyNameCtype { #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); +extern int onigenc_end(void); +extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); /* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); +extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); +extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); +extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); +extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); /* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); -ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); -//ONIG_EXTERN const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); +extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); +extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); +extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); +//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); /* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); +extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); +extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) @@ -218,21 +221,21 @@ extern int onig_codes_byte_at(OnigCodePoint code[], int at); #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ OnigEncISO_8859_1_ToUpperCaseTable[c] -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN int +extern int onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); -ONIG_EXTERN UChar* +extern UChar* onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); /* defined in regexec.c, but used in enc/xxx.c */ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; +extern OnigEncoding OnigEncDefaultCharEncoding; +extern const UChar OnigEncAsciiToLowerCaseTable[]; +extern const UChar OnigEncAsciiToUpperCaseTable[]; +extern const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) @@ -249,4 +252,6 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_IS_UNICODE_ENCODING(enc) \ ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype) +#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) ((enc)->min_enc_len == 1) + #endif /* REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c index e7d2570..70efe9a 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,19 +43,21 @@ onig_error_code_to_format(int code) { char *p; - if (code >= 0) return (UChar* )0; - switch (code) { case ONIG_MISMATCH: p = "mismatch"; break; case ONIG_NO_SUPPORT_CONFIG: p = "no support in this configuration"; break; + case ONIG_ABORT: + p = "abort"; break; case ONIGERR_MEMORY: p = "fail to memory allocation"; break; case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; case ONIGERR_PARSE_DEPTH_LIMIT_OVER: p = "parse depth limit over"; break; + case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER: + p = "retry-limit-in-match over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: @@ -172,6 +174,18 @@ onig_error_code_to_format(int code) p = "invalid absent group pattern"; break; case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN: p = "invalid absent group generator pattern"; break; + case ONIGERR_INVALID_CALLOUT_PATTERN: + p = "invalid callout pattern"; break; + case ONIGERR_INVALID_CALLOUT_NAME: + p = "invalid callout name"; break; + case ONIGERR_UNDEFINED_CALLOUT_NAME: + p = "undefined callout name"; break; + case ONIGERR_INVALID_CALLOUT_BODY: + p = "invalid callout body"; break; + case ONIGERR_INVALID_CALLOUT_TAG_NAME: + p = "invalid callout tag name"; break; + case ONIGERR_INVALID_CALLOUT_ARG: + p = "invalid callout arg"; break; case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: p = "not supported encoding combination"; break; case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: diff --git a/src/regexec.c b/src/regexec.c index 53f42ee..35e3698 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,11 +26,8 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include "regint.h" -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) @@ -40,6 +37,565 @@ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif +#define CHECK_INTERRUPT_IN_MATCH + +#ifdef USE_CALLOUT +typedef struct { + int last_match_at_call_counter; + struct { + OnigType type; + OnigValue val; + } slot[ONIG_CALLOUT_DATA_SLOT_NUM]; +} CalloutData; +#endif + +struct OnigMatchParamStruct { + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigCalloutFunc progress_callout_of_contents; + OnigCalloutFunc retraction_callout_of_contents; +#ifdef USE_CALLOUT + int match_at_call_counter; + void* callout_user_data; + CalloutData* callout_data; + int callout_data_alloc_num; +#endif +}; + +extern int +onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param, + unsigned int limit) +{ + param->match_stack_limit = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, + unsigned long limit) +{ + param->retry_limit_in_match = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->progress_callout_of_contents = f; + return ONIG_NORMAL; +} + +extern int +onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->retraction_callout_of_contents = f; + return ONIG_NORMAL; +} + + + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + int ptr_num; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigMatchParam* mp; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +} MatchArg; + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_MODE 6 + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OpInfoType; + +static OpInfoType OpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, +#endif + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_WORD_ASCII, "word-ascii", ARG_NON }, + { OP_NO_WORD, "not-word", ARG_NON }, + { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, + { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, + { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, + { OP_WORD_BEGIN, "word-begin", ARG_MODE }, + { OP_WORD_END, "word-end", ARG_MODE }, + { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON }, + { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push-super", ARG_RELADDR }, + { OP_POP_OUT, "pop-out", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, +#ifdef USE_CALLOUT + { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL }, + { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL }, +#endif + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ + RelAddrType curr = (RelAddrType )(p - start); + + fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + OnigCodePoint code; + OnigOptionType option; + ModeType mode; + UChar *q; + + fprintf(f, "%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fputc(':', f); + p_rel_addr(f, addr, bp, start); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":{/%d}", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_MODE: + mode = *((ModeType* )bp); + bp += SIZE_MODE; + fprintf(f, ":%d", mode); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + +#ifdef USE_OP_CCLASS_NODE + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%p:%d", cc, n); + } + break; +#endif + + case OP_BACKREF_N_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fputc(':', f); + p_rel_addr(f, addr, bp, start); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_LOOK_BEHIND_NOT_START: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:", len); + p_rel_addr(f, addr, bp, start); + break; + + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: + { + GET_MEMNUM_INC(mem, bp); // number + fprintf(f, ":%d", mem); + } + break; + + case OP_CALLOUT_NAME: + { + int id; + + GET_MEMNUM_INC(id, bp); // id + GET_MEMNUM_INC(mem, bp); // number + + fprintf(f, ":%d:%d", id, mem); + } + break; +#endif + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); + } + } + if (nextp) *nextp = bp; +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_COMPILE +extern void +onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + UChar* bp; + UChar* start = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", + reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "code-length: %d\n", reg->used); + + bp = start; + while (bp < end) { + int pos = bp - start; + + fprintf(f, "%4d: ", pos); + onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} +#endif + + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif } +#ifdef USE_CALLOUT +#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \ + args.in = (ain);\ + args.name_id = (aname_id);\ + args.num = anum;\ + args.regex = reg;\ + args.string = str;\ + args.string_end = end;\ + args.start = sstart;\ + args.right_range = right_range;\ + args.current = s;\ + args.retry_in_match_counter = retry_in_match_counter;\ + args.msa = msa;\ + args.stk_base = stk_base;\ + args.stk = stk;\ + args.mem_start_stk = mem_start_stk;\ + args.mem_end_stk = mem_end_stk;\ + result = (func)(&args, user);\ +} while (0) + +#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\ + int result;\ + OnigCalloutArgs args;\ + CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\ + switch (result) {\ + case ONIG_CALLOUT_FAIL:\ + case ONIG_CALLOUT_SUCCESS:\ + break;\ + default:\ + if (result > 0) {\ + result = ONIGERR_INVALID_ARGUMENT;\ + }\ + best_len = result;\ + goto finish;\ + break;\ + }\ +} while(0) +#endif + /** stack **/ #define INVALID_STACK_INDEX -1 @@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_ALT (0x0002 | STK_ALT_FLAG) #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) + /* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 +#define STK_MEM_START 0x0010 +#define STK_MEM_END 0x8030 +#define STK_REPEAT_INC 0x0050 +#ifdef USE_CALLOUT +#define STK_CALLOUT 0x0070 +#endif + /* avoided by normal-POP */ #define STK_VOID 0x0000 /* for fill a blank */ #define STK_EMPTY_CHECK_START 0x3000 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0600 -#define STK_CALL_FRAME 0x0700 -#define STK_RETURN 0x0800 -#define STK_SAVE_VAL 0x0900 +#define STK_MEM_END_MARK 0x8100 +#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0300 +#define STK_CALL_FRAME 0x0400 +#define STK_RETURN 0x0500 +#define STK_SAVE_VAL 0x0600 /* stack type check mask */ #define STK_MASK_POP_USED STK_ALT_FLAG -#define STK_MASK_TO_VOID_TARGET 0x10fe +#define STK_MASK_POP_HANDLED 0x0010 +#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004) +#define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ typedef intptr_t StackIndex; typedef struct _StackType { unsigned int type; - int id; + int zid; union { struct { UChar *pcode; /* byte code position */ UChar *pstr; /* string position */ UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif } state; struct { int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ @@ -378,67 +976,66 @@ typedef struct _StackType { UChar* v; UChar* v2; } val; +#ifdef USE_CALLOUT + struct { + int num; + OnigCalloutFunc func; + } callout; +#endif } u; } StackType; +#ifdef USE_CALLOUT + +struct OnigCalloutArgsStruct { + OnigCalloutIn in; + int name_id; /* name id or ONIG_NON_NAME_ID */ + int num; + OnigRegex regex; + const OnigUChar* string; + const OnigUChar* string_end; + const OnigUChar* start; + const OnigUChar* right_range; + const OnigUChar* current; // current matching position + unsigned long retry_in_match_counter; + + /* invisible to users */ + MatchArg* msa; + StackType* stk_base; + StackType* stk; + StackIndex* mem_start_stk; + StackIndex* mem_end_stk; +}; + +#endif + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #else -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ -} while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif #define ALLOCA_PTR_NUM_LIMIT 50 @@ -495,25 +1092,303 @@ typedef struct _StackType { mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; +static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE; extern unsigned int onig_get_match_stack_limit_size(void) { - return MatchStackLimitSize; + return MatchStackLimit; } extern int onig_set_match_stack_limit_size(unsigned int size) { - MatchStackLimitSize = size; + MatchStackLimit = size; return 0; } +#ifdef USE_RETRY_LIMIT_IN_MATCH + +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; + +#define CHECK_RETRY_LIMIT_IN_MATCH do {\ + if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +} while (0) + +#else + +#define CHECK_RETRY_LIMIT_IN_MATCH + +#endif /* USE_RETRY_LIMIT_IN_MATCH */ + +extern unsigned long +onig_get_retry_limit_in_match(void) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + return RetryLimitInMatch; +#else + //return ONIG_NO_SUPPORT_CONFIG; + return 0; +#endif +} + +extern int +onig_set_retry_limit_in_match(unsigned long size) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + RetryLimitInMatch = size; + return 0; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +static OnigCalloutFunc DefaultProgressCallout; +static OnigCalloutFunc DefaultRetractionCallout; + +extern OnigMatchParam* +onig_new_match_param(void) +{ + OnigMatchParam* p; + + p = (OnigMatchParam* )xmalloc(sizeof(*p)); + if (IS_NOT_NULL(p)) { + onig_initialize_match_param(p); + } + + return p; +} + +extern void +onig_free_match_param_content(OnigMatchParam* p) +{ +#ifdef USE_CALLOUT + if (IS_NOT_NULL(p->callout_data)) { + xfree(p->callout_data); + p->callout_data = 0; + } +#endif +} + +extern void +onig_free_match_param(OnigMatchParam* p) +{ + if (IS_NOT_NULL(p)) { + onig_free_match_param_content(p); + xfree(p); + } +} + +extern int +onig_initialize_match_param(OnigMatchParam* mp) +{ + mp->match_stack_limit = MatchStackLimit; +#ifdef USE_RETRY_LIMIT_IN_MATCH + mp->retry_limit_in_match = RetryLimitInMatch; +#endif + mp->progress_callout_of_contents = DefaultProgressCallout; + mp->retraction_callout_of_contents = DefaultRetractionCallout; + +#ifdef USE_CALLOUT + mp->match_at_call_counter = 0; + mp->callout_user_data = 0; + mp->callout_data = 0; + mp->callout_data_alloc_num = 0; +#endif + + return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +static int +adjust_match_param(regex_t* reg, OnigMatchParam* mp) +{ + RegexExt* ext = REG_EXTP(reg); + + mp->match_at_call_counter = 0; + + if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL; + + if (ext->callout_num > mp->callout_data_alloc_num) { + CalloutData* d; + size_t n = ext->callout_num * sizeof(*d); + if (IS_NOT_NULL(mp->callout_data)) + d = (CalloutData* )xrealloc(mp->callout_data, n); + else + d = (CalloutData* )xmalloc(n); + CHECK_NULL_RETURN_MEMERR(d); + + mp->callout_data = d; + mp->callout_data_alloc_num = ext->callout_num; + } + + xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData)); + return ONIG_NORMAL; +} + +#define ADJUST_MATCH_PARAM(reg, mp) \ + r = adjust_match_param(reg, mp);\ + if (r != ONIG_NORMAL) return r; + +#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1)) + +extern int +onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args) +{ + OnigMatchParam* mp; + int num; + CalloutData* d; + + mp = args->msa->mp; + num = args->num; + + d = CALLOUT_DATA_AT_NUM(mp, num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + return d->last_match_at_call_counter; + } + + return 0; +} + +extern int +onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, + int slot, OnigType* type, + OnigValue* val) +{ + return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp, + args->num, slot, type, val); +} + +extern int +onig_get_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + } + + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType* type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_get_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +extern int +onig_set_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + d->slot[slot].type = type; + d->slot[slot].val = *val; + d->last_match_at_call_counter = mp->match_at_call_counter; + + return ONIG_NORMAL; +} + +extern int +onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_set_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +#else +#define ADJUST_MATCH_PARAM(reg, mp) +#endif /* USE_CALLOUT */ + + static int stack_double(int is_alloca, char** arg_alloc_base, StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, - OnigMatchArg* msa) + MatchArg* msa) { unsigned int n; int used; @@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base, xmemcpy(new_alloc_base, alloc_base, size); } else { - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { + if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) return ONIGERR_MATCH_STACK_LIMIT_OVER; else - n = MatchStackLimitSize; + n = msa->match_stack_limit; } new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { @@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8));\ - } - #define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) #define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\ STACK_INC;\ } while(0) -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) { \ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ +#ifdef ONIG_DEBUG_MATCH +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = s;\ + stk->u.state.pstr_prev = sprev;\ STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +} while (0) +#else +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ +} while (0) +#endif #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) @@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_REPEAT(sid, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ STACK_INC;\ @@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ STACK_INC;\ } while(0) @@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base, while (k > stk_base) {\ k--;\ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->id == (mnum)) {\ + && k->zid == (mnum)) {\ level++;\ }\ - else if (k->type == STK_MEM_START && k->id == (mnum)) {\ + else if (k->type == STK_MEM_START && k->zid == (mnum)) {\ if (level == 0) break;\ level--;\ }\ @@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_START;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) @@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_END;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ STACK_INC;\ } while(0) @@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ STACK_INC;\ @@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ stk->u.val.v2 = sprev;\ @@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ break;\ @@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ sprev = k->u.val.v2;\ @@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while (0) +#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = ONIG_NON_NAME_ID;\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = (aid);\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ if ((p) < stk_base) {\ @@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) + +#ifdef USE_CALLOUT +#define POP_CALLOUT_CASE \ + else if (stk->type == STK_CALLOUT) {\ + RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\ + } +#else +#define POP_CALLOUT_CASE +#endif + #define STACK_POP do {\ switch (pop_level) {\ case STACK_POP_LEVEL_FREE:\ @@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ @@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ default:\ @@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + POP_CALLOUT_CASE\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ }\ } while(0) -#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ +#define POP_TIL_BODY(aname, til_type) do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ - if (stk->type == STK_ALT_PREC_READ_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + STACK_BASE_CHECK(stk, (aname));\ + if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ + if (stk->type == (til_type)) break;\ + else {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ + }\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ } while(0) +#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ + POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\ +} while(0) + #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ + POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\ } while(0) + #define STACK_EXEC_TO_VOID(k) do {\ k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ if (IS_TO_VOID_TARGET(k)) {\ + if (k->type == STK_TO_VOID_START) {\ + k->type = STK_VOID;\ + break;\ + }\ k->type = STK_VOID;\ }\ - else if (k->type == STK_TO_VOID_START) {\ - k->type = STK_VOID;\ - break;\ - }\ }\ } while(0) @@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ @@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ @@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (level == 0) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ @@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ else if (k->type == STK_EMPTY_CHECK_END) {\ - if (k->id == (sid)) level++;\ + if (k->zid == (sid)) level++;\ }\ }\ } while(0) @@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ break;\ }\ }\ @@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range -#else -#define INIT_RIGHT_RANGE right_range = (UChar* )end -#endif #ifdef USE_CAPTURE_HISTORY static int @@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, while (k < stk_top) { if (k->type == STK_MEM_START) { - n = k->id; + n = k->zid; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); @@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } } else if (k->type == STK_MEM_END) { - if (k->id == node->group) { + if (k->zid == node->group) { node->end = (int )(k->u.mem.pstr - str); *kp = k; return 0; @@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pstart = k->u.mem.pstr; if (IS_NOT_NULL(pend)) { if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ @@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg, } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pend = k->u.mem.pstr; } } @@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { return 1; } } @@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +#define SOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ } while(0) -#define MOP_OUT do {\ +#define SOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while(0) @@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f) r = fprintf(f, " count prev time\n"); if (r < 0) return -1; - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + for (i = 0; OpInfo[i].opcode >= 0; i++) { r = fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name); if (r < 0) return -1; } r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); @@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f) #else #define STACK_INC stk++ -#define MOP_IN(opcode) -#define MOP_OUT +#define SOP_IN(opcode) +#define SOP_OUT #endif @@ -1459,10 +2307,8 @@ typedef struct { /* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* in_right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* in_right_range, const UChar* sstart, UChar* sprev, + MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; UChar* keep; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; +#ifdef USE_RETRY_LIMIT_IN_MATCH + unsigned long retry_limit_in_match; + unsigned long retry_in_match_counter; #endif + +#ifdef USE_CALLOUT + int of; +#endif + UChar *p = reg->p; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef USE_CALLOUT + msa->mp->match_at_call_counter++; +#endif + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match = msa->retry_limit_in_match; +#endif + //n = reg->num_repeat + reg->num_mem * 2; pop_level = reg->stack_pop_level; num_mem = reg->num_mem; @@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; keep = s = (UChar* )sstart; + STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */ INIT_RIGHT_RANGE; +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_in_match_counter = 0; +#endif + while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - fprintf(stderr, "%4d: ", (int )(p - reg->p)); + if (p == FinishCode) + fprintf(stderr, "----: "); + else + fprintf(stderr, "%4d: ", (int )(p - reg->p)); onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); fprintf(stderr, "\n"); } @@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sbegin = s; switch (*p++) { - case OP_END: MOP_IN(OP_END); + case OP_END: SOP_IN(OP_END); n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; @@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - MOP_OUT; + SOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { @@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto finish; break; - case OP_EXACT1: MOP_IN(OP_EXACT1); + case OP_EXACT1: SOP_IN(OP_EXACT1); DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); { int len; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - MOP_OUT; + SOP_OUT; break; - case OP_EXACT2: MOP_IN(OP_EXACT2); + case OP_EXACT2: SOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT3: MOP_IN(OP_EXACT3); + case OP_EXACT3: SOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT4: MOP_IN(OP_EXACT4); + case OP_EXACT4: SOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT5: MOP_IN(OP_EXACT5); + case OP_EXACT5: SOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN: MOP_IN(OP_EXACTN); + case OP_EXACTN: SOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); { int len; UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - MOP_OUT; + SOP_OUT; continue; break; - case OP_CCLASS: MOP_IN(OP_CCLASS); + case OP_CCLASS: SOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; #ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - MOP_OUT; + SOP_OUT; break; #endif - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; + SOP_OUT; break; - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); + case OP_WORD: SOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII); + case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD: MOP_IN(OP_NO_WORD); + case OP_NO_WORD: SOP_IN(OP_NO_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII); + case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY); + case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY); + case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || - ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - MOP_OUT; + if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + SOP_OUT; continue; } } @@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_WORD_END: MOP_IN(OP_WORD_END); + case OP_WORD_END: SOP_IN(OP_WORD_END); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_END_BUF: MOP_IN(OP_END_BUF); + case OP_END_BUF: SOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; - case OP_END_LINE: MOP_IN(OP_END_LINE); + case OP_END_LINE: SOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } @@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_BACKREF1: MOP_IN(OP_BACKREF1); + case OP_BACKREF1: SOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: MOP_IN(OP_BACKREF2); + case OP_BACKREF2: SOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREF_N: MOP_IN(OP_BACKREF_N); + case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); GET_MEMNUM_INC(mem, p); backref: { @@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC); + case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif - case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK); + case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); { GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { @@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); + case OP_SET_OPTION_PUSH: SOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; + SOP_OUT; continue; break; - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); + case OP_SET_OPTION: SOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START); + case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_EMPTY_CHECK_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END); + case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); { int is_empty; @@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT - case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST); + case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); { int is_empty; @@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto empty_check_found; } } - MOP_OUT; + SOP_OUT; continue; break; #endif #ifdef USE_CALL case OP_EMPTY_CHECK_END_MEMST_PUSH: - MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); + SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { int is_empty; @@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_EMPTY_CHECK_END(mem); } } - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_JUMP: MOP_IN(OP_JUMP); + case OP_JUMP: SOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_PUSH: MOP_IN(OP_PUSH); + case OP_PUSH: SOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER); + case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); GET_RELADDR_INC(addr, p); STACK_PUSH_SUPER_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; + SOP_OUT; continue; break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: MOP_IN(OP_POP); + case OP_POP_OUT: SOP_IN(OP_POP_OUT); STACK_POP_ONE; - MOP_OUT; + // for stop backtrack + //CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p += (addr + 1); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT: MOP_IN(OP_REPEAT); + case OP_REPEAT: SOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START); + case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); STACK_PUSH_POS(s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END); + case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); { STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START); + case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END); + case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; - case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START); + case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); STACK_PUSH_TO_VOID_START; - MOP_OUT; + SOP_OUT; continue; break; - case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END); + case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); STACK_EXEC_TO_VOID(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START); + case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END); + case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END); STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_CALL - case OP_CALL: MOP_IN(OP_CALL); + case OP_CALL: SOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - MOP_OUT; + SOP_OUT; continue; break; - case OP_RETURN: MOP_IN(OP_RETURN); + case OP_RETURN: SOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); + case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); { SaveType type; GET_SAVE_TYPE_INC(type, p); @@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); + case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); { UpdateVarType type; enum SaveType save_type; @@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; + continue; + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + of = ONIG_CALLOUT_OF_CONTENTS; + goto callout_common_entry; + + SOP_OUT; continue; break; + case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + { + int call_result; + int name_id; + int num; + int in; + CalloutListEntry* e; + OnigCalloutFunc func; + OnigCalloutArgs args; + + of = ONIG_CALLOUT_OF_NAME; + GET_MEMNUM_INC(name_id, p); + + callout_common_entry: + GET_MEMNUM_INC(num, p); + e = onig_reg_callout_list_at(reg, num); + in = e->in; + if (of == ONIG_CALLOUT_OF_NAME) { + func = onig_get_callout_start_func(reg, num); + } + else { + name_id = ONIG_NON_NAME_ID; + func = msa->mp->progress_callout_of_contents; + } + + if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) { + CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id, + num, msa->mp->callout_user_data, args, call_result); + switch (call_result) { + case ONIG_CALLOUT_FAIL: + goto fail; + break; + case ONIG_CALLOUT_SUCCESS: + goto retraction_callout2; + break; + default: /* error code */ + if (call_result > 0) { + call_result = ONIGERR_INVALID_ARGUMENT; + } + best_len = call_result; + goto finish; + break; + } + } + else { + retraction_callout2: + if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) { + if (of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_NAME(name_id, num, func); + } + } + else { + func = msa->mp->retraction_callout_of_contents; + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_CONTENTS(num, func); + } + } + } + } + } + SOP_OUT; + continue; + break; +#endif + case OP_FINISH: goto finish; break; fail: - MOP_OUT; + SOP_OUT; /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); + case OP_FAIL: SOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; + CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; @@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unexpected_bytecode_error: STACK_SAVE; return ONIGERR_UNEXPECTED_BYTECODE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match_over: + STACK_SAVE; + return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +#endif } @@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[], } return (UChar* )NULL; } - extern int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) { int r; - UChar *prev; - OnigMatchArg msa; + OnigMatchParam mp; - MATCH_ARG_INIT(msa, reg, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + onig_initialize_match_param(&mp); + r = onig_match_with_param(reg, str, end, at, region, option, &mp); + onig_free_match_param_content(&mp); + return r; +} + +extern int +onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, OnigOptionType option, + OnigMatchParam* mp) +{ + int r; + UChar *prev; + MatchArg msa; + ADJUST_MATCH_PARAM(reg, mp); + MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, } prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); + r = match_at(reg, str, end, end, at, prev, &msa); } end: @@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM_NO_REV: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search(reg->enc, reg->map, p, range); break; } @@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM_NO_REV: #ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { int r; @@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #endif break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search_backward(reg->enc, reg->map, range, adjrange, p); break; } @@ -3725,12 +4590,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, OnigOptionType option) { int r; + OnigMatchParam mp; + + onig_initialize_match_param(&mp); + r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); + onig_free_match_param_content(&mp); + return r; + +} + +extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) +{ + int r; UChar *s, *prev; - OnigMatchArg msa; + MatchArg msa; const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar *orig_range = range; -#endif #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif + ADJUST_MATCH_PARAM(reg, mp); + if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ /* anchor optimize: resume search range */ @@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; prev = (UChar* )NULL; - MATCH_ARG_INIT(msa, reg, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif + MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); goto mismatch; } @@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, reg, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp); s = (UChar* )start; if (range > start) { /* forward search */ @@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else prev = (UChar* )NULL; - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; sch_range = (UChar* )range; @@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; @@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; if (range < end) @@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from) *to = *from; } + +/* for callout functions */ + +#ifdef USE_CALLOUT + +extern OnigCalloutFunc +onig_get_progress_callout(void) +{ + return DefaultProgressCallout; +} + +extern int +onig_set_progress_callout(OnigCalloutFunc f) +{ + DefaultProgressCallout = f; + return ONIG_NORMAL; +} + +extern OnigCalloutFunc +onig_get_retraction_callout(void) +{ + return DefaultRetractionCallout; +} + +extern int +onig_set_retraction_callout(OnigCalloutFunc f) +{ + DefaultRetractionCallout = f; + return ONIG_NORMAL; +} + +extern int +onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) +{ + return args->num; +} + +extern OnigCalloutIn +onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) +{ + return args->in; +} + +extern int +onig_get_name_id_by_callout_args(OnigCalloutArgs* args) +{ + return args->name_id; +} + +extern const UChar* +onig_get_contents_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.start; + } + + return 0; +} + +extern const UChar* +onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.end; + } + + return 0; +} + +extern int +onig_get_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.passed_num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, + OnigType* type, OnigValue* val) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; + if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index]; + return ONIG_NORMAL; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern const UChar* +onig_get_string_by_callout_args(OnigCalloutArgs* args) +{ + return args->string; +} + +extern const UChar* +onig_get_string_end_by_callout_args(OnigCalloutArgs* args) +{ + return args->string_end; +} + +extern const UChar* +onig_get_start_by_callout_args(OnigCalloutArgs* args) +{ + return args->start; +} + +extern const UChar* +onig_get_right_range_by_callout_args(OnigCalloutArgs* args) +{ + return args->right_range; +} + +extern const UChar* +onig_get_current_by_callout_args(OnigCalloutArgs* args) +{ + return args->current; +} + +extern OnigRegex +onig_get_regex_by_callout_args(OnigCalloutArgs* args) +{ + return args->regex; +} + +extern unsigned long +onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) +{ + return args->retry_in_match_counter; +} + + +extern int +onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end) +{ + OnigRegex reg; + const UChar* str; + StackType* stk_base; + int i; + + i = mem_num; + reg = a->regex; + str = a->string; + stk_base = a->stk_base; + + if (i > 0) { + if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); + else + *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); + + *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )a->mem_end_stk[i])) - str); + } + else { + *begin = *end = ONIG_REGION_NOTPOS; + } + } + else if (i == 0) { +#if 0 + *begin = a->start - str; + *end = a->current - str; +#else + return ONIGERR_INVALID_ARGUMENT; +#endif + } + else + return ONIGERR_INVALID_ARGUMENT; + + return ONIG_NORMAL; +} + +extern int +onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes) +{ + int n; + + n = (int )(a->stk - a->stk_base); + + if (used_num != 0) + *used_num = n; + + if (used_bytes != 0) + *used_bytes = n * sizeof(StackType); + + return ONIG_NORMAL; +} + + +/* builtin callout functions */ + +extern int +onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_FAIL; +} + +extern int +onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_MISMATCH; +} + +#if 0 +extern int +onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_SUCCESS; +} +#endif + +extern int +onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int n; + OnigValue val; + + r = onig_get_arg_by_callout_args(args, 0, 0, &val); + if (r != ONIG_NORMAL) return r; + + n = (int )val.l; + if (n >= 0) { + n = ONIGERR_INVALID_CALLOUT_BODY; + } + + return n; +} + +extern int +onig_builtin_count(OnigCalloutArgs* args, void* user_data) +{ + (void )onig_check_callout_data_and_clear_old_values(args); + + return onig_builtin_total_count(args, user_data); +} + +extern int +onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + OnigCodePoint count_type; + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; + + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + val.l = 0; + } + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + slot = 2; + if (count_type == '<') + val.l++; + else if (count_type == 'X') + val.l--; + } + else { + slot = 1; + if (count_type != '<') + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + /* slot 1: in progress counter, slot 2: in retraction counter */ + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + val.l = 0; + } + + val.l++; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + + (void )onig_check_callout_data_and_clear_old_values(args); + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + type = ONIG_TYPE_LONG; + val.l = 0; + } + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + val.l--; + } + else { + if (val.l >= aval.l) return ONIG_CALLOUT_FAIL; + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +enum OP_CMP { + OP_EQ, + OP_NE, + OP_LT, + OP_GT, + OP_LE, + OP_GE +}; + +extern int +onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + long lv; + long rv; + OnigType type; + OnigValue val; + regex_t* reg; + enum OP_CMP op; + + reg = args->regex; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + lv = 0L; + else + lv = val.l; + } + else { /* ONIG_TYPE_LONG */ + lv = val.l; + } + + r = onig_get_arg_by_callout_args(args, 2, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + rv = 0L; + else + rv = val.l; + } + else { /* ONIG_TYPE_LONG */ + rv = val.l; + } + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + OnigCodePoint c1, c2; + UChar* p; + + r = onig_get_arg_by_callout_args(args, 1, &type, &val); + if (r != ONIG_NORMAL) return r; + + p = val.s.start; + c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p < val.s.end) { + c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG; + } + else + c2 = 0; + + switch (c1) { + case '=': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_EQ; + break; + case '!': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_NE; + break; + case '<': + if (c2 == '=') op = OP_LE; + else if (c2 == 0) op = OP_LT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + case '>': + if (c2 == '=') op = OP_GE; + else if (c2 == 0) op = OP_GT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + default: + return ONIGERR_INVALID_CALLOUT_ARG; + break; + } + val.l = (long )op; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + } + else { + op = (enum OP_CMP )val.l; + } + + switch (op) { + case OP_EQ: r = (lv == rv); break; + case OP_NE: r = (lv != rv); break; + case OP_LT: r = (lv < rv); break; + case OP_GT: r = (lv > rv); break; + case OP_LE: r = (lv <= rv); break; + case OP_GE: r = (lv >= rv); break; + } + + return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS; +} + + +#include <stdio.h> + +static FILE* OutFp; + +/* name start with "onig_" for macros. */ +static int +onig_builtin_monitor(OnigCalloutArgs* args, void* user_data) +{ + int r; + int num; + size_t tag_len; + const UChar* start; + const UChar* right; + const UChar* current; + const UChar* string; + const UChar* strend; + const UChar* tag_start; + const UChar* tag_end; + regex_t* reg; + OnigCalloutIn in; + OnigType type; + OnigValue val; + char buf[20]; + FILE* fp; + + fp = OutFp; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + in = onig_get_callout_in_by_callout_args(args); + if (in == ONIG_CALLOUT_IN_PROGRESS) { + if (val.c == '<') + return ONIG_CALLOUT_SUCCESS; + } + else { + if (val.c != 'X' && val.c != '<') + return ONIG_CALLOUT_SUCCESS; + } + + num = onig_get_callout_num_by_callout_args(args); + start = onig_get_start_by_callout_args(args); + right = onig_get_right_range_by_callout_args(args); + current = onig_get_current_by_callout_args(args); + string = onig_get_string_by_callout_args(args); + strend = onig_get_string_end_by_callout_args(args); + reg = onig_get_regex_by_callout_args(args); + tag_start = onig_get_callout_tag_start(reg, num); + tag_end = onig_get_callout_tag_end(reg, num); + + if (tag_start == 0) + xsnprintf(buf, sizeof(buf), "#%d", num); + else { + /* CAUTION: tag string is not terminated with NULL. */ + int i; + + tag_len = tag_end - tag_start; + if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1; + for (i = 0; i < tag_len; i++) buf[i] = tag_start[i]; + buf[tag_len] = '\0'; + } + + fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n", + buf, + in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=", + (int )(current - string), + (int )(start - string), + (int )(right - string), + (int )(strend - string)); + fflush(fp); + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) +{ + int id; + char* name; + OnigEncoding enc; + unsigned int ts[4]; + OnigValue opts[4]; + + if (IS_NOT_NULL(fp)) + OutFp = (FILE* )fp; + else + OutFp = stdout; + + enc = ONIG_ENCODING_ASCII; + + name = "MON"; + ts[0] = ONIG_TYPE_CHAR; + opts[0].c = '>'; + BC_B_O(name, monitor, 1, ts, 1, opts); + + return ONIG_NORMAL; +} + +#endif /* USE_CALLOUT */ diff --git a/src/reggnu.c b/src/reggnu.c index 50eb9b4..37c7519 100644 --- a/src/reggnu.c +++ b/src/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,10 +28,7 @@ */ #include "regint.h" - -#ifndef ONIGGNU_H #include "oniggnu.h" -#endif extern void re_free_registers(OnigRegion* r) @@ -140,8 +137,7 @@ re_mbcinit(int mb_code) break; } - onig_initialize(0, 0); - onig_initialize_encoding(enc); + onig_initialize(&enc, 1); onigenc_set_default_encoding(enc); } diff --git a/src/regint.h b/src/regint.h index 256b045..ba8407a 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,29 +57,48 @@ /* config */ /* spec. config */ #define USE_CALL +#define USE_CALLOUT #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +#define USE_RETRY_LIMIT_IN_MATCH + /* internal config */ #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY +#include "regenc.h" + +#ifdef __cplusplus +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + + #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - /* */ /* escape other system UChar definition */ -#include "config.h" #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif @@ -89,15 +108,12 @@ #define USE_VARIABLE_META_CHARS #define USE_POSIX_API_REGION_OPTION #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc #define xfree free -#define CHECK_INTERRUPT_IN_MATCH_AT - #define st_init_table onig_st_init_table #define st_init_table_with_size onig_st_init_table_with_size #define st_init_numtable onig_st_init_numtable @@ -118,9 +134,6 @@ /* */ #define onig_st_is_member st_is_member -#define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 - #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove @@ -140,6 +153,10 @@ #include <stddef.h> +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif + #ifdef HAVE_STDLIB_H #include <stdlib.h> #endif @@ -184,8 +201,6 @@ typedef unsigned int uintptr_t; #endif #endif -#include "regenc.h" - #ifdef MIN #undef MIN #endif @@ -237,14 +252,93 @@ typedef unsigned int uintptr_t; #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +#ifdef USE_CALLOUT + +typedef struct { + int flag; + OnigCalloutOf of; + int in; + int name_id; + const UChar* tag_start; + const UChar* tag_end; + OnigCalloutType type; + OnigCalloutFunc start_func; + OnigCalloutFunc end_func; + union { + struct { + const UChar* start; + const UChar* end; + } content; + struct { + int num; + int passed_num; + OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; + } arg; + } u; +} CalloutListEntry; + +#endif + typedef struct { - int num_keeper; - int* keepers; -} RegExt; + const UChar* pattern; + const UChar* pattern_end; +#ifdef USE_CALLOUT + void* tag_table; + int callout_num; + int callout_list_alloc; + CalloutListEntry* callout_list; /* index: callout num */ +#endif +} RegexExt; -#define REG_EXTP(reg) (RegExt* )((reg)->chain) +#define REG_EXTP(reg) ((RegexExt* )((reg)->chain)) #define REG_EXTPL(reg) ((reg)->chain) +struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ + int num_comb_exp_check; /* no longer used (combination explosion check) */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigLen dmin; /* min-distance of exact or map */ + OnigLen dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +}; + + /* stack pop level */ enum StackPopLevel { STACK_POP_LEVEL_FREE = 0, @@ -253,12 +347,14 @@ enum StackPopLevel { }; /* optimize flags */ -#define ONIG_OPTIMIZE_NONE 0 -#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP 5 /* char map */ +enum OptimizeType { + OPTIMIZE_NONE = 0, + OPTIMIZE_EXACT = 1, /* Slow Search */ + OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */ + OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */ + OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */ + OPTIMIZE_MAP = 5 /* char map */ +}; /* bit status */ typedef unsigned int MemStatusType; @@ -467,8 +563,8 @@ typedef struct _BBuf { #define ANCHOR_NO_WORD_BOUNDARY (1<<11) #define ANCHOR_WORD_BEGIN (1<<12) #define ANCHOR_WORD_END (1<<13) -#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ +#define ANCHOR_ANYCHAR_INF (1<<14) +#define ANCHOR_ANYCHAR_INF_ML (1<<15) #define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16) #define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) @@ -557,7 +653,7 @@ enum OpCode { OP_JUMP, OP_PUSH, OP_PUSH_SUPER, - OP_POP, + OP_POP_OUT, OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ OP_REPEAT, /* {n,m} */ @@ -581,16 +677,14 @@ enum OpCode { OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */ OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */ - OP_CALL, /* \g<name> */ + OP_CALL, /* \g<name> */ OP_RETURN, OP_PUSH_SAVE_VAL, OP_UPDATE_VAR, - - OP_STATE_CHECK_PUSH, /* combination explosion check and push */ - OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ - OP_STATE_CHECK, /* check only */ - OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, +#ifdef USE_CALLOUT + OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */ + OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */ +#endif /* no need: IS_DYNAMIC_OPTION() == 0 */ OP_SET_OPTION_PUSH, /* set option and push recover option */ @@ -627,7 +721,6 @@ typedef int ModeType; #define SIZE_ABSADDR sizeof(AbsAddrType) #define SIZE_LENGTH sizeof(LengthType) #define SIZE_MEMNUM sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) #define SIZE_REPEATNUM sizeof(RepeatNumType) #define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_CODE_POINT sizeof(OnigCodePoint) @@ -643,7 +736,6 @@ typedef int ModeType; #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) #define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType) #define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) #define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType) @@ -662,7 +754,7 @@ typedef int ModeType; #define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_POP_OUT SIZE_OPCODE #define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) @@ -693,11 +785,9 @@ typedef int ModeType; #define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) #define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#ifdef USE_CALLOUT +#define SIZE_OP_CALLOUT_CONTENTS (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM) #endif #define MC_ESC(syn) (syn)->meta_char_table.esc @@ -751,44 +841,14 @@ typedef int ModeType; #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - int ptr_num; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} OnigMatchArg; - - -typedef struct OnigEndCallListItem { - struct OnigEndCallListItem* next; - void (*func)(void); -} OnigEndCallListItemType; - extern void onig_add_end_call(void (*func)(void)); #ifdef ONIG_DEBUG -typedef struct { - short int opcode; - char* name; - short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - - -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc)); +#ifdef ONIG_DEBUG_COMPILE +extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); +#endif #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); @@ -803,6 +863,85 @@ extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_transfer P_((regex_t* to, regex_t* from)); extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); +extern RegexExt* onig_get_regex_ext(regex_t* reg); +extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); + +#ifdef USE_CALLOUT + +extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id); +extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id); +extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id); +extern int onig_callout_tag_table_free(void* table); +extern void onig_free_reg_callout_list(int n, CalloutListEntry* list); +extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num); +extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num); + +/* for definition of builtin callout */ +#define BC0_P(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC0_R(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_RETRACTION,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC0_B(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC_P(name, func, na, ts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, (na), (ts), 0, 0); \ + if (id < 0) return id;\ +} while(0) + +#define BC_P_O(name, func, nts, ts, nopts, opts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ + if (id < 0) return id;\ +} while(0) + +#define BC_B(name, func, na, ts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, (na), (ts), 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC_B_O(name, func, nts, ts, nopts, opts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ + if (id < 0) return id;\ +} while(0) + +#endif /* USE_CALLOUT */ + /* strend hash */ typedef void hash_table_type; diff --git a/src/regparse.c b/src/regparse.c index 1e4dc30..6e95a14 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #include "regparse.h" #include "st.h" @@ -33,10 +34,17 @@ #include <stdio.h> #endif +#define INIT_TAG_NAMES_ALLOC_NUM 5 + #define WARN_BUFSIZE 256 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \ + ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */) +#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \ + ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + OnigSyntaxType OnigSyntaxOniguruma = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | @@ -50,6 +58,8 @@ OnigSyntaxType OnigSyntaxOniguruma = { ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | @@ -188,6 +198,16 @@ onig_set_parse_depth_limit(unsigned int depth) return 0; } +static int +positive_int_multiply(int x, int y) +{ + if (x == 0 || y == 0) return 0; + + if (x < INT_MAX / y) + return x * y; + else + return -1; +} static void bbuf_free(BBuf* bbuf) @@ -331,25 +351,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end) } } -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ - int slen, term_len, i; - UChar *r; - - slen = (int )(end - s); - term_len = ONIGENC_MBC_MINLEN(enc); - - r = (UChar* )xmalloc(slen + term_len); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, slen); - - for (i = 0; i < term_len; i++) - r[slen + i] = (UChar )0; - - return r; -} - static int save_entry(ScanEnv* env, enum SaveType type, int* id) { @@ -521,6 +522,106 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, return result; } + +typedef struct { + OnigEncoding enc; + int type; // callout type: single or not + UChar* s; + UChar* end; +} st_callout_name_key; + +static int +callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y) +{ + UChar *p, *q; + int c; + + if (x->enc != y->enc) return 1; + if (x->type != y->type) return 1; + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +callout_name_table_hash(st_callout_name_key* x) +{ + UChar *p; + int val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + /* use intptr_t for escape warning in Windows */ + return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type; +} + +extern hash_table_type* +onig_st_init_callout_name_table_with_size(int size) +{ + static struct st_hash_type hashType = { + callout_name_table_cmp, + callout_name_table_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_callout_name_table(hash_table_type* table, + OnigEncoding enc, + int type, + const UChar* str_key, + const UChar* end_key, + hash_data_type *value) +{ + st_callout_name_key key; + + key.enc = enc; + key.type = type; + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +st_insert_callout_name_table(hash_table_type* table, + OnigEncoding enc, int type, + UChar* str_key, UChar* end_key, + hash_data_type value) +{ + st_callout_name_key* key; + int result; + + key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key)); + CHECK_NULL_RETURN_MEMERR(key); + + /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */ + key->enc = enc; + key->type = type; + key->s = str_key; + key->end = end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + #endif /* USE_ST_LIBRARY */ @@ -537,6 +638,8 @@ typedef struct { #ifdef USE_ST_LIBRARY +#define INIT_NAMES_ALLOC_NUM 5 + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -862,13 +965,13 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(e)) { #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { - t = onig_st_init_strend_table_with_size(5); + t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); CHECK_NULL_RETURN_MEMERR(e); - e->name = strdup_with_null(reg->enc, name, name_end); + e->name = onigenc_strdup(reg->enc, name, name_end); if (IS_NULL(e->name)) { xfree(e); return ONIGERR_MEMORY; } @@ -919,7 +1022,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) } e = &(t->e[t->num]); t->num++; - e->name = strdup_with_null(reg->enc, name, name_end); + e->name = onigenc_strdup(reg->enc, name, name_end); if (IS_NULL(e->name)) return ONIGERR_MEMORY; e->name_len = name_end - name; #endif @@ -1019,6 +1122,781 @@ onig_noname_group_capture_is_active(regex_t* reg) return 1; } +#ifdef USE_CALLOUT + +typedef struct { + OnigCalloutType type; + int in; + OnigCalloutFunc start_func; + OnigCalloutFunc end_func; + int arg_num; + int opt_arg_num; + unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM]; + UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */ +} CalloutNameListEntry; + +typedef struct { + int n; + int alloc; + CalloutNameListEntry* v; +} CalloutNameListType; + +static CalloutNameListType* GlobalCalloutNameList; + +static int +make_callout_func_list(CalloutNameListType** rs, int init_size) +{ + CalloutNameListType* s; + CalloutNameListEntry* v; + + *rs = 0; + + s = xmalloc(sizeof(*s)); + if (IS_NULL(s)) return ONIGERR_MEMORY; + + v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size); + if (IS_NULL(v)) { + xfree(s); + return ONIGERR_MEMORY; + } + + s->n = 0; + s->alloc = init_size; + s->v = v; + + *rs = s; + return ONIG_NORMAL; +} + +static void +free_callout_func_list(CalloutNameListType* s) +{ + if (IS_NOT_NULL(s)) { + if (IS_NOT_NULL(s->v)) { + int i, j; + + for (i = 0; i < s->n; i++) { + CalloutNameListEntry* e = s->v + i; + for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) { + if (e->arg_types[j] == ONIG_TYPE_STRING) { + UChar* p = e->opt_defaults[j].s.start; + if (IS_NOT_NULL(p)) xfree(p); + } + } + } + xfree(s->v); + } + xfree(s); + } +} + +static int +callout_func_list_add(CalloutNameListType* s, int* rid) +{ + if (s->n >= s->alloc) { + int new_size = s->alloc * 2; + CalloutNameListEntry* nv = (CalloutNameListEntry* ) + xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size); + if (IS_NULL(nv)) return ONIGERR_MEMORY; + + s->alloc = new_size; + s->v = nv; + } + + *rid = s->n; + + xmemset(&(s->v[s->n]), 0, sizeof(*(s->v))); + s->n++; + return ONIG_NORMAL; +} + + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int id; +} CalloutNameEntry; + +#ifdef USE_ST_LIBRARY +typedef st_table CalloutNameTable; +#else +typedef struct { + CalloutNameEntry* e; + int num; + int alloc; +} CalloutNameTable; +#endif + +static CalloutNameTable* GlobalCalloutNameTable; +static int CalloutNameIDCounter; + +#ifdef USE_ST_LIBRARY + +static int +i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e, + void* arg ARG_UNUSED) +{ + xfree(e->name); + /*xfree(key->s); */ /* is same as e->name */ + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +callout_name_table_clear(CalloutNameTable* t) +{ + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_callout_name_entry, 0); + } + return 0; +} + +static int +global_callout_name_table_free(void) +{ + if (IS_NOT_NULL(GlobalCalloutNameTable)) { + int r = callout_name_table_clear(GlobalCalloutNameTable); + if (r != 0) return r; + + onig_st_free_table(GlobalCalloutNameTable); + GlobalCalloutNameTable = 0; + CalloutNameIDCounter = 0; + } + + return 0; +} + +static CalloutNameEntry* +callout_name_find(OnigEncoding enc, int is_not_single, + const UChar* name, const UChar* name_end) +{ + int r; + CalloutNameEntry* e; + CalloutNameTable* t = GlobalCalloutNameTable; + + e = (CalloutNameEntry* )NULL; + if (IS_NOT_NULL(t)) { + r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, + (HashDataType* )((void* )(&e))); + if (r == 0) { /* not found */ + if (enc != ONIG_ENCODING_ASCII && + ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { + enc = ONIG_ENCODING_ASCII; + onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, + (HashDataType* )((void* )(&e))); + } + } + } + return e; +} + +#else + +static int +callout_name_table_clear(CalloutNameTable* t) +{ + int i; + CalloutNameEntry* e; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->id = 0; + e->func = 0; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +static int +global_callout_name_table_free(void) +{ + if (IS_NOT_NULL(GlobalCalloutNameTable)) { + int r = callout_name_table_clear(GlobalCalloutNameTable); + if (r != 0) return r; + + xfree(GlobalCalloutNameTable); + GlobalCalloutNameTable = 0; + CalloutNameIDCounter = 0; + } + return 0; +} + +static CalloutNameEntry* +callout_name_find(UChar* name, UChar* name_end) +{ + int i, len; + CalloutNameEntry* e; + CalloutNameTable* t = Calloutnames; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (CalloutNameEntry* )NULL; +} + +#endif + +/* name string must be single byte char string. */ +static int +callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc, + int is_not_single, UChar* name, UChar* name_end) +{ + int r; + CalloutNameEntry* e; + CalloutNameTable* t = GlobalCalloutNameTable; + + *rentry = 0; + if (name_end - name <= 0) + return ONIGERR_INVALID_CALLOUT_NAME; + + e = callout_name_find(enc, is_not_single, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY + if (IS_NULL(t)) { + t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); + GlobalCalloutNameTable = t; + } + e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); + CHECK_NULL_RETURN_MEMERR(e); + + e->name = onigenc_strdup(enc, name, name_end); + if (IS_NULL(e->name)) { + xfree(e); return ONIGERR_MEMORY; + } + + r = st_insert_callout_name_table(t, enc, is_not_single, + e->name, (e->name + (name_end - name)), + (HashDataType )e); + if (r < 0) return r; + +#else + + int alloc; + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable)); + CHECK_NULL_RETURN_MEMERR(t); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + GlobalCalloutNameTable = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(t->e); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].id = 0; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = onigenc_strdup(enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; +#endif + + CalloutNameIDCounter++; + e->id = CalloutNameIDCounter; + e->name_len = (int )(name_end - name); + } + + *rentry = e; + return e->id; +} + +static int +is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ + UChar* p; + OnigCodePoint c; + + if (name >= name_end) return 0; + + p = name; + while (p < name_end) { + c = ONIGENC_MBC_TO_CODE(enc, p, name_end); + if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c)) + return 0; + + if (p == name) { + if (c >= '0' && c <= '9') return 0; + } + + p += ONIGENC_MBC_ENC_LEN(enc, p); + } + + return 1; +} + +static int +is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ + UChar* p; + OnigCodePoint c; + + if (name >= name_end) return 0; + + p = name; + while (p < name_end) { + c = ONIGENC_MBC_TO_CODE(enc, p, name_end); + if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c)) + return 0; + + if (p == name) { + if (c >= '0' && c <= '9') return 0; + } + + p += ONIGENC_MBC_ENC_LEN(enc, p); + } + + return 1; +} + +extern int +onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, + UChar* name, UChar* name_end, int in, + OnigCalloutFunc start_func, + OnigCalloutFunc end_func, + int arg_num, unsigned int arg_types[], + int opt_arg_num, OnigValue opt_defaults[]) +{ + int r; + int i; + int j; + int id; + int is_not_single; + CalloutNameEntry* e; + CalloutNameListEntry* fe; + + if (callout_type != ONIG_CALLOUT_TYPE_SINGLE) + return ONIGERR_INVALID_ARGUMENT; + + if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (opt_arg_num < 0 || opt_arg_num > arg_num) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (start_func == 0 && end_func == 0) + return ONIGERR_INVALID_CALLOUT_ARG; + + if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0) + return ONIGERR_INVALID_CALLOUT_ARG; + + for (i = 0; i < arg_num; i++) { + unsigned int t = arg_types[i]; + if (t == ONIG_TYPE_VOID) + return ONIGERR_INVALID_CALLOUT_ARG; + else { + if (i >= arg_num - opt_arg_num) { + if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && + t != ONIG_TYPE_TAG) + return ONIGERR_INVALID_CALLOUT_ARG; + } + else { + if (t != ONIG_TYPE_LONG) { + t = t & ~ONIG_TYPE_LONG; + if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG) + return ONIGERR_INVALID_CALLOUT_ARG; + } + } + } + } + + if (! is_allowed_callout_name(enc, name, name_end)) { + return ONIGERR_INVALID_CALLOUT_NAME; + } + + is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE); + id = callout_name_entry(&e, enc, is_not_single, name, name_end); + if (id < 0) return id; + + r = ONIG_NORMAL; + if (IS_NULL(GlobalCalloutNameList)) { + r = make_callout_func_list(&GlobalCalloutNameList, 10); + if (r != ONIG_NORMAL) return r; + } + + while (id >= GlobalCalloutNameList->n) { + int rid; + r = callout_func_list_add(GlobalCalloutNameList, &rid); + if (r != ONIG_NORMAL) return r; + } + + fe = GlobalCalloutNameList->v + id; + fe->type = callout_type; + fe->in = in; + fe->start_func = start_func; + fe->end_func = end_func; + fe->arg_num = arg_num; + fe->opt_arg_num = opt_arg_num; + fe->name = e->name; + + for (i = 0; i < arg_num; i++) { + fe->arg_types[i] = arg_types[i]; + } + for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { + if (fe->arg_types[i] == ONIG_TYPE_STRING) { + OnigValue* val = opt_defaults + j; + UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end); + CHECK_NULL_RETURN_MEMERR(ds); + + fe->opt_defaults[i].s.start = ds; + fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start); + } + else { + fe->opt_defaults[i] = opt_defaults[j]; + } + } + + r = id; // return id + return r; +} + +static int +get_callout_name_id_by_name(OnigEncoding enc, int is_not_single, + UChar* name, UChar* name_end, int* rid) +{ + int r; + CalloutNameEntry* e; + + if (! is_allowed_callout_name(enc, name, name_end)) { + return ONIGERR_INVALID_CALLOUT_NAME; + } + + e = callout_name_find(enc, is_not_single, name, name_end); + if (IS_NULL(e)) { + return ONIGERR_UNDEFINED_CALLOUT_NAME; + } + + r = ONIG_NORMAL; + *rid = e->id; + + return r; +} + +extern OnigCalloutFunc +onig_get_callout_start_func(regex_t* reg, int callout_num) +{ + /* If used for callouts of contents, return 0. */ + CalloutListEntry* e; + + e = onig_reg_callout_list_at(reg, callout_num); + return e->start_func; +} + +extern const UChar* +onig_get_callout_tag_start(regex_t* reg, int callout_num) +{ + CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + return e->tag_start; +} + +extern const UChar* +onig_get_callout_tag_end(regex_t* reg, int callout_num) +{ + CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + return e->tag_end; +} + + +extern OnigCalloutType +onig_get_callout_type_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].type; +} + +extern OnigCalloutFunc +onig_get_callout_start_func_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].start_func; +} + +extern OnigCalloutFunc +onig_get_callout_end_func_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].end_func; +} + +extern int +onig_get_callout_in_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].in; +} + +static int +get_callout_arg_num_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].arg_num; +} + +static int +get_callout_opt_arg_num_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].opt_arg_num; +} + +static unsigned int +get_callout_arg_type_by_name_id(int name_id, int index) +{ + return GlobalCalloutNameList->v[name_id].arg_types[index]; +} + +static OnigValue +get_callout_opt_default_by_name_id(int name_id, int index) +{ + return GlobalCalloutNameList->v[name_id].opt_defaults[index]; +} + +extern UChar* +onig_get_callout_name_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].name; +} + +extern int +onig_global_callout_names_free(void) +{ + free_callout_func_list(GlobalCalloutNameList); + GlobalCalloutNameList = 0; + + global_callout_name_table_free(); + return ONIG_NORMAL; +} + + +typedef st_table CalloutTagTable; +typedef intptr_t CalloutTagVal; + +#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0) + +static int +i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg) +{ + int num; + RegexExt* ext = (RegexExt* )arg; + + num = (int )e - 1; + ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST; + return ST_CONTINUE; +} + +static int +setup_ext_callout_list_values(regex_t* reg) +{ + int i, j; + RegexExt* ext; + + ext = REG_EXTP(reg); + if (IS_NOT_NULL(ext->tag_table)) { + onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set, + (st_data_t )ext); + } + + for (i = 0; i < ext->callout_num; i++) { + CalloutListEntry* e = ext->callout_list + i; + if (e->of == ONIG_CALLOUT_OF_NAME) { + for (j = 0; j < e->u.arg.num; j++) { + if (e->u.arg.types[j] == ONIG_TYPE_TAG) { + UChar* start; + UChar* end; + int num; + start = e->u.arg.vals[j].s.start; + end = e->u.arg.vals[j].s.end; + num = onig_get_callout_num_by_tag(reg, start, end); + if (num < 0) return num; + e->u.arg.vals[j].tag = num; + } + } + } + } + + return ONIG_NORMAL; +} + +extern int +onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num) +{ + RegexExt* ext = REG_EXTP(reg); + + if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0; + if (callout_num > ext->callout_num) return 0; + + return (ext->callout_list[callout_num].flag & + CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0; +} + +static int +i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED) +{ + xfree(key); + return ST_DELETE; +} + +static int +callout_tag_table_clear(CalloutTagTable* t) +{ + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_callout_tag_entry, 0); + } + return 0; +} + +extern int +onig_callout_tag_table_free(void* table) +{ + CalloutTagTable* t = (CalloutTagTable* )table; + + if (IS_NOT_NULL(t)) { + int r = callout_tag_table_clear(t); + if (r != 0) return r; + + onig_st_free_table(t); + } + + return 0; +} + +extern int +onig_get_callout_num_by_tag(regex_t* reg, + const UChar* tag, const UChar* tag_end) +{ + int r; + RegexExt* ext; + CalloutTagVal e; + + ext = REG_EXTP(reg); + if (IS_NULL(ext) || IS_NULL(ext->tag_table)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + r = onig_st_lookup_strend(ext->tag_table, tag, tag_end, + (HashDataType* )((void* )(&e))); + if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + return (int )e; +} + +static CalloutTagVal +callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end) +{ + CalloutTagVal e; + + e = -1; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +static int +callout_tag_table_new(CalloutTagTable** rt) +{ + CalloutTagTable* t; + + *rt = 0; + t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); + + *rt = t; + return ONIG_NORMAL; +} + +static int +callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end, + CalloutTagVal entry_val) +{ + int r; + CalloutTagVal val; + + if (name_end - name <= 0) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + val = callout_tag_find(t, name, name_end); + if (val >= 0) + return ONIGERR_MULTIPLEX_DEFINED_NAME; + + r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val); + if (r < 0) return r; + + return ONIG_NORMAL; +} + +static int +ext_ensure_tag_table(regex_t* reg) +{ + int r; + RegexExt* ext; + CalloutTagTable* t; + + ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); + + if (IS_NULL(ext->tag_table)) { + r = callout_tag_table_new(&t); + if (r != ONIG_NORMAL) return r; + + ext->tag_table = t; + } + + return ONIG_NORMAL; +} + +static int +callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, + CalloutTagVal entry_val) +{ + int r; + RegexExt* ext; + CalloutListEntry* e; + + r = ext_ensure_tag_table(reg); + if (r != ONIG_NORMAL) return r; + + ext = onig_get_regex_ext(reg); + r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); + + e = onig_reg_callout_list_at(reg, (int )entry_val); + e->tag_start = name; + e->tag_end = name_end; + + return r; +} + +#endif /* USE_CALLOUT */ + #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16 @@ -1045,12 +1923,6 @@ scan_env_clear(ScanEnv* env) xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - env->num_comb_exp_check = 0; - env->comb_exp_max_regnum = 0; - env->curr_max_regnum = 0; - env->has_recursion = 0; -#endif env->parse_depth = 0; env->keep_num = 0; env->save_num = 0; @@ -1504,10 +2376,6 @@ node_new_quantifier(int lower, int upper, int by_number) if (by_number != 0) NODE_STATUS_ADD(node, NST_BY_NUMBER); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - QUANT_(node)->comb_exp_check_num = 0; -#endif - return node; } @@ -1642,6 +2510,116 @@ node_new_keep(Node** node, ScanEnv* env) return ONIG_NORMAL; } +#ifdef USE_CALLOUT + +extern void +onig_free_reg_callout_list(int n, CalloutListEntry* list) +{ + int i; + int j; + + if (IS_NULL(list)) return ; + + for (i = 0; i < n; i++) { + if (list[i].of == ONIG_CALLOUT_OF_NAME) { + for (j = 0; j < list[i].u.arg.passed_num; j++) { + if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) { + if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start)) + xfree(list[i].u.arg.vals[j].s.start); + } + } + } + else { /* ONIG_CALLOUT_OF_CONTENTS */ + if (IS_NOT_NULL(list[i].u.content.start)) { + xfree((void* )list[i].u.content.start); + } + } + } + + xfree(list); +} + +extern CalloutListEntry* +onig_reg_callout_list_at(regex_t* reg, int num) +{ + RegexExt* ext = REG_EXTP(reg); + CHECK_NULL_RETURN(ext); + + if (num <= 0 || num > ext->callout_num) + return 0; + + num--; + return ext->callout_list + num; +} + +static int +reg_callout_list_entry(ScanEnv* env, int* rnum) +{ +#define INIT_CALLOUT_LIST_NUM 3 + + int num; + CalloutListEntry* list; + CalloutListEntry* e; + RegexExt* ext; + + ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); + + if (IS_NULL(ext->callout_list)) { + list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM); + CHECK_NULL_RETURN_MEMERR(list); + + ext->callout_list = list; + ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM; + ext->callout_num = 0; + } + + num = ext->callout_num + 1; + if (num > ext->callout_list_alloc) { + int alloc = ext->callout_list_alloc * 2; + list = (CalloutListEntry* )xrealloc(ext->callout_list, + sizeof(CalloutListEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(list); + + ext->callout_list = list; + ext->callout_list_alloc = alloc; + } + + e = ext->callout_list + (num - 1); + + e->flag = 0; + e->of = 0; + e->in = ONIG_CALLOUT_OF_CONTENTS; + e->type = 0; + e->tag_start = 0; + e->tag_end = 0; + e->start_func = 0; + e->end_func = 0; + e->u.arg.num = 0; + e->u.arg.passed_num = 0; + + ext->callout_num = num; + *rnum = num; + return ONIG_NORMAL; +} + +static int +node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id, + ScanEnv* env) +{ + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->num = num; + GIMMICK_(*node)->type = GIMMICK_CALLOUT; + GIMMICK_(*node)->detail_type = (int )callout_of; + + return ONIG_NORMAL; +} +#endif + static int make_extended_grapheme_cluster(Node** node, ScanEnv* env) { @@ -2838,7 +3816,7 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QuantNode* q) +quantifier_type_num(QuantNode* q) { if (q->greedy) { if (q->lower == 0) { @@ -2889,9 +3867,22 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) p = QUANT_(pnode); c = QUANT_(cnode); - pnum = popular_quantifier_num(p); - cnum = popular_quantifier_num(c); - if (pnum < 0 || cnum < 0) return ; + pnum = quantifier_type_num(p); + cnum = quantifier_type_num(c); + if (pnum < 0 || cnum < 0) { + if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { + if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { + int n = positive_int_multiply(p->lower, c->lower); + if (n >= 0) { + p->lower = p->upper = n; + NODE_BODY(pnode) = NODE_BODY(cnode); + goto remove_cnode; + } + } + } + + return ; + } switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: @@ -2927,6 +3918,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) break; } + remove_cnode: NODE_BODY(cnode) = NULL_NODE; onig_node_free(cnode); } @@ -5508,6 +6500,452 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); +#ifdef USE_CALLOUT + +/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */ +static int +parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + int i; + int in; + int num; + OnigCodePoint c; + UChar* code_start; + UChar* code_end; + UChar* contents; + UChar* tag_start; + UChar* tag_end; + int brace_nest; + CalloutListEntry* e; + RegexExt* ext; + OnigEncoding enc = env->enc; + UChar* p = *src; + + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + brace_nest = 0; + while (PPEEK_IS('{')) { + brace_nest++; + PINC_S; + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + } + + in = ONIG_CALLOUT_IN_PROGRESS; + code_start = p; + while (1) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + code_end = p; + PFETCH_S(c); + if (c == '}') { + i = brace_nest; + while (i > 0) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + PFETCH_S(c); + if (c == '}') i--; + else break; + } + if (i == 0) break; + } + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH_S(c); + if (c == '[') { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_start = p; + while (! PEND) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_end = p; + PFETCH_S(c); + if (c == ']') break; + } + if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + tag_start = tag_end = 0; + } + + if (c == 'X') { + in |= ONIG_CALLOUT_IN_RETRACTION; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else if (c == '<') { + in = ONIG_CALLOUT_IN_RETRACTION; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else if (c == '>') { /* no needs (default) */ + //in = ONIG_CALLOUT_IN_PROGRESS; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + + if (c != cterm) + return ONIGERR_INVALID_CALLOUT_PATTERN; + + r = reg_callout_list_entry(env, &num); + if (r != 0) return r; + + ext = onig_get_regex_ext(env->reg); + if (IS_NULL(ext->pattern)) { + r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); + if (r != ONIG_NORMAL) return r; + } + + if (tag_start != tag_end) { + r = callout_tag_entry(env->reg, tag_start, tag_end, num); + if (r != ONIG_NORMAL) return r; + } + + contents = onigenc_strdup(enc, code_start, code_end); + CHECK_NULL_RETURN_MEMERR(contents); + + r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); + if (r != 0) { + xfree(contents); + return r; + } + + e = onig_reg_callout_list_at(env->reg, num); + e->of = ONIG_CALLOUT_OF_CONTENTS; + e->in = in; + e->name_id = ONIG_NON_NAME_ID; + e->u.content.start = contents; + e->u.content.end = contents + (code_end - code_start); + + *src = p; + return 0; +} + +static long +parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) +{ + long v; + long d; + int flag; + UChar* p; + OnigCodePoint c; + + if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG; + + flag = 1; + v = 0; + p = s; + while (p < end) { + c = ONIGENC_MBC_TO_CODE(enc, p, end); + p += ONIGENC_MBC_ENC_LEN(enc, p); + if (c >= '0' && c <= '9') { + d = (long )(c - '0'); + if (v > (max - d) / 10) + return ONIGERR_INVALID_CALLOUT_ARG; + + v = v * 10 + d; + } + else if (sign_on != 0 && (c == '-' || c == '+')) { + if (c == '-') flag = -1; + } + else + return ONIGERR_INVALID_CALLOUT_ARG; + + sign_on = 0; + } + + *rl = flag * v; + return ONIG_NORMAL; +} + +static int +parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, + unsigned int types[], OnigValue vals[], ScanEnv* env) +{ +#define MAX_CALLOUT_ARG_BYTE_LENGTH 128 + + int r; + int n; + int esc; + int cn; + UChar* s; + UChar* e; + UChar* eesc; + OnigCodePoint c; + UChar* bufend; + UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH]; + OnigEncoding enc = env->enc; + UChar* p = *src; + + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + n = 0; + while (n < ONIG_CALLOUT_MAX_ARGS_NUM) { + c = 0; + cn = 0; + esc = 0; + eesc = 0; + bufend = buf; + s = e = p; + while (1) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + e = p; + PFETCH_S(c); + if (esc != 0) { + esc = 0; + if (c == '\\' || c == cterm || c == ',') { + /* */ + } + else { + e = eesc; + cn++; + } + goto add_char; + } + else { + if (c == '\\') { + esc = 1; + eesc = e; + } + else if (c == cterm || c == ',') + break; + else { + size_t clen; + + add_char: + if (skip_mode == 0) { + clen = p - e; + if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) + return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ + + xmemcpy(bufend, e, clen); + bufend += clen; + } + cn++; + } + } + } + + if (cn != 0) { + if (skip_mode == 0) { + if ((types[n] & ONIG_TYPE_LONG) != 0) { + int fixed = 0; + if (cn > 0) { + long rl; + r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl); + if (r == ONIG_NORMAL) { + vals[n].l = rl; + fixed = 1; + types[n] = ONIG_TYPE_LONG; + } + } + + if (fixed == 0) { + types[n] = (types[n] & ~ONIG_TYPE_LONG); + if (types[n] == ONIG_TYPE_VOID) + return ONIGERR_INVALID_CALLOUT_ARG; + } + } + + switch (types[n]) { + case ONIG_TYPE_LONG: + break; + + case ONIG_TYPE_CHAR: + if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG; + vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend); + break; + + case ONIG_TYPE_STRING: + { + UChar* rs = onigenc_strdup(enc, buf, bufend); + CHECK_NULL_RETURN_MEMERR(rs); + vals[n].s.start = rs; + vals[n].s.end = rs + (e - s); + } + break; + + case ONIG_TYPE_TAG: + if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + vals[n].s.start = s; + vals[n].s.end = e; + break; + + case ONIG_TYPE_VOID: + case ONIG_TYPE_POINTER: + return ONIGERR_PARSER_BUG; + break; + } + } + + n++; + } + + if (c == cterm) break; + } + + if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN; + + *src = p; + return n; +} + +/* (*name[TAG]) (*name[TAG]{a,b,..}) */ +static int +parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + int i; + int in; + int num; + int name_id; + int arg_num; + int max_arg_num; + int opt_arg_num; + int is_not_single; + OnigCodePoint c; + UChar* name_start; + UChar* name_end; + UChar* tag_start; + UChar* tag_end; + Node* node; + CalloutListEntry* e; + RegexExt* ext; + unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigEncoding enc = env->enc; + UChar* p = *src; + + //PFETCH_READY; + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + node = 0; + name_start = p; + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + name_end = p; + PFETCH_S(c); + if (c == cterm || c == '[' || c == '{') break; + } + + if (! is_allowed_callout_name(enc, name_start, name_end)) + return ONIGERR_INVALID_CALLOUT_NAME; + + if (c == '[') { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_start = p; + while (! PEND) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_end = p; + PFETCH_S(c); + if (c == ']') break; + } + if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + tag_start = tag_end = 0; + } + + if (c == '{') { + UChar* save; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + /* read for single check only */ + save = p; + arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env); + if (arg_num < 0) return arg_num; + + is_not_single = PPEEK_IS(cterm) ? 0 : 1; + p = save; + r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, + &name_id); + if (r != ONIG_NORMAL) return r; + + max_arg_num = get_callout_arg_num_by_name_id(name_id); + for (i = 0; i < max_arg_num; i++) { + types[i] = get_callout_arg_type_by_name_id(name_id, i); + } + + arg_num = parse_callout_args(0, '}', &p, end, types, vals, env); + if (arg_num < 0) return arg_num; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + arg_num = 0; + + is_not_single = 0; + r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, + &name_id); + if (r != ONIG_NORMAL) return r; + + max_arg_num = get_callout_arg_num_by_name_id(name_id); + for (i = 0; i < max_arg_num; i++) { + types[i] = get_callout_arg_type_by_name_id(name_id, i); + } + } + + in = onig_get_callout_in_by_name_id(name_id); + opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id); + if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (c != cterm) + return ONIGERR_INVALID_CALLOUT_PATTERN; + + r = reg_callout_list_entry(env, &num); + if (r != 0) return r; + + ext = onig_get_regex_ext(env->reg); + if (IS_NULL(ext->pattern)) { + r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); + if (r != ONIG_NORMAL) return r; + } + + if (tag_start != tag_end) { + r = callout_tag_entry(env->reg, tag_start, tag_end, num); + if (r != ONIG_NORMAL) return r; + } + + r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); + if (r != ONIG_NORMAL) return r; + + e = onig_reg_callout_list_at(env->reg, num); + e->of = ONIG_CALLOUT_OF_NAME; + e->in = in; + e->name_id = name_id; + e->type = onig_get_callout_type_by_name_id(name_id); + e->start_func = onig_get_callout_start_func_by_name_id(name_id); + e->end_func = onig_get_callout_end_func_by_name_id(name_id); + e->u.arg.num = max_arg_num; + e->u.arg.passed_num = arg_num; + for (i = 0; i < max_arg_num; i++) { + e->u.arg.types[i] = types[i]; + if (i < arg_num) + e->u.arg.vals[i] = vals[i]; + else + e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i); + } + + *np = node; + *src = p; + return 0; +} +#endif + static int parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) @@ -5526,8 +6964,8 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; option = env->options; - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + c = PPEEK; + if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { PINC; if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; @@ -5673,6 +7111,18 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } break; +#ifdef USE_CALLOUT + case '{': + if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) + return ONIGERR_UNDEFINED_GROUP_OPTION; + + r = parse_callout_of_contents(np, ')', &p, end, env); + if (r != 0) return r; + + goto end; + break; +#endif + case '(': /* (?()...) */ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) { @@ -5769,6 +7219,29 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (c != ')') goto err_if_else; } } +#ifdef USE_CALLOUT + else if (c == '?') { + if (IS_SYNTAX_OP2(env->syntax, + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) { + if (! PEND && PPEEK_IS('{')) { + /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */ + condition_is_checker = 0; + PFETCH(c); + r = parse_callout_of_contents(&condition, ')', &p, end, env); + if (r != 0) return r; + goto end_condition; + } + } + goto any_condition; + } + else if (c == '*' && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { + condition_is_checker = 0; + r = parse_callout_of_name(&condition, ')', &p, end, env); + if (r != 0) return r; + goto end_condition; + } +#endif else { any_condition: PUNFETCH; @@ -5782,6 +7255,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } + end_condition: CHECK_NULL_RETURN_MEMERR(condition); if (PEND) { @@ -5970,6 +7444,16 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return ONIGERR_UNDEFINED_GROUP_OPTION; } } +#ifdef USE_CALLOUT + else if (c == '*' && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { + PINC; + r = parse_callout_of_name(np, ')', &p, end, env); + if (r != 0) return r; + + goto end; + } +#endif else { if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; @@ -6040,11 +7524,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ QuantNode* qnt = QUANT_(target); - int nestq_num = popular_quantifier_num(qn); - int targetq_num = popular_quantifier_num(qnt); + int nestq_num = quantifier_type_num(qn); + int targetq_num = quantifier_type_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) && + if (targetq_num >= 0 && nestq_num >= 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { UChar buf[WARN_BUFSIZE]; @@ -6078,18 +7562,19 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) warn_exit: #endif - if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + if (targetq_num >= 0 && nestq_num < 0) { + if (targetq_num == 1 || targetq_num == 2) { /* * or + */ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); } } } + else { + NODE_BODY(qnode) = target; + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } } break; @@ -6717,6 +8202,9 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, { int r; UChar* p; +#ifdef USE_CALLOUT + RegexExt* ext; +#endif names_clear(reg); @@ -6750,6 +8238,14 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, #endif reg->num_mem = env->num_mem; + +#ifdef USE_CALLOUT + ext = REG_EXTP(reg); + if (IS_NOT_NULL(ext) && ext->callout_num > 0) { + r = setup_ext_callout_list_values(reg); + } +#endif + return r; } diff --git a/src/regparse.h b/src/regparse.h index 99fe7c9..3ffbea4 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,8 +51,12 @@ enum GimmickType { GIMMICK_KEEP = 1, GIMMICK_SAVE = 2, GIMMICK_UPDATE_VAR = 3, +#ifdef USE_CALLOUT + GIMMICK_CALLOUT = 4, +#endif }; + /* node type bit */ #define NODE_TYPE2BIT(type) (1<<(type)) @@ -97,7 +101,7 @@ enum GimmickType { (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) enum EnclosureType { @@ -129,10 +133,12 @@ enum EnclosureType { #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) -#define QUANT_BODY_IS_NOT_EMPTY 0 -#define QUANT_BODY_IS_EMPTY 1 -#define QUANT_BODY_IS_EMPTY_MEM 2 -#define QUANT_BODY_IS_EMPTY_REC 3 +enum QuantBodyEmpty { + QUANT_BODY_IS_NOT_EMPTY = 0, + QUANT_BODY_IS_EMPTY = 1, + QUANT_BODY_IS_EMPTY_MEM = 2, + QUANT_BODY_IS_EMPTY_REC = 3 +}; /* node status bits */ #define NST_MIN_FIXED (1<<0) @@ -221,13 +227,10 @@ typedef struct { int lower; int upper; int greedy; - int body_empty_info; + enum QuantBodyEmpty body_empty_info; struct _Node* head_exact; struct _Node* next_head_exact; int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif } QuantNode; typedef struct { @@ -330,6 +333,7 @@ typedef struct { enum GimmickType type; int detail_type; + int num; int id; } GimmickNode; @@ -398,15 +402,9 @@ typedef struct { int num_mem; int num_named; int mem_alloc; - MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; - MemEnv* mem_env_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int num_comb_exp_check; - int comb_exp_max_regnum; - int curr_max_regnum; - int has_recursion; -#endif - unsigned int parse_depth; + MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; + MemEnv* mem_env_dynamic; + unsigned int parse_depth; int keep_num; int save_num; @@ -447,6 +445,10 @@ extern int onig_free_shared_cclass_table P_((void)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); +#ifdef USE_CALLOUT +extern int onig_global_callout_names_free(void); +#endif + #ifdef ONIG_DEBUG extern int onig_print_names(FILE*, regex_t*); #endif diff --git a/src/regposerr.c b/src/regposerr.c index fc71eee..2e2a8e2 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -2,7 +2,7 @@ regposerr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,13 @@ * SUCH DAMAGE. */ +/* Can't include regint.h etc.. for conflict of regex_t. + Define ONIGURUMA_EXPORT here for onigposix.h. + */ +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT +#endif + #include "config.h" #include "onigposix.h" diff --git a/src/regposix.c b/src/regposix.c index 0fdbcbb..895cf29 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,10 @@ onig2posix_error_code(int code) static const O2PERR o2p[] = { { ONIG_MISMATCH, REG_NOMATCH }, { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIG_ABORT, REG_EONIG_INTERNAL }, { ONIGERR_MEMORY, REG_ESPACE }, { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL }, { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, @@ -117,6 +119,12 @@ onig2posix_error_code(int code) { ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT }, { ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT }, { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_CALLOUT_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_BODY, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_TAG_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_ARG, REG_BADPAT }, { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, { ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL } }; @@ -260,8 +268,7 @@ reg_set_encoding(int mb_code) break; } - onig_initialize(0, 0); - onig_initialize_encoding(enc); + onig_initialize(&enc, 1); onigenc_set_default_encoding(enc); } diff --git a/src/regsyntax.c b/src/regsyntax.c index 3817d38..aa95479 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -176,6 +176,8 @@ OnigSyntaxType OnigSyntaxPerl = { ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | @@ -207,6 +209,8 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | diff --git a/src/regversion.c b/src/regversion.c index 245a001..594a52c 100644 --- a/src/regversion.c +++ b/src/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,6 @@ * SUCH DAMAGE. */ -#include "config.h" #include "regint.h" #include <stdio.h> @@ -49,7 +48,7 @@ onig_copyright(void) static char s[58]; xsnprintf(s, sizeof(s), - "Oniguruma %d.%d.%d : Copyright (C) 2002-2016 K.Kosako", + "Oniguruma %d.%d.%d : Copyright (C) 2002-2018 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -321,8 +321,8 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, OnigEncodingType OnigEncodingSJIS = { mbc_enc_len, "Shift_JIS", /* name */ - 2, /* max byte length */ - 1, /* min byte length */ + 2, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, diff --git a/src/utf16_be.c b/src/utf16_be.c index f220cca..098ab54 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,50 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" /* for USE_CALLOUT */ + + +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_UTF16_BE; + t_long = ONIG_TYPE_LONG; + + name = "\000F\000A\000I\000L\000\000"; BC0_P(name, fail); + name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch); + name = "\000M\000A\000X\000\000"; BC_B(name, max, 1, &t_long); + + name = "\000E\000R\000R\000O\000R\000\000"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "\000C\000O\000U\000N\000T\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "\000T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "\000C\000M\000P\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -215,8 +258,8 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF16_BE = { utf16be_mbc_enc_len, "UTF-16BE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ + 4, /* max enc length */ + 2, /* min enc length */ utf16be_is_mbc_newline, utf16be_mbc_to_code, utf16be_code_to_mbclen, @@ -229,7 +272,7 @@ OnigEncodingType OnigEncodingUTF16_BE = { onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ + init, + 0, /* is_initialized */ is_valid_mbc_string }; diff --git a/src/utf16_le.c b/src/utf16_le.c index 89bc72f..dc0d3f1 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,8 +26,49 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "regint.h" /* for USE_CALLOUT */ -#include "regenc.h" +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_UTF16_LE; + t_long = ONIG_TYPE_LONG; + + name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail); + name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch); + name = "M\000A\000X\000\000\000"; BC_B(name, max, 1, &t_long); + + name = "E\000R\000R\000O\000R\000\000\000"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "C\000O\000U\000N\000T\000\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "C\000M\000P\000\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -225,8 +266,8 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF16_LE = { utf16le_mbc_enc_len, "UTF-16LE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ + 4, /* max enc length */ + 2, /* min enc length */ utf16le_is_mbc_newline, utf16le_mbc_to_code, utf16le_code_to_mbclen, @@ -239,7 +280,7 @@ OnigEncodingType OnigEncodingUTF16_LE = { onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ + init, + 0, /* is_initialized */ is_valid_mbc_string }; diff --git a/src/utf32_be.c b/src/utf32_be.c index d0c7f39..68760bb 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF32_BE = { utf32be_mbc_enc_len, "UTF-32BE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ + 4, /* max enc length */ + 4, /* min enc length */ utf32be_is_mbc_newline, utf32be_mbc_to_code, utf32be_code_to_mbclen, diff --git a/src/utf32_le.c b/src/utf32_le.c index 33200d1..8208cd0 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF32_LE = { utf32le_mbc_enc_len, "UTF-32LE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ + 4, /* max enc length */ + 4, /* min enc length */ utf32le_is_mbc_newline, utf32le_mbc_to_code, utf32le_code_to_mbclen, @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -280,8 +280,8 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF8 = { mbc_enc_len, "UTF-8", /* name */ - 6, /* max byte length */ - 1, /* min byte length */ + 6, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, diff --git a/test-driver b/test-driver index 8e575b0..0218a01 100755 --- a/test-driver +++ b/test-driver @@ -1,9 +1,9 @@ #! /bin/sh # test-driver - basic testsuite driver script. -scriptversion=2013-07-13.22; # UTC +scriptversion=2016-01-11.22; # UTC -# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# Copyright (C) 2011-2017 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -143,6 +143,6 @@ echo ":copy-in-global-log: $gcopy" >> $trs_file # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" +# time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: diff --git a/test/Makefile.am b/test/Makefile.am index 9643bb0..6559a77 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -2,7 +2,7 @@ lib_onig = ../src/libonig.la AM_LDFLAGS = -L$(prefix)/lib -AM_CFLAGS = +AM_CFLAGS = -Wall -Wno-invalid-source-encoding AM_CPPFLAGS = -I$(top_srcdir)/src -I$(includedir) TESTS = test_utf8 testc testp testcu @@ -27,7 +27,8 @@ testc_LDADD = $(lib_onig) testp_SOURCES = testc.c testp_LDADD = $(lib_onig) -testp_CFLAGS = -DPOSIX_TEST +testp_CFLAGS = -DPOSIX_TEST -Wall -Wno-invalid-source-encoding + testcu_SOURCES = testu.c testcu_LDADD = $(lib_onig) diff --git a/test/test_utf8.c b/test/test_utf8.c index a14cacc..286158d 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -295,6 +295,7 @@ extern int main(int argc, char* argv[]) x2("(?m:a.)", "a\n", 0, 2); x2("(?m:.b)", "a\nb", 1, 3); x2(".*abc", "dddabdd\nddabc", 8, 13); + x2(".+abc", "dddabdd\nddabcaa\naaaabc", 8, 13); x2("(?m:.*abc)", "dddabddabc", 0, 10); n("(?i)(?-i)a", "A"); n("(?i)(?-i:a)", "A"); @@ -1064,9 +1065,19 @@ extern int main(int argc, char* argv[]) x2("c.*\\b", "abc", 2, 3); x2("\\b.*abc.*\\b", "abc", 0, 3); + n("(*FAIL)", "abcdefg"); + n("abcd(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)", "abcdefg"); + x2("(?:[ab]|(*MAX{2}).)*", "abcbaaccaaa", 0, 7); + x2("(?:(*COUNT[AB]{X})[ab]|(*COUNT[CD]{X})[cd])*(*CMP{AB,<,CD})", + "abababcdab", 5, 8); + x2("(?(?{....})123|456)", "123", 0, 3); + x2("(?(*FAIL)123|456)", "456", 0, 3); + + e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME); fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", diff --git a/test/testc.c b/test/testc.c index 1a60c8a..e37665a 100644 --- a/test/testc.c +++ b/test/testc.c @@ -2,9 +2,6 @@ * This program was generated by testconv.rb. */ #include "config.h" -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif #include <stdio.h> #ifdef POSIX_TEST @@ -690,12 +687,6 @@ extern int main(int argc, char* argv[]) x2("^\\X$", "\x0d\x0a", 0, 2); x2("^\\X\\X\\X$", "ab\x0d\x0a", 0, 4); - /* - < ifndef IGNORE_EUC_JP > - for testcases print warnings #63 - warning: illegal character encoding in string literal [-Winvalid-source-encoding] - */ -#ifndef IGNORE_EUC_JP x2("", "", 0, 0); x2("", "", 0, 2); n("", ""); @@ -977,8 +968,6 @@ extern int main(int argc, char* argv[]) n("\\P{Hiragana}", ""); #endif -#endif /* IGNORE_EUC_JP */ - fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); diff --git a/test/testu.c b/test/testu.c index d037194..d2bb9ee 100644 --- a/test/testu.c +++ b/test/testu.c @@ -139,7 +139,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not) if (r) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r, &einfo); + onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(err_file, "ERROR: %s\n", s); nerror++; return ; @@ -150,7 +150,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not) region, ONIG_OPTION_NONE); if (r < ONIG_MISMATCH) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(s, r); + onig_error_code_to_str((UChar* )s, r); fprintf(err_file, "ERROR: %s\n", s); nerror++; return ; diff --git a/windows/testc.c b/windows/testc.c index ef8085e..c09c6de 100644 --- a/windows/testc.c +++ b/windows/testc.c @@ -159,8 +159,9 @@ static void n(char* pattern, char* str) extern int main(int argc, char* argv[])
{
#ifndef POSIX_TEST
- static OnigEncoding use_encs[] = { ONIG_ENCODING_SJIS };
+ static OnigEncoding use_encs[1];
+ use_encs[0] = ONIG_ENCODING_SJIS;
onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
#endif
|