diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2020-04-21 06:47:36 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2020-04-21 06:47:36 +0200 |
commit | a792c4308d32f68f34131ff89e124190e4513c38 (patch) | |
tree | 68d5ba9857cafd597ea84011077a83e43f2d1620 | |
parent | 9e629c8f43b43617fa5b7d3654f7d81e81b8a427 (diff) | |
parent | d1ffcb88ee95aded1bc4aef56f4f39951ad03ceb (diff) |
Merge branch 'release/debian/6.9.5-1'debian/6.9.5-1
89 files changed, 14402 insertions, 9540 deletions
@@ -48,6 +48,8 @@ m4/*.m4 /test/testcu /test/testp /test/test_regset +/test/test_syntax +/test/test_back /test/kofu-utf8.txt # sample/ @@ -68,8 +70,8 @@ m4/*.m4 /sample/log* /harnesses/utf16*.dict -/harnesses/*-libfuzzer -/harnesses/main-* +/harnesses/fuzzer-* +/harnesses/read-* /harnesses/libfuzzer-onig /harnesses/libfuzzer-onig-full /harnesses/slow-unit-* diff --git a/CMakeLists.txt b/CMakeLists.txt index bce888a..29a1417 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,13 @@ cmake_minimum_required(VERSION 3.1) project(oniguruma - VERSION 6.9.4 + VERSION 6.9.5 LANGUAGES C) set(PACKAGE onig) set(PACKAGE_VERSION ${PROJECT_VERSION}) option(BUILD_SHARED_LIBS "Build shared libraries" ON) -option(ENABLE_POSIX_API "Include POSIX API" ON) +option(ENABLE_POSIX_API "Include POSIX API" OFF) if(MSVC) option(MSVC_STATIC_RUNTIME "Build with static runtime" OFF) endif() @@ -1,7 +1,7 @@ Oniguruma LICENSE ----------------- -Copyright (c) 2002-2019 K.Kosako <kkosako0@gmail.com> +Copyright (c) 2002-2020 K.Kosako <kkosako0@gmail.com> All rights reserved. Redistribution and use in source and binary forms, with or without @@ -1,9 +1,29 @@ History +2020/04/DD: Version 6.9.5 + +2020/04/12: Release Candidate 2 for Version 6.9.5 +2020/04/09: fix a problem (found by oss-fuzz test on my PC) +2020/04/05: Release Candidate 1 for Version 6.9.5 +2020/03/30: remove src/*.py and src/*.sh from distribution files +2020/03/27: NEW: Code point sequence notation \x{HHHH ...}, \o{OOOO ...} +2020/03/24: NEW API: maximum nesting level of subexp call +2020/03/22: #165: change enable-posix-api default from YES to NO +2020/03/15: update Unicode version to 13.0.0 +2020/03/10: add test_back.c +2020/03/08: tune output of debug in print_optimize_info() +2020/03/02: fix #186: Allow regset search to succeed at end of string +2020/02/13: NEW API: retry-limit-in-search functions +2020/01/20: add ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND flag +2019/12/27: add USE_REGSET switch +2019/12/20: remove OPTIMIZE_STR_CASE_FOLD +2019/12/13: add test/test_syntax.c +2019/12/13: add ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH flag + + 2019/11/29: Version 6.9.4 2019/11/22: Release Candidate 3 for Version 6.9.4 - 2019/11/20: fix a problem found by libFuzzer test 2019/11/14: Release Candidate 2 for Version 6.9.4 2019/11/12: fix integer overflow by nested quantifier diff --git a/Makefile.am b/Makefile.am index ac5e27f..fc3885b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,9 +42,22 @@ all-test: archive: git archive --format=tar --prefix=oniguruma/ HEAD | gzip > ../oniguruma-archive.tar.gz +tar: + cd ..; tar cvf oniguruma-`date +%Y%m%d`.tar oniguruma; gzip oniguruma-`date +%Y%m%d`.tar + +debug: + make clean + ./configure CFLAGS="-O0 -g" + make + +debug_out: + make clean + ./configure CFLAGS="-O0 -g -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE" + make + sanitize: make clean - ./configure --enable-posix-api=no CC=clang CFLAGS="-O -g -fsanitize=address" LDFLAGS="-fsanitize=address" + ./configure CFLAGS="-O -g -fsanitize=address" LDFLAGS="-fsanitize=address" make make all-test @@ -27,6 +27,17 @@ Supported character encodings: * doc/SYNTAX.md: contributed by seanofw +Version 6.9.5 +------------- + +* POSIX API disabled by default for Unix (* Enabled by: configure --enable-posix-api=yes) +* Update Unicode version 13.0.0 +* NEW: Code point sequence notation \x{HHHH HHHH ...}, \o{OOOO OOOO ...} +* NEW API: retry limit in search functions +* NEW API: maximum nesting level of subexp call +* Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../ + + Version 6.9.4 ------------- @@ -216,7 +227,16 @@ Install (I have checked by Visual Studio Community 2015) +Alternatively, you can build and install oniguruma using [vcpkg](https://github.com/microsoft/vcpkg/) dependency manager: + + 1. git clone https://github.com/Microsoft/vcpkg.git + 2. cd vcpkg + 3. ./bootstrap-vcpkg.bat + 4. ./vcpkg integrate install + 5. ./vcpkg install oniguruma +The oniguruma port in vcpkg is kept up to date by microsoft team members and community contributors. +If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. Regular Expressions ------------------- diff --git a/configure.ac b/configure.ac index ac51e85..74c20e3 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.9.4) +AC_INIT(onig, 6.9.5) AC_CONFIG_MACRO_DIR([m4]) @@ -19,14 +19,14 @@ AC_SUBST(STATISTICS) dnl check for POSIX API AC_ARG_ENABLE([posix-api], [AS_HELP_STRING([--enable-posix-api], - [turn on to include POSIX API [default=yes]])], + [turn on to include POSIX API [default=no]])], [\ case "${enableval}" in yes) enable_posix_api=yes ;; no) enable_posix_api=no ;; *) AC_MSG_ERROR(bad value for --enable-posix-api) ;; esac], -enable_posix_api=yes) +enable_posix_api=no) AM_CONDITIONAL(ENABLE_POSIX_API, test x"${enable_posix_api}" = xyes) @@ -57,6 +57,8 @@ AC_CHECK_HEADERS(sys/time.h unistd.h sys/times.h) dnl Checks for typedefs, structures, and compiler characteristics. AC_CHECK_SIZEOF([int]) AC_CHECK_SIZEOF([long]) +AC_CHECK_SIZEOF([long long]) +AC_CHECK_SIZEOF([void*]) dnl Checks for library functions. AC_FUNC_ALLOCA diff --git a/debian/changelog b/debian/changelog index 8dada6e..85525aa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,16 @@ +libonig (6.9.5-1) unstable; urgency=medium + + * New upstream release. + - Refresh symbols file. + * Declare compliance with Debian Policy 4.5.0 (No changes needed). + * debian/copyright: + - Add year 2020. + * Remove unused patches: + - debian/patches/0105-CVE-2019-13224.patch, + - debian/patches/0110-CVE-2019-13225.patch. + + -- Jörg Frings-Fürst <debian@jff.email> Mon, 20 Apr 2020 22:35:52 +0200 + libonig (6.9.4-1) unstable; urgency=medium * Neu upstream release. diff --git a/debian/control b/debian/control index a277d0f..fc0c05f 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,7 @@ Priority: extra Maintainer: Jörg Frings-Fürst <debian@jff.email> Build-Depends: debhelper-compat (= 12) -Standards-Version: 4.4.1.1 +Standards-Version: 4.5.0 Rules-Requires-Root: no Homepage: https://github.com/kkos/oniguruma Vcs-Git: git://jff.email/opt/git/libonig.git diff --git a/debian/copyright b/debian/copyright index 6b10c03..291fcf8 100644 --- a/debian/copyright +++ b/debian/copyright @@ -2,7 +2,7 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0 Source: https://github.com/kkos/oniguruma Files: * -Copyright: 2002-2019 K.Kosako <kkosako0@gmail.com> +Copyright: 2002-2020 K.Kosako <kkosako0@gmail.com> License: BSD-2-clause License: BSD-2-clause @@ -30,7 +30,7 @@ License: BSD-2-clause Files: debian/* Copyright: 2006-2008 Max Kellermann <max@duempel.org> - 2014-2019 Jörg Frings-Fürst <debian@jff.email> + 2014-2020 Jörg Frings-Fürst <debian@jff.email> License: GPL-2+ License: GPL-2+ diff --git a/debian/patches/0105-CVE-2019-13224.patch b/debian/patches/0105-CVE-2019-13224.patch deleted file mode 100644 index 6ea4f95..0000000 --- a/debian/patches/0105-CVE-2019-13224.patch +++ /dev/null @@ -1,38 +0,0 @@ -Description: CVE-2019-13224 - don't allow different encodings for onig_new_deluxe() -Origin: upstream, https://github.com/kkos/oniguruma/commit/0f7f61ed1b7b697e283e37bd2d731d0bd57adb55 -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=931878 -Last-Update: 2019-07-12 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ -Index: trunk/src/regext.c -=================================================================== ---- trunk.orig/src/regext.c -+++ trunk/src/regext.c -@@ -29,6 +29,7 @@ - - #include "regint.h" - -+#if 0 - static void - conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) - { -@@ -158,6 +159,7 @@ conv_encoding(OnigEncoding from, OnigEnc - - return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; - } -+#endif - - extern int - onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, -@@ -169,9 +171,7 @@ onig_new_deluxe(regex_t** reg, const UCh - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - - if (ci->pattern_enc != ci->target_enc) { -- r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, -- &cpat, &cpat_end); -- if (r != 0) return r; -+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; - } - else { - cpat = (UChar* )pattern; diff --git a/debian/patches/0110-CVE-2019-13225.patch b/debian/patches/0110-CVE-2019-13225.patch deleted file mode 100644 index be9e152..0000000 --- a/debian/patches/0110-CVE-2019-13225.patch +++ /dev/null @@ -1,66 +0,0 @@ -Description: CVE-2019-13225 - problem in converting if-then-else pattern to bytecode. -Origin: upstream, https://github.com/kkos/oniguruma/commit/c509265c5f6ae7264f7b8a8aae1cfa5fc59d108c -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=931878 -Last-Update: 2019-07-12 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ -Index: trunk/src/regcomp.c -=================================================================== ---- trunk.orig/src/regcomp.c -+++ trunk/src/regcomp.c -@@ -1307,8 +1307,9 @@ compile_length_bag_node(BagNode* node, r - len += tlen; - } - -+ len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END; -+ - if (IS_NOT_NULL(Else)) { -- len += SIZE_OP_JUMP; - tlen = compile_length_tree(Else, reg); - if (tlen < 0) return tlen; - len += tlen; -@@ -1455,7 +1456,7 @@ compile_bag_node(BagNode* node, regex_t* - - case BAG_IF_ELSE: - { -- int cond_len, then_len, jump_len; -+ int cond_len, then_len, else_len, jump_len; - Node* cond = NODE_BAG_BODY(node); - Node* Then = node->te.Then; - Node* Else = node->te.Else; -@@ -1472,8 +1473,7 @@ compile_bag_node(BagNode* node, regex_t* - else - then_len = 0; - -- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END; -- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; -+ jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP; - - r = add_op(reg, OP_PUSH); - if (r != 0) return r; -@@ -1490,11 +1490,20 @@ compile_bag_node(BagNode* node, regex_t* - } - - if (IS_NOT_NULL(Else)) { -- int else_len = compile_length_tree(Else, reg); -- r = add_op(reg, OP_JUMP); -- if (r != 0) return r; -- COP(reg)->jump.addr = else_len + SIZE_INC_OP; -+ else_len = compile_length_tree(Else, reg); -+ if (else_len < 0) return else_len; -+ } -+ else -+ else_len = 0; - -+ r = add_op(reg, OP_JUMP); -+ if (r != 0) return r; -+ COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP; -+ -+ r = add_op(reg, OP_ATOMIC_END); -+ if (r != 0) return r; -+ -+ if (IS_NOT_NULL(Else)) { - r = compile_tree(Else, reg, env); - } - } diff --git a/debian/patches/series b/debian/patches/series index 1c34712..ea79fff 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1 @@ 0100-source_typos.patch -#0105-CVE-2019-13224.patch -#0110-CVE-2019-13225.patch diff --git a/debian/rules b/debian/rules index ee95689..833094f 100755 --- a/debian/rules +++ b/debian/rules @@ -1,19 +1,13 @@ #!/usr/bin/make -f -# -*- makefile -*- -# Sample debian/rules that uses debhelper. -# This file was originally written by Joey Hess and Craig Small. -# As a special exception, when this file is copied by dh-make into a -# dh-make output file, you may use that output file without restriction. -# This special exception was added by Craig Small in version 0.37 of dh-make. # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 # -# Test for gcc-6 support +# Test for gcc-10 support # -#export CC=gcc-6 -#export CXX=g++-6 +#export CC=gcc-10 +#export CXX=g++-10 export DEB_BUILD_MAINT_OPTIONS = hardening=+all diff --git a/debian/symbols b/debian/symbols index 19e8a59..1659176 100644 --- a/debian/symbols +++ b/debian/symbols @@ -117,10 +117,12 @@ libonig.so.5 libonig5 #MINVER# onig_get_retraction_callout@Base 6.8.1 onig_get_retry_counter_by_callout_args@Base 6.8.1 onig_get_retry_limit_in_match@Base 6.8.1 + onig_get_retry_limit_in_search@Base 6.9.5 onig_get_right_range_by_callout_args@Base 6.8.1 onig_get_start_by_callout_args@Base 6.8.1 onig_get_string_by_callout_args@Base 6.8.1 onig_get_string_end_by_callout_args@Base 6.8.1 + onig_get_subexp_call_max_nest_level@Base 6.9.5 onig_get_syntax@Base 6.8.1 onig_get_syntax_behavior@Base 6.8.1 onig_get_syntax_op2@Base 6.8.1 @@ -147,12 +149,14 @@ libonig.so.5 libonig5 #MINVER# onig_new_deluxe@Base 6.8.1 onig_new_match_param@Base 6.8.1 onig_new_without_alloc@Base 6.8.1 + onig_node_copy@Base 6.9.5 onig_node_free@Base 6.8.1 onig_node_new_alt@Base 6.8.1 - onig_node_new_anchor@Base 6.8.1 onig_node_new_bag@Base 6.9.1 onig_node_new_list@Base 6.8.1 onig_node_new_str@Base 6.8.1 + onig_node_reset_empty@Base 6.9.5 + onig_node_reset_fail@Base 6.9.5 onig_node_str_cat@Base 6.8.1 onig_node_str_clear@Base 6.8.1 onig_node_str_set@Base 6.8.1 @@ -206,6 +210,9 @@ libonig.so.5 libonig5 #MINVER# onig_set_retraction_callout_of_match_param@Base 6.8.1 onig_set_retry_limit_in_match@Base 6.8.1 onig_set_retry_limit_in_match_of_match_param@Base 6.8.1 + onig_set_retry_limit_in_search@Base 6.9.5 + onig_set_retry_limit_in_search_of_match_param@Base 6.9.5 + onig_set_subexp_call_max_nest_level@Base 6.9.5 onig_set_syntax_behavior@Base 6.8.1 onig_set_syntax_op2@Base 6.8.1 onig_set_syntax_op@Base 6.8.1 @@ -304,11 +311,3 @@ libonig.so.5 libonig5 #MINVER# re_mbcinit@Base 6.8.1 re_search@Base 6.8.1 re_set_casetable@Base 6.8.1 - reg_foreach_name@Base 6.8.1 - reg_name_to_group_numbers@Base 6.8.1 - reg_number_of_names@Base 6.8.1 - reg_set_encoding@Base 6.8.1 - regcomp@Base 6.8.1 - regerror@Base 6.8.1 - regexec@Base 6.8.1 - regfree@Base 6.8.1 @@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.4 2019/09/30 +Oniguruma API Version 6.9.5 2020/03/25 #include <oniguruma.h> @@ -273,6 +273,18 @@ Oniguruma API Version 6.9.4 2019/09/30 normal return: ONIG_NORMAL +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + # int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) Set a function for callouts of contents in progress. @@ -333,7 +345,7 @@ Oniguruma API Version 6.9.4 2019/09/30 arguments 1-7: same as onig_search() - 8 mp: match parameter values (match_stack_limit, retry_limit_in_match) + 8 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, @@ -368,7 +380,7 @@ Oniguruma API Version 6.9.4 2019/09/30 arguments 1-6: same as onig_match() - 7 mp: match parameter values (match_stack_limit, retry_limit_in_match) + 7 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, @@ -599,8 +611,8 @@ Oniguruma API Version 6.9.4 2019/09/30 # int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) Iterate function call for all names. @@ -866,19 +878,51 @@ Oniguruma API Version 6.9.4 2019/09/30 # unsigned long onig_get_retry_limit_in_match(void) - Return the limit of retry counts in matching process. + Return the limit of retry counts in a matching process. (default: 10000000) - normal return: limit value + normal return: current limit value + + +# unsigned long onig_get_retry_limit_in_search(void) + + Return the limit of retry counts in a search process. + 0 means unlimited. + (default: 0) + + normal return: current limit value -# int onig_set_retry_limit_in_match(unsigned long n) +# int onig_set_retry_limit_in_match(unsigned long limit) Set the limit of retry counts in matching process. normal return: ONIG_NORMAL +# int onig_set_retry_limit_in_search(unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + normal return: ONIG_NORMAL + + +# int onig_get_subexp_call_max_nest_level(void) + + Return the limit of subexp call nest level. + (default: 24) + + normal return: current limit value + + +# int onig_set_subexp_call_max_nest_level(int max_level) + + Set a limit level of subexp call nest level. + + normal return: ONIG_NORMAL + + # OnigCalloutFunc onig_get_progress_callout(void) Get a function for callouts of contents in progress. @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.9.4 2019/09/30 +鬼車インターフェース Version 6.9.5 2020/03/25 #include <oniguruma.h> @@ -263,7 +263,19 @@ # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) - 一回のマッチでのリトライ数の最大値をセットする。 + 一回のマッチでのリトライ数の制限値をセットする。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限回数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 引数 1 mp: マッチパラメタオブジェクトアドレス @@ -331,7 +343,7 @@ 引数 1-7: onig_search()と同じ - 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_match(regex_t* reg, const UChar* str, const UChar* end, @@ -365,7 +377,7 @@ 引数 1-6: onig_match()と同じ - 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, @@ -600,8 +612,8 @@ # int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) 全ての名前に対してコールバック関数呼び出しを実行する。 @@ -872,15 +884,48 @@ # unsigned long onig_get_retry_limit_in_match(void) - マッチング関数内でのリトライ数の制限値を返す。 + 一回のマッチングでのリトライ数の制限値を返す。 (デフォルト: 10000000) 正常終了戻り値: 制限値 -# int onig_set_retry_limit_in_match(unsigned long n) +# unsigned long onig_get_retry_limit_in_search(void) + + 一回の検索でのリトライ数の制限値を返す。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: 制限値 + + +# int onig_set_retry_limit_in_match(unsigned long limit) + + 一回のマッチング内でのリトライ数の制限値を指定する。 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search(unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_get_subexp_call_max_nest_level(void) + + 部分式呼出しのネストレベルの最大値を返す。 + (デフォルト: 24) + + 正常終了戻り値: 制限値 + + +# int onig_set_subexp_call_max_nest_level(int max_level) - マッチング関数内でのリトライ数の制限値を指定する。 + 部分式呼出しのネストレベルの最大値を指定する。 正常終了戻り値: ONIG_NORMAL @@ -1,6 +1,6 @@ -Oniguruma Regular Expressions Version 6.9.4 2019/10/31 +Oniguruma Regular Expressions Version 6.9.5 2020/04/09 -syntax: ONIG_SYNTAX_ONIGURUMA (default) +syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) 1. Syntax elements @@ -21,19 +21,28 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) \f form feed (0x0C) \a bell (0x07) \e escape (0x1B) - \nnn octal char (encoded byte value) - \o{17777777777} wide octal char (character code point value) - \uHHHH wide hexadecimal char (character code point value) - \xHH hexadecimal char (encoded byte value) - \x{7HHHHHHH} wide hexadecimal char (character code point value) - \cx control char (character code point value) - \C-x control char (character code point value) - \M-x meta (x|0x80) (character code point value) - \M-\C-x meta control char (character code point value) + \nnn octal char (encoded byte value) + \xHH hexadecimal char (encoded byte value) + \x{7HHHHHHH} (1-8 digits) hexadecimal char (code point value) + \o{17777777777} (1-11 digits) octal char (code point value) + \uHHHH hexadecimal char (code point value) + \cx control char (code point value) + \C-x control char (code point value) + \M-x meta (x|0x80) (code point value) + \M-\C-x meta control char (code point value) (* \b as backspace is effective in character class only) +2.1 Code point sequences + + Hexadecimal code point (1-8 digits) + \x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH} + + Octal code point (1-11 digits) + \o{17777777777 17777777777 ... 17777777777} + + 3. Character types . any character (except newline) @@ -132,6 +141,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {,n} at least 0 but no more than n times ({0,n}) {n} n times + reluctant ?? 0 or 1 times @@ -141,6 +151,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {n,}? at least n times {,n}? at least 0 but not more than n times (== {0,n}?) + {n}? is reluctant operator in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL only. + (In that case, it doesn't make sense to write so.) + In default syntax, /a{n}?/ === /(?:a{n})?/ + + possessive (greedy and does not backtrack once match) ?+ 1 or 0 times @@ -148,8 +163,8 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) ++ 1 or more times {n,m} (n > m) at least m but not more than n times - {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and - ONIG_SYNTAX_PERL only. + {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and + ONIG_SYNTAX_PERL only. ex. /a*+/ === /(?>a*)/ @@ -279,15 +294,12 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) (?=subexp) look-ahead (?!subexp) negative look-ahead + (?<=subexp) look-behind (?<!subexp) negative look-behind - Subexp of look-behind must be fixed-width. - But top-level alternatives can be of various lengths. - ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed. - - In negative look-behind, capturing group isn't allowed, - but non-capturing group (?:) is allowed. + * Cannot use Absent stopper (?~|expr) and Range clear + (?~|) operators in look-behind and negative look-behind. * In look-behind and negative look-behind, support for ignore-case option is limited. Only supports conversion @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.9.4 2019/10/31 +鬼車 正規表現 Version 6.9.5 2020/04/09 使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -21,19 +21,28 @@ \f 改頁 (0x0C) \a 鐘 (0x07) \e 退避修飾 (0x1B) - \nnn 八進数表現 符号化バイト値 - \o{17777777777} 拡張八進数表現 コードポイント値 - \uHHHH 拡張十六進数表現 コードポイント値 - \xHH 十六進数表現 符号化バイト値 - \x{7HHHHHHH} 拡張十六進数表現 コードポイント値 - \cx 制御文字表現 コードポイント値 - \C-x 制御文字表現 コードポイント値 - \M-x 超 (x|0x80) コードポイント値 - \M-\C-x 超 + 制御文字表現 コードポイント値 + \nnn 八進数表現 符号化バイト値 + \xHH 十六進数表現 符号化バイト値 + \x{7HHHHHHH} (1-8桁) 拡張十六進数表現 コードポイント値 + \o{17777777777} (1-11桁) 拡張八進数表現 コードポイント値 + \uHHHH 拡張十六進数表現 コードポイント値 + \cx 制御文字表現 コードポイント値 + \C-x 制御文字表現 コードポイント値 + \M-x 超 (x|0x80) コードポイント値 + \M-\C-x 超 + 制御文字表現 コードポイント値 ※ \bは、文字集合内でのみ有効 +2.1 コードポイント連続表記 + + 十六進数表現コードポイント (1-8桁) + \x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH} + + 八進数表現コードポイント (1-11桁) + \o{17777777777 17777777777 ... 17777777777} + + 3. 文字種 . 任意文字 (改行を除く: オプションに依存) @@ -131,6 +140,7 @@ {,n} 零回以上n回以下 ({0,n}) {n} n回 + 無欲 ?? 零回または一回 @@ -140,6 +150,11 @@ {n,}? n回以上 {,n}? 零回以上n回以下 (== {0,n}?) + {n}? はONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ無欲な指定子 + (その場合には、態々そう書く意味はないが) + デフォルトの文法では、/a{n}?/ === /(?:a{n})?/ + + 強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない) ?+ 一回または零回 @@ -147,7 +162,8 @@ ++ 一回以上 {n,m} (n > m) m回以上 かつ n回以下 - {n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ強欲な指定子 + {n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ + 強欲な指定子 例. /a*+/ === /(?>a*)/ @@ -274,15 +290,12 @@ (?=式) 先読み (?!式) 否定先読み + (?<=式) 戻り読み (?<!式) 否定戻り読み - 戻り読みの式は固定文字長でなければならない。 - しかし、最上位の選択子だけは異なった文字長が許される。 - 例. (?<=a|bc) は許可. (?<=aaa(?:b|cd)) は不許可 - - 否定戻り読みでは、捕獲式集合は許されないが、 - 非捕獲式集合は許される。 + * 戻り読み、否定戻り読みの式の中では、不在停止演算子 + (?~|expr)と範囲消去演算子(?~|)を使用することはできない * 戻り読み、否定戻り読みの中では、ignore-caseオプションの 対応が制限される。一文字と一文字の間の変換しか対応しない。 diff --git a/doc/SYNTAX.md b/doc/SYNTAX.md index 69ecf3a..c38e5c8 100644 --- a/doc/SYNTAX.md +++ b/doc/SYNTAX.md @@ -1,7 +1,7 @@ # Oniguruma syntax (operator) configuration -_Documented for Oniguruma 6.9.3 (2019/08/08)_ +_Documented for Oniguruma 6.9.5 (2020/01/23)_ ---------- @@ -75,7 +75,7 @@ data set by `onig_set_meta_char()` will be ignored. ### 1. ONIG_SYN_OP_DOT_ANYCHAR (enable `.`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby_ Enables support for the standard `.` metacharacter, meaning "any one character." You usually want this flag on unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -84,7 +84,7 @@ so that you can use a metacharacter other than `.` instead. ### 2. ONIG_SYN_OP_ASTERISK_ZERO_INF (enable `r*`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r*` metacharacter, meaning "zero or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -103,7 +103,7 @@ behavior. ### 4. ONIG_SYN_OP_PLUS_ONE_INF (enable `r+`) -_Set in: PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r+` metacharacter, meaning "one or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -122,7 +122,7 @@ behavior. ### 6. ONIG_SYN_OP_QMARK_ZERO_ONE (enable `r?`) -_Set in: PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r?` metacharacter, meaning "zero or one r" or "an optional r." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -141,7 +141,7 @@ you want `?` to simply match a literal `?` character, but you still want some wa ### 8. ONIG_SYN_OP_BRACE_INTERVAL (enable `r{l,u}`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the `r{lower,upper}` range form, common to more advanced regex engines, which lets you specify precisely a minimum and maximum range on how many r's @@ -168,7 +168,7 @@ match literal curly brace characters, but you still want some way of activating ### 10. ONIG_SYN_OP_VBAR_ALT (enable `r|s`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `r|s` alternation operator. You usually want this flag set. @@ -185,7 +185,7 @@ match a literal `|` character, but you still want some way of activating "altern ### 12. ONIG_SYN_OP_LPAREN_SUBEXP (enable `(r)`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `(...)` grouping-and-capturing operators. You usually want this flag set. @@ -203,7 +203,7 @@ activating "grouping" or "capturing" behavior. ### 14. ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (enable `\A` and `\Z` and `\z`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the anchors `\A` (start-of-string), `\Z` (end-of-string or newline-at-end-of-string), and `\z` (end-of-string) escapes. @@ -214,7 +214,7 @@ option will recognize that metacharacter instead.) ### 15. ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (enable `\G`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the special anchor `\G` (start-of-previous-match). @@ -231,7 +231,7 @@ exactly the same as `\A`. ### 16. ONIG_SYN_OP_DECIMAL_BACKREF (enable `\num`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for subsequent matches to back references to prior capture groups `(...)` using the common `\num` syntax (like `\3`). @@ -244,7 +244,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 17. ONIG_SYN_OP_BRACKET_CC (enable `[...]`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for recognizing character classes, like `[a-z]`. If this flag is not set, `[` and `]` will be treated as ordinary literal characters instead of as metacharacters. @@ -254,7 +254,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 18. ONIG_SYN_OP_ESC_W_WORD (enable `\w` and `\W`) -_Set in: Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\w` and `\W` shorthand forms. These match "word characters," whose meaning varies depending on the encoding being used. @@ -285,7 +285,7 @@ Most regex syntaxes do _not_ support these metacharacters. ### 20. ONIG_SYN_OP_ESC_B_WORD_BOUND (enable `\b` and `\B`) -_Set in: Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\b` and `\B` word-boundary metacharacters. The `\b` metacharacter matches a zero-width position at a transition from word-characters to non-word-characters, or vice @@ -297,7 +297,7 @@ are considered "word characters." ### 21. ONIG_SYN_OP_ESC_S_WHITE_SPACE (enable `\s` and `\S`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\s` and `\S` whitespace-matching metacharacters. @@ -319,7 +319,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 22. ONIG_SYN_OP_ESC_D_DIGIT (enable `\d` and `\D`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\d` and `\D` digit-matching metacharacters. @@ -337,7 +337,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 23. ONIG_SYN_OP_LINE_ANCHOR (enable `^r` and `r$`) -_Set in: Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `^` and `$` line-anchor metacharacters. @@ -352,7 +352,7 @@ and not any other form.) ### 24. ONIG_SYN_OP_POSIX_BRACKET (enable POSIX `[:xxxx:]`) -_Set in: PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the POSIX `[:xxxx:]` character classes, like `[:alpha:]` and `[:digit:]`. The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `digit`, @@ -361,7 +361,7 @@ The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `d ### 25. ONIG_SYN_OP_QMARK_NON_GREEDY (enable `r??`, `r*?`, `r+?`, and `r{n,m}?`) -_Set in: Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Java, Perl_NG, Ruby_ Enables support for lazy (non-greedy) quantifiers: That is, if you append a `?` after another quantifier such as `?`, `*`, `+`, or `{n,m}`, Oniguruma will try to match @@ -370,7 +370,7 @@ as _little_ as possible instead of as _much_ as possible. ### 26. ONIG_SYN_OP_ESC_CONTROL_CHARS (enable `\n`, `\r`, `\t`, etc.) -_Set in: PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby_ Enables support for C-style control-code escapes, like `\n` and `\r`. Specifically, this recognizes `\a` (7), `\b` (8), `\t` (9), `\n` (10), `\f` (12), `\r` (13), and @@ -380,7 +380,7 @@ support for recognizing `\v` as code point 11. ### 27. ONIG_SYN_OP_ESC_C_CONTROL (enable `\cx` control codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for named control-code escapes, like `\cm` or `\cM` for code-point 13. In this shorthand form, control codes may be specified by `\c` (for "Control") @@ -390,7 +390,7 @@ followed by an alphabetic letter, a-z or A-Z, indicating which code point to rep ### 28. ONIG_SYN_OP_ESC_OCTAL3 (enable `\OOO` octal codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for octal-style escapes of up to three digits, like `\1` for code point 1, and `\177` for code point 127. Octal values greater than 255 will result @@ -399,7 +399,7 @@ in an error message. ### 29. ONIG_SYN_OP_ESC_X_HEX2 (enable `\xHH` hex codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for hexadecimal-style escapes of up to two digits, like `\x1` for code point 1, and `\x7F` for code point 127. @@ -407,7 +407,7 @@ point 1, and `\x7F` for code point 127. ### 30. ONIG_SYN_OP_ESC_X_BRACE_HEX8 (enable `\x{7HHHHHHH}` hex codes) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for brace-wrapped hexadecimal-style escapes of up to eight digits, like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. @@ -415,7 +415,7 @@ like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. ### 31. ONIG_SYN_OP_ESC_O_BRACE_OCTAL (enable `\o{1OOOOOOOOOO}` octal codes) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for brace-wrapped octal-style escapes of up to eleven digits, like `\o{1}` for code point 1, and `\o{177776}` for code point 65534. @@ -444,7 +444,7 @@ longer be treated as metacharacters, and instead will be matched as literal ### 1. ONIG_SYN_OP2_QMARK_GROUP_EFFECT (enable `(?...)`) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for the fairly-common `(?...)` grouping operator, which controls precedence but which does _not_ capture its contents. @@ -465,7 +465,7 @@ The supported toggle-able options for this flag are: ### 3. ONIG_SYN_OP2_OPTION_RUBY (enable options `(?imx)` and `(?-imx)`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support of regex options. (i,m,x) The supported toggle-able options for this flag are: @@ -477,7 +477,7 @@ The supported toggle-able options for this flag are: ### 4. ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (enable `r?+`, `r*+`, and `r++`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the _possessive_ quantifiers `?+`, `*+`, and `++`, which work similarly to `?` and `*` and `+`, respectively, but which do not backtrack @@ -499,7 +499,7 @@ extent if subsequent parts of the pattern fail to match. ### 6. ONIG_SYN_OP2_CCLASS_SET_OP (enable `&&` within `[...]`) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for character-class _intersection_. For example, with this feature enabled, you can write `[a-z&&[^aeiou]]` to produce a character class @@ -509,7 +509,7 @@ all control codes _except_ newlines. ### 7. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (enable named captures `(?<name>...)`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for _naming_ capture groups, so that instead of having to refer to captures by position (like `\3` or `$3`), you can refer to them by names @@ -519,7 +519,7 @@ and `(?'name'...)`, but not the Python `(?P<name>...)` syntax. ### 8. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (enable named backreferences `\k<name>`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for substituted backreferences by name, not just by position. This supports using `\k'name'` in addition to supporting `\k<name>`. This also @@ -530,7 +530,7 @@ the match, if the capture matched multiple times, by writing `\k<name+n>` or ### 9. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (enable backreferences `\g<name>` and `\g<n>`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for substituted backreferences by both name and position using the same syntax. This supports using `\g'name'` and `\g'1'` in addition to @@ -554,7 +554,7 @@ enabled by default in any syntax. ### 11. ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (enable `\C-x`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for Ruby legacy control-code escapes, like `\C-m` or `\C-M` for code-point 13. In this shorthand form, control codes may be specified by `\C-` (for "Control") @@ -567,7 +567,7 @@ See also ONIG_SYN_OP_ESC_C_CONTROL, which enables the more-common `\cx` syntax. ### 12. ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (enable `\M-x`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for Ruby legacy meta-code escapes. When you write `\M-x`, Oniguruma will match an `x` whose 8th bit is set (i.e., the character code of `x` will be or'ed @@ -577,7 +577,7 @@ with `0x80`). So, for example, you can match `\x81` using `\x81`, or you can wr ### 13. ONIG_SYN_OP2_ESC_V_VTAB (enable `\v` as vertical tab) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enabled, `\v` will be equivalent to ASCII code point 11. @@ -585,7 +585,7 @@ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enab ### 14. ONIG_SYN_OP2_ESC_U_HEX4 (enable `\uHHHH` for Unicode) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for a Java-style `\uHHHH` escape code for representing Unicode code-points by number, using up to four hexadecimal digits (up to `\uFFFF`). So, @@ -611,7 +611,7 @@ These anchor forms are very obscure, and rarely supported by other regex librari ### 16. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (enable `\p{...}` and `\P{...}`) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:alpha:]` when this is enabled, you can instead write `\p{alpha}`. @@ -621,7 +621,7 @@ See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. ### 17. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (enable `\p{^...}` and `\P{^...}`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:^alpha:]` when this is enabled, you can instead write `\p{^alpha}`. @@ -636,7 +636,7 @@ _(not presently used)_ ### 19. ONIG_SYN_OP2_ESC_H_XDIGIT (enable `\h` and `\H`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the Ruby-specific shorthand `\h` and `\H` metacharacters. Somewhat like `\d` matches decimal digits, `\h` matches hexadecimal digits — that is, @@ -658,7 +658,7 @@ You usually do not want this flag to be enabled. ### 21. ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (enable `(?(...)then|else)`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for conditional inclusion of subsequent regex patterns based on whether a prior named or numbered capture matched, or based on whether a pattern will @@ -676,7 +676,7 @@ match. This supports many different forms, including: ### 22. ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (enable `\K`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for `\K`, which excludes all content before it from the overall regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match @@ -687,7 +687,7 @@ regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match ### 23. ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (enable `\R`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for `\R`, the "general newline" shorthand, which matches `(\r\n|[\n\v\f\r\u0085\u2028\u2029])` (obviously, the Unicode values are cannot be @@ -698,7 +698,7 @@ matched in ASCII encodings). ### 24. ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (enable `\N` and `\O`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for `\N` and `\O`. `\N` is "not a line break," which is much like the standard `.` metacharacter, except that while `.` can be affected by @@ -713,7 +713,7 @@ multi-line mode are enabled or disabled. ### 25. ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (enable `(?~...)`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the `(?~r)` "absent operator" syntax, which matches as much as possible as long as the result _doesn't_ match pattern `r`. This is @@ -731,7 +731,7 @@ excellent article about it is [available on Medium](https://medium.com/rubyinsid ### 26. ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (enable `\X` and `\Y` and `\y`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ `\X` is another variation on `.`, designed to support Unicode, in that it matches a full _grapheme cluster_. In Unicode, `à` can be encoded as one code point, @@ -764,7 +764,7 @@ backreferences. ### 28. ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (enable `(?{...})`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(?{foo})` is reached in a pattern, the callback @@ -779,7 +779,7 @@ Full documentation for this advanced feature can be found in the Oniguruma ### 29. ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (enable `(*name)`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(*foo)` is reached in a pattern, the callback @@ -820,7 +820,7 @@ some syntaxes but not in others. ### 0. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (independent `?`, `*`, `+`, `{n,m}`) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ This flag specifies how to handle operators like `?` and `*` when they aren't directly attached to an operand, as in `^*` or `(*)`: Are they an error, are @@ -830,7 +830,7 @@ determines if they are errors or if they are discarded. ### 1. ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (error or ignore independent operators) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ If ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS is set, this flag controls what happens when independent operators appear in a pattern: If this flag is set, then independent @@ -847,7 +847,7 @@ character will produce an error message. ### 3. ONIG_SYN_ALLOW_INVALID_INTERVAL (allow `{???`) -_Set in: GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ This flag, if set, causes an invalid range, like `foo{bar}` or `foo{}`, to be silently discarded, as if `foo` had been written instead. If clear, an invalid @@ -855,7 +855,7 @@ range will produce an error message. ### 4. ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (allow `{,n}` to mean `{0,n}`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, then `r{,n}` will be treated as equivalent to writing `{0,n}`. If this flag is clear, then `r{,n}` will produce an error message. @@ -876,7 +876,7 @@ No built-in syntax has this flag enabled. ### 6. ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (allow `(?<=a|bc)`) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ If this flag is set, lookbehind patterns with alternate options may have differing lengths among those options. If this flag is clear, lookbehind patterns with options @@ -888,7 +888,7 @@ depend on this rule. ### 7. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (prefer `\k<name>` over `\3`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ If this flag is set on the syntax *and* ONIG_OPTION_CAPTURE_GROUP is set when calling Oniguruma, then if a name is used on any capture, all captures must also use names: A @@ -896,20 +896,33 @@ single use of a named capture prohibits the use of numbered captures. ### 8. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (allow `(?<x>)...(?<x>)`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ If this flag is set, multiple capture groups may use the same name. If this flag is clear, then reuse of a name will produce an error message. ### 9. ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (`a{n}?` is equivalent to `(?:a{n})?`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, then intervals of a fixed size will ignore a lazy (non-greedy) `?` quantifier and treat it as an optional match (an ordinary `r?`), since "match as little as possible" is meaningless for a fixed-size interval. If this flag is clear, then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded. +### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`) + +_Set in: Perl, Perl_NG, Java_ + +If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern). +If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/ + +### 11. ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (`(?<=...a+...)`) + +_Set in: Oniguruma, Java_ + +If this flag is set, then a variable length expressions are allowed in look-behind. + ### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`) _Set in: Grep_ @@ -921,7 +934,7 @@ only exclude those characters and ranges written in them. ### 21. ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (allow `[...\w...]`) -_Set in: GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ If this flag is set, shorthands like `\w` are allowed to describe characters in character classes. If this flag is clear, shorthands like `\w` are treated as a redundantly-escaped @@ -937,7 +950,7 @@ character ranges will produce an error message. ### 23. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (treat `[0-9-a]` as `[0-9\-a]`) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ If this flag is set, then a trailing `-` after a character range will be taken as a literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a trailing @@ -945,7 +958,7 @@ literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a t ### 24. ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (warn on `[[...]` and `[-x]`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, Oniguruma will be stricter about warning for bad forms in character classes: `[[...]` will produce a warning, but `[\[...]` will not; @@ -955,7 +968,7 @@ will be silently discarded. ### 25. ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (warn on `(?:a*)+`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, Oniguruma will warn about nested repeat operators those have no meaning, like `(?:a*)+`. If this flag is clear, Oniguruma will allow the nested repeat operators without warning about them. @@ -968,7 +981,7 @@ If this flag is set, then invalid code points at the end of range in character c ### 31. ONIG_SYN_CONTEXT_INDEP_ANCHORS -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ Not currently used, and does nothing. (But still set in several syntaxes for some reason.) @@ -1062,10 +1075,12 @@ These tables show which of the built-in syntaxes use which flags and options, fo | 3 | `ONIG_SYN_ALLOW_INVALID_INTERVAL` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | | 4 | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` | - | - | - | - | - | - | - | - | Yes | Yes | | 5 | `ONIG_SYN_STRICT_CHECK_BACKREF` | - | - | - | - | - | - | - | - | - | - | -| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | Yes | Yes | | 7 | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | | 8 | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME` | - | - | - | - | - | - | - | Yes | Yes | Yes | | 9 | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY` | - | - | - | - | - | - | - | - | Yes | Yes | +| 10 | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH` | - | - | - | - | - | Yes | Yes | Yes | - | - | +| 11 | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | - | Yes | | 20 | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC` | - | - | - | Yes | - | - | - | - | - | - | | 21 | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | | 22 | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC` | - | - | Yes | Yes | - | - | - | - | - | - | diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES index 24c2031..2227ada 100644 --- a/doc/UNICODE_PROPERTIES +++ b/doc/UNICODE_PROPERTIES @@ -1,4 +1,4 @@ -Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) +Unicode Properties (Unicode Version: 13.0.0, Emoji: 13.0) 15: ASCII_Hex_Digit 16: Adlam @@ -38,225 +38,229 @@ Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) 50: Changes_When_Titlecased 51: Changes_When_Uppercased 52: Cherokee - 53: Cn - 54: Co - 55: Common - 56: Coptic - 57: Cs - 58: Cuneiform - 59: Cypriot - 60: Cyrillic - 61: Dash - 62: Default_Ignorable_Code_Point - 63: Deprecated - 64: Deseret - 65: Devanagari - 66: Diacritic - 67: Dogra - 68: Duployan - 69: Egyptian_Hieroglyphs - 70: Elbasan - 71: Elymaic - 72: Emoji - 73: Emoji_Component - 74: Emoji_Modifier - 75: Emoji_Modifier_Base - 76: Emoji_Presentation - 77: Ethiopic - 78: Extended_Pictographic - 79: Extender - 80: Georgian - 81: Glagolitic - 82: Gothic - 83: Grantha - 84: Grapheme_Base - 85: Grapheme_Extend - 86: Grapheme_Link - 87: Greek - 88: Gujarati - 89: Gunjala_Gondi - 90: Gurmukhi - 91: Han - 92: Hangul - 93: Hanifi_Rohingya - 94: Hanunoo - 95: Hatran - 96: Hebrew - 97: Hex_Digit - 98: Hiragana - 99: Hyphen -100: IDS_Binary_Operator -101: IDS_Trinary_Operator -102: ID_Continue -103: ID_Start -104: Ideographic -105: Imperial_Aramaic -106: Inherited -107: Inscriptional_Pahlavi -108: Inscriptional_Parthian -109: Javanese -110: Join_Control -111: Kaithi -112: Kannada -113: Katakana -114: Kayah_Li -115: Kharoshthi -116: Khmer -117: Khojki -118: Khudawadi -119: L -120: LC -121: Lao -122: Latin -123: Lepcha -124: Limbu -125: Linear_A -126: Linear_B -127: Lisu -128: Ll -129: Lm -130: Lo -131: Logical_Order_Exception -132: Lowercase -133: Lt -134: Lu -135: Lycian -136: Lydian -137: M -138: Mahajani -139: Makasar -140: Malayalam -141: Mandaic -142: Manichaean -143: Marchen -144: Masaram_Gondi -145: Math -146: Mc -147: Me -148: Medefaidrin -149: Meetei_Mayek -150: Mende_Kikakui -151: Meroitic_Cursive -152: Meroitic_Hieroglyphs -153: Miao -154: Mn -155: Modi -156: Mongolian -157: Mro -158: Multani -159: Myanmar -160: N -161: Nabataean -162: Nandinagari -163: Nd -164: New_Tai_Lue -165: Newa -166: Nko -167: Nl -168: No -169: Noncharacter_Code_Point -170: Nushu -171: Nyiakeng_Puachue_Hmong -172: Ogham -173: Ol_Chiki -174: Old_Hungarian -175: Old_Italic -176: Old_North_Arabian -177: Old_Permic -178: Old_Persian -179: Old_Sogdian -180: Old_South_Arabian -181: Old_Turkic -182: Oriya -183: Osage -184: Osmanya -185: Other_Alphabetic -186: Other_Default_Ignorable_Code_Point -187: Other_Grapheme_Extend -188: Other_ID_Continue -189: Other_ID_Start -190: Other_Lowercase -191: Other_Math -192: Other_Uppercase -193: P -194: Pahawh_Hmong -195: Palmyrene -196: Pattern_Syntax -197: Pattern_White_Space -198: Pau_Cin_Hau -199: Pc -200: Pd -201: Pe -202: Pf -203: Phags_Pa -204: Phoenician -205: Pi -206: Po -207: Prepended_Concatenation_Mark -208: Ps -209: Psalter_Pahlavi -210: Quotation_Mark -211: Radical -212: Regional_Indicator -213: Rejang -214: Runic -215: S -216: Samaritan -217: Saurashtra -218: Sc -219: Sentence_Terminal -220: Sharada -221: Shavian -222: Siddham -223: SignWriting -224: Sinhala -225: Sk -226: Sm -227: So -228: Soft_Dotted -229: Sogdian -230: Sora_Sompeng -231: Soyombo -232: Sundanese -233: Syloti_Nagri -234: Syriac -235: Tagalog -236: Tagbanwa -237: Tai_Le -238: Tai_Tham -239: Tai_Viet -240: Takri -241: Tamil -242: Tangut -243: Telugu -244: Terminal_Punctuation -245: Thaana -246: Thai -247: Tibetan -248: Tifinagh -249: Tirhuta -250: Ugaritic -251: Unified_Ideograph -252: Unknown -253: Uppercase -254: Vai -255: Variation_Selector -256: Wancho -257: Warang_Citi -258: White_Space -259: XID_Continue -260: XID_Start -261: Yi -262: Z -263: Zanabazar_Square -264: Zl -265: Zp -266: Zs + 53: Chorasmian + 54: Cn + 55: Co + 56: Common + 57: Coptic + 58: Cs + 59: Cuneiform + 60: Cypriot + 61: Cyrillic + 62: Dash + 63: Default_Ignorable_Code_Point + 64: Deprecated + 65: Deseret + 66: Devanagari + 67: Diacritic + 68: Dives_Akuru + 69: Dogra + 70: Duployan + 71: Egyptian_Hieroglyphs + 72: Elbasan + 73: Elymaic + 74: Emoji + 75: Emoji_Component + 76: Emoji_Modifier + 77: Emoji_Modifier_Base + 78: Emoji_Presentation + 79: Ethiopic + 80: Extended_Pictographic + 81: Extender + 82: Georgian + 83: Glagolitic + 84: Gothic + 85: Grantha + 86: Grapheme_Base + 87: Grapheme_Extend + 88: Grapheme_Link + 89: Greek + 90: Gujarati + 91: Gunjala_Gondi + 92: Gurmukhi + 93: Han + 94: Hangul + 95: Hanifi_Rohingya + 96: Hanunoo + 97: Hatran + 98: Hebrew + 99: Hex_Digit +100: Hiragana +101: Hyphen +102: IDS_Binary_Operator +103: IDS_Trinary_Operator +104: ID_Continue +105: ID_Start +106: Ideographic +107: Imperial_Aramaic +108: Inherited +109: Inscriptional_Pahlavi +110: Inscriptional_Parthian +111: Javanese +112: Join_Control +113: Kaithi +114: Kannada +115: Katakana +116: Kayah_Li +117: Kharoshthi +118: Khitan_Small_Script +119: Khmer +120: Khojki +121: Khudawadi +122: L +123: LC +124: Lao +125: Latin +126: Lepcha +127: Limbu +128: Linear_A +129: Linear_B +130: Lisu +131: Ll +132: Lm +133: Lo +134: Logical_Order_Exception +135: Lowercase +136: Lt +137: Lu +138: Lycian +139: Lydian +140: M +141: Mahajani +142: Makasar +143: Malayalam +144: Mandaic +145: Manichaean +146: Marchen +147: Masaram_Gondi +148: Math +149: Mc +150: Me +151: Medefaidrin +152: Meetei_Mayek +153: Mende_Kikakui +154: Meroitic_Cursive +155: Meroitic_Hieroglyphs +156: Miao +157: Mn +158: Modi +159: Mongolian +160: Mro +161: Multani +162: Myanmar +163: N +164: Nabataean +165: Nandinagari +166: Nd +167: New_Tai_Lue +168: Newa +169: Nko +170: Nl +171: No +172: Noncharacter_Code_Point +173: Nushu +174: Nyiakeng_Puachue_Hmong +175: Ogham +176: Ol_Chiki +177: Old_Hungarian +178: Old_Italic +179: Old_North_Arabian +180: Old_Permic +181: Old_Persian +182: Old_Sogdian +183: Old_South_Arabian +184: Old_Turkic +185: Oriya +186: Osage +187: Osmanya +188: Other_Alphabetic +189: Other_Default_Ignorable_Code_Point +190: Other_Grapheme_Extend +191: Other_ID_Continue +192: Other_ID_Start +193: Other_Lowercase +194: Other_Math +195: Other_Uppercase +196: P +197: Pahawh_Hmong +198: Palmyrene +199: Pattern_Syntax +200: Pattern_White_Space +201: Pau_Cin_Hau +202: Pc +203: Pd +204: Pe +205: Pf +206: Phags_Pa +207: Phoenician +208: Pi +209: Po +210: Prepended_Concatenation_Mark +211: Ps +212: Psalter_Pahlavi +213: Quotation_Mark +214: Radical +215: Regional_Indicator +216: Rejang +217: Runic +218: S +219: Samaritan +220: Saurashtra +221: Sc +222: Sentence_Terminal +223: Sharada +224: Shavian +225: Siddham +226: SignWriting +227: Sinhala +228: Sk +229: Sm +230: So +231: Soft_Dotted +232: Sogdian +233: Sora_Sompeng +234: Soyombo +235: Sundanese +236: Syloti_Nagri +237: Syriac +238: Tagalog +239: Tagbanwa +240: Tai_Le +241: Tai_Tham +242: Tai_Viet +243: Takri +244: Tamil +245: Tangut +246: Telugu +247: Terminal_Punctuation +248: Thaana +249: Thai +250: Tibetan +251: Tifinagh +252: Tirhuta +253: Ugaritic +254: Unified_Ideograph +255: Unknown +256: Uppercase +257: Vai +258: Variation_Selector +259: Wancho +260: Warang_Citi +261: White_Space +262: XID_Continue +263: XID_Start +264: Yezidi +265: Yi +266: Z +267: Zanabazar_Square +268: Zl +269: Zp +270: Zs 16: Adlm 42: Aghb 15: AHex 21: Arab -105: Armi +107: Armi 22: Armn 24: Avst 25: Bali @@ -274,515 +278,532 @@ Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) 45: Cakm 38: Cans 39: Cari -120: Cased_Letter +123: Cased_Letter 52: Cher + 53: Chrs 40: CI -201: Close_Punctuation -137: Combining_Mark -199: Connector_Punctuation +204: Close_Punctuation +140: Combining_Mark +202: Connector_Punctuation 43: Control - 56: Copt - 59: Cprt -218: Currency_Symbol + 57: Copt + 60: Cprt +221: Currency_Symbol 47: CWCF 48: CWCM 49: CWL 50: CWT 51: CWU - 60: Cyrl -200: Dash_Punctuation -163: Decimal_Number - 63: Dep - 65: Deva - 62: DI - 66: Dia - 67: Dogr - 64: Dsrt - 68: Dupl - 69: Egyp - 70: Elba - 71: Elym -147: Enclosing_Mark - 77: Ethi - 79: Ext -202: Final_Punctuation + 61: Cyrl +203: Dash_Punctuation +166: Decimal_Number + 64: Dep + 66: Deva + 63: DI + 67: Dia + 68: Diak + 69: Dogr + 65: Dsrt + 70: Dupl + 77: EBase + 75: EComp + 71: Egyp + 72: Elba + 73: Elym + 76: EMod +150: Enclosing_Mark + 78: EPres + 79: Ethi + 81: Ext + 80: ExtPict +205: Final_Punctuation 44: Format - 80: Geor - 81: Glag - 89: Gong -144: Gonm - 82: Goth - 83: Gran - 84: Gr_Base - 87: Grek - 85: Gr_Ext - 86: Gr_Link - 88: Gujr - 90: Guru - 92: Hang - 91: Hani - 94: Hano - 95: Hatr - 96: Hebr - 97: Hex - 98: Hira + 82: Geor + 83: Glag + 91: Gong +147: Gonm + 84: Goth + 85: Gran + 86: Gr_Base + 89: Grek + 87: Gr_Ext + 88: Gr_Link + 90: Gujr + 92: Guru + 94: Hang + 93: Hani + 96: Hano + 97: Hatr + 98: Hebr + 99: Hex +100: Hira 19: Hluw -194: Hmng -171: Hmnp -174: Hung -102: IDC -104: Ideo -103: IDS -100: IDSB -101: IDST -205: Initial_Punctuation -175: Ital -109: Java -110: Join_C -114: Kali -113: Kana -115: Khar -116: Khmr -117: Khoj -112: Knda -111: Kthi -238: Lana -121: Laoo -122: Latn -123: Lepc -119: Letter -167: Letter_Number -124: Limb -125: Lina -126: Linb -264: Line_Separator -131: LOE -128: Lowercase_Letter -135: Lyci -136: Lydi -138: Mahj -139: Maka -141: Mand -142: Mani -143: Marc -137: Mark -226: Math_Symbol -148: Medf -150: Mend -151: Merc -152: Mero -140: Mlym -129: Modifier_Letter -225: Modifier_Symbol -156: Mong -157: Mroo -149: Mtei -158: Mult -159: Mymr -162: Nand -176: Narb -161: Nbat -169: NChar -166: Nkoo -154: Nonspacing_Mark -170: Nshu -160: Number -185: OAlpha -186: ODI -172: Ogam -187: OGr_Ext -188: OIDC -189: OIDS -173: Olck -190: OLower -191: OMath -208: Open_Punctuation -181: Orkh -182: Orya -183: Osge -184: Osma +197: Hmng +174: Hmnp +177: Hung +104: IDC +106: Ideo +105: IDS +102: IDSB +103: IDST +208: Initial_Punctuation +178: Ital +111: Java +112: Join_C +116: Kali +115: Kana +117: Khar +119: Khmr +120: Khoj +118: Kits +114: Knda +113: Kthi +241: Lana +124: Laoo +125: Latn +126: Lepc +122: Letter +170: Letter_Number +127: Limb +128: Lina +129: Linb +268: Line_Separator +134: LOE +131: Lowercase_Letter +138: Lyci +139: Lydi +141: Mahj +142: Maka +144: Mand +145: Mani +146: Marc +140: Mark +229: Math_Symbol +151: Medf +153: Mend +154: Merc +155: Mero +143: Mlym +132: Modifier_Letter +228: Modifier_Symbol +159: Mong +160: Mroo +152: Mtei +161: Mult +162: Mymr +165: Nand +179: Narb +164: Nbat +172: NChar +169: Nkoo +157: Nonspacing_Mark +173: Nshu +163: Number +188: OAlpha +189: ODI +175: Ogam +190: OGr_Ext +191: OIDC +192: OIDS +176: Olck +193: OLower +194: OMath +211: Open_Punctuation +184: Orkh +185: Orya +186: Osge +187: Osma 37: Other -130: Other_Letter -168: Other_Number -206: Other_Punctuation -227: Other_Symbol -192: OUpper -195: Palm -265: Paragraph_Separator -196: Pat_Syn -197: Pat_WS -198: Pauc -207: PCM -177: Perm -203: Phag -107: Phli -209: Phlp -204: Phnx -153: Plrd - 54: Private_Use -108: Prti -193: Punctuation - 56: Qaac -106: Qaai -210: QMark -212: RI -213: Rjng - 93: Rohg -214: Runr -216: Samr -180: Sarb -217: Saur -228: SD -262: Separator -223: Sgnw -221: Shaw -220: Shrd -222: Sidd -118: Sind -224: Sinh -229: Sogd -179: Sogo -230: Sora -231: Soyo -266: Space_Separator -146: Spacing_Mark -219: STerm -232: Sund - 57: Surrogate -233: Sylo -215: Symbol -234: Syrc -236: Tagb -240: Takr -237: Tale -164: Talu -241: Taml -242: Tang -239: Tavt -243: Telu -244: Term -248: Tfng -235: Tglg -245: Thaa -247: Tibt -249: Tirh -133: Titlecase_Letter -250: Ugar -251: UIdeo - 53: Unassigned -134: Uppercase_Letter -254: Vaii -255: VS -257: Wara -256: Wcho -258: WSpace -259: XIDC -260: XIDS -178: Xpeo - 58: Xsux -261: Yiii -263: Zanb -106: Zinh - 55: Zyyy -252: Zzzz -267: In_Basic_Latin -268: In_Latin_1_Supplement -269: In_Latin_Extended_A -270: In_Latin_Extended_B -271: In_IPA_Extensions -272: In_Spacing_Modifier_Letters -273: In_Combining_Diacritical_Marks -274: In_Greek_and_Coptic -275: In_Cyrillic -276: In_Cyrillic_Supplement -277: In_Armenian -278: In_Hebrew -279: In_Arabic -280: In_Syriac -281: In_Arabic_Supplement -282: In_Thaana -283: In_NKo -284: In_Samaritan -285: In_Mandaic -286: In_Syriac_Supplement -287: In_Arabic_Extended_A -288: In_Devanagari -289: In_Bengali -290: In_Gurmukhi -291: In_Gujarati -292: In_Oriya -293: In_Tamil -294: In_Telugu -295: In_Kannada -296: In_Malayalam -297: In_Sinhala -298: In_Thai -299: In_Lao -300: In_Tibetan -301: In_Myanmar -302: In_Georgian -303: In_Hangul_Jamo -304: In_Ethiopic -305: In_Ethiopic_Supplement -306: In_Cherokee -307: In_Unified_Canadian_Aboriginal_Syllabics -308: In_Ogham -309: In_Runic -310: In_Tagalog -311: In_Hanunoo -312: In_Buhid -313: In_Tagbanwa -314: In_Khmer -315: In_Mongolian -316: In_Unified_Canadian_Aboriginal_Syllabics_Extended -317: In_Limbu -318: In_Tai_Le -319: In_New_Tai_Lue -320: In_Khmer_Symbols -321: In_Buginese -322: In_Tai_Tham -323: In_Combining_Diacritical_Marks_Extended -324: In_Balinese -325: In_Sundanese -326: In_Batak -327: In_Lepcha -328: In_Ol_Chiki -329: In_Cyrillic_Extended_C -330: In_Georgian_Extended -331: In_Sundanese_Supplement -332: In_Vedic_Extensions -333: In_Phonetic_Extensions -334: In_Phonetic_Extensions_Supplement -335: In_Combining_Diacritical_Marks_Supplement -336: In_Latin_Extended_Additional -337: In_Greek_Extended -338: In_General_Punctuation -339: In_Superscripts_and_Subscripts -340: In_Currency_Symbols -341: In_Combining_Diacritical_Marks_for_Symbols -342: In_Letterlike_Symbols -343: In_Number_Forms -344: In_Arrows -345: In_Mathematical_Operators -346: In_Miscellaneous_Technical -347: In_Control_Pictures -348: In_Optical_Character_Recognition -349: In_Enclosed_Alphanumerics -350: In_Box_Drawing -351: In_Block_Elements -352: In_Geometric_Shapes -353: In_Miscellaneous_Symbols -354: In_Dingbats -355: In_Miscellaneous_Mathematical_Symbols_A -356: In_Supplemental_Arrows_A -357: In_Braille_Patterns -358: In_Supplemental_Arrows_B -359: In_Miscellaneous_Mathematical_Symbols_B -360: In_Supplemental_Mathematical_Operators -361: In_Miscellaneous_Symbols_and_Arrows -362: In_Glagolitic -363: In_Latin_Extended_C -364: In_Coptic -365: In_Georgian_Supplement -366: In_Tifinagh -367: In_Ethiopic_Extended -368: In_Cyrillic_Extended_A -369: In_Supplemental_Punctuation -370: In_CJK_Radicals_Supplement -371: In_Kangxi_Radicals -372: In_Ideographic_Description_Characters -373: In_CJK_Symbols_and_Punctuation -374: In_Hiragana -375: In_Katakana -376: In_Bopomofo -377: In_Hangul_Compatibility_Jamo -378: In_Kanbun -379: In_Bopomofo_Extended -380: In_CJK_Strokes -381: In_Katakana_Phonetic_Extensions -382: In_Enclosed_CJK_Letters_and_Months -383: In_CJK_Compatibility -384: In_CJK_Unified_Ideographs_Extension_A -385: In_Yijing_Hexagram_Symbols -386: In_CJK_Unified_Ideographs -387: In_Yi_Syllables -388: In_Yi_Radicals -389: In_Lisu -390: In_Vai -391: In_Cyrillic_Extended_B -392: In_Bamum -393: In_Modifier_Tone_Letters -394: In_Latin_Extended_D -395: In_Syloti_Nagri -396: In_Common_Indic_Number_Forms -397: In_Phags_pa -398: In_Saurashtra -399: In_Devanagari_Extended -400: In_Kayah_Li -401: In_Rejang -402: In_Hangul_Jamo_Extended_A -403: In_Javanese -404: In_Myanmar_Extended_B -405: In_Cham -406: In_Myanmar_Extended_A -407: In_Tai_Viet -408: In_Meetei_Mayek_Extensions -409: In_Ethiopic_Extended_A -410: In_Latin_Extended_E -411: In_Cherokee_Supplement -412: In_Meetei_Mayek -413: In_Hangul_Syllables -414: In_Hangul_Jamo_Extended_B -415: In_High_Surrogates -416: In_High_Private_Use_Surrogates -417: In_Low_Surrogates -418: In_Private_Use_Area -419: In_CJK_Compatibility_Ideographs -420: In_Alphabetic_Presentation_Forms -421: In_Arabic_Presentation_Forms_A -422: In_Variation_Selectors -423: In_Vertical_Forms -424: In_Combining_Half_Marks -425: In_CJK_Compatibility_Forms -426: In_Small_Form_Variants -427: In_Arabic_Presentation_Forms_B -428: In_Halfwidth_and_Fullwidth_Forms -429: In_Specials -430: In_Linear_B_Syllabary -431: In_Linear_B_Ideograms -432: In_Aegean_Numbers -433: In_Ancient_Greek_Numbers -434: In_Ancient_Symbols -435: In_Phaistos_Disc -436: In_Lycian -437: In_Carian -438: In_Coptic_Epact_Numbers -439: In_Old_Italic -440: In_Gothic -441: In_Old_Permic -442: In_Ugaritic -443: In_Old_Persian -444: In_Deseret -445: In_Shavian -446: In_Osmanya -447: In_Osage -448: In_Elbasan -449: In_Caucasian_Albanian -450: In_Linear_A -451: In_Cypriot_Syllabary -452: In_Imperial_Aramaic -453: In_Palmyrene -454: In_Nabataean -455: In_Hatran -456: In_Phoenician -457: In_Lydian -458: In_Meroitic_Hieroglyphs -459: In_Meroitic_Cursive -460: In_Kharoshthi -461: In_Old_South_Arabian -462: In_Old_North_Arabian -463: In_Manichaean -464: In_Avestan -465: In_Inscriptional_Parthian -466: In_Inscriptional_Pahlavi -467: In_Psalter_Pahlavi -468: In_Old_Turkic -469: In_Old_Hungarian -470: In_Hanifi_Rohingya -471: In_Rumi_Numeral_Symbols -472: In_Old_Sogdian -473: In_Sogdian -474: In_Elymaic -475: In_Brahmi -476: In_Kaithi -477: In_Sora_Sompeng -478: In_Chakma -479: In_Mahajani -480: In_Sharada -481: In_Sinhala_Archaic_Numbers -482: In_Khojki -483: In_Multani -484: In_Khudawadi -485: In_Grantha -486: In_Newa -487: In_Tirhuta -488: In_Siddham -489: In_Modi -490: In_Mongolian_Supplement -491: In_Takri -492: In_Ahom -493: In_Dogra -494: In_Warang_Citi -495: In_Nandinagari -496: In_Zanabazar_Square -497: In_Soyombo -498: In_Pau_Cin_Hau -499: In_Bhaiksuki -500: In_Marchen -501: In_Masaram_Gondi -502: In_Gunjala_Gondi -503: In_Makasar -504: In_Tamil_Supplement -505: In_Cuneiform -506: In_Cuneiform_Numbers_and_Punctuation -507: In_Early_Dynastic_Cuneiform -508: In_Egyptian_Hieroglyphs -509: In_Egyptian_Hieroglyph_Format_Controls -510: In_Anatolian_Hieroglyphs -511: In_Bamum_Supplement -512: In_Mro -513: In_Bassa_Vah -514: In_Pahawh_Hmong -515: In_Medefaidrin -516: In_Miao -517: In_Ideographic_Symbols_and_Punctuation -518: In_Tangut -519: In_Tangut_Components -520: In_Kana_Supplement -521: In_Kana_Extended_A -522: In_Small_Kana_Extension -523: In_Nushu -524: In_Duployan -525: In_Shorthand_Format_Controls -526: In_Byzantine_Musical_Symbols -527: In_Musical_Symbols -528: In_Ancient_Greek_Musical_Notation -529: In_Mayan_Numerals -530: In_Tai_Xuan_Jing_Symbols -531: In_Counting_Rod_Numerals -532: In_Mathematical_Alphanumeric_Symbols -533: In_Sutton_SignWriting -534: In_Glagolitic_Supplement -535: In_Nyiakeng_Puachue_Hmong -536: In_Wancho -537: In_Mende_Kikakui -538: In_Adlam -539: In_Indic_Siyaq_Numbers -540: In_Ottoman_Siyaq_Numbers -541: In_Arabic_Mathematical_Alphabetic_Symbols -542: In_Mahjong_Tiles -543: In_Domino_Tiles -544: In_Playing_Cards -545: In_Enclosed_Alphanumeric_Supplement -546: In_Enclosed_Ideographic_Supplement -547: In_Miscellaneous_Symbols_and_Pictographs -548: In_Emoticons -549: In_Ornamental_Dingbats -550: In_Transport_and_Map_Symbols -551: In_Alchemical_Symbols -552: In_Geometric_Shapes_Extended -553: In_Supplemental_Arrows_C -554: In_Supplemental_Symbols_and_Pictographs -555: In_Chess_Symbols -556: In_Symbols_and_Pictographs_Extended_A -557: In_CJK_Unified_Ideographs_Extension_B -558: In_CJK_Unified_Ideographs_Extension_C -559: In_CJK_Unified_Ideographs_Extension_D -560: In_CJK_Unified_Ideographs_Extension_E -561: In_CJK_Unified_Ideographs_Extension_F -562: In_CJK_Compatibility_Ideographs_Supplement -563: In_Tags -564: In_Variation_Selectors_Supplement -565: In_Supplementary_Private_Use_Area_A -566: In_Supplementary_Private_Use_Area_B -567: In_No_Block +133: Other_Letter +171: Other_Number +209: Other_Punctuation +230: Other_Symbol +195: OUpper +198: Palm +269: Paragraph_Separator +199: Pat_Syn +200: Pat_WS +201: Pauc +210: PCM +180: Perm +206: Phag +109: Phli +212: Phlp +207: Phnx +156: Plrd + 55: Private_Use +110: Prti +196: Punctuation + 57: Qaac +108: Qaai +213: QMark +215: RI +216: Rjng + 95: Rohg +217: Runr +219: Samr +183: Sarb +220: Saur +231: SD +266: Separator +226: Sgnw +224: Shaw +223: Shrd +225: Sidd +121: Sind +227: Sinh +232: Sogd +182: Sogo +233: Sora +234: Soyo +270: Space_Separator +149: Spacing_Mark +222: STerm +235: Sund + 58: Surrogate +236: Sylo +218: Symbol +237: Syrc +239: Tagb +243: Takr +240: Tale +167: Talu +244: Taml +245: Tang +242: Tavt +246: Telu +247: Term +251: Tfng +238: Tglg +248: Thaa +250: Tibt +252: Tirh +136: Titlecase_Letter +253: Ugar +254: UIdeo + 54: Unassigned +137: Uppercase_Letter +257: Vaii +258: VS +260: Wara +259: Wcho +261: WSpace +262: XIDC +263: XIDS +181: Xpeo + 59: Xsux +264: Yezi +265: Yiii +267: Zanb +108: Zinh + 56: Zyyy +255: Zzzz +271: In_Basic_Latin +272: In_Latin_1_Supplement +273: In_Latin_Extended_A +274: In_Latin_Extended_B +275: In_IPA_Extensions +276: In_Spacing_Modifier_Letters +277: In_Combining_Diacritical_Marks +278: In_Greek_and_Coptic +279: In_Cyrillic +280: In_Cyrillic_Supplement +281: In_Armenian +282: In_Hebrew +283: In_Arabic +284: In_Syriac +285: In_Arabic_Supplement +286: In_Thaana +287: In_NKo +288: In_Samaritan +289: In_Mandaic +290: In_Syriac_Supplement +291: In_Arabic_Extended_A +292: In_Devanagari +293: In_Bengali +294: In_Gurmukhi +295: In_Gujarati +296: In_Oriya +297: In_Tamil +298: In_Telugu +299: In_Kannada +300: In_Malayalam +301: In_Sinhala +302: In_Thai +303: In_Lao +304: In_Tibetan +305: In_Myanmar +306: In_Georgian +307: In_Hangul_Jamo +308: In_Ethiopic +309: In_Ethiopic_Supplement +310: In_Cherokee +311: In_Unified_Canadian_Aboriginal_Syllabics +312: In_Ogham +313: In_Runic +314: In_Tagalog +315: In_Hanunoo +316: In_Buhid +317: In_Tagbanwa +318: In_Khmer +319: In_Mongolian +320: In_Unified_Canadian_Aboriginal_Syllabics_Extended +321: In_Limbu +322: In_Tai_Le +323: In_New_Tai_Lue +324: In_Khmer_Symbols +325: In_Buginese +326: In_Tai_Tham +327: In_Combining_Diacritical_Marks_Extended +328: In_Balinese +329: In_Sundanese +330: In_Batak +331: In_Lepcha +332: In_Ol_Chiki +333: In_Cyrillic_Extended_C +334: In_Georgian_Extended +335: In_Sundanese_Supplement +336: In_Vedic_Extensions +337: In_Phonetic_Extensions +338: In_Phonetic_Extensions_Supplement +339: In_Combining_Diacritical_Marks_Supplement +340: In_Latin_Extended_Additional +341: In_Greek_Extended +342: In_General_Punctuation +343: In_Superscripts_and_Subscripts +344: In_Currency_Symbols +345: In_Combining_Diacritical_Marks_for_Symbols +346: In_Letterlike_Symbols +347: In_Number_Forms +348: In_Arrows +349: In_Mathematical_Operators +350: In_Miscellaneous_Technical +351: In_Control_Pictures +352: In_Optical_Character_Recognition +353: In_Enclosed_Alphanumerics +354: In_Box_Drawing +355: In_Block_Elements +356: In_Geometric_Shapes +357: In_Miscellaneous_Symbols +358: In_Dingbats +359: In_Miscellaneous_Mathematical_Symbols_A +360: In_Supplemental_Arrows_A +361: In_Braille_Patterns +362: In_Supplemental_Arrows_B +363: In_Miscellaneous_Mathematical_Symbols_B +364: In_Supplemental_Mathematical_Operators +365: In_Miscellaneous_Symbols_and_Arrows +366: In_Glagolitic +367: In_Latin_Extended_C +368: In_Coptic +369: In_Georgian_Supplement +370: In_Tifinagh +371: In_Ethiopic_Extended +372: In_Cyrillic_Extended_A +373: In_Supplemental_Punctuation +374: In_CJK_Radicals_Supplement +375: In_Kangxi_Radicals +376: In_Ideographic_Description_Characters +377: In_CJK_Symbols_and_Punctuation +378: In_Hiragana +379: In_Katakana +380: In_Bopomofo +381: In_Hangul_Compatibility_Jamo +382: In_Kanbun +383: In_Bopomofo_Extended +384: In_CJK_Strokes +385: In_Katakana_Phonetic_Extensions +386: In_Enclosed_CJK_Letters_and_Months +387: In_CJK_Compatibility +388: In_CJK_Unified_Ideographs_Extension_A +389: In_Yijing_Hexagram_Symbols +390: In_CJK_Unified_Ideographs +391: In_Yi_Syllables +392: In_Yi_Radicals +393: In_Lisu +394: In_Vai +395: In_Cyrillic_Extended_B +396: In_Bamum +397: In_Modifier_Tone_Letters +398: In_Latin_Extended_D +399: In_Syloti_Nagri +400: In_Common_Indic_Number_Forms +401: In_Phags_pa +402: In_Saurashtra +403: In_Devanagari_Extended +404: In_Kayah_Li +405: In_Rejang +406: In_Hangul_Jamo_Extended_A +407: In_Javanese +408: In_Myanmar_Extended_B +409: In_Cham +410: In_Myanmar_Extended_A +411: In_Tai_Viet +412: In_Meetei_Mayek_Extensions +413: In_Ethiopic_Extended_A +414: In_Latin_Extended_E +415: In_Cherokee_Supplement +416: In_Meetei_Mayek +417: In_Hangul_Syllables +418: In_Hangul_Jamo_Extended_B +419: In_High_Surrogates +420: In_High_Private_Use_Surrogates +421: In_Low_Surrogates +422: In_Private_Use_Area +423: In_CJK_Compatibility_Ideographs +424: In_Alphabetic_Presentation_Forms +425: In_Arabic_Presentation_Forms_A +426: In_Variation_Selectors +427: In_Vertical_Forms +428: In_Combining_Half_Marks +429: In_CJK_Compatibility_Forms +430: In_Small_Form_Variants +431: In_Arabic_Presentation_Forms_B +432: In_Halfwidth_and_Fullwidth_Forms +433: In_Specials +434: In_Linear_B_Syllabary +435: In_Linear_B_Ideograms +436: In_Aegean_Numbers +437: In_Ancient_Greek_Numbers +438: In_Ancient_Symbols +439: In_Phaistos_Disc +440: In_Lycian +441: In_Carian +442: In_Coptic_Epact_Numbers +443: In_Old_Italic +444: In_Gothic +445: In_Old_Permic +446: In_Ugaritic +447: In_Old_Persian +448: In_Deseret +449: In_Shavian +450: In_Osmanya +451: In_Osage +452: In_Elbasan +453: In_Caucasian_Albanian +454: In_Linear_A +455: In_Cypriot_Syllabary +456: In_Imperial_Aramaic +457: In_Palmyrene +458: In_Nabataean +459: In_Hatran +460: In_Phoenician +461: In_Lydian +462: In_Meroitic_Hieroglyphs +463: In_Meroitic_Cursive +464: In_Kharoshthi +465: In_Old_South_Arabian +466: In_Old_North_Arabian +467: In_Manichaean +468: In_Avestan +469: In_Inscriptional_Parthian +470: In_Inscriptional_Pahlavi +471: In_Psalter_Pahlavi +472: In_Old_Turkic +473: In_Old_Hungarian +474: In_Hanifi_Rohingya +475: In_Rumi_Numeral_Symbols +476: In_Yezidi +477: In_Old_Sogdian +478: In_Sogdian +479: In_Chorasmian +480: In_Elymaic +481: In_Brahmi +482: In_Kaithi +483: In_Sora_Sompeng +484: In_Chakma +485: In_Mahajani +486: In_Sharada +487: In_Sinhala_Archaic_Numbers +488: In_Khojki +489: In_Multani +490: In_Khudawadi +491: In_Grantha +492: In_Newa +493: In_Tirhuta +494: In_Siddham +495: In_Modi +496: In_Mongolian_Supplement +497: In_Takri +498: In_Ahom +499: In_Dogra +500: In_Warang_Citi +501: In_Dives_Akuru +502: In_Nandinagari +503: In_Zanabazar_Square +504: In_Soyombo +505: In_Pau_Cin_Hau +506: In_Bhaiksuki +507: In_Marchen +508: In_Masaram_Gondi +509: In_Gunjala_Gondi +510: In_Makasar +511: In_Lisu_Supplement +512: In_Tamil_Supplement +513: In_Cuneiform +514: In_Cuneiform_Numbers_and_Punctuation +515: In_Early_Dynastic_Cuneiform +516: In_Egyptian_Hieroglyphs +517: In_Egyptian_Hieroglyph_Format_Controls +518: In_Anatolian_Hieroglyphs +519: In_Bamum_Supplement +520: In_Mro +521: In_Bassa_Vah +522: In_Pahawh_Hmong +523: In_Medefaidrin +524: In_Miao +525: In_Ideographic_Symbols_and_Punctuation +526: In_Tangut +527: In_Tangut_Components +528: In_Khitan_Small_Script +529: In_Tangut_Supplement +530: In_Kana_Supplement +531: In_Kana_Extended_A +532: In_Small_Kana_Extension +533: In_Nushu +534: In_Duployan +535: In_Shorthand_Format_Controls +536: In_Byzantine_Musical_Symbols +537: In_Musical_Symbols +538: In_Ancient_Greek_Musical_Notation +539: In_Mayan_Numerals +540: In_Tai_Xuan_Jing_Symbols +541: In_Counting_Rod_Numerals +542: In_Mathematical_Alphanumeric_Symbols +543: In_Sutton_SignWriting +544: In_Glagolitic_Supplement +545: In_Nyiakeng_Puachue_Hmong +546: In_Wancho +547: In_Mende_Kikakui +548: In_Adlam +549: In_Indic_Siyaq_Numbers +550: In_Ottoman_Siyaq_Numbers +551: In_Arabic_Mathematical_Alphabetic_Symbols +552: In_Mahjong_Tiles +553: In_Domino_Tiles +554: In_Playing_Cards +555: In_Enclosed_Alphanumeric_Supplement +556: In_Enclosed_Ideographic_Supplement +557: In_Miscellaneous_Symbols_and_Pictographs +558: In_Emoticons +559: In_Ornamental_Dingbats +560: In_Transport_and_Map_Symbols +561: In_Alchemical_Symbols +562: In_Geometric_Shapes_Extended +563: In_Supplemental_Arrows_C +564: In_Supplemental_Symbols_and_Pictographs +565: In_Chess_Symbols +566: In_Symbols_and_Pictographs_Extended_A +567: In_Symbols_for_Legacy_Computing +568: In_CJK_Unified_Ideographs_Extension_B +569: In_CJK_Unified_Ideographs_Extension_C +570: In_CJK_Unified_Ideographs_Extension_D +571: In_CJK_Unified_Ideographs_Extension_E +572: In_CJK_Unified_Ideographs_Extension_F +573: In_CJK_Compatibility_Ideographs_Supplement +574: In_CJK_Unified_Ideographs_Extension_G +575: In_Tags +576: In_Variation_Selectors_Supplement +577: In_Supplementary_Private_Use_Area_A +578: In_Supplementary_Private_Use_Area_B +579: In_No_Block diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict index e6e00db..a3e978b 100644 --- a/harnesses/ascii_compatible.dict +++ b/harnesses/ascii_compatible.dict @@ -109,3 +109,5 @@ "\\N{name}" "\\p{Katakana}" "\\p{Emoji}" +"ss" +"SS" diff --git a/harnesses/base.c b/harnesses/base.c new file mode 100644 index 0000000..a88e6f2 --- /dev/null +++ b/harnesses/base.c @@ -0,0 +1,499 @@ +/* + * base.c contributed by Mark Griffin + * Copyright (c) 2019-2020 K.Kosako + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "oniguruma.h" + +#define PARSE_DEPTH_LIMIT 8 +#define RETRY_LIMIT 5000 +#define CALL_MAX_NEST_LEVEL 8 +//#define EXEC_PRINT_INTERVAL 500000 +//#define DUMP_DATA_INTERVAL 100000 +//#define STAT_PATH "fuzzer.stat_log" + +typedef unsigned char uint8_t; + +#ifdef DUMP_INPUT +static void +dump_input(unsigned char* data, size_t len) +{ + static FILE* DumpFp; + static char end[] = { 'E', 'N', 'D' }; + + if (DumpFp == 0) + DumpFp = fopen("dump-input", "w"); + + fseek(DumpFp, 0, SEEK_SET); + fwrite(data, sizeof(unsigned char), len, DumpFp); + fwrite(end, sizeof(char), sizeof(end), DumpFp); + fflush(DumpFp); +} +#endif + +#ifdef DUMP_DATA_INTERVAL +static void +dump_file(char* path, unsigned char* data, size_t len) +{ + FILE* fp; + + fp = fopen(path, "w"); + fwrite(data, sizeof(unsigned char), len, fp); + fclose(fp); +} +#endif + +#ifdef STANDALONE +#include <ctype.h> + +static void +dump_data(FILE* fp, unsigned char* data, int len) +{ + int i; + + fprintf(fp, "{\n"); + for (i = 0; i < len; i++) { + unsigned char c = data[i]; + + if (isprint((int )c)) { + if (c == '\\') + fprintf(fp, " '\\\\'"); + else + fprintf(fp, " '%c'", c); + } + else { + fprintf(fp, "0x%02x", (int )c); + } + + if (i == len - 1) { + fprintf(fp, "\n"); + } + else { + if (i % 8 == 7) + fprintf(fp, ",\n"); + else + fprintf(fp, ", "); + } + } + fprintf(fp, "};\n"); +} + +#else + +static void +output_current_time(FILE* fp) +{ + char d[64]; + time_t t; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fprintf(fp, "%s", d); +} + +#endif + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + if (backward != 0) { + start = end; + range = str; + } + else { + start = str; + range = end; + } + + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { +#ifdef STANDALONE + int i; + + fprintf(stdout, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } +#endif + } + else if (r == ONIG_MISMATCH) { +#ifdef STANDALONE + fprintf(stdout, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); +#endif + } + else { /* error */ +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + + if (r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) + return -2; + + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, + char* apattern, char* apattern_end, char* astr, UChar* end, int backward) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_search(RETRY_LIMIT); +#ifdef PARSE_DEPTH_LIMIT + onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); +#endif + onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); + + r = onig_new(®, pattern, pattern_end, + options, enc, syntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef STANDALONE + fprintf(stdout, "ERROR: %s\n", s); +#endif + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + REGEX_SUCCESS_COUNT++; + + r = search(reg, pattern, pattern_end, backward); + if (r == -2) return -2; + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; + r = search(reg, str, end, backward); + if (r == -2) return -2; + } + + onig_free(reg); + onig_end(); + return 0; +} + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, + int backward, int pattern_size, size_t remaining_size, unsigned char *data) +{ + int r; + unsigned char *pattern_end; + unsigned char *str_null_end; + + // copy first PATTERN_SIZE bytes off to be the pattern + unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); + memcpy(pattern, data, pattern_size); + pattern_end = pattern + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; + +#if defined(UTF16_BE) || defined(UTF16_LE) + if (remaining_size % 2 == 1) remaining_size--; +#endif + + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + + r = exec(enc, options, syntax, + (char *)pattern, (char *)pattern_end, + (char *)str, str_null_end, backward); + + free(pattern); + free(str); + return r; +} + +#define OPTIONS_MASK (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP) + + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES 6 +#else +#define NUM_CONTROL_BYTES 5 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) + static OnigEncoding encodings[] = { + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_ASCII, + ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_EUC_TW, + ONIG_ENCODING_EUC_KR, + ONIG_ENCODING_EUC_CN, + ONIG_ENCODING_SJIS, + ONIG_ENCODING_KOI8_R, + ONIG_ENCODING_CP1251, + ONIG_ENCODING_BIG5, + ONIG_ENCODING_GB18030, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_ISO_8859_2, + ONIG_ENCODING_ISO_8859_3, + ONIG_ENCODING_ISO_8859_4, + ONIG_ENCODING_ISO_8859_5, + ONIG_ENCODING_ISO_8859_6, + ONIG_ENCODING_ISO_8859_7, + ONIG_ENCODING_ISO_8859_8, + ONIG_ENCODING_ISO_8859_9, + ONIG_ENCODING_ISO_8859_10, + ONIG_ENCODING_ISO_8859_11, + ONIG_ENCODING_ISO_8859_13, + ONIG_ENCODING_ISO_8859_14, + ONIG_ENCODING_ISO_8859_15, + ONIG_ENCODING_ISO_8859_16 + }; + unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST + static OnigSyntaxType* syntaxes[] = { + ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_EMACS, + ONIG_SYNTAX_GREP, + ONIG_SYNTAX_GNU_REGEX, + ONIG_SYNTAX_JAVA, + ONIG_SYNTAX_PERL_NG, + ONIG_SYNTAX_ONIGURUMA + }; + +#ifdef STANDALONE + static char* syntax_names[] = { + "Posix Extended", + "Emacs", + "Grep", + "GNU Regex", + "Java", + "Perl+NG", + "Oniguruma" + }; +#endif + + unsigned char syntax_choice; +#endif + + int r; + int backward; + int pattern_size; + size_t remaining_size; + unsigned char *data; + unsigned char pattern_size_choice; + OnigOptionType options; + OnigEncoding enc; + OnigSyntaxType* syntax; + +#ifndef STANDALONE + static FILE* STAT_FP; +#endif + + INPUT_COUNT++; + +#ifdef DUMP_DATA_INTERVAL + if (INPUT_COUNT % DUMP_DATA_INTERVAL == 0) { + char path[20]; + sprintf(path, "dump-%ld", INPUT_COUNT); + dump_file(path, (unsigned char* )Data, Size); + } +#endif + + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); + +#ifdef UTF16_BE + enc = ONIG_ENCODING_UTF16_BE; +#else +#ifdef UTF16_LE + enc = ONIG_ENCODING_UTF16_LE; +#else + encoding_choice = data[0]; + data++; + remaining_size--; + + int num_encodings = sizeof(encodings)/sizeof(encodings[0]); + enc = encodings[encoding_choice % num_encodings]; +#endif +#endif + +#ifdef SYNTAX_TEST + syntax_choice = data[0]; + data++; + remaining_size--; + + int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); + syntax = syntaxes[syntax_choice % num_syntaxes]; +#else + syntax = ONIG_SYNTAX_DEFAULT; +#endif + + if ((data[1] & 0xc0) == 0) + options = (data[0] | (data[1] << 8)) & OPTIONS_MASK; + else + options = data[0] & ONIG_OPTION_IGNORECASE; + + data++; + remaining_size--; + data++; + remaining_size--; + + pattern_size_choice = data[0]; + data++; + remaining_size--; + + backward = (data[0] == 0xbb); + data++; + remaining_size--; + + if (remaining_size == 0) + pattern_size = 0; + else { + pattern_size = (int )pattern_size_choice % remaining_size; +#if defined(UTF16_BE) || defined(UTF16_LE) + if (pattern_size % 2 == 1) pattern_size--; +#endif + } + +#ifdef STANDALONE + dump_data(stdout, data, pattern_size); +#ifdef SYNTAX_TEST + fprintf(stdout, + "enc: %s, syntax: %s, options: %u, pattern_size: %d, back:%d\n", + ONIGENC_NAME(enc), + syntax_names[syntax_choice % num_syntaxes], + options, + pattern_size, backward); +#else + fprintf(stdout, "enc: %s, options: %u, pattern_size: %d, back:%d\n", + ONIGENC_NAME(enc), options, pattern_size, backward); +#endif +#endif + +#ifdef DUMP_INPUT + dump_input((unsigned char* )Data, Size); +#endif + + r = alloc_exec(enc, options, syntax, backward, pattern_size, + remaining_size, data); + if (r == -2) exit(-2); + +#ifndef STANDALONE +#ifdef EXEC_PRINT_INTERVAL + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + float fexec, freg, fvalid; + + if (STAT_FP == 0) { +#ifdef STAT_PATH + STAT_FP = fopen(STAT_PATH, "a"); +#else + STAT_FP = stdout; +#endif + } + + output_current_time(STAT_FP); + + if (INPUT_COUNT != 0) { // overflow check + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + fprintf(STAT_FP, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", + EXEC_COUNT, fexec, freg, fvalid); + fflush(STAT_FP); + } + else { + fprintf(STAT_FP, ": ignore (input count overflow)\n"); + } + + EXEC_COUNT_INTERVAL = 0; + } + else if (EXEC_COUNT == 1) { + output_current_time(stdout); + fprintf(stdout, ": ------------ START ------------\n"); + } +#endif +#endif + + return r; +} + +#ifdef STANDALONE + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* STANDALONE */ diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe.c index aabe916..5441de9 100644 --- a/harnesses/deluxe-encode-harness.c +++ b/harnesses/deluxe.c @@ -1,5 +1,5 @@ /* - * deluxe-encode-harness.c + * deluxe.c * contributed by Mark Griffin */ #include <stdio.h> @@ -8,7 +8,9 @@ #include <stdlib.h> #include <string.h> -#define DEFAULT_LIMIT 120 +#define RETRY_LIMIT 10000 +#define DEPTH_LIMIT 10 + typedef unsigned char uint8_t; static int @@ -66,8 +68,8 @@ exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, unsigned char* end = (unsigned char* )astr_end; onig_initialize(&str_enc, 1); - onig_set_retry_limit_in_match(DEFAULT_LIMIT); - onig_set_parse_depth_limit(DEFAULT_LIMIT); + onig_set_retry_limit_in_search(RETRY_LIMIT); + onig_set_parse_depth_limit(DEPTH_LIMIT); ci.num_of_elements = 5; ci.pattern_enc = pattern_enc; @@ -186,7 +188,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) } -#ifdef WITH_READ_MAIN +#ifdef STANDALONE #include <unistd.h> @@ -201,4 +203,4 @@ extern int main(int argc, char* argv[]) return 0; } -#endif /* WITH_READ_MAIN */ +#endif /* STANDALONE */ diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c deleted file mode 100644 index 5db0512..0000000 --- a/harnesses/encode-harness.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * encode-harness.c - * contributed by Mark Griffin - */ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <time.h> - -#include "oniguruma.h" - - -//#define PARSE_DEPTH_LIMIT 120 -#define RETRY_LIMIT 3500 - -typedef unsigned char uint8_t; - -static int -search(regex_t* reg, unsigned char* str, unsigned char* end) -{ - int r; - unsigned char *start, *range; - OnigRegion *region; - - region = onig_region_new(); - - start = str; - range = end; - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); - if (r >= 0) { -#ifdef WITH_READ_MAIN - int i; - - fprintf(stdout, "match at %d (%s)\n", r, - ONIGENC_NAME(onig_get_encoding(reg))); - for (i = 0; i < region->num_regs; i++) { - fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } -#endif - } - else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN - fprintf(stdout, "search fail (%s)\n", - ONIGENC_NAME(onig_get_encoding(reg))); -#endif - } - else { /* error */ -#ifdef WITH_READ_MAIN - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - - onig_error_code_to_str((UChar* )s, r); - fprintf(stdout, "ERROR: %s\n", s); - fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); -#endif - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - - if (r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) - return -2; - - return -1; - } - - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - return 0; -} - -static long INPUT_COUNT; -static long EXEC_COUNT; -static long EXEC_COUNT_INTERVAL; -static long REGEX_SUCCESS_COUNT; -static long VALID_STRING_COUNT; - -static int -exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - char* apattern, char* apattern_end, char* astr, UChar* end) -{ - int r; - regex_t* reg; - OnigErrorInfo einfo; - UChar* pattern = (UChar* )apattern; - UChar* str = (UChar* )astr; - UChar* pattern_end = (UChar* )apattern_end; - - EXEC_COUNT++; - EXEC_COUNT_INTERVAL++; - - onig_initialize(&enc, 1); - onig_set_retry_limit_in_match(RETRY_LIMIT); - //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); - - r = onig_new(®, pattern, pattern_end, - options, enc, syntax, &einfo); - if (r != ONIG_NORMAL) { - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r, &einfo); -#ifdef WITH_READ_MAIN - fprintf(stdout, "ERROR: %s\n", s); -#endif - onig_end(); - - if (r == ONIGERR_PARSER_BUG || - r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) { - return -2; - } - else - return -1; - } - REGEX_SUCCESS_COUNT++; - - r = search(reg, pattern, pattern_end); - if (r == -2) return -2; - - if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { - VALID_STRING_COUNT++; - r = search(reg, str, end); - if (r == -2) return -2; - } - - onig_free(reg); - onig_end(); - return 0; -} - -#if 0 -static void -output_data(char* path, const uint8_t * data, size_t size) -{ - int fd; - ssize_t n; - - fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); - if (fd == -1) { - fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); - return ; - } - - n = write(fd, (const void* )data, size); - if (n != size) { - fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); - } - close(fd); -} -#endif - - -static int -alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - int pattern_size, size_t remaining_size, unsigned char *data) -{ - int r; - unsigned char *pattern_end; - unsigned char *str_null_end; - - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); - memcpy(pattern, data, pattern_size); - pattern_end = pattern + pattern_size; - data += pattern_size; - remaining_size -= pattern_size; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (remaining_size % 2 == 1) remaining_size--; -#endif - - unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); - memcpy(str, data, remaining_size); - str_null_end = str + remaining_size; - - r = exec(enc, options, syntax, - (char *)pattern, (char *)pattern_end, - (char *)str, str_null_end); - - free(pattern); - free(str); - return r; -} - - -#define EXEC_PRINT_INTERVAL 10000000 -#define MAX_PATTERN_SIZE 150 - -#ifdef SYNTAX_TEST -#define NUM_CONTROL_BYTES 3 -#else -#define NUM_CONTROL_BYTES 2 -#endif - -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ -#if !defined(UTF16_BE) && !defined(UTF16_LE) - static OnigEncoding encodings[] = { - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_SJIS, - //ONIG_ENCODING_EUC_JP, - ONIG_ENCODING_ISO_8859_1, - ONIG_ENCODING_BIG5, - ONIG_ENCODING_GB18030, - ONIG_ENCODING_EUC_TW - }; - unsigned char encoding_choice; -#endif - -#ifdef SYNTAX_TEST - static OnigSyntaxType* syntaxes[] = { - ONIG_SYNTAX_POSIX_EXTENDED, - ONIG_SYNTAX_EMACS, - ONIG_SYNTAX_GREP, - ONIG_SYNTAX_GNU_REGEX, - ONIG_SYNTAX_JAVA, - ONIG_SYNTAX_PERL_NG, - ONIG_SYNTAX_ONIGURUMA - }; - unsigned char syntax_choice; -#endif - - int r; - int pattern_size; - size_t remaining_size; - unsigned char *data; - unsigned char options_choice; - OnigOptionType options; - OnigEncoding enc; - OnigSyntaxType* syntax; - - INPUT_COUNT++; - if (Size < NUM_CONTROL_BYTES) return 0; - - remaining_size = Size; - data = (unsigned char* )(Data); - -#ifdef UTF16_BE - enc = ONIG_ENCODING_UTF16_BE; -#else -#ifdef UTF16_LE - enc = ONIG_ENCODING_UTF16_LE; -#else - encoding_choice = data[0]; - data++; - remaining_size--; - - int num_encodings = sizeof(encodings)/sizeof(encodings[0]); - enc = encodings[encoding_choice % num_encodings]; -#endif -#endif - -#ifdef SYNTAX_TEST - syntax_choice = data[0]; - data++; - remaining_size--; - - int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); - syntax = syntaxes[syntax_choice % num_syntaxes]; -#else - syntax = ONIG_SYNTAX_DEFAULT; -#endif - - options_choice = data[0]; - options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; - data++; - remaining_size--; - -#ifdef WITH_READ_MAIN -#ifdef SYNTAX_TEST - fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", - ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); -#else - fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); -#endif -#endif - -#ifdef WITH_READ_MAIN - int max_pattern_size; - - if (remaining_size == 0) - max_pattern_size = 0; - else { - max_pattern_size = remaining_size - 1; - if (max_pattern_size > MAX_PATTERN_SIZE) - max_pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (max_pattern_size % 2 == 1) max_pattern_size--; -#endif - } - - for (pattern_size = 0; pattern_size <= max_pattern_size; ) { - fprintf(stdout, "pattern_size: %d\n", pattern_size); - r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); - if (r == -2) { - //output_data("parser-bug", Data, Size); - exit(-2); - } - -#if defined(UTF16_BE) || defined(UTF16_LE) - pattern_size += 2; -#else - pattern_size++; -#endif - } - -#else /* WITH_READ_MAIN */ - - if (remaining_size == 0) - pattern_size = 0; - else { - pattern_size = INPUT_COUNT % remaining_size; - if (pattern_size > MAX_PATTERN_SIZE) - pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (pattern_size % 2 == 1) pattern_size--; -#endif - } - - r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); - if (r == -2) { - //output_data("parser-bug", Data, Size); - exit(-2); - } -#endif /* else WITH_READ_MAIN */ - - if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { - char d[64]; - time_t t; - float fexec, freg, fvalid; - - t = time(NULL); - strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); - - fexec = (float )EXEC_COUNT / INPUT_COUNT; - freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; - fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; - - fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", - d, EXEC_COUNT, fexec, freg, fvalid); - - EXEC_COUNT_INTERVAL = 0; - } - return r; -} - -#ifdef WITH_READ_MAIN - -extern int main(int argc, char* argv[]) -{ - size_t n; - uint8_t Data[10000]; - - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); - - return 0; -} -#endif /* WITH_READ_MAIN */ diff --git a/harnesses/fuzzer.options b/harnesses/fuzzer.options new file mode 100644 index 0000000..ab44744 --- /dev/null +++ b/harnesses/fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = ascii_compatible.dict diff --git a/harnesses/makefile b/harnesses/makefile index dfd84de..b324295 100644 --- a/harnesses/makefile +++ b/harnesses/makefile @@ -1,54 +1,53 @@ # makefile for harness SRC = ../src CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer -CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN +CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE ONIG_LIB = $(SRC)/.libs/libonig.a LIBS = $(ONIG_LIB) -TARGETS = encode-libfuzzer syntax-libfuzzer \ - utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \ - main-utf16-be main-utf16-le main-regset regset-libfuzzer +TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \ + fuzzer-regset \ + read-encode read-syntax read-utf16-be read-utf16-le read-regset -OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \ - deluxe-encode-libfuzzer main-deluxe-encode +OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe default: $(TARGETS) -encode-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-encode: base.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -syntax-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-syntax: base.c $(ONIG_LIB) clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@ -deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB) +fuzzer-deluxe: deluxe.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-be: base.c $(ONIG_LIB) clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@ -utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-le: base.c $(ONIG_LIB) clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@ -regset-libfuzzer: regset-harness.c $(ONIG_LIB) +fuzzer-regset: regset.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -main-encode: encode-harness.c $(ONIG_LIB) +read-encode: base.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-syntax: encode-harness.c $(ONIG_LIB) +read-syntax: base.c $(ONIG_LIB) clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@ -main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB) +read-deluxe: deluxe.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-be: encode-harness.c $(ONIG_LIB) +read-utf16-be: base.c $(ONIG_LIB) clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-le: encode-harness.c $(ONIG_LIB) +read-utf16-le: base.c $(ONIG_LIB) clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@ -main-regset: regset-harness.c $(ONIG_LIB) +read-regset: regset.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) diff --git a/harnesses/regset-harness.c b/harnesses/regset.c index b4b7e20..a8dd181 100644 --- a/harnesses/regset-harness.c +++ b/harnesses/regset.c @@ -1,5 +1,5 @@ /* - * regset-harness.c + * regset.c * Copyright (c) 2019 K.Kosako */ #include <stdio.h> @@ -14,9 +14,9 @@ #include "oniguruma.h" -#define RETRY_LIMIT 500 +#define RETRY_LIMIT 5000 -#ifdef WITH_READ_MAIN +#ifdef STANDALONE //#define CHECK_EACH_REGEX_SEARCH_TIME #endif @@ -25,6 +25,18 @@ typedef unsigned char uint8_t; static OnigEncoding ENC; +static void +output_current_time(FILE* fp) +{ + char d[64]; + time_t t; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fprintf(fp, "%s", d); +} + #ifdef CHECK_EACH_REGEX_SEARCH_TIME static double get_sec(struct timespec* ts, struct timespec* te) @@ -85,7 +97,7 @@ search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* r = onig_regset_search(set, str, end, start, range, lead, ONIG_OPTION_NONE, &match_pos); if (r >= 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE int i; int match_index; OnigRegion* region; @@ -105,12 +117,12 @@ search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* #endif } else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); #endif } else { /* error */ -#ifdef WITH_READ_MAIN +#ifdef STANDALONE char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r); @@ -148,13 +160,13 @@ exec(OnigEncoding enc, int reg_num, int init_reg_num, options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; onig_initialize(&enc, 1); - onig_set_retry_limit_in_match(RETRY_LIMIT); + onig_set_retry_limit_in_search(RETRY_LIMIT); for (i = 0; i < init_reg_num; i++) { r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, ONIG_SYNTAX_DEFAULT, &einfo); if (r != 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); @@ -189,7 +201,7 @@ exec(OnigEncoding enc, int reg_num, int init_reg_num, r = onig_new(®, pat[i], pat_end[i], options, ENC, ONIG_SYNTAX_DEFAULT, &einfo); if (r != 0) { -#ifdef WITH_READ_MAIN +#ifdef STANDALONE char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); @@ -311,7 +323,7 @@ LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) memcpy(str, data, remaining_size); str_null_end = str + remaining_size; -#ifdef WITH_READ_MAIN +#ifdef STANDALONE fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", reg_num, pattern_size, lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); @@ -344,26 +356,27 @@ LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) } if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { - char d[64]; - time_t t; float fexec, freg, fvalid; - t = time(NULL); - strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); - fexec = (float )EXEC_COUNT / INPUT_COUNT; freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; - fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", - d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + output_current_time(stdout); + fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", + EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); EXEC_COUNT_INTERVAL = 0; } + else if (EXEC_COUNT == 1) { + output_current_time(stdout); + fprintf(stdout, ": ------------ START ------------\n"); + } + return r; } -#ifdef WITH_READ_MAIN +#ifdef STANDALONE extern int main(int argc, char* argv[]) { @@ -376,4 +389,4 @@ extern int main(int argc, char* argv[]) return 0; } -#endif /* WITH_READ_MAIN */ +#endif /* STANDALONE */ diff --git a/sample/Makefile.am b/sample/Makefile.am index 22a4989..2bf4697 100644 --- a/sample/Makefile.am +++ b/sample/Makefile.am @@ -3,6 +3,7 @@ lib_onig = ../src/libonig.la LDADD = $(lib_onig) +AM_CFLAGS = -Wall AM_LDFLAGS = -L$(prefix)/lib AM_CPPFLAGS = -I$(top_srcdir)/src diff --git a/sample/encode.c b/sample/encode.c index c5d4771..9e9a0c7 100644 --- a/sample/encode.c +++ b/sample/encode.c @@ -161,6 +161,9 @@ extern int main(int argc, char* argv[]) r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE, "(Aa\\d)+", "BaA5Aa0234"); + r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_NONE, + "[[^\\w]]+[^\xee\xef]\xee\xef", "[[^\\w]]+[^\xee\xef]\xee\xef"); + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, "\000[\000\337\000]\000\000", "\000S\000S\000\000"); diff --git a/sample/listcap.c b/sample/listcap.c index a73f7d4..c0d3014 100644 --- a/sample/listcap.c +++ b/sample/listcap.c @@ -24,7 +24,7 @@ node_callback(int group, int beg, int end, int level, int at, void* arg) } extern int ex(unsigned char* str, unsigned char* pattern, - OnigSyntaxType* syntax) + OnigSyntaxType* syntax, OnigOptionType options) { int r; unsigned char *start, *range, *end; @@ -33,7 +33,7 @@ extern int ex(unsigned char* str, unsigned char* pattern, OnigRegion *region; r = onig_new(®, pattern, pattern + strlen((char* )pattern), - ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); + options, ONIG_ENCODING_ASCII, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); @@ -84,6 +84,7 @@ extern int main(int argc, char* argv[]) { int r; OnigSyntaxType syn; + OnigEncoding use_encs[1]; static UChar* str1 = (UChar* )"((())())"; static UChar* pattern1 @@ -95,7 +96,11 @@ extern int main(int argc, char* argv[]) static UChar* str3 = (UChar* )"0123"; static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)"; - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + static UChar* str4 = (UChar* )"(((a))(a)) ((((a))(a)))"; + static UChar* pattern4 + = (UChar* )"\\g<p>(?@<p>\\(\\g<s>\\)){0}(?@<s>(?:\\g<p>)*|a){0}"; + + use_encs[0] = ONIG_ENCODING_ASCII; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); /* enable capture hostory */ @@ -103,9 +108,10 @@ extern int main(int argc, char* argv[]) onig_set_syntax_op2(&syn, onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY); - r = ex(str1, pattern1, &syn); - r = ex(str2, pattern2, &syn); - r = ex(str3, pattern3, &syn); + r = ex(str1, pattern1, &syn, ONIG_OPTION_NONE); + r = ex(str2, pattern2, &syn, ONIG_OPTION_NONE); + r = ex(str3, pattern3, &syn, ONIG_OPTION_NONE); + r = ex(str4, pattern4, &syn, ONIG_OPTION_FIND_LONGEST); onig_end(); return r; diff --git a/sample/names.c b/sample/names.c index 9b1eb24..bf47f63 100644 --- a/sample/names.c +++ b/sample/names.c @@ -31,11 +31,12 @@ extern int main(int argc, char* argv[]) regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; + OnigEncoding use_encs[1]; static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)"; static UChar* str = (UChar* )"aaabbbbcc"; - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + use_encs[0] = ONIG_ENCODING_ASCII; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = onig_new(®, pattern, pattern + strlen((char* )pattern), diff --git a/sample/regset.c b/sample/regset.c index ca3a10c..6be511a 100644 --- a/sample/regset.c +++ b/sample/regset.c @@ -16,6 +16,7 @@ extern int main(int argc, char* argv[]) regex_t* reg; OnigErrorInfo einfo; char ebuf[ONIG_MAX_ERROR_MESSAGE_LEN]; + OnigEncoding use_encs[1]; static UChar* str = (UChar* )"aaaaaaaaaaaaaaaaaaaaaaca"; @@ -25,7 +26,7 @@ extern int main(int argc, char* argv[]) "a(.....)c" }; - OnigEncoding use_encs[] = { ONIG_ENCODING_UTF8 }; + use_encs[0] = ONIG_ENCODING_UTF8; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = onig_regset_new(&set, 0, NULL); diff --git a/sample/simple.c b/sample/simple.c index 5a14042..a08ea86 100644 --- a/sample/simple.c +++ b/sample/simple.c @@ -12,11 +12,12 @@ extern int main(int argc, char* argv[]) regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; + OnigEncoding use_encs[1]; static UChar* pattern = (UChar* )"a(.*)b|[e-f]+"; static UChar* str = (UChar* )"zzzzaffffffffb"; - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + use_encs[0] = ONIG_ENCODING_ASCII; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = onig_new(®, pattern, pattern + strlen((char* )pattern), diff --git a/sample/sql.c b/sample/sql.c index 1602ac9..21147aa 100644 --- a/sample/sql.c +++ b/sample/sql.c @@ -14,11 +14,12 @@ extern int main(int argc, char* argv[]) regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; + OnigEncoding use_encs[1]; static UChar* pattern = (UChar* )"\\_%\\\\__zz"; static UChar* str = (UChar* )"a_abcabcabc\\ppzz"; - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + use_encs[0] = ONIG_ENCODING_ASCII; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS); diff --git a/sample/syntax.c b/sample/syntax.c index e034608..3a5a7cf 100644 --- a/sample/syntax.c +++ b/sample/syntax.c @@ -58,8 +58,9 @@ extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) extern int main(int argc, char* argv[]) { int r; + OnigEncoding use_encs[1]; - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + use_encs[0] = ONIG_ENCODING_ASCII; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = exec(ONIG_SYNTAX_PERL, diff --git a/sample/user_property.c b/sample/user_property.c index d52adc0..ecb842c 100644 --- a/sample/user_property.c +++ b/sample/user_property.c @@ -13,6 +13,7 @@ main(int argc, char* argv[]) regex_t* reg; OnigErrorInfo einfo; OnigRegion *region; + OnigEncoding use_encs[1]; static OnigCodePoint handakuon_hiragana[] = { 5, /* number of ranges */ @@ -29,7 +30,7 @@ main(int argc, char* argv[]) /* "PA PI PU PE PO a" */ static UChar* str = (UChar* )"\343\201\261\343\201\264\343\201\267\343\201\272\343\201\275a"; - OnigEncoding use_encs[] = { ONIG_ENCODING_UTF8 }; + use_encs[0] = ONIG_ENCODING_UTF8; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); r = onig_unicode_define_user_property("HandakuonHiragana", handakuon_hiragana); diff --git a/src/Makefile.am b/src/Makefile.am index 1600311..36c2222 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -50,13 +50,7 @@ libonig_la_LDFLAGS = -version-info $(LTVERSION) EXTRA_DIST = koi8.c mktable.c \ unicode_fold_data.c unicode_property_data.c \ unicode_property_data_posix.c \ - unicode_egcb_data.c unicode_wb_data.c \ - make_unicode_egcb.sh make_unicode_egcb_data.py \ - make_unicode_wb.sh make_unicode_wb_data.py \ - make_unicode_fold.sh make_unicode_fold_data.py \ - make_unicode_property.sh make_unicode_property_data.py \ - make_property.sh \ - gperf_fold_key_conv.py gperf_unfold_key_conv.py + unicode_egcb_data.c unicode_wb_data.c dll: $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ diff --git a/src/Makefile.windows b/src/Makefile.windows index 1e87504..90ebf28 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -155,6 +155,10 @@ $(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/ $(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h $(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +all-test: test_syntax test_regset test_utf8 testc testp testu + +test_syntax: $(TEST_DIR)/test_syntax.c $(libname) + $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_syntax.c $(libname) test_regset: $(TEST_DIR)/test_regset.c $(libname) $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_regset.c $(libname) @@ -172,7 +176,7 @@ testu: $(TEST_DIR)/testu.c $(libname) $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern $(TEST_DIR)/testu.c $(libname) clean: - del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\test_regset.exe $(BUILD_DIR)\test_utf8.exe $(BUILD_DIR)\testp.exe $(BUILD_DIR)\testc.exe $(BUILD_DIR)\testu.exe + del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\test_regset.exe $(BUILD_DIR)\test_syntax.exe $(BUILD_DIR)\test_utf8.exe $(BUILD_DIR)\testp.exe $(BUILD_DIR)\testc.exe $(BUILD_DIR)\testu.exe samples: all diff --git a/src/euc_jp_prop.c b/src/euc_jp_prop.c index a816f48..cb884c4 100644 --- a/src/euc_jp_prop.c +++ b/src/euc_jp_prop.c @@ -31,8 +31,7 @@ #line 1 "euc_jp_prop.gperf" -#include <string.h> -#include "regenc.h" +#include "regint.h" #define TOTAL_KEYWORDS 16 #define MIN_WORD_LENGTH 4 @@ -89,50 +88,50 @@ onigenc_euc_jp_lookup_property_name (register const char *str, register size_t l static struct PropertyNameCtype wordlist[] = { {""}, {""}, {""}, {""}, -#line 23 "euc_jp_prop.gperf" +#line 22 "euc_jp_prop.gperf" {"Word", 12}, -#line 12 "euc_jp_prop.gperf" +#line 11 "euc_jp_prop.gperf" {"Alpha", 1}, {""}, {""}, -#line 26 "euc_jp_prop.gperf" +#line 25 "euc_jp_prop.gperf" {"Hiragana", 15}, {""}, -#line 24 "euc_jp_prop.gperf" +#line 23 "euc_jp_prop.gperf" {"Alnum", 13}, {""}, {""}, -#line 27 "euc_jp_prop.gperf" +#line 26 "euc_jp_prop.gperf" {"Katakana", 16}, {""}, -#line 25 "euc_jp_prop.gperf" +#line 24 "euc_jp_prop.gperf" {"ASCII", 14}, -#line 22 "euc_jp_prop.gperf" +#line 21 "euc_jp_prop.gperf" {"XDigit", 11}, {""}, {""}, {""}, -#line 14 "euc_jp_prop.gperf" +#line 13 "euc_jp_prop.gperf" {"Cntrl", 3}, {""}, {""}, -#line 13 "euc_jp_prop.gperf" +#line 12 "euc_jp_prop.gperf" {"Blank", 2}, {""}, -#line 19 "euc_jp_prop.gperf" +#line 18 "euc_jp_prop.gperf" {"Punct", 8}, {""}, {""}, {""}, {""}, -#line 18 "euc_jp_prop.gperf" +#line 17 "euc_jp_prop.gperf" {"Print", 7}, {""}, {""}, {""}, {""}, -#line 21 "euc_jp_prop.gperf" +#line 20 "euc_jp_prop.gperf" {"Upper", 10}, {""}, {""}, {""}, {""}, -#line 20 "euc_jp_prop.gperf" +#line 19 "euc_jp_prop.gperf" {"Space", 9}, {""}, {""}, {""}, {""}, -#line 17 "euc_jp_prop.gperf" +#line 16 "euc_jp_prop.gperf" {"Lower", 6}, {""}, {""}, {""}, {""}, -#line 16 "euc_jp_prop.gperf" +#line 15 "euc_jp_prop.gperf" {"Graph", 5}, {""}, {""}, {""}, {""}, -#line 15 "euc_jp_prop.gperf" +#line 14 "euc_jp_prop.gperf" {"Digit", 4} }; diff --git a/src/euc_jp_prop.gperf b/src/euc_jp_prop.gperf index 2cec8cf..842b61b 100644 --- a/src/euc_jp_prop.gperf +++ b/src/euc_jp_prop.gperf @@ -1,6 +1,5 @@ %{ -#include <string.h> -#include "regenc.h" +#include "regint.h" %} struct PropertyNameCtype { diff --git a/src/gb18030.c b/src/gb18030.c index 50898eb..1385a7f 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,7 +2,7 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org> + * Copyright (c) 2005-2020 KUBO Takehiro <kubo AT jiubao DOT org> * K.Kosako * All rights reserved. * @@ -31,10 +31,20 @@ #include "regenc.h" #if 1 + #define DEBUG_GB18030(arg) + #else -#include <stdio.h> + +#ifndef NEED_TO_INCLUDE_STDIO +#define NEED_TO_INCLUDE_STDIO +#endif + +/* for printf() */ +#include "regint.h" + #define DEBUG_GB18030(arg) printf arg + #endif enum { diff --git a/src/iso8859_1.c b/src/iso8859_1.c index e681c2a..d75509e 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -29,6 +29,9 @@ #include "regenc.h" +#define LARGE_S 0x53 +#define SMALL_S 0x73 + #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) @@ -115,33 +118,48 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { + static OnigUChar sa[] = { LARGE_S, SMALL_S }; + int i, j, n; + if (0x41 <= *p && *p <= 0x5a) { + if (*p == LARGE_S && end > p + 1 + && (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */ + ss_combination: + items[0].byte_len = 2; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )0xdf; + + n = 1; + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (sa[i] == *p && sa[j] == *(p+1)) + continue; + + items[n].byte_len = 2; + items[n].code_len = 2; + items[n].code[0] = (OnigCodePoint )sa[i]; + items[n].code[1] = (OnigCodePoint )sa[j]; + n++; + } + } + return 4; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } else if (0x61 <= *p && *p <= 0x7a) { + if (*p == SMALL_S && end > p + 1 + && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) { /* ss */ + goto ss_combination; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } else if (0xc0 <= *p && *p <= 0xcf) { items[0].byte_len = 1; diff --git a/src/make_property.sh b/src/make_property.sh index cef0a96..e5f1244 100755 --- a/src/make_property.sh +++ b/src/make_property.sh @@ -1,6 +1,7 @@ #!/bin/sh GPERF=gperf +SED=gsed TMP1=gperf1.tmp TMP2=gperf2.tmp @@ -10,10 +11,10 @@ GPERF_OPT='-pt -T -L ANSI-C' ADD_CAST='s/return +len +\+ +asso_values/return (unsigned int )len + asso_values/' ${GPERF} ${GPERF_OPT} -N onigenc_euc_jp_lookup_property_name --output-file ${TMP1} euc_jp_prop.gperf -cat ${TMP1} | sed -r "${ADD_CAST}" > euc_jp_prop.c +cat ${TMP1} | ${SED} -r "${ADD_CAST}" > euc_jp_prop.c ${GPERF} ${GPERF_OPT} -N onigenc_sjis_lookup_property_name --output-file ${TMP2} sjis_prop.gperf -cat ${TMP2} | sed -r "${ADD_CAST}" > sjis_prop.c +cat ${TMP2} | ${SED} -r "${ADD_CAST}" > sjis_prop.c rm -f ${TMP1} ${TMP2} diff --git a/src/make_unicode_egcb_data.py b/src/make_unicode_egcb_data.py index 9c71796..e2982df 100755 --- a/src/make_unicode_egcb_data.py +++ b/src/make_unicode_egcb_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # make_unicode_egcb_data.py -# Copyright (c) 2017-2019 K.Kosako +# Copyright (c) 2017-2020 K.Kosako import sys import re @@ -195,7 +195,7 @@ PROPS = sorted(PROPS) print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */' COPYRIGHT = ''' /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py index 55d5b88..0e6c635 100755 --- a/src/make_unicode_fold_data.py +++ b/src/make_unicode_fold_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # make_unicode_fold_data.py -# Copyright (c) 2016-2019 K.Kosako +# Copyright (c) 2016-2020 K.Kosako import sys import re @@ -28,6 +28,35 @@ UNFOLDS = {} TURKISH_UNFOLDS = {} LOCALE_UNFOLDS = {} +COPYRIGHT = ''' +/*- + * Copyright (c) 2017-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +'''.strip() + + class Entry: def __init__(self, fold): self.fold = fold @@ -55,6 +84,11 @@ def form3bytes(x): x2 = (x>>16) & 0xff return "\\x%02x\\x%02x\\x%02x" % (x2, x1, x0) +def enc_len(code, encode): + u = unichr(code) + s = u.encode(encode) + return len(s) + def check_version_info(s): m = VERSION_REG.match(s) if m is not None: @@ -231,6 +265,8 @@ def output_macros(f, name): def output_fold_source(f, out_comment): print >> f, "/* This file was generated by make_unicode_fold_data.py. */" + print >> f, COPYRIGHT + print >> f, "\n" print >> f, '#include "regenc.h"' print >> f, '' if VERSION_INFO[0] < 0: @@ -244,42 +280,11 @@ def output_fold_source(f, out_comment): output_fold_data(f, DataName, out_comment) -HEAD = ''' -%{ -/* This gperf source file was generated by make_unicode_fold_data.py */ +def output_gperf_unfold_key(f): + head = "%{\n/* This gperf source file was generated by make_unicode_fold_data.py */\n\n" + COPYRIGHT + """\ -/*- - * Copyright (c) 2017-2019 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <string.h> -#include "regenc.h" +#include "regint.h" %} -'''.strip() - -def output_gperf_unfold_key(f): - head = HEAD + """\ struct ByUnfoldKey { OnigCodePoint code; @@ -299,7 +304,10 @@ struct ByUnfoldKey { print >> f, '%%' def output_gperf_fold_key(f, key_len): - head = HEAD + """\ + head = "%{\n/* This gperf source file was generated by make_unicode_fold_data.py */\n\n" + COPYRIGHT + """\ + +#include "regint.h" +%} short int %% @@ -324,6 +332,138 @@ def output_gperf_source(): with open(GPERF_FOLD_KEY_FILES[i-1], 'w') as f: output_gperf_fold_key(f, i) +def unfolds_byte_length_check(encode): + l = UNFOLDS.items() + sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index)) + for unfold, e in sl: + key_len = enc_len(unfold, encode) + fold_len = sum(map(lambda c: enc_len(c, encode), e.fold)) + if key_len > fold_len: + sfolds = ' '.join(map(lambda c: "0x%06x" % c, e.fold)) + s = "%s byte length: %d > %d: 0x%06x => %s" % (encode, key_len, fold_len, unfold, sfolds) + print >> sys.stderr, s + +def double_fold_check(): + l = UNFOLDS.items() + sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index)) + for unfold, e in sl: + for f in e.fold: + #print >> sys.stderr, ("check 0x%06x" % f) + e2 = UNFOLDS.get(f) + if e2 is not None: + s = "double folds: 0x%06x => %s, 0x%06x => %s" % (unfold, e.fold, f, e2.fold) + print >> sys.stderr, s + +def unfold_is_multi_code_folds_head_check(): + l = UNFOLDS.items() + l2 = filter(lambda (k,e):e.fold_len == 2, l) + l3 = filter(lambda (k,e):e.fold_len == 3, l) + sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index)) + for unfold, _ in sl: + for k, e in l2: + if e.fold[0] == unfold: + s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold) + print >> sys.stderr, s + for k, e in l3: + if e.fold[0] == unfold: + s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold) + print >> sys.stderr, s + +def make_one_folds(l): + h = {} + for unfold, e in l: + if e.fold_len != 1: + continue + fold = e.fold[0] + unfolds = h.get(fold) + if unfolds is None: + unfolds = [unfold] + h[fold] = unfolds + else: + unfolds.append(unfold) + + return h + +def make_foldn_heads(l, fold_len, one_folds): + h = {} + for unfold, e in l: + if e.fold_len != fold_len: + continue + unfolds = one_folds.get(e.fold[0]) + h[e.fold[0]] = (e, unfolds) + + return h + +def fold2_expansion_num(e, one_folds): + n = len(e.unfolds) + n0 = 1 + u0 = one_folds.get(e.fold[0]) + if u0 is not None: + n0 += len(u0) + n1 = 1 + u1 = one_folds.get(e.fold[1]) + if u1 is not None: + n1 += len(u1) + n += (n0 * n1) + return n + +def fold3_expansion_num(e, one_folds): + n = len(e.unfolds) + n0 = 1 + u0 = one_folds.get(e.fold[0]) + if u0 is not None: + n0 += len(u0) + n1 = 1 + u1 = one_folds.get(e.fold[1]) + if u1 is not None: + n1 += len(u1) + n2 = 1 + u2 = one_folds.get(e.fold[2]) + if u2 is not None: + n2 += len(u2) + n += (n0 * n1 * n2) + return n + +def get_all_folds_expansion_num(x, one_folds, fold2_heads, fold3_heads): + e = UNFOLDS[x] + n = 0 + if e.fold_len == 1: + n1 = len(e.unfolds) + 1 # +1: fold + fx = e.fold[0] + r = fold2_heads.get(fx) + n2 = n3 = 0 + if r is not None: + e2, _ = r + n2 = fold2_expansion_num(e2, one_folds) + r = fold3_heads.get(fx) + if r is not None: + e3, _ = r + n3 = fold3_expansion_num(e3, one_folds) + n = max(n1, n2, n3) + elif e.fold_len == 2: + n = fold2_expansion_num(e, one_folds) + elif e.fold_len == 3: + n = fold3_expansion_num(e, one_folds) + else: + raise RuntimeError("Invalid fold_len %d" % (e.fold_len)) + + return n + +def get_all_folds_expansion_max_num(): + l = UNFOLDS.items() + one_folds = make_one_folds(l) + fold2_heads = make_foldn_heads(l, 2, one_folds) + fold3_heads = make_foldn_heads(l, 3, one_folds) + sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index)) + nmax = 0 + max_unfold = None + for unfold, e in sl: + n = get_all_folds_expansion_num(unfold, one_folds, fold2_heads, fold3_heads) + if nmax < n: + nmax = n + max_unfold = unfold + + return (nmax, max_unfold) ## main ## with open(SOURCE_FILE, 'r') as f: @@ -335,3 +475,12 @@ out_comment = True output_fold_source(sys.stdout, out_comment) output_gperf_source() + +#unfolds_byte_length_check('utf-8') +#unfolds_byte_length_check('utf-16') +double_fold_check() +unfold_is_multi_code_folds_head_check() + +#max_num, max_code = get_all_folds_expansion_max_num() +#max_num -= 1 # remove self +#print >> sys.stderr, "max expansion: 0x%06x: %d" % (max_code, max_num) diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh index 51c8951..5129376 100755 --- a/src/make_unicode_property.sh +++ b/src/make_unicode_property.sh @@ -1,6 +1,7 @@ #!/bin/sh GPERF=gperf +SED=gsed NAME=unicode_property_data TMP1=gperf1.tmp @@ -16,10 +17,10 @@ ADD_CAST='s/unsigned +int +hval *= *len/unsigned int hval = (unsigned int )len/' ./make_unicode_property_data.py -posix > ${NAME}_posix.gperf ${GPERF} ${GPERF_OPT} -N unicode_lookup_property_name --output-file ${TMP1} ${NAME}.gperf -cat ${TMP1} | sed -e 's/^#line.*$//g' | sed -r "${POOL_CAST}" | sed -r "${ADD_STATIC}" | sed -r "${ADD_CAST}" > ${NAME}.c +cat ${TMP1} | ${SED} -e 's/^#line.*$//g' | ${SED} -r "${POOL_CAST}" | ${SED} -r "${ADD_STATIC}" | ${SED} -r "${ADD_CAST}" > ${NAME}.c ${GPERF} ${GPERF_OPT} -N unicode_lookup_property_name --output-file ${TMP2} ${NAME}_posix.gperf -cat ${TMP2} | sed -e 's/^#line.*$//g' | sed -r "${POOL_CAST}" | sed -r "${ADD_STATIC}" > ${NAME}_posix.c +cat ${TMP2} | ${SED} -e 's/^#line.*$//g' | ${SED} -r "${POOL_CAST}" | ${SED} -r "${ADD_STATIC}" > ${NAME}_posix.c rm -f ${NAME}.gperf ${NAME}_posix.gperf ${TMP1} ${TMP2} diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 9776628..285c462 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -418,10 +418,39 @@ def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = No merge_props(PROPS, props) return dic, props, ver_m + ### main ### argv = sys.argv argc = len(argv) +COPYRIGHT = ''' +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +'''.strip() + POSIX_ONLY = False INCLUDE_GRAPHEME_CLUSTER_DATA = False @@ -485,10 +514,14 @@ if INCLUDE_GRAPHEME_CLUSTER_DATA: add_posix_props(DIC) PROPS = sorted(PROPS) + s = '''%{ /* Generated by make_unicode_property_data.py. */ ''' print s +print COPYRIGHT +print '' + for prop in POSIX_LIST: print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop) diff --git a/src/make_unicode_wb_data.py b/src/make_unicode_wb_data.py index ddedd5d..fb0bf50 100755 --- a/src/make_unicode_wb_data.py +++ b/src/make_unicode_wb_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # make_unicode_wb_data.py -# Copyright (c) 2019 K.Kosako +# Copyright (c) 2019-2020 K.Kosako import sys import re @@ -195,7 +195,7 @@ PROPS = sorted(PROPS) print '/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */' COPYRIGHT = ''' /*- - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/mktable.c b/src/mktable.c index 318bac0..5f36ee3 100644 --- a/src/mktable.c +++ b/src/mktable.c @@ -1102,7 +1102,7 @@ static int IsAscii(int enc ARG_UNUSED, int c) static int IsNewline(int enc ARG_UNUSED, int c) { - if (c == 0x0a) return 1; + if (c == NEWLINE_CODE) return 1; return 0; } diff --git a/src/onigposix.h b/src/onigposix.h index 5ff779f..37e09ea 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@ onigposix.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <stdlib.h> +#ifndef ONIG_NO_STANDARD_C_HEADERS +#include <stddef.h> +#endif #ifdef __cplusplus extern "C" { diff --git a/src/oniguruma.h b/src/oniguruma.h index 08ac6f7..15f6ef0 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,9 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 4 +#define ONIGURUMA_VERSION_TEENY 5 -#define ONIGURUMA_VERSION_INT 60904 +#define ONIGURUMA_VERSION_INT 60905 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -70,6 +70,10 @@ extern "C" { #define ONIG_EXTERN extern #endif +#ifndef ONIG_VARIADIC_FUNC_ATTR +#define ONIG_VARIADIC_FUNC_ATTR +#endif + /* PART: character encoding */ #ifndef ONIG_ESCAPE_UCHAR_COLLISION @@ -515,6 +519,8 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */ #define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ +#define ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (1U<<10) /* ..(?i)...|... */ +#define ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (1U<<11) /* (?<=a+|..) */ /* syntax (behavior) in char class [...] */ #define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ @@ -554,6 +560,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_MATCH_STACK_LIMIT_OVER -15 #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17 +#define ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER -18 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -777,7 +784,7 @@ int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings)); ONIG_EXTERN int onig_init P_((void)); ONIG_EXTERN -int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); +int ONIG_VARIADIC_FUNC_ATTR onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); ONIG_EXTERN int onig_is_error_code_needs_param PV_((int code)); ONIG_EXTERN @@ -902,12 +909,20 @@ unsigned long onig_get_retry_limit_in_match P_((void)); ONIG_EXTERN int onig_set_retry_limit_in_match P_((unsigned long n)); ONIG_EXTERN +unsigned long onig_get_retry_limit_in_search P_((void)); +ONIG_EXTERN +int onig_set_retry_limit_in_search P_((unsigned long n)); +ONIG_EXTERN unsigned int onig_get_parse_depth_limit P_((void)); ONIG_EXTERN int onig_set_capture_num_limit P_((int num)); ONIG_EXTERN int onig_set_parse_depth_limit P_((unsigned int depth)); ONIG_EXTERN +int onig_get_subexp_call_max_nest_level P_((void)); +ONIG_EXTERN +int onig_set_subexp_call_max_nest_level P_((int level)); +ONIG_EXTERN int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges)); ONIG_EXTERN int onig_end P_((void)); @@ -930,6 +945,8 @@ int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, un ONIG_EXTERN int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit)); ONIG_EXTERN +int onig_set_retry_limit_in_search_of_match_param P_((OnigMatchParam* param, unsigned long limit)); +ONIG_EXTERN int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); ONIG_EXTERN int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); diff --git a/src/regcomp.c b/src/regcomp.c index 69d4b95..4d5b78f 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,8 +31,21 @@ #define OPS_INIT_SIZE 8 +typedef struct { + OnigLen min; + OnigLen max; +} MinMaxLen; + +typedef struct { + OnigLen min; + OnigLen max; + int min_is_sure; +} MinMaxCharLen; + OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; +static OnigLen node_min_byte_len(Node* node, ScanEnv* env); + #if 0 typedef struct { int n; @@ -99,7 +112,7 @@ int_stack_pop(int_stack* s) #ifdef ONIG_DEBUG if (s->n <= 0) { - fprintf(stderr, "int_stack_pop: fail empty. %p\n", s); + fprintf(DBGFP, "int_stack_pop: fail empty. %p\n", s); return 0; } #endif @@ -228,13 +241,13 @@ ops_free(regex_t* reg) if (! is_in_string_pool(reg, op->exact_len_n.s)) xfree(op->exact_len_n.s); break; - case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N: case OP_STR_N_IC: + case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N: if (! is_in_string_pool(reg, op->exact_n.s)) xfree(op->exact_n.s); break; case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4: case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2: - case OP_STR_MB2N3: case OP_STR_1_IC: + case OP_STR_MB2N3: break; case OP_CCLASS_NOT: case OP_CCLASS: @@ -302,9 +315,6 @@ ops_calc_size_of_string_pool(regex_t* reg) total += op->exact_len_n.len * op->exact_len_n.n; break; case OP_STR_N: - case OP_STR_N_IC: - total += op->exact_n.n; - break; case OP_STR_MB2N: total += op->exact_n.n * 2; break; @@ -357,7 +367,6 @@ ops_make_string_pool(regex_t* reg) curr += len; break; case OP_STR_N: - case OP_STR_N_IC: len = op->exact_n.n; copy: xmemcpy(curr, op->exact_n.s, len); @@ -398,12 +407,12 @@ onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) } static int -int_multiply_cmp(int x, int y, int v) +len_multiply_cmp(OnigLen x, int y, OnigLen v) { if (x == 0 || y == 0) return -1; - if (x < INT_MAX / y) { - int xy = x * y; + if (x < INFINITE_LEN / y) { + OnigLen xy = x * (OnigLen )y; if (xy > v) return 1; else { if (xy == v) return 0; @@ -411,7 +420,7 @@ int_multiply_cmp(int x, int y, int v) } } else - return 1; + return v == INFINITE_LEN ? 0 : 1; } extern int @@ -419,7 +428,7 @@ onig_positive_int_multiply(int x, int y) { if (x == 0 || y == 0) return 0; - if (x < INT_MAX / y) + if (x < ONIG_INT_MAX / y) return x * y; else return -1; @@ -489,42 +498,29 @@ node_str_node_cat(Node* node, Node* add) { int r; - if (STR_(node)->flag != STR_(add)->flag) + if (NODE_STATUS(node) != NODE_STATUS(add)) return ONIGERR_TYPE_BUG; - r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end); - if (r != 0) return r; - - if (NODE_STRING_IS_CASE_FOLD_MATCH(node)) - STR_(node)->case_min_len += STR_(add)->case_min_len; - - return 0; -} - -static int -node_str_cat_case_fold(Node* node, const UChar* s, const UChar* end, int case_min_len) -{ - int r; - - if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) + if (STR_(node)->flag != STR_(add)->flag) return ONIGERR_TYPE_BUG; - r = onig_node_str_cat(node, s, end); + r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end); if (r != 0) return r; - STR_(node)->case_min_len += case_min_len; return 0; } static void -node_conv_to_str_node(Node* node, int flag) +node_conv_to_str_node(Node* node, Node* ref_node) { + xmemset(node, 0, sizeof(*node)); NODE_SET_TYPE(node, NODE_STRING); - STR_(node)->flag = flag; + NODE_STATUS(node) = NODE_STATUS(ref_node); + + STR_(node)->flag = STR_(ref_node)->flag; STR_(node)->s = STR_(node)->buf; STR_(node)->end = STR_(node)->buf; STR_(node)->capacity = 0; - STR_(node)->case_min_len = 0; } static OnigLen @@ -554,7 +550,7 @@ bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -602,6 +598,351 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node) } #endif /* USE_CALL */ +enum CharLenReturnType { + CHAR_LEN_NORMAL = 0, /* fixed or variable */ + CHAR_LEN_TOP_ALT_FIXED = 1 +}; + +static int +mmcl_fixed(MinMaxCharLen* c) +{ + return (c->min == c->max && c->min != INFINITE_LEN); +} + +static void +mmcl_set(MinMaxCharLen* l, OnigLen len) +{ + l->min = len; + l->max = len; + l->min_is_sure = TRUE; +} + +static void +mmcl_set_min_max(MinMaxCharLen* l, OnigLen min, OnigLen max, int min_is_sure) +{ + l->min = min; + l->max = max; + l->min_is_sure = min_is_sure; +} + +static void +mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add) +{ + to->min = distance_add(to->min, add->min); + to->max = distance_add(to->max, add->max); + + to->min_is_sure = add->min_is_sure != 0 && to->min_is_sure != 0; +} + +static void +mmcl_multiply(MinMaxCharLen* to, int m) +{ + to->min = distance_multiply(to->min, m); + to->max = distance_multiply(to->max, m); +} + +static void +mmcl_repeat_range_multiply(MinMaxCharLen* to, int mlow, int mhigh) +{ + to->min = distance_multiply(to->min, mlow); + + if (IS_INFINITE_REPEAT(mhigh)) + to->max = INFINITE_LEN; + else + to->max = distance_multiply(to->max, mhigh); +} + +static void +mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt) +{ + if (to->min > alt->min) { + to->min = alt->min; + if (alt->min_is_sure != 0) + to->min_is_sure = TRUE; + } + + if (to->max < alt->max) to->max = alt->max; +} + +static int +mml_is_equal(MinMaxLen* a, MinMaxLen* b) +{ + return a->min == b->min && a->max == b->max; +} + +static void +mml_set_min_max(MinMaxLen* l, OnigLen min, OnigLen max) +{ + l->min = min; + l->max = max; +} + +static void +mml_clear(MinMaxLen* l) +{ + l->min = l->max = 0; +} + +static void +mml_copy(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +mml_add(MinMaxLen* to, MinMaxLen* add) +{ + to->min = distance_add(to->min, add->min); + to->max = distance_add(to->max, add->max); +} + +static void +mml_alt_merge(MinMaxLen* to, MinMaxLen* alt) +{ + if (to->min > alt->min) to->min = alt->min; + if (to->max < alt->max) to->max = alt->max; +} + +/* fixed size pattern node only */ +static int +node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env, + int level) +{ + MinMaxCharLen tci; + int r = CHAR_LEN_NORMAL; + + level++; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + { + int first = TRUE; + do { + r = node_char_len1(NODE_CAR(node), reg, &tci, env, level); + if (r < 0) break; + if (first == TRUE) { + *ci = tci; + first = FALSE; + } + else + mmcl_add(ci, &tci); + } while (IS_NOT_NULL(node = NODE_CDR(node))); + } + break; + + case NODE_ALT: + { + int fixed; + + r = node_char_len1(NODE_CAR(node), reg, ci, env, level); + if (r < 0) break; + + fixed = TRUE; + while (IS_NOT_NULL(node = NODE_CDR(node))) { + r = node_char_len1(NODE_CAR(node), reg, &tci, env, level); + if (r < 0) break; + if (! mmcl_fixed(&tci)) + fixed = FALSE; + mmcl_alt_merge(ci, &tci); + } + if (r < 0) break; + + r = CHAR_LEN_NORMAL; + if (mmcl_fixed(ci)) break; + + if (fixed == TRUE && level == 1) { + r = CHAR_LEN_TOP_ALT_FIXED; + } + } + break; + + case NODE_STRING: + { + OnigLen clen; + StrNode* sn = STR_(node); + UChar *s = sn->s; + + if (NODE_IS_IGNORECASE(node) && ! NODE_STRING_IS_CRUDE(node)) { + /* Such a case is possible. + ex. /(?i)(?<=\1)(a)/ + Backref node refer to capture group, but it doesn't tune yet. + */ + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + break; + } + + clen = 0; + while (s < sn->end) { + s += enclen(reg->enc, s); + clen = distance_add(clen, 1); + } + mmcl_set(ci, clen); + } + break; + + case NODE_QUANT: + { + QuantNode* qn = QUANT_(node); + + if (qn->lower == qn->upper) { + if (qn->upper == 0) { + mmcl_set(ci, 0); + } + else { + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + if (r < 0) break; + mmcl_multiply(ci, qn->lower); + } + } + else { + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + if (r < 0) break; + mmcl_repeat_range_multiply(ci, qn->lower, qn->upper); + } + } + break; + +#ifdef USE_CALL + case NODE_CALL: + if (NODE_IS_RECURSION(node)) + mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE); + else + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + break; +#endif + + case NODE_CTYPE: + case NODE_CCLASS: + mmcl_set(ci, 1); + break; + + case NODE_BAG: + { + BagNode* en = BAG_(node); + + switch (en->type) { + case BAG_MEMORY: + if (NODE_IS_FIXED_CLEN(node)) { + mmcl_set_min_max(ci, en->min_char_len, en->max_char_len, + NODE_IS_FIXED_CLEN_MIN_SURE(node)); + } + else { + if (NODE_IS_MARK1(node)) { + mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE); + } + else { + NODE_STATUS_ADD(node, MARK1); + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + NODE_STATUS_REMOVE(node, MARK1); + if (r < 0) break; + + en->min_char_len = ci->min; + en->max_char_len = ci->max; + NODE_STATUS_ADD(node, FIXED_CLEN); + if (ci->min_is_sure != 0) + NODE_STATUS_ADD(node, FIXED_CLEN_MIN_SURE); + } + } + /* can't optimize look-behind if capture exists. */ + ci->min_is_sure = FALSE; + break; + case BAG_OPTION: + case BAG_STOP_BACKTRACK: + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + break; + case BAG_IF_ELSE: + { + MinMaxCharLen eci; + + r = node_char_len1(NODE_BODY(node), reg, ci, env, level); + if (r < 0) break; + + if (IS_NOT_NULL(en->te.Then)) { + r = node_char_len1(en->te.Then, reg, &tci, env, level); + if (r < 0) break; + mmcl_add(ci, &tci); + } + + if (IS_NOT_NULL(en->te.Else)) { + r = node_char_len1(en->te.Else, reg, &eci, env, level); + if (r < 0) break; + } + else { + mmcl_set(&eci, 0); + } + + mmcl_alt_merge(ci, &eci); + } + break; + default: /* never come here */ + r = ONIGERR_PARSER_BUG; + break; + } + } + break; + + case NODE_ANCHOR: + mmcl_set(ci, 0); + /* can't optimize look-behind if anchor exists. */ + ci->min_is_sure = FALSE; + break; + + case NODE_GIMMICK: + zero: + mmcl_set(ci, 0); + break; + + case NODE_BACKREF: + if (NODE_IS_CHECKER(node)) + goto zero; + + if (NODE_IS_RECURSION(node)) { +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE); + break; + } +#endif + + mmcl_set_min_max(ci, 0, 0, FALSE); + break; + } + + { + int i; + int* backs; + MemEnv* mem_env = SCANENV_MEMENV(env); + BackRefNode* br = BACKREF_(node); + + backs = BACKREFS_P(br); + r = node_char_len1(mem_env[backs[0]].mem_node, reg, ci, env, level); + if (r < 0) break; + if (! mmcl_fixed(ci)) ci->min_is_sure = FALSE; + + for (i = 1; i < br->back_num; i++) { + r = node_char_len1(mem_env[backs[i]].mem_node, reg, &tci, env, level); + if (r < 0) break; + if (! mmcl_fixed(&tci)) tci.min_is_sure = FALSE; + mmcl_alt_merge(ci, &tci); + } + } + break; + + default: /* never come here */ + r = ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +node_char_len(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env) +{ + return node_char_len1(node, reg, ci, env, 0); +} + static int add_op(regex_t* reg, int opcode) @@ -626,7 +967,7 @@ static int compile_tree(Node* node, regex_t* reg, ScanEnv* env); #define IS_NEED_STR_LEN_OP(op) \ ((op) == OP_STR_N || (op) == OP_STR_MB2N ||\ - (op) == OP_STR_MB3N || (op) == OP_STR_MBN || (op) == OP_STR_N_IC) + (op) == OP_STR_MB3N || (op) == OP_STR_MBN) static int select_str_opcode(int mb_len, int str_len) @@ -711,16 +1052,16 @@ compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; if (emptiness != BODY_IS_NOT_EMPTY) { - if (emptiness == BODY_IS_EMPTY_POSSIBILITY) + if (emptiness == BODY_MAY_BE_EMPTY) r = add_op(reg, OP_EMPTY_CHECK_END); - else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) { + else if (emptiness == BODY_MAY_BE_EMPTY_MEM) { if (NODE_IS_EMPTY_STATUS_CHECK(qn) != 0) r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); else r = add_op(reg, OP_EMPTY_CHECK_END); } #ifdef USE_CALL - else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) + else if (emptiness == BODY_MAY_BE_EMPTY_REC) r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH); #endif @@ -794,12 +1135,7 @@ add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg) else if (IS_NEED_STR_LEN_OP(op)) { p = onigenc_strdup(reg->enc, s, end); CHECK_NULL_RETURN_MEMERR(p); - - if (op == OP_STR_N_IC) - COP(reg)->exact_n.n = byte_len; - else - COP(reg)->exact_n.n = str_len; - + COP(reg)->exact_n.n = str_len; COP(reg)->exact_n.s = p; } else { @@ -822,8 +1158,6 @@ compile_length_string_node(Node* node, regex_t* reg) if (sn->end <= sn->s) return 0; - if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) return 1; - p = prev = sn->s; prev_len = enclen(enc, p); p += prev_len; @@ -861,40 +1195,6 @@ compile_length_string_crude_node(StrNode* sn, regex_t* reg) } static int -compile_ambig_string_node(Node* node, regex_t* reg) -{ - int r; - int len; - int byte_len; - UChar* p; - StrNode* sn; - OnigEncoding enc = reg->enc; - - sn = STR_(node); - len = enclen(enc, sn->s); - byte_len = (int )(sn->end - sn->s); - if (len == byte_len) { - r = add_op(reg, OP_STR_1_IC); - if (r != 0) return r; - - xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s)); - xmemcpy(COP(reg)->exact.s, sn->s, (size_t )byte_len); - } - else { - r = add_op(reg, OP_STR_N_IC); - if (r != 0) return r; - - p = onigenc_strdup(enc, sn->s, sn->end); - CHECK_NULL_RETURN_MEMERR(p); - - COP(reg)->exact_n.s = p; - COP(reg)->exact_n.n = byte_len; - } - - return 0; -} - -static int compile_string_node(Node* node, regex_t* reg) { int r, len, prev_len, slen; @@ -907,9 +1207,6 @@ compile_string_node(Node* node, regex_t* reg) return 0; end = sn->end; - if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) { - return compile_ambig_string_node(node, reg); - } p = prev = sn->s; prev_len = enclen(enc, p); @@ -1103,7 +1400,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) /* anychar repeat */ if (is_anychar_infinite_greedy(qn)) { if (qn->lower <= 1 || - int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) { + len_multiply_cmp((OnigLen )tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) { if (IS_NOT_NULL(qn->next_head_exact)) return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else @@ -1117,7 +1414,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) if (infinite && (qn->lower <= 1 || - int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { + len_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = OPSIZE_JUMP; } @@ -1148,7 +1445,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) } else if (!infinite && qn->greedy && (qn->upper == 1 || - int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper, + len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { len = tlen * qn->lower; len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower); @@ -1176,12 +1473,12 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (is_anychar_infinite_greedy(qn) && (qn->lower <= 1 || - int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { + len_multiply_cmp((OnigLen )tlen, qn->lower, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_op(reg, - IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ? + r = add_op(reg, NODE_IS_MULTILINE(NODE_QUANT_BODY(qn)) ? OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT); if (r != 0) return r; @@ -1189,8 +1486,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) return 0; } else { - r = add_op(reg, - IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ? + r = add_op(reg, NODE_IS_MULTILINE(NODE_QUANT_BODY(qn)) ? OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR); return r; } @@ -1202,7 +1498,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (infinite && (qn->lower <= 1 || - int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { + len_multiply_cmp((OnigLen )tlen, qn->lower, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { int addr; if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { @@ -1297,7 +1594,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } else if (! infinite && qn->greedy && (qn->upper == 1 || - int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper, + len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { int n = qn->upper - qn->lower; @@ -1337,11 +1634,8 @@ static int compile_length_option_node(BagNode* node, regex_t* reg) { int tlen; - OnigOptionType prev = reg->options; - reg->options = node->o.options; tlen = compile_length_tree(NODE_BAG_BODY(node), reg); - reg->options = prev; return tlen; } @@ -1350,11 +1644,8 @@ static int compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env) { int r; - OnigOptionType prev = reg->options; - reg->options = node->o.options; r = compile_tree(NODE_BAG_BODY(node), reg, env); - reg->options = prev; return r; } @@ -1423,10 +1714,10 @@ compile_length_bag_node(BagNode* node, regex_t* reg) v = onig_positive_int_multiply(qn->lower, tlen); if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - len = v + OPSIZE_PUSH + tlen + OPSIZE_POP_OUT + OPSIZE_JUMP; + len = v + OPSIZE_PUSH + tlen + OPSIZE_POP + OPSIZE_JUMP; } else { - len = OPSIZE_ATOMIC_START + tlen + OPSIZE_ATOMIC_END; + len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK; } break; @@ -1438,8 +1729,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg) len = compile_length_tree(cond, reg); if (len < 0) return len; - len += OPSIZE_PUSH; - len += OPSIZE_ATOMIC_START + OPSIZE_ATOMIC_END; + len += OPSIZE_PUSH + OPSIZE_MARK + OPSIZE_CUT_TO_MARK; if (IS_NOT_NULL(Then)) { tlen = compile_length_tree(Then, reg); @@ -1447,7 +1737,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg) len += tlen; } - len += OPSIZE_JUMP + OPSIZE_ATOMIC_END; + len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK; if (IS_NOT_NULL(Else)) { tlen = compile_length_tree(Else, reg); @@ -1466,8 +1756,6 @@ compile_length_bag_node(BagNode* node, regex_t* reg) return len; } -static int get_char_len_node(Node* node, regex_t* reg, int* len); - static int compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) { @@ -1481,7 +1769,7 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP; - NODE_STATUS_ADD(node, ADDR_FIXED); + NODE_STATUS_ADD(node, FIXED_ADDR); COP(reg)->call.addr = (int )node->m.called_addr; if (node->m.regnum == 0) { @@ -1574,35 +1862,49 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) r = add_op(reg, OP_PUSH); if (r != 0) return r; - COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP_OUT + OPSIZE_JUMP; + COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP + OPSIZE_JUMP; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; - r = add_op(reg, OP_POP_OUT); + r = add_op(reg, OP_POP); if (r != 0) return r; r = add_op(reg, OP_JUMP); if (r != 0) return r; - COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP_OUT); + COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP); } else { - r = add_op(reg, OP_ATOMIC_START); + MemNumType mid; + + ID_ENTRY(env, mid); + r = add_op(reg, OP_MARK); if (r != 0) return r; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = 0; + r = compile_tree(NODE_BAG_BODY(node), reg, env); if (r != 0) return r; - r = add_op(reg, OP_ATOMIC_END); + r = add_op(reg, OP_CUT_TO_MARK); + if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid; + COP(reg)->cut_to_mark.restore_pos = 0; } break; case BAG_IF_ELSE: { int cond_len, then_len, else_len, jump_len; + MemNumType mid; Node* cond = NODE_BAG_BODY(node); Node* Then = node->te.Then; Node* Else = node->te.Else; - r = add_op(reg, OP_ATOMIC_START); + ID_ENTRY(env, mid); + + r = add_op(reg, OP_MARK); if (r != 0) return r; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = 0; cond_len = compile_length_tree(cond, reg); if (cond_len < 0) return cond_len; @@ -1613,7 +1915,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) else then_len = 0; - jump_len = cond_len + then_len + OPSIZE_ATOMIC_END + OPSIZE_JUMP; + jump_len = cond_len + then_len + OPSIZE_CUT_TO_MARK + OPSIZE_JUMP; r = add_op(reg, OP_PUSH); if (r != 0) return r; @@ -1621,8 +1923,10 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) r = compile_tree(cond, reg, env); if (r != 0) return r; - r = add_op(reg, OP_ATOMIC_END); + r = add_op(reg, OP_CUT_TO_MARK); if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid; + COP(reg)->cut_to_mark.restore_pos = 0; if (IS_NOT_NULL(Then)) { r = compile_tree(Then, reg, env); @@ -1638,10 +1942,12 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) r = add_op(reg, OP_JUMP); if (r != 0) return r; - COP(reg)->jump.addr = OPSIZE_ATOMIC_END + else_len + SIZE_INC; + COP(reg)->jump.addr = OPSIZE_CUT_TO_MARK + else_len + SIZE_INC; - r = add_op(reg, OP_ATOMIC_END); + r = add_op(reg, OP_CUT_TO_MARK); if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid; + COP(reg)->cut_to_mark.restore_pos = 0; if (IS_NOT_NULL(Else)) { r = compile_tree(Else, reg, env); @@ -1666,16 +1972,38 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) switch (node->type) { case ANCR_PREC_READ: - len = OPSIZE_PREC_READ_START + tlen + OPSIZE_PREC_READ_END; + len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK; break; case ANCR_PREC_READ_NOT: - len = OPSIZE_PREC_READ_NOT_START + tlen + OPSIZE_PREC_READ_NOT_END; + len = OPSIZE_PUSH + OPSIZE_MARK + tlen + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL; break; case ANCR_LOOK_BEHIND: - len = OPSIZE_LOOK_BEHIND + tlen; + if (node->char_min_len == node->char_max_len) + len = OPSIZE_MARK + OPSIZE_STEP_BACK_START + tlen + OPSIZE_CUT_TO_MARK; + else { + len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_UPDATE_VAR + OPSIZE_FAIL + OPSIZE_JUMP + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_CUT_TO_MARK + OPSIZE_UPDATE_VAR; + + if (IS_NOT_NULL(node->lead_node)) { + int llen = compile_length_tree(node->lead_node, reg); + if (llen < 0) return llen; + + len += OPSIZE_MOVE + llen; + } + } break; case ANCR_LOOK_BEHIND_NOT: - len = OPSIZE_LOOK_BEHIND_NOT_START + tlen + OPSIZE_LOOK_BEHIND_NOT_END; + if (node->char_min_len == node->char_max_len) + len = OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + tlen + OPSIZE_POP_TO_MARK + OPSIZE_FAIL + OPSIZE_POP; + else { + len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_POP; + + if (IS_NOT_NULL(node->lead_node)) { + int llen = compile_length_tree(node->lead_node, reg); + if (llen < 0) return llen; + + len += OPSIZE_MOVE + llen; + } + } break; case ANCR_WORD_BOUNDARY: @@ -1701,10 +2029,254 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) } static int +compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ScanEnv* env) +{ + int r; + + if (node->char_min_len == node->char_max_len) { + MemNumType mid; + + ID_ENTRY(env, mid); + r = add_op(reg, OP_MARK); + if (r != 0) return r; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = FALSE; + + r = add_op(reg, OP_STEP_BACK_START); + if (r != 0) return r; + COP(reg)->step_back_start.initial = node->char_min_len; + COP(reg)->step_back_start.remaining = 0; + COP(reg)->step_back_start.addr = 1; + + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; + + r = add_op(reg, OP_CUT_TO_MARK); + if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid; + COP(reg)->cut_to_mark.restore_pos = FALSE; + } + else { + MemNumType mid1, mid2; + OnigLen diff; + + if (IS_NOT_NULL(node->lead_node)) { + MinMaxCharLen ci; + + r = node_char_len(node->lead_node, reg, &ci, env); + if (r < 0) return r; + r = add_op(reg, OP_MOVE); + if (r != 0) return r; + COP(reg)->move.n = -((RelPositionType )ci.min); + r = compile_tree(node->lead_node, reg, env); + if (r != 0) return r; + } + + ID_ENTRY(env, mid1); + r = add_op(reg, OP_SAVE_VAL); + if (r != 0) return r; + COP(reg)->save_val.type = SAVE_RIGHT_RANGE; + COP(reg)->save_val.id = mid1; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S; + + ID_ENTRY(env, mid2); + r = add_op(reg, OP_MARK); + if (r != 0) return r; + COP(reg)->mark.id = mid2; + COP(reg)->mark.save_pos = FALSE; + + r = add_op(reg, OP_PUSH); + if (r != 0) return r; + COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP; + + r = add_op(reg, OP_JUMP); + if (r != 0) return r; + COP(reg)->jump.addr = SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK; + COP(reg)->update_var.id = mid1; + COP(reg)->update_var.clear = FALSE; + r = add_op(reg, OP_FAIL); + if (r != 0) return r; + + r = add_op(reg, OP_STEP_BACK_START); + if (r != 0) return r; + + if (node->char_max_len != INFINITE_LEN) + diff = node->char_max_len - node->char_min_len; + else + diff = INFINITE_LEN; + + COP(reg)->step_back_start.initial = node->char_min_len; + COP(reg)->step_back_start.remaining = diff; + COP(reg)->step_back_start.addr = 2; + + r = add_op(reg, OP_STEP_BACK_NEXT); + if (r != 0) return r; + + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; + + r = add_op(reg, OP_CHECK_POSITION); + if (r != 0) return r; + COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE; + + r = add_op(reg, OP_CUT_TO_MARK); + if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid2; + COP(reg)->cut_to_mark.restore_pos = FALSE; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK; + COP(reg)->update_var.id = mid1; + COP(reg)->update_var.clear = TRUE; + } + + return r; +} + +static int +compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg, + ScanEnv* env) +{ + int r; + int len; + + len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); + + if (node->char_min_len == node->char_max_len) { + MemNumType mid; + + ID_ENTRY(env, mid); + r = add_op(reg, OP_MARK); + if (r != 0) return r; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = FALSE; + + r = add_op(reg, OP_PUSH); + if (r != 0) return r; + COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL; + + r = add_op(reg, OP_STEP_BACK_START); + if (r != 0) return r; + COP(reg)->step_back_start.initial = node->char_min_len; + COP(reg)->step_back_start.remaining = 0; + COP(reg)->step_back_start.addr = 1; + + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; + + r = add_op(reg, OP_POP_TO_MARK); + if (r != 0) return r; + COP(reg)->pop_to_mark.id = mid; + r = add_op(reg, OP_FAIL); + if (r != 0) return r; + r = add_op(reg, OP_POP); + } + else { + MemNumType mid1, mid2; + OnigLen diff; + + ID_ENTRY(env, mid1); + r = add_op(reg, OP_SAVE_VAL); + if (r != 0) return r; + COP(reg)->save_val.type = SAVE_RIGHT_RANGE; + COP(reg)->save_val.id = mid1; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S; + + ID_ENTRY(env, mid2); + r = add_op(reg, OP_MARK); + if (r != 0) return r; + COP(reg)->mark.id = mid2; + COP(reg)->mark.save_pos = FALSE; + + r = add_op(reg, OP_PUSH); + if (r != 0) return r; + COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + len + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL; + + if (IS_NOT_NULL(node->lead_node)) { + int clen; + MinMaxCharLen ci; + + clen = compile_length_tree(node->lead_node, reg); + COP(reg)->push.addr += OPSIZE_MOVE + clen; + + r = node_char_len(node->lead_node, reg, &ci, env); + if (r < 0) return r; + r = add_op(reg, OP_MOVE); + if (r != 0) return r; + COP(reg)->move.n = -((RelPositionType )ci.min); + + r = compile_tree(node->lead_node, reg, env); + if (r != 0) return r; + } + + r = add_op(reg, OP_STEP_BACK_START); + if (r != 0) return r; + + if (node->char_max_len != INFINITE_LEN) + diff = node->char_max_len - node->char_min_len; + else + diff = INFINITE_LEN; + + COP(reg)->step_back_start.initial = node->char_min_len; + COP(reg)->step_back_start.remaining = diff; + COP(reg)->step_back_start.addr = 2; + + r = add_op(reg, OP_STEP_BACK_NEXT); + if (r != 0) return r; + + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; + + r = add_op(reg, OP_CHECK_POSITION); + if (r != 0) return r; + COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE; + + r = add_op(reg, OP_POP_TO_MARK); + if (r != 0) return r; + COP(reg)->pop_to_mark.id = mid2; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK; + COP(reg)->update_var.id = mid1; + COP(reg)->update_var.clear = FALSE; + + r = add_op(reg, OP_POP); /* pop save val */ + if (r != 0) return r; + r = add_op(reg, OP_FAIL); + if (r != 0) return r; + + r = add_op(reg, OP_UPDATE_VAR); + if (r != 0) return r; + COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK; + COP(reg)->update_var.id = mid1; + COP(reg)->update_var.clear = FALSE; + + r = add_op(reg, OP_POP); /* pop mark */ + if (r != 0) return r; + r = add_op(reg, OP_POP); /* pop save val */ + } + + return r; +} + +static int compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) { int r, len; enum OpCode op; + MemNumType mid; switch (node->type) { case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break; @@ -1712,7 +2284,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break; case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break; case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break; - case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break; + case ANCR_BEGIN_POSITION: + r = add_op(reg, OP_CHECK_POSITION); + if (r != 0) return r; + COP(reg)->check_position.type = CHECK_POSITION_SEARCH_START; + break; case ANCR_WORD_BOUNDARY: op = OP_WORD_BOUNDARY; @@ -1744,7 +2320,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; #ifdef USE_UNICODE_WORD_BREAK - if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD)) + if (NODE_IS_TEXT_SEGMENT_WORD(node)) type = WORD_BOUNDARY; #endif @@ -1755,66 +2331,60 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) break; case ANCR_PREC_READ: - r = add_op(reg, OP_PREC_READ_START); - if (r != 0) return r; - r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); - if (r != 0) return r; - r = add_op(reg, OP_PREC_READ_END); - break; - - case ANCR_PREC_READ_NOT: - len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); - if (len < 0) return len; - - r = add_op(reg, OP_PREC_READ_NOT_START); - if (r != 0) return r; - COP(reg)->prec_read_not_start.addr = SIZE_INC + len + OPSIZE_PREC_READ_NOT_END; - r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); - if (r != 0) return r; - r = add_op(reg, OP_PREC_READ_NOT_END); - break; - - case ANCR_LOOK_BEHIND: { - int n; - r = add_op(reg, OP_LOOK_BEHIND); + ID_ENTRY(env, mid); + r = add_op(reg, OP_MARK); if (r != 0) return r; - if (node->char_len < 0) { - r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n); - if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = TRUE; - COP(reg)->look_behind.len = n; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; + + r = add_op(reg, OP_CUT_TO_MARK); + if (r != 0) return r; + COP(reg)->cut_to_mark.id = mid; + COP(reg)->cut_to_mark.restore_pos = TRUE; } break; - case ANCR_LOOK_BEHIND_NOT: + case ANCR_PREC_READ_NOT: { - int n; - len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); - r = add_op(reg, OP_LOOK_BEHIND_NOT_START); - if (r != 0) return r; - COP(reg)->look_behind_not_start.addr = SIZE_INC + len + OPSIZE_LOOK_BEHIND_NOT_END; + if (len < 0) return len; - if (node->char_len < 0) { - r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n); - if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; + ID_ENTRY(env, mid); + r = add_op(reg, OP_PUSH); + if (r != 0) return r; + COP(reg)->push.addr = SIZE_INC + OPSIZE_MARK + len + + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL; - COP(reg)->look_behind_not_start.len = n; + r = add_op(reg, OP_MARK); + if (r != 0) return r; + COP(reg)->mark.id = mid; + COP(reg)->mark.save_pos = FALSE; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_op(reg, OP_LOOK_BEHIND_NOT_END); + + r = add_op(reg, OP_POP_TO_MARK); + if (r != 0) return r; + COP(reg)->pop_to_mark.id = mid; + + r = add_op(reg, OP_POP); + if (r != 0) return r; + r = add_op(reg, OP_FAIL); } break; + case ANCR_LOOK_BEHIND: + r = compile_anchor_look_behind_node(node, reg, env); + break; + + case ANCR_LOOK_BEHIND_NOT: + r = compile_anchor_look_behind_not_node(node, reg, env); + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1826,7 +2396,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) static int compile_gimmick_node(GimmickNode* node, regex_t* reg) { - int r; + int r = 0; switch (node->type) { case GIMMICK_FAIL: @@ -1834,10 +2404,10 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg) break; case GIMMICK_SAVE: - r = add_op(reg, OP_PUSH_SAVE_VAL); + r = add_op(reg, OP_SAVE_VAL); if (r != 0) return r; - COP(reg)->push_save_val.type = node->detail_type; - COP(reg)->push_save_val.id = node->id; + COP(reg)->save_val.type = node->detail_type; + COP(reg)->save_val.id = node->id; break; case GIMMICK_UPDATE_VAR: @@ -1845,6 +2415,7 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg) if (r != 0) return r; COP(reg)->update_var.type = node->detail_type; COP(reg)->update_var.id = node->id; + COP(reg)->update_var.clear = FALSE; break; #ifdef USE_CALLOUT @@ -1888,7 +2459,7 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg) break; case GIMMICK_SAVE: - len = OPSIZE_PUSH_SAVE_VAL; + len = OPSIZE_SAVE_VAL; break; case GIMMICK_UPDATE_VAR: @@ -2055,8 +2626,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) switch (CTYPE_(node)->ctype) { case CTYPE_ANYCHAR: - r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ? - OP_ANYCHAR_ML : OP_ANYCHAR); + r = add_op(reg, NODE_IS_MULTILINE(node) ? OP_ANYCHAR_ML : OP_ANYCHAR); break; case ONIGENC_CTYPE_WORD: @@ -2098,7 +2668,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) else { #ifdef USE_BACKREF_WITH_LEVEL if (NODE_IS_NEST_LEVEL(node)) { - if ((reg->options & ONIG_OPTION_IGNORECASE) != 0) + if (NODE_IS_IGNORECASE(node)) r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC); else r = add_op(reg, OP_BACKREF_WITH_LEVEL); @@ -2111,7 +2681,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) #endif if (br->back_num == 1) { n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { + if (NODE_IS_IGNORECASE(node)) { r = add_op(reg, OP_BACKREF_N_IC); if (r != 0) return r; COP(reg)->backref_n.n1 = n; @@ -2132,7 +2702,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) int num; int* p; - r = add_op(reg, IS_IGNORECASE(reg->options) ? + r = add_op(reg, NODE_IS_IGNORECASE(node) ? OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI); if (r != 0) return r; @@ -2183,7 +2753,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) default: #ifdef ONIG_DEBUG - fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); + fprintf(DBGFP, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); #endif break; } @@ -2192,7 +2762,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) } static int -noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter) { int r = 0; Node* node = *plink; @@ -2201,7 +2771,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) case NODE_LIST: case NODE_ALT: do { - r = noname_disable_map(&(NODE_CAR(node)), map, counter); + r = make_named_capture_number_map(&(NODE_CAR(node)), map, counter); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; @@ -2209,7 +2779,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) { Node** ptarget = &(NODE_BODY(node)); Node* old = *ptarget; - r = noname_disable_map(ptarget, map, counter); + r = make_named_capture_number_map(ptarget, map, counter); if (r != 0) return r; if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) { r = onig_reduce_nested_quantifier(node); @@ -2225,35 +2795,35 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) (*counter)++; map[en->m.regnum].new_val = *counter; en->m.regnum = *counter; - r = noname_disable_map(&(NODE_BODY(node)), map, counter); + r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); } else { *plink = NODE_BODY(node); NODE_BODY(node) = NULL_NODE; onig_node_free(node); - r = noname_disable_map(plink, map, counter); + r = make_named_capture_number_map(plink, map, counter); } } else if (en->type == BAG_IF_ELSE) { - r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter); + r = make_named_capture_number_map(&(NODE_BAG_BODY(en)), map, counter); if (r != 0) return r; if (IS_NOT_NULL(en->te.Then)) { - r = noname_disable_map(&(en->te.Then), map, counter); + r = make_named_capture_number_map(&(en->te.Then), map, counter); if (r != 0) return r; } if (IS_NOT_NULL(en->te.Else)) { - r = noname_disable_map(&(en->te.Else), map, counter); + r = make_named_capture_number_map(&(en->te.Else), map, counter); if (r != 0) return r; } } else - r = noname_disable_map(&(NODE_BODY(node)), map, counter); + r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); } break; case NODE_ANCHOR: if (IS_NOT_NULL(NODE_BODY(node))) - r = noname_disable_map(&(NODE_BODY(node)), map, counter); + r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); break; default: @@ -2264,7 +2834,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } static int -renumber_node_backref(Node* node, GroupNumRemap* map) +renumber_backref_node(Node* node, GroupNumMap* map) { int i, pos, n, old_num; int *backs; @@ -2292,7 +2862,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) } static int -renumber_by_map(Node* node, GroupNumRemap* map) +renumber_backref_traverse(Node* node, GroupNumMap* map) { int r = 0; @@ -2300,28 +2870,28 @@ renumber_by_map(Node* node, GroupNumRemap* map) case NODE_LIST: case NODE_ALT: do { - r = renumber_by_map(NODE_CAR(node), map); + r = renumber_backref_traverse(NODE_CAR(node), map); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_QUANT: - r = renumber_by_map(NODE_BODY(node), map); + r = renumber_backref_traverse(NODE_BODY(node), map); break; case NODE_BAG: { BagNode* en = BAG_(node); - r = renumber_by_map(NODE_BODY(node), map); + r = renumber_backref_traverse(NODE_BODY(node), map); if (r != 0) return r; if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { - r = renumber_by_map(en->te.Then, map); + r = renumber_backref_traverse(en->te.Then, map); if (r != 0) return r; } if (IS_NOT_NULL(en->te.Else)) { - r = renumber_by_map(en->te.Else, map); + r = renumber_backref_traverse(en->te.Else, map); if (r != 0) return r; } } @@ -2329,12 +2899,12 @@ renumber_by_map(Node* node, GroupNumRemap* map) break; case NODE_BACKREF: - r = renumber_node_backref(node, map); + r = renumber_backref_node(node, map); break; case NODE_ANCHOR: if (IS_NOT_NULL(NODE_BODY(node))) - r = renumber_by_map(NODE_BODY(node), map); + r = renumber_backref_traverse(NODE_BODY(node), map); break; default: @@ -2403,18 +2973,18 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) { int r, i, pos, counter; MemStatusType loc; - GroupNumRemap* map; + GroupNumMap* map; - map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + map = (GroupNumMap* )xalloca(sizeof(GroupNumMap) * (env->num_mem + 1)); CHECK_NULL_RETURN_MEMERR(map); for (i = 1; i <= env->num_mem; i++) { map[i].new_val = 0; } counter = 0; - r = noname_disable_map(root, map, &counter); + r = make_named_capture_number_map(root, map, &counter); if (r != 0) return r; - r = renumber_by_map(*root, map); + r = renumber_backref_traverse(*root, map); if (r != 0) return r; for (i = 1, pos = 1; i <= env->num_mem; i++) { @@ -2448,8 +3018,18 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) AbsAddrType* paddr; for (i = 0; i < uslist->num; i++) { - if (! NODE_IS_ADDR_FIXED(uslist->us[i].target)) - return ONIGERR_PARSER_BUG; + if (! NODE_IS_FIXED_ADDR(uslist->us[i].target)) { + if (NODE_IS_CALLED(uslist->us[i].target)) + return ONIGERR_PARSER_BUG; + else { + /* CASE: called node doesn't have called address. + ex. /((|a\g<1>)(.){0}){0}\g<3>/ + group-1 doesn't called, but compiled into bytecodes, + because group-3 is referred from outside. + */ + continue; + } + } en = BAG_(uslist->us[i].target); addr = en->m.called_addr; @@ -2462,173 +3042,6 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) } #endif - -#define GET_CHAR_LEN_VARLEN -1 -#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 - -/* fixed size pattern node only */ -static int -get_char_len_node1(Node* node, regex_t* reg, int* len, int level) -{ - int tlen; - int r = 0; - - level++; - *len = 0; - switch (NODE_TYPE(node)) { - case NODE_LIST: - do { - r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level); - if (r == 0) - *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); - break; - - case NODE_ALT: - { - int tlen2; - int varlen = 0; - - r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) { - r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level); - if (r == 0) { - if (tlen != tlen2) - varlen = 1; - } - } - if (r == 0) { - if (varlen != 0) { - if (level == 1) - r = GET_CHAR_LEN_TOP_ALT_VARLEN; - else - r = GET_CHAR_LEN_VARLEN; - } - else - *len = tlen; - } - } - break; - - case NODE_STRING: - { - StrNode* sn = STR_(node); - UChar *s = sn->s; - - while (s < sn->end) { - s += enclen(reg->enc, s); - (*len)++; - } - } - break; - - case NODE_QUANT: - { - QuantNode* qn = QUANT_(node); - - if (qn->lower == qn->upper) { - if (qn->upper == 0) { - *len = 0; - } - else { - r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); - } - } - else - r = GET_CHAR_LEN_VARLEN; - } - break; - -#ifdef USE_CALL - case NODE_CALL: - if (! NODE_IS_RECURSION(node)) - r = get_char_len_node1(NODE_BODY(node), reg, len, level); - else - r = GET_CHAR_LEN_VARLEN; - break; -#endif - - case NODE_CTYPE: - case NODE_CCLASS: - *len = 1; - break; - - case NODE_BAG: - { - BagNode* en = BAG_(node); - - switch (en->type) { - case BAG_MEMORY: -#ifdef USE_CALL - if (NODE_IS_CLEN_FIXED(node)) - *len = en->char_len; - else { - r = get_char_len_node1(NODE_BODY(node), reg, len, level); - if (r == 0) { - en->char_len = *len; - NODE_STATUS_ADD(node, CLEN_FIXED); - } - } - break; -#endif - case BAG_OPTION: - case BAG_STOP_BACKTRACK: - r = get_char_len_node1(NODE_BODY(node), reg, len, level); - break; - case BAG_IF_ELSE: - { - int clen, elen; - - r = get_char_len_node1(NODE_BODY(node), reg, &clen, level); - if (r == 0) { - if (IS_NOT_NULL(en->te.Then)) { - r = get_char_len_node1(en->te.Then, reg, &tlen, level); - if (r != 0) break; - } - else tlen = 0; - if (IS_NOT_NULL(en->te.Else)) { - r = get_char_len_node1(en->te.Else, reg, &elen, level); - if (r != 0) break; - } - else elen = 0; - - if (clen + tlen != elen) { - r = GET_CHAR_LEN_VARLEN; - } - else { - *len = elen; - } - } - } - break; - } - } - break; - - case NODE_ANCHOR: - case NODE_GIMMICK: - break; - - case NODE_BACKREF: - if (NODE_IS_CHECKER(node)) - break; - /* fall */ - default: - r = GET_CHAR_LEN_VARLEN; - break; - } - - return r; -} - -static int -get_char_len_node(Node* node, regex_t* reg, int* len) -{ - return get_char_len_node1(node, reg, len, 0); -} - /* x is not included y ==> 1 : 0 */ static int is_exclusive(Node* x, Node* y, regex_t* reg) @@ -2804,14 +3217,9 @@ is_exclusive(Node* x, Node* y, regex_t* reg) len = NODE_STRING_LEN(x); if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); - if (NODE_STRING_IS_CASE_FOLD_MATCH(x) || NODE_STRING_IS_CASE_FOLD_MATCH(y)) { - /* tiny version */ - return 0; - } - else { - for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { - if (*p != *q) return 1; - } + + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; } } break; @@ -2830,7 +3238,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } static Node* -get_head_value_node(Node* node, int exact, regex_t* reg) +get_tree_head_literal(Node* node, int exact, regex_t* reg) { Node* n = NULL_NODE; @@ -2853,7 +3261,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) break; case NODE_LIST: - n = get_head_value_node(NODE_CAR(node), exact, reg); + n = get_tree_head_literal(NODE_CAR(node), exact, reg); break; case NODE_STRING: @@ -2864,7 +3272,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) break; if (exact == 0 || - ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_CRUDE(node)) { + ! NODE_IS_IGNORECASE(node) || NODE_STRING_IS_CRUDE(node)) { n = node; } } @@ -2877,7 +3285,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; else - n = get_head_value_node(NODE_BODY(node), exact, reg); + n = get_tree_head_literal(NODE_BODY(node), exact, reg); } } break; @@ -2887,19 +3295,10 @@ get_head_value_node(Node* node, int exact, regex_t* reg) BagNode* en = BAG_(node); switch (en->type) { case BAG_OPTION: - { - OnigOptionType options = reg->options; - - reg->options = BAG_(node)->o.options; - n = get_head_value_node(NODE_BODY(node), exact, reg); - reg->options = options; - } - break; - case BAG_MEMORY: case BAG_STOP_BACKTRACK: case BAG_IF_ELSE: - n = get_head_value_node(NODE_BODY(node), exact, reg); + n = get_tree_head_literal(NODE_BODY(node), exact, reg); break; } } @@ -2907,7 +3306,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) case NODE_ANCHOR: if (ANCHOR_(node)->type == ANCR_PREC_READ) - n = get_head_value_node(NODE_BODY(node), exact, reg); + n = get_tree_head_literal(NODE_BODY(node), exact, reg); break; case NODE_GIMMICK: @@ -2918,42 +3317,244 @@ get_head_value_node(Node* node, int exact, regex_t* reg) return n; } +enum GetValue { + GET_VALUE_NONE = -1, + GET_VALUE_IGNORE = 0, + GET_VALUE_FOUND = 1 +}; + +static int +get_tree_tail_literal(Node* node, Node** rnode, regex_t* reg) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + if (IS_NULL(NODE_CDR(node))) { + r = get_tree_tail_literal(NODE_CAR(node), rnode, reg); + } + else { + r = get_tree_tail_literal(NODE_CDR(node), rnode, reg); + if (r == GET_VALUE_IGNORE) { + r = get_tree_tail_literal(NODE_CAR(node), rnode, reg); + } + } + break; + +#ifdef USE_CALL + case NODE_CALL: + r = get_tree_tail_literal(NODE_BODY(node), rnode, reg); + break; +#endif + + case NODE_CTYPE: + if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) { + r = GET_VALUE_NONE; + break; + } + /* fall */ + case NODE_CCLASS: + *rnode = node; + r = GET_VALUE_FOUND; + break; + + case NODE_STRING: + { + StrNode* sn = STR_(node); + + if (sn->end <= sn->s) { + r = GET_VALUE_IGNORE; + break; + } + + if (NODE_IS_IGNORECASE(node) && ! NODE_STRING_IS_CRUDE(node)) { + r = GET_VALUE_NONE; + break; + } + + *rnode = node; + r = GET_VALUE_FOUND; + } + break; + + case NODE_QUANT: + { + QuantNode* qn = QUANT_(node); + if (qn->lower != 0) { + r = get_tree_tail_literal(NODE_BODY(node), rnode, reg); + } + else + r = GET_VALUE_NONE; + } + break; + + case NODE_BAG: + { + BagNode* en = BAG_(node); + + if (en->type == BAG_MEMORY) { + if (NODE_IS_MARK1(node)) + r = GET_VALUE_NONE; + else { + NODE_STATUS_ADD(node, MARK1); + r = get_tree_tail_literal(NODE_BODY(node), rnode, reg); + NODE_STATUS_REMOVE(node, MARK1); + } + } + else { + r = get_tree_tail_literal(NODE_BODY(node), rnode, reg); + } + } + break; + + case NODE_ANCHOR: + case NODE_GIMMICK: + r = GET_VALUE_IGNORE; + break; + + case NODE_ALT: + case NODE_BACKREF: + default: + r = GET_VALUE_NONE; + break; + } + + return r; +} + static int -check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask) +check_called_node_in_look_behind(Node* node, int not) { + int r; + + r = 0; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = check_called_node_in_look_behind(NODE_CAR(node), not); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: + r = check_called_node_in_look_behind(NODE_BODY(node), not); + break; + + case NODE_BAG: + { + BagNode* en = BAG_(node); + + if (en->type == BAG_MEMORY) { + if (NODE_IS_MARK1(node)) + return 0; + else { + NODE_STATUS_ADD(node, MARK1); + r = check_called_node_in_look_behind(NODE_BODY(node), not); + NODE_STATUS_REMOVE(node, MARK1); + } + } + else { + r = check_called_node_in_look_behind(NODE_BODY(node), not); + if (r == 0 && en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = check_called_node_in_look_behind(en->te.Then, not); + if (r != 0) break; + } + if (IS_NOT_NULL(en->te.Else)) { + r = check_called_node_in_look_behind(en->te.Else, not); + } + } + } + } + break; + + case NODE_ANCHOR: + if (IS_NOT_NULL(NODE_BODY(node))) + r = check_called_node_in_look_behind(NODE_BODY(node), not); + break; + + case NODE_GIMMICK: + if (NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0) + return 1; + break; + + default: + break; + } + + return r; +} + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \ + | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \ + | NODE_BIT_CALL | NODE_BIT_BACKREF | NODE_BIT_GIMMICK) + +#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE ) +#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE ) + +#define ALLOWED_ANCHOR_IN_LB \ + ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \ + | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \ + | ANCR_WORD_BEGIN | ANCR_WORD_END \ + | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY ) + +#define ALLOWED_ANCHOR_IN_LB_NOT \ + ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \ + | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \ + | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \ + | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY ) + + +static int +check_node_in_look_behind(Node* node, int not, int* used) +{ + static unsigned int + bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT }; + + static unsigned int + anchor_mask[2] = { ALLOWED_ANCHOR_IN_LB, ALLOWED_ANCHOR_IN_LB_NOT }; + NodeType type; int r = 0; type = NODE_TYPE(node); - if ((NODE_TYPE2BIT(type) & type_mask) == 0) + if ((NODE_TYPE2BIT(type) & ALLOWED_TYPE_IN_LB) == 0) return 1; switch (type) { case NODE_LIST: case NODE_ALT: do { - r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask); + r = check_node_in_look_behind(NODE_CAR(node), not, used); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_QUANT: - r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); + r = check_node_in_look_behind(NODE_BODY(node), not, used); break; case NODE_BAG: { BagNode* en = BAG_(node); - if (((1<<en->type) & bag_mask) == 0) + if (((1<<en->type) & bag_mask[not]) == 0) return 1; - r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); - if (r == 0 && en->type == BAG_IF_ELSE) { + r = check_node_in_look_behind(NODE_BODY(node), not, used); + if (r != 0) break; + + if (en->type == BAG_MEMORY) { + if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)) *used = TRUE; + } + else if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { - r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask); + r = check_node_in_look_behind(en->te.Then, not, used); if (r != 0) break; } if (IS_NOT_NULL(en->te.Else)) { - r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask); + r = check_node_in_look_behind(en->te.Else, not, used); } } } @@ -2961,14 +3562,22 @@ check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask) case NODE_ANCHOR: type = ANCHOR_(node)->type; - if ((type & anchor_mask) == 0) + if ((type & anchor_mask[not]) == 0) return 1; if (IS_NOT_NULL(NODE_BODY(node))) - r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); + r = check_node_in_look_behind(NODE_BODY(node), not, used); break; case NODE_GIMMICK: + if (NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0) + return 1; + break; + + case NODE_CALL: + r = check_called_node_in_look_behind(NODE_BODY(node), not); + break; + default: break; } @@ -2976,7 +3585,7 @@ check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask) } static OnigLen -tree_min_len(Node* node, ScanEnv* env) +node_min_byte_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmin; @@ -2992,9 +3601,9 @@ tree_min_len(Node* node, ScanEnv* env) if (NODE_IS_RECURSION(node)) break; backs = BACKREFS_P(br); - len = tree_min_len(mem_env[backs[0]].mem_node, env); + len = node_min_byte_len(mem_env[backs[0]].mem_node, env); for (i = 1; i < br->back_num; i++) { - tmin = tree_min_len(mem_env[backs[i]].mem_node, env); + tmin = node_min_byte_len(mem_env[backs[i]].mem_node, env); if (len > tmin) len = tmin; } } @@ -3005,18 +3614,18 @@ tree_min_len(Node* node, ScanEnv* env) { Node* t = NODE_BODY(node); if (NODE_IS_RECURSION(node)) { - if (NODE_IS_MIN_FIXED(t)) + if (NODE_IS_FIXED_MIN(t)) len = BAG_(t)->min_len; } else - len = tree_min_len(t, env); + len = node_min_byte_len(t, env); } break; #endif case NODE_LIST: do { - tmin = tree_min_len(NODE_CAR(node), env); + tmin = node_min_byte_len(NODE_CAR(node), env); len = distance_add(len, tmin); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; @@ -3027,7 +3636,7 @@ tree_min_len(Node* node, ScanEnv* env) y = node; do { x = NODE_CAR(y); - tmin = tree_min_len(x, env); + tmin = node_min_byte_len(x, env); if (y == node) len = tmin; else if (len > tmin) len = tmin; } while (IS_NOT_NULL(y = NODE_CDR(y))); @@ -3051,7 +3660,7 @@ tree_min_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->lower > 0) { - len = tree_min_len(NODE_BODY(node), env); + len = node_min_byte_len(NODE_BODY(node), env); len = distance_multiply(len, qn->lower); } } @@ -3062,35 +3671,35 @@ tree_min_len(Node* node, ScanEnv* env) BagNode* en = BAG_(node); switch (en->type) { case BAG_MEMORY: - if (NODE_IS_MIN_FIXED(node)) + if (NODE_IS_FIXED_MIN(node)) len = en->min_len; else { if (NODE_IS_MARK1(node)) len = 0; /* recursive */ else { NODE_STATUS_ADD(node, MARK1); - len = tree_min_len(NODE_BODY(node), env); + len = node_min_byte_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, MARK1); en->min_len = len; - NODE_STATUS_ADD(node, MIN_FIXED); + NODE_STATUS_ADD(node, FIXED_MIN); } } break; case BAG_OPTION: case BAG_STOP_BACKTRACK: - len = tree_min_len(NODE_BODY(node), env); + len = node_min_byte_len(NODE_BODY(node), env); break; case BAG_IF_ELSE: { OnigLen elen; - len = tree_min_len(NODE_BODY(node), env); + len = node_min_byte_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) - len += tree_min_len(en->te.Then, env); + len += node_min_byte_len(en->te.Then, env); if (IS_NOT_NULL(en->te.Else)) - elen = tree_min_len(en->te.Else, env); + elen = node_min_byte_len(en->te.Else, env); else elen = 0; if (elen < len) len = elen; @@ -3118,7 +3727,7 @@ tree_min_len(Node* node, ScanEnv* env) } static OnigLen -tree_max_len(Node* node, ScanEnv* env) +node_max_byte_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmax; @@ -3127,14 +3736,14 @@ tree_max_len(Node* node, ScanEnv* env) switch (NODE_TYPE(node)) { case NODE_LIST: do { - tmax = tree_max_len(NODE_CAR(node), env); + tmax = node_max_byte_len(NODE_CAR(node), env); len = distance_add(len, tmax); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_ALT: do { - tmax = tree_max_len(NODE_CAR(node), env); + tmax = node_max_byte_len(NODE_CAR(node), env); if (len < tmax) len = tmax; } while (IS_NOT_NULL(node = NODE_CDR(node))); break; @@ -3158,12 +3767,16 @@ tree_max_len(Node* node, ScanEnv* env) MemEnv* mem_env = SCANENV_MEMENV(env); BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) { - len = INFINITE_LEN; +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + len = INFINITE_LEN; + } +#endif break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - tmax = tree_max_len(mem_env[backs[i]].mem_node, env); + tmax = node_max_byte_len(mem_env[backs[i]].mem_node, env); if (len < tmax) len = tmax; } } @@ -3172,7 +3785,7 @@ tree_max_len(Node* node, ScanEnv* env) #ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) - len = tree_max_len(NODE_BODY(node), env); + len = node_max_byte_len(NODE_BODY(node), env); else len = INFINITE_LEN; break; @@ -3183,7 +3796,7 @@ tree_max_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->upper != 0) { - len = tree_max_len(NODE_BODY(node), env); + len = node_max_byte_len(NODE_BODY(node), env); if (len != 0) { if (! IS_INFINITE_REPEAT(qn->upper)) len = distance_multiply(len, qn->upper); @@ -3199,37 +3812,37 @@ tree_max_len(Node* node, ScanEnv* env) BagNode* en = BAG_(node); switch (en->type) { case BAG_MEMORY: - if (NODE_IS_MAX_FIXED(node)) + if (NODE_IS_FIXED_MAX(node)) len = en->max_len; else { if (NODE_IS_MARK1(node)) len = INFINITE_LEN; else { NODE_STATUS_ADD(node, MARK1); - len = tree_max_len(NODE_BODY(node), env); + len = node_max_byte_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, MARK1); en->max_len = len; - NODE_STATUS_ADD(node, MAX_FIXED); + NODE_STATUS_ADD(node, FIXED_MAX); } } break; case BAG_OPTION: case BAG_STOP_BACKTRACK: - len = tree_max_len(NODE_BODY(node), env); + len = node_max_byte_len(NODE_BODY(node), env); break; case BAG_IF_ELSE: { OnigLen tlen, elen; - len = tree_max_len(NODE_BODY(node), env); + len = node_max_byte_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) { - tlen = tree_max_len(en->te.Then, env); + tlen = node_max_byte_len(en->te.Then, env); len = distance_add(len, tlen); } if (IS_NOT_NULL(en->te.Else)) - elen = tree_max_len(en->te.Else, env); + elen = node_max_byte_len(en->te.Else, env); else elen = 0; if (elen > len) len = elen; @@ -3537,7 +4150,7 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; r |= ret; if (head != 0) { - min = tree_min_len(NODE_CAR(x), env); + min = node_min_byte_len(NODE_CAR(x), env); if (min != 0) head = 0; } } while (IS_NOT_NULL(x = NODE_CDR(x))); @@ -3602,7 +4215,7 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) if (IS_NOT_NULL(en->te.Then)) { OnigLen min; if (head != 0) { - min = tree_min_len(NODE_BODY(node), env); + min = node_min_byte_len(NODE_BODY(node), env); } else min = 0; @@ -3888,7 +4501,9 @@ reduce_string_list(Node* node) next_node = NODE_CDR(node); curr = NODE_CAR(node); if (NODE_TYPE(curr) == NODE_STRING) { - if (IS_NULL(prev) || STR_(curr)->flag != STR_(prev)->flag) { + if (IS_NULL(prev) + || STR_(curr)->flag != STR_(prev)->flag + || NODE_STATUS(curr) != NODE_STATUS(prev)) { prev = curr; prev_node = node; } @@ -3967,9 +4582,13 @@ reduce_string_list(Node* node) static int divide_look_behind_alternatives(Node* node) { + int r; + int anc_type; Node *head, *np, *insert_node; - AnchorNode* an = ANCHOR_(node); - int anc_type = an->type; + AnchorNode* an; + + an = ANCHOR_(node); + anc_type = an->type; head = NODE_ANCHOR_BODY(an); np = NODE_CAR(head); @@ -3979,7 +4598,8 @@ divide_look_behind_alternatives(Node* node) np = node; while (IS_NOT_NULL(np = NODE_CDR(np))) { - insert_node = onig_node_new_anchor(anc_type, an->ascii_mode); + r = onig_node_copy(&insert_node, head); + if (r != 0) return r; CHECK_NULL_RETURN_MEMERR(insert_node); NODE_BODY(insert_node) = NODE_CAR(np); NODE_CAR(np) = insert_node; @@ -3995,21 +4615,162 @@ divide_look_behind_alternatives(Node* node) } static int -tune_look_behind(Node* node, regex_t* reg, ScanEnv* env) +node_reduce_in_look_behind(Node* node) { - int r, len; + NodeType type; + Node* body; + + if (NODE_TYPE(node) != NODE_QUANT) return 0; + + body = NODE_BODY(node); + type = NODE_TYPE(body); + if (type == NODE_STRING || type == NODE_CTYPE || + type == NODE_CCLASS || type == NODE_BACKREF) { + QuantNode* qn = QUANT_(node); + qn->upper = qn->lower; + if (qn->upper == 0) + return 1; /* removed */ + } + + return 0; +} + +static int +list_reduce_in_look_behind(Node* node) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_QUANT: + r = node_reduce_in_look_behind(node); + if (r > 0) r = 0; + break; + + case NODE_LIST: + do { + r = node_reduce_in_look_behind(NODE_CAR(node)); + if (r <= 0) break; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + default: + r = 0; + break; + } + + return r; +} + +static int +alt_reduce_in_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_ALT: + do { + r = list_reduce_in_look_behind(NODE_CAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + default: + r = list_reduce_in_look_behind(node); + break; + } + + return r; +} + +static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env); + +static int +tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int r; + int state1; + int used; + MinMaxCharLen ci; + Node* body; AnchorNode* an = ANCHOR_(node); - r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len); - if (r == 0) - an->char_len = len; - else if (r == GET_CHAR_LEN_VARLEN) - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) - r = divide_look_behind_alternatives(node); - else - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + used = FALSE; + r = check_node_in_look_behind(NODE_ANCHOR_BODY(an), + an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0, + &used); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + + if (an->type == ANCR_LOOK_BEHIND_NOT) + state1 = state | IN_NOT | IN_LOOK_BEHIND; + else + state1 = state | IN_LOOK_BEHIND; + + body = NODE_ANCHOR_BODY(an); + /* Execute tune_tree(body) before call node_char_len(). + Because case-fold expansion must be done before node_char_len(). + */ + r = tune_tree(body, reg, state1, env); + if (r != 0) return r; + + r = alt_reduce_in_look_behind(body, reg, env); + if (r != 0) return r; + + r = node_char_len(body, reg, &ci, env); + if (r >= 0) { + /* #177: overflow in onigenc_step_back() */ + if ((ci.max != INFINITE_LEN && ci.max > LOOK_BEHIND_MAX_CHAR_LEN) + || ci.min > LOOK_BEHIND_MAX_CHAR_LEN) { + return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + if (ci.min == 0 && ci.min_is_sure != 0 && used == FALSE) { + if (an->type == ANCR_LOOK_BEHIND_NOT) + r = onig_node_reset_fail(node); + else + r = onig_node_reset_empty(node); + + return r; + } + + if (r == CHAR_LEN_TOP_ALT_FIXED) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) { + r = divide_look_behind_alternatives(node); + if (r == 0) + r = tune_tree(node, reg, state, env); + } + else if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND)) + goto normal; + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else { /* CHAR_LEN_NORMAL */ + normal: + if (ci.min == INFINITE_LEN) { + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else { + if (ci.min != ci.max && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND)) { + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else { + Node* tail; + + /* check lead_node is already set by double call after + divide_look_behind_alternatives() */ + if (IS_NULL(an->lead_node)) { + an->char_min_len = ci.min; + an->char_max_len = ci.max; + r = get_tree_tail_literal(body, &tail, reg); + if (r == GET_VALUE_FOUND) { + r = onig_node_copy(&(an->lead_node), tail); + if (r != 0) return r; + } + } + r = ONIG_NORMAL; + } + } + } } return r; @@ -4026,7 +4787,7 @@ tune_next(Node* node, Node* next_node, regex_t* reg) QuantNode* qn = QUANT_(node); if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) { #ifdef USE_QUANT_PEEK_NEXT - Node* n = get_head_value_node(next_node, 1, reg); + Node* n = get_tree_head_literal(next_node, 1, reg); /* '\0': for UTF-16BE etc... */ if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { qn->next_head_exact = n; @@ -4036,9 +4797,9 @@ tune_next(Node* node, Node* next_node, regex_t* reg) if (qn->lower <= 1) { if (is_strict_real_node(NODE_BODY(node))) { Node *x, *y; - x = get_head_value_node(NODE_BODY(node), 0, reg); + x = get_tree_head_literal(NODE_BODY(node), 0, reg); if (IS_NOT_NULL(x)) { - y = get_head_value_node(next_node, 0, reg); + y = get_tree_head_literal(next_node, 0, reg); if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) { Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK); CHECK_NULL_RETURN_MEMERR(en); @@ -4076,11 +4837,13 @@ is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[]) } static int -get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], int* rmin, int* rmax) +get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], + OnigLen* rmin, OnigLen* rmax) { - int i, len, minlen, maxlen; + int i; + OnigLen len, minlen, maxlen; - minlen = INT_MAX; + minlen = INFINITE_LEN; maxlen = 0; for (i = 0; i < n; i++) { OnigCaseFoldCodeItem* item = items + i; @@ -4096,45 +4859,6 @@ get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], int* r } static int -conv_string_case_fold(OnigEncoding enc, OnigCaseFoldType case_fold_flag, - UChar* s, UChar* end, UChar** rs, UChar** rend, int* rcase_min_len) -{ - UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *sbuf, *ebuf, *sp; - int i, n, len, sbuf_size; - - *rs = NULL; - sbuf_size = (int )(end - s) * 2; - sbuf = (UChar* )xmalloc(sbuf_size); - CHECK_NULL_RETURN_MEMERR(sbuf); - ebuf = sbuf + sbuf_size; - - n = 0; - sp = sbuf; - p = s; - while (p < end) { - len = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, buf); - for (i = 0; i < len; i++) { - if (sp >= ebuf) { - sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); - CHECK_NULL_RETURN_MEMERR(sbuf); - sp = sbuf + sbuf_size; - sbuf_size *= 2; - ebuf = sbuf + sbuf_size; - } - - *sp++ = buf[i]; - } - n++; - } - - *rs = sbuf; - *rend = sp; - *rcase_min_len = n; - return 0; -} - -static int make_code_list_to_string(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]) { @@ -4186,7 +4910,7 @@ unravel_cf_node_add(Node** rlist, Node* add) static int unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end, - unsigned int flag, int case_min_len) + unsigned int flag) { int r; Node *sn, *list; @@ -4195,17 +4919,13 @@ unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end, sn = *rsn; if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) { - if (NODE_STRING_IS_CASE_FOLD_MATCH(sn)) - r = node_str_cat_case_fold(sn, s, end, case_min_len); - else - r = onig_node_str_cat(sn, s, end); + r = onig_node_str_cat(sn, s, end); } else { sn = onig_node_new_str(s, end); CHECK_NULL_RETURN_MEMERR(sn); STR_(sn)->flag = flag; - STR_(sn)->case_min_len = case_min_len; r = unravel_cf_node_add(&list, sn); } @@ -4217,27 +4937,8 @@ unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end, } static int -unravel_cf_string_fold_add(Node** rlist, Node** rsn, OnigEncoding enc, - OnigCaseFoldType case_fold_flag, UChar* s, UChar* end) -{ - int r; - int case_min_len; - UChar *rs, *rend; - - r = conv_string_case_fold(enc, case_fold_flag, s, end, - &rs, &rend, &case_min_len); - if (r != 0) return r; - - r = unravel_cf_string_add(rlist, rsn, rs, rend, - NODE_STRING_CASE_FOLD_MATCH, case_min_len); - xfree(rs); - - return r; -} - -static int unravel_cf_string_alt_or_cc_add(Node** rlist, int n, - OnigCaseFoldCodeItem items[], int byte_len, OnigEncoding enc, + OnigCaseFoldCodeItem items[], OnigEncoding enc, OnigCaseFoldType case_fold_flag, UChar* s, UChar* end) { int r, i; @@ -4294,7 +4995,7 @@ unravel_cf_string_alt_or_cc_add(Node** rlist, int n, static int unravel_cf_look_behind_add(Node** rlist, Node** rsn, int n, OnigCaseFoldCodeItem items[], OnigEncoding enc, - UChar* s, int one_len) + UChar* s, OnigLen one_len) { int r, i, found; @@ -4309,7 +5010,7 @@ unravel_cf_look_behind_add(Node** rlist, Node** rsn, } if (found == 0) { - r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */, 0); + r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */); } else { Node* node; @@ -4340,7 +5041,8 @@ unravel_cf_look_behind_add(Node** rlist, Node** rsn, static int unravel_case_fold_string(Node* node, regex_t* reg, int state) { - int r, n, one_len, min_len, max_len, in_look_behind; + int r, n, in_look_behind; + OnigLen min_len, max_len, one_len; UChar *start, *end, *p, *q; StrNode* snode; Node *sn, *list; @@ -4349,8 +5051,8 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state) if (NODE_STRING_IS_CASE_EXPANDED(node)) return 0; + NODE_STATUS_REMOVE(node, IGNORECASE); snode = STR_(node); - start = snode->s; end = snode->end; if (start >= end) return 0; @@ -4368,32 +5070,36 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state) goto err; } - one_len = enclen(enc, p); + one_len = (OnigLen )enclen(enc, p); if (n == 0) { q = p + one_len; - r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */, 0); + r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */); if (r != 0) goto err; } else { if (in_look_behind != 0) { q = p + one_len; + if (items[0].byte_len != one_len) { + r = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, q, + items); + if (r < 0) goto err; + n = r; + } r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len); if (r != 0) goto err; } else { get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len); - q = p + max_len; - if (one_len == max_len && min_len == max_len) { - r = unravel_cf_string_alt_or_cc_add(&list, n, items, max_len, enc, - reg->case_fold_flag, p, q); - if (r != 0) goto err; - sn = NULL_NODE; - } - else { - r = unravel_cf_string_fold_add(&list, &sn, enc, reg->case_fold_flag, - p, q); - if (r != 0) goto err; + if (min_len != max_len) { + r = ONIGERR_PARSER_BUG; + goto err; } + + q = p + max_len; + r = unravel_cf_string_alt_or_cc_add(&list, n, items, enc, + reg->case_fold_flag, p, q); + if (r != 0) goto err; + sn = NULL_NODE; } } @@ -4428,7 +5134,7 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state) static enum BodyEmptyType quantifiers_memory_node_info(Node* node) { - int r = BODY_IS_EMPTY_POSSIBILITY; + int r = BODY_MAY_BE_EMPTY; switch (NODE_TYPE(node)) { case NODE_LIST: @@ -4445,7 +5151,7 @@ quantifiers_memory_node_info(Node* node) #ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) { - return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */ + return BODY_MAY_BE_EMPTY_REC; /* tiny version */ } else r = quantifiers_memory_node_info(NODE_BODY(node)); @@ -4467,9 +5173,9 @@ quantifiers_memory_node_info(Node* node) switch (en->type) { case BAG_MEMORY: if (NODE_IS_RECURSION(node)) { - return BODY_IS_EMPTY_POSSIBILITY_REC; + return BODY_MAY_BE_EMPTY_REC; } - return BODY_IS_EMPTY_POSSIBILITY_MEM; + return BODY_MAY_BE_EMPTY_MEM; break; case BAG_OPTION: @@ -4524,7 +5230,7 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state) if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! OPTON_CAPTURE_GROUP(env->options)) { return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } @@ -4935,35 +5641,12 @@ tune_called_state(Node* node, int state) #endif /* USE_CALL */ -static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env); - #ifdef __GNUC__ __inline #endif static int tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) { -/* allowed node types in look-behind */ -#define ALLOWED_TYPE_IN_LB \ - ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \ - | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \ - | NODE_BIT_CALL | NODE_BIT_GIMMICK) - -#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE ) -#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE ) - -#define ALLOWED_ANCHOR_IN_LB \ - ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \ - | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \ - | ANCR_WORD_BEGIN | ANCR_WORD_END \ - | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY ) - -#define ALLOWED_ANCHOR_IN_LB_NOT \ - ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \ - | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \ - | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \ - | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY ) - int r; AnchorNode* an = ANCHOR_(node); @@ -4976,28 +5659,8 @@ tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) break; case ANCR_LOOK_BEHIND: - { - r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, - ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env); - if (r != 0) return r; - r = tune_look_behind(node, reg, env); - } - break; - case ANCR_LOOK_BEHIND_NOT: - { - r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, - ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND), - env); - if (r != 0) return r; - r = tune_look_behind(node, reg, env); - } + r = tune_look_behind(node, reg, state, env); break; default: @@ -5015,7 +5678,6 @@ static int tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env) { int r; - OnigLen d; QuantNode* qn = QUANT_(node); Node* body = NODE_BODY(node); @@ -5027,12 +5689,12 @@ tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) { - d = tree_min_len(body, env); + OnigLen d = node_min_byte_len(body, env); if (d == 0) { #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT qn->emptiness = quantifiers_memory_node_info(body); #else - qn->emptiness = BODY_IS_EMPTY_POSSIBILITY; + qn->emptiness = BODY_MAY_BE_EMPTY; #endif } } @@ -5054,7 +5716,7 @@ tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env) if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { int i, n = qn->lower; - node_conv_to_str_node(node, STR_(body)->flag); + node_conv_to_str_node(node, body); for (i = 0; i < n; i++) { r = node_str_node_cat(node, body); if (r != 0) return r; @@ -5074,7 +5736,7 @@ tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } } else { - qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg); + qn->head_exact = get_tree_head_literal(NODE_BODY(node), 1, reg); } } @@ -5115,7 +5777,7 @@ tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env) break; case NODE_STRING: - if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_CRUDE(node)) { + if (NODE_IS_IGNORECASE(node) && ! NODE_STRING_IS_CRUDE(node)) { r = unravel_case_fold_string(node, reg, state); } break; @@ -5299,14 +5961,8 @@ set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand, #endif typedef struct { - OnigLen min; /* min byte length */ - OnigLen max; /* max byte length */ -} MinMax; - -typedef struct { - MinMax mm; + MinMaxLen mm; OnigEncoding enc; - OnigOptionType options; OnigCaseFoldType case_fold_flag; ScanEnv* scan_env; } OptEnv; @@ -5317,23 +5973,22 @@ typedef struct { } OptAnc; typedef struct { - MinMax mm; /* position */ + MinMaxLen mm; /* position */ OptAnc anc; int reach_end; - int case_fold; int len; UChar s[OPT_EXACT_MAXLEN]; } OptStr; typedef struct { - MinMax mm; /* position */ + MinMaxLen mm; /* position */ OptAnc anc; int value; /* weighted value */ UChar map[CHAR_MAP_SIZE]; } OptMap; typedef struct { - MinMax len; + MinMaxLen len; OptAnc anc; OptStr sb; /* boundary */ OptStr sm; /* middle */ @@ -5367,7 +6022,7 @@ map_position_value(OnigEncoding enc, int i) } static int -distance_value(MinMax* mm) +distance_value(MinMaxLen* mm) { /* 1000 / (min-max-dist + 1) */ static const short int dist_vals[] = { @@ -5396,7 +6051,7 @@ distance_value(MinMax* mm) } static int -comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2) +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) { if (v2 <= 0) return -1; if (v1 <= 0) return 1; @@ -5412,46 +6067,6 @@ comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2) return 0; } -static int -is_equal_mml(MinMax* a, MinMax* b) -{ - return a->min == b->min && a->max == b->max; -} - -static void -set_mml(MinMax* l, OnigLen min, OnigLen max) -{ - l->min = min; - l->max = max; -} - -static void -clear_mml(MinMax* l) -{ - l->min = l->max = 0; -} - -static void -copy_mml(MinMax* to, MinMax* from) -{ - to->min = from->min; - to->max = from->max; -} - -static void -add_mml(MinMax* to, MinMax* from) -{ - to->min = distance_add(to->min, from->min); - to->max = distance_add(to->max, from->max); -} - -static void -alt_merge_mml(MinMax* to, MinMax* from) -{ - if (to->min > from->min) to->min = from->min; - if (to->max < from->max) to->max = from->max; -} - static void copy_opt_env(OptEnv* to, OptEnv* from) { @@ -5543,12 +6158,11 @@ is_full_opt_exact(OptStr* e) static void clear_opt_exact(OptStr* e) { - clear_mml(&e->mm); + mml_clear(&e->mm); clear_opt_anc_info(&e->anc); - e->reach_end = 0; - e->case_fold = 0; - e->len = 0; - e->s[0] = '\0'; + e->reach_end = 0; + e->len = 0; + e->s[0] = '\0'; } static void @@ -5564,14 +6178,6 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc) UChar *p, *end; OptAnc tanc; - if (add->case_fold != 0) { - if (! to->case_fold) { - if (to->len > 1 || to->len >= add->len) return 0; /* avoid */ - - to->case_fold = 1; - } - } - r = 0; p = add->s; end = p + add->len; @@ -5610,7 +6216,7 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc) to->len = i; - if (p >= end && to->len == (int )(end - s)) + if (p >= end) to->reach_end = 1; } @@ -5624,7 +6230,7 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env) return ; } - if (! is_equal_mml(&to->mm, &add->mm)) { + if (! mml_is_equal(&to->mm, &add->mm)) { clear_opt_exact(to); return ; } @@ -5644,8 +6250,6 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env) to->reach_end = 0; } to->len = i; - if (add->case_fold != 0) - to->case_fold = 1; alt_merge_opt_anc_info(&to->anc, &add->anc); if (! to->reach_end) to->anc.right = 0; @@ -5675,8 +6279,8 @@ select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt) if (alt->len > 1) va += 5; } - if (now->case_fold == 0) vn *= 2; - if (alt->case_fold == 0) va *= 2; + vn *= 2; + va *= 2; if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0) copy_opt_exact(now, alt); @@ -5725,28 +6329,6 @@ add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc) } } -static int -add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType fold_flag) -{ - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int i, n; - - add_char_opt_map(map, p[0], enc); - - fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items); - if (n < 0) return n; - - for (i = 0; i < n; i++) { - ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map(map, buf[0], enc); - } - - return 0; -} - static void select_opt_map(OptMap* now, OptMap* alt) { @@ -5775,12 +6357,7 @@ comp_opt_exact_or_map(OptStr* e, OptMap* m) if (m->value <= 0) return -1; - if (e->case_fold != 0) { - case_value = 1; - } - else - case_value = 3; - + case_value = 3; ae = COMP_EM_BASE * e->len * case_value; am = COMP_EM_BASE * 5 * 2 / m->value; return comp_distance_value(&e->mm, &m->mm, ae, am); @@ -5791,14 +6368,14 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) { int i, val; - /* if (! is_equal_mml(&to->mm, &add->mm)) return ; */ + /* if (! mml_is_equal(&to->mm, &add->mm)) return ; */ if (to->value == 0) return ; if (add->value == 0 || to->mm.max < add->mm.min) { clear_opt_map(to); return ; } - alt_merge_mml(&to->mm, &add->mm); + mml_alt_merge(&to->mm, &add->mm); val = 0; for (i = 0; i < CHAR_MAP_SIZE; i++) { @@ -5814,17 +6391,17 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) } static void -set_bound_node_opt_info(OptNode* opt, MinMax* plen) +set_bound_node_opt_info(OptNode* opt, MinMaxLen* plen) { - copy_mml(&(opt->sb.mm), plen); - copy_mml(&(opt->spr.mm), plen); - copy_mml(&(opt->map.mm), plen); + mml_copy(&(opt->sb.mm), plen); + mml_copy(&(opt->spr.mm), plen); + mml_copy(&(opt->map.mm), plen); } static void clear_node_opt_info(OptNode* opt) { - clear_mml(&opt->len); + mml_clear(&opt->len); clear_opt_anc_info(&opt->anc); clear_opt_exact(&opt->sb); clear_opt_exact(&opt->sm); @@ -5889,7 +6466,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add) } select_opt_map(&to->map, &add->map); - add_mml(&to->len, &add->len); + mml_add(&to->len, &add->len); } static void @@ -5901,7 +6478,7 @@ alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env) alt_merge_opt_exact(&to->spr, &add->spr, env); alt_merge_opt_map(env->enc, &to->map, &add->map); - alt_merge_mml(&to->len, &add->len); + mml_alt_merge(&to->len, &add->len); } @@ -5930,7 +6507,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) do { r = optimize_nodes(NODE_CAR(nd), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mm, &xo.len); + mml_add(&nenv.mm, &xo.len); concat_left_node_opt_info(enc, opt, &xo); } } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); @@ -5956,29 +6533,11 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) StrNode* sn = STR_(node); int slen = (int )(sn->end - sn->s); - if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) { - concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); - if (slen > 0) { - add_char_opt_map(&opt->map, *(sn->s), enc); - } - set_mml(&opt->len, slen, slen); - } - else { - int max, min; - - concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); - opt->sb.case_fold = 1; - - if (slen > 0) { - r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, - enc, env->case_fold_flag); - if (r != 0) break; - } - - max = slen; - min = sn->case_min_len * ONIGENC_MBC_MINLEN(enc); - set_mml(&opt->len, min, max); + concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); + if (slen > 0) { + add_char_opt_map(&opt->map, *(sn->s), enc); } + mml_set_min_max(&opt->len, slen, slen); } break; @@ -5993,7 +6552,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) OnigLen min = ONIGENC_MBC_MINLEN(enc); OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc); - set_mml(&opt->len, min, max); + mml_set_min_max(&opt->len, min, max); } else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { @@ -6002,7 +6561,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) add_char_opt_map(&opt->map, (UChar )i, enc); } } - set_mml(&opt->len, 1, 1); + mml_set_min_max(&opt->len, 1, 1); } } break; @@ -6046,7 +6605,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) else { min = ONIGENC_MBC_MINLEN(enc); } - set_mml(&opt->len, min, max); + mml_set_min_max(&opt->len, min, max); } break; @@ -6087,37 +6646,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) case NODE_BACKREF: if (! NODE_IS_CHECKER(node)) { - int* backs; - OnigLen min, max, tmin, tmax; - MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); - BackRefNode* br = BACKREF_(node); + OnigLen min, max; - if (NODE_IS_RECURSION(node)) { - set_mml(&opt->len, 0, INFINITE_LEN); - break; - } - backs = BACKREFS_P(br); - min = tree_min_len(mem_env[backs[0]].mem_node, env->scan_env); - max = tree_max_len(mem_env[backs[0]].mem_node, env->scan_env); - for (i = 1; i < br->back_num; i++) { - tmin = tree_min_len(mem_env[backs[i]].mem_node, env->scan_env); - tmax = tree_max_len(mem_env[backs[i]].mem_node, env->scan_env); - if (min > tmin) min = tmin; - if (max < tmax) max = tmax; - } - set_mml(&opt->len, min, max); + min = node_min_byte_len(node, env->scan_env); + max = node_max_byte_len(node, env->scan_env); + mml_set_min_max(&opt->len, min, max); } break; #ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) - set_mml(&opt->len, 0, INFINITE_LEN); + mml_set_min_max(&opt->len, 0, INFINITE_LEN); else { - OnigOptionType save = env->options; - env->options = BAG_(NODE_BODY(node))->o.options; r = optimize_nodes(NODE_BODY(node), opt, env); - env->options = save; } break; #endif @@ -6127,6 +6669,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) OnigLen min, max; QuantNode* qn = QUANT_(node); + /* Issue #175 + ex. /\g<1>{0}(?<=|())/ + + Empty and unused nodes in look-behind is removed in + tune_look_behind(). + Called group nodes are assigned to be not called if the caller side is + inside of zero-repetition. + As a result, the nodes are considered unused. + */ + if (qn->upper == 0) { + mml_set_min_max(&opt->len, 0, 0); + break; + } + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r != 0) break; @@ -6153,7 +6709,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) if (IS_INFINITE_REPEAT(qn->upper)) { if (env->mm.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { - if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) + if (NODE_IS_MULTILINE(NODE_QUANT_BODY(qn))) add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML); else add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF); @@ -6166,7 +6722,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) } min = distance_multiply(xo.len.min, qn->lower); - set_mml(&opt->len, min, max); + mml_set_min_max(&opt->len, min, max); } break; @@ -6175,14 +6731,9 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) BagNode* en = BAG_(node); switch (en->type) { + case BAG_STOP_BACKTRACK: case BAG_OPTION: - { - OnigOptionType save = env->options; - - env->options = en->o.options; - r = optimize_nodes(NODE_BODY(node), opt, env); - env->options = save; - } + r = optimize_nodes(NODE_BODY(node), opt, env); break; case BAG_MEMORY: @@ -6193,9 +6744,9 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) min = 0; max = INFINITE_LEN; - if (NODE_IS_MIN_FIXED(node)) min = en->min_len; - if (NODE_IS_MAX_FIXED(node)) max = en->max_len; - set_mml(&opt->len, min, max); + if (NODE_IS_FIXED_MIN(node)) min = en->min_len; + if (NODE_IS_FIXED_MAX(node)) max = en->max_len; + mml_set_min_max(&opt->len, min, max); } else #endif @@ -6208,10 +6759,6 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) } break; - case BAG_STOP_BACKTRACK: - r = optimize_nodes(NODE_BODY(node), opt, env); - break; - case BAG_IF_ELSE: { OptEnv nenv; @@ -6219,7 +6766,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) copy_opt_env(&nenv, env); r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mm, &xo.len); + mml_add(&nenv.mm, &xo.len); concat_left_node_opt_info(enc, opt, &xo); if (IS_NOT_NULL(en->te.Then)) { r = optimize_nodes(en->te.Then, &xo, &nenv); @@ -6245,7 +6792,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) default: #ifdef ONIG_DEBUG - fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node)); + fprintf(DBGFP, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node)); #endif r = ONIGERR_TYPE_BUG; break; @@ -6258,6 +6805,7 @@ static int set_optimize_exact(regex_t* reg, OptStr* e) { int r; + int allow_reverse; if (e->len == 0) return 0; @@ -6266,40 +6814,28 @@ set_optimize_exact(regex_t* reg, OptStr* e) xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; - if (e->case_fold) { - reg->optimize = OPTIMIZE_STR_CASE_FOLD; - } - else { - int allow_reverse; + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - - if (e->len >= 2 || (e->len >= 1 && allow_reverse)) { - r = set_sunday_quick_search_or_bmh_skip_table(reg, 0, - reg->exact, reg->exact_end, - reg->map, &(reg->map_offset)); - if (r != 0) return r; + if (e->len >= 2 || (e->len >= 1 && allow_reverse)) { + r = set_sunday_quick_search_or_bmh_skip_table(reg, 0, + reg->exact, reg->exact_end, + reg->map, &(reg->map_offset)); + if (r != 0) return r; - reg->optimize = (allow_reverse != 0 - ? OPTIMIZE_STR_FAST - : OPTIMIZE_STR_FAST_STEP_FORWARD); - } - else { - reg->optimize = OPTIMIZE_STR; - } + reg->optimize = (allow_reverse != 0 + ? OPTIMIZE_STR_FAST + : OPTIMIZE_STR_FAST_STEP_FORWARD); + } + else { + reg->optimize = OPTIMIZE_STR; } reg->dist_min = e->mm.min; reg->dist_max = e->mm.max; if (reg->dist_min != INFINITE_LEN) { - int n; - if (e->case_fold != 0) - n = 1; - else - n = (int )(reg->exact_end - reg->exact); - + int n = (int )(reg->exact_end - reg->exact); reg->threshold_len = reg->dist_min + n; } @@ -6319,7 +6855,7 @@ set_optimize_map(regex_t* reg, OptMap* m) reg->dist_max = m->mm.max; if (reg->dist_min != INFINITE_LEN) { - reg->threshold_len = reg->dist_min + 1; + reg->threshold_len = reg->dist_min + ONIGENC_MBC_MINLEN(reg->enc); } } @@ -6342,10 +6878,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) OptEnv env; env.enc = reg->enc; - env.options = reg->options; env.case_fold_flag = reg->case_fold_flag; env.scan_env = scan_env; - clear_mml(&env.mm); + mml_clear(&env.mm); r = optimize_nodes(node, &opt, &env); if (r != 0) return r; @@ -6387,7 +6922,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) - print_optimize_info(stderr, reg); + print_optimize_info(DBGFP, reg); #endif return r; } @@ -6414,8 +6949,6 @@ clear_optimize_info(regex_t* reg) static void print_enc_string(FILE* fp, OnigEncoding enc, const UChar *s, const UChar *end) { - fprintf(fp, "\nPATTERN: /"); - if (ONIGENC_MBC_MINLEN(enc) > 1) { const UChar *p; OnigCodePoint code; @@ -6515,9 +7048,8 @@ print_anchor(FILE* f, int anchor) static void print_optimize_info(FILE* f, regex_t* reg) { - static const char* on[] = { "NONE", "STR", - "STR_FAST", "STR_FAST_STEP_FORWARD", - "STR_CASE_FOLD", "MAP" }; + static const char* on[] = + { "NONE", "STR", "STR_FAST", "STR_FAST_STEP_FORWARD", "MAP" }; fprintf(f, "optimize: %s\n", on[reg->optimize]); fprintf(f, " anchor: "); print_anchor(f, reg->anchor); @@ -6537,7 +7069,12 @@ print_optimize_info(FILE* f, regex_t* reg) for (p = reg->exact; p < reg->exact_end; p++) { fputc(*p, f); } - fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); + fprintf(f, "]: length: %ld, dmin: %u, ", + (reg->exact_end - reg->exact), reg->dist_min); + if (reg->dist_max == INFINITE_LEN) + fprintf(f, "dmax: inf.\n"); + else + fprintf(f, "dmax: %u\n", reg->dist_max); } else if (reg->optimize & OPTIMIZE_MAP) { int c, i, n = 0; @@ -6545,7 +7082,8 @@ print_optimize_info(FILE* f, regex_t* reg) for (i = 0; i < CHAR_MAP_SIZE; i++) if (reg->map[i]) n++; - fprintf(f, "map: n=%d\n", n); + fprintf(f, "map: n=%d, dmin: %u, dmax: %u\n", + n, reg->dist_min, reg->dist_max); if (n > 0) { c = 0; fputc('[', f); @@ -6680,7 +7218,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #ifdef ONIG_DEBUG - print_enc_string(stderr, reg->enc, pattern, pattern_end); + fprintf(DBGFP, "\nPATTERN: /"); + print_enc_string(DBGFP, reg->enc, pattern, pattern_end); #endif if (reg->ops_alloc == 0) { @@ -6708,7 +7247,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, /* mixed use named group and no-named group */ if (scan_env.num_named > 0 && IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! OPTON_CAPTURE_GROUP(reg->options)) { if (scan_env.num_named != scan_env.num_mem) r = disable_noname_group_capture(&root, reg, &scan_env); else @@ -6741,10 +7280,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #endif #ifdef ONIG_DEBUG_PARSE - fprintf(stderr, "MAX PARSE DEPTH: %d\n", scan_env.max_parse_depth); - fprintf(stderr, "TREE (parsed)\n"); - print_tree(stderr, root); - fprintf(stderr, "\n"); + fprintf(DBGFP, "MAX PARSE DEPTH: %d\n", scan_env.max_parse_depth); + fprintf(DBGFP, "TREE (parsed)\n"); + print_tree(DBGFP, root); + fprintf(DBGFP, "\n"); #endif r = tune_tree(root, reg, 0, &scan_env); @@ -6758,13 +7297,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #ifdef ONIG_DEBUG_PARSE - fprintf(stderr, "TREE (after tune)\n"); - print_tree(stderr, root); - fprintf(stderr, "\n"); + fprintf(DBGFP, "TREE (after tune)\n"); + print_tree(DBGFP, root); + fprintf(DBGFP, "\n"); #endif - reg->capture_history = scan_env.cap_history; - reg->push_mem_start = scan_env.backtrack_mem | scan_env.cap_history; + reg->capture_history = scan_env.cap_history; + reg->push_mem_start = scan_env.backtrack_mem | scan_env.cap_history; #ifdef USE_CALLOUT if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) { @@ -6804,6 +7343,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST; COP(reg)->update_var.id = 0; /* not used */ + COP(reg)->update_var.clear = FALSE; } r = add_op(reg, OP_END); @@ -6827,6 +7367,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef USE_CALLOUT || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) #endif +#ifdef USE_CALL + || scan_env.num_call > 0 +#endif ) reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { @@ -6847,8 +7390,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, onig_node_free(root); #ifdef ONIG_DEBUG_COMPILE - onig_print_names(stderr, reg); - onig_print_compiled_byte_code_list(stderr, reg); + onig_print_names(DBGFP, reg); + onig_print_compiled_byte_code_list(DBGFP, reg); #endif #ifdef USE_DIRECT_THREADED_CODE @@ -6920,20 +7463,18 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl else option |= syntax->options; - (reg)->enc = enc; - (reg)->options = option; - (reg)->syntax = syntax; - (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->extp = (RegexExt* )NULL; - - (reg)->ops = (Operation* )NULL; - (reg)->ops_curr = (Operation* )NULL; - (reg)->ops_used = 0; - (reg)->ops_alloc = 0; - (reg)->name_table = (void* )NULL; - - (reg)->case_fold_flag = case_fold_flag; + (reg)->enc = enc; + (reg)->options = option; + (reg)->syntax = syntax; + (reg)->optimize = 0; + (reg)->exact = (UChar* )NULL; + (reg)->extp = (RegexExt* )NULL; + (reg)->ops = (Operation* )NULL; + (reg)->ops_curr = (Operation* )NULL; + (reg)->ops_used = 0; + (reg)->ops_alloc = 0; + (reg)->name_table = (void* )NULL; + (reg)->case_fold_flag = case_fold_flag; return 0; } @@ -7171,8 +7712,8 @@ print_indent_tree(FILE* f, Node* node, int indent) if (NODE_STRING_IS_CRUDE(node)) mode = "-crude"; - else if (NODE_STRING_IS_CASE_FOLD_MATCH(node)) - mode = "-case_fold_match"; + else if (NODE_IS_IGNORECASE(node)) + mode = "-ignorecase"; else mode = ""; @@ -7208,7 +7749,7 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, "<ctype:%p> ", node); switch (CTYPE_(node)->ctype) { case CTYPE_ANYCHAR: - fprintf(f, "<anychar:%p>", node); + fprintf(f, "anychar"); break; case ONIGENC_CTYPE_WORD: @@ -7295,9 +7836,10 @@ print_indent_tree(FILE* f, Node* node, int indent) #endif case NODE_QUANT: - fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node, + fprintf(f, "<quantifier:%p>{%d,%d}%s%s\n", node, QUANT_(node)->lower, QUANT_(node)->upper, - (QUANT_(node)->greedy ? "" : "?")); + (QUANT_(node)->greedy ? "" : "?"), + QUANT_(node)->include_referred == 0 ? "" : " referred"); print_indent_tree(f, NODE_BODY(node), indent + add); break; @@ -7337,6 +7879,10 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case BAG_MEMORY: fprintf(f, "memory:%d", BAG_(node)->m.regnum); + if (NODE_IS_CALLED(node)) + fprintf(f, ", called"); + if (NODE_IS_FIXED_ADDR(node)) + fprintf(f, ", fixed-addr"); break; case BAG_STOP_BACKTRACK: fprintf(f, "stop-bt"); diff --git a/src/regenc.c b/src/regenc.c index 16ac313..dbfbc89 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -29,6 +29,9 @@ #include "regint.h" +#define LARGE_S 0x53 +#define SMALL_S 0x73 + OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; #define INITED_LIST_SIZE 20 @@ -549,7 +552,7 @@ static int ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void* arg) { - static OnigCodePoint ss[] = { 0x73, 0x73 }; + static OnigCodePoint ss[] = { SMALL_S, SMALL_S }; return (*f)((OnigCodePoint )0xdf, ss, 2, arg); } @@ -588,35 +591,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { - if (0x41 <= *p && *p <= 0x5a) { + int i, j, n; + static OnigUChar sa[] = { LARGE_S, SMALL_S }; + + if (0x41 <= *p && *p <= 0x5a) { /* A - Z */ + if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */ + ss_combination: + items[0].byte_len = 2; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )0xdf; + + n = 1; + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (sa[i] == *p && sa[j] == *(p+1)) + continue; + + items[n].byte_len = 2; + items[n].code_len = 2; + items[n].code[0] = (OnigCodePoint )sa[i]; + items[n].code[1] = (OnigCodePoint )sa[j]; + n++; + } + } + return 4; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } - else if (0x61 <= *p && *p <= 0x7a) { + else if (0x61 <= *p && *p <= 0x7a) { /* a - z */ + if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) { + goto ss_combination; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } else if (*p == 0xdf && ess_tsett_flag != 0) { items[0].byte_len = 1; @@ -676,7 +692,7 @@ extern int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) { if (p < end) { - if (*p == 0x0a) return 1; + if (*p == NEWLINE_CODE) return 1; } return 0; } @@ -887,7 +903,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end) { OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end); - if (code > 127) return 0; + if (code > ASCII_LIMIT) return 0; return ONIGENC_IS_ASCII_CODE_WORD(code); } diff --git a/src/regenc.h b/src/regenc.h index db35841..d183b97 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +34,10 @@ #endif #include "config.h" + +#ifndef ONIG_NO_STANDARD_C_HEADERS #include <stddef.h> +#endif #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION @@ -74,6 +77,8 @@ typedef struct { #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) #define MAX_CODE_POINT (~((OnigCodePoint )0)) +#define ASCII_LIMIT 127 +#define NEWLINE_CODE 0x0a #define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) diff --git a/src/regerror.c b/src/regerror.c index b57a276..58bc7fd 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,10 +27,12 @@ * SUCH DAMAGE. */ -#include "regint.h" -#include <stdio.h> /* for vsnprintf() */ +#ifndef NEED_TO_INCLUDE_STDIO +/* for vsnprintf() */ +#define NEED_TO_INCLUDE_STDIO +#endif -#include <stdarg.h> +#include "regint.h" extern UChar* onig_error_code_to_format(int code) @@ -52,6 +54,8 @@ onig_error_code_to_format(int code) p = "parse depth limit over"; break; case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER: p = "retry-limit-in-match over"; break; + case ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER: + p = "retry-limit-in-search over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: @@ -277,7 +281,8 @@ onig_is_error_code_needs_param(int code) /* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 -extern int onig_error_code_to_str(UChar* s, int code, ...) +extern int ONIG_VARIADIC_FUNC_ATTR +onig_error_code_to_str(UChar* s, int code, ...) { UChar *p, *q; OnigErrorInfo* einfo; @@ -337,8 +342,9 @@ extern int onig_error_code_to_str(UChar* s, int code, ...) } -void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, ...) +void ONIG_VARIADIC_FUNC_ATTR +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) { int n, need, len; UChar *p, *s, *bp; diff --git a/src/regexec.c b/src/regexec.c index ce498c6..1b6895d 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,13 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + +#ifndef ONIG_NO_PRINT +#ifndef NEED_TO_INCLUDE_STDIO +#define NEED_TO_INCLUDE_STDIO +#endif +#endif + #include "regint.h" #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ @@ -65,7 +72,10 @@ typedef struct { struct OnigMatchParamStruct { unsigned int match_stack_limit; +#ifdef USE_RETRY_LIMIT unsigned long retry_limit_in_match; + unsigned long retry_limit_in_search; +#endif #ifdef USE_CALLOUT OnigCalloutFunc progress_callout_of_contents; OnigCalloutFunc retraction_callout_of_contents; @@ -88,8 +98,24 @@ extern int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, unsigned long limit) { +#ifdef USE_RETRY_LIMIT param->retry_limit_in_match = limit; return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +extern int +onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* param, + unsigned long limit) +{ +#ifdef USE_RETRY_LIMIT + param->retry_limit_in_search = limit; + return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif } extern int @@ -134,7 +160,11 @@ typedef struct { int ptr_num; const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ unsigned int match_stack_limit; +#ifdef USE_RETRY_LIMIT unsigned long retry_limit_in_match; + unsigned long retry_limit_in_search; + unsigned long retry_limit_in_search_counter; +#endif OnigMatchParam* mp; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE int best_len; /* for ONIG_OPTION_FIND_LONGEST */ @@ -177,8 +207,6 @@ static OpInfoType OpInfo[] = { { OP_STR_MB2N, "str_mb2-n"}, { OP_STR_MB3N, "str_mb3n"}, { OP_STR_MBN, "str_mbn"}, - { OP_STR_1_IC, "str_1-ic"}, - { OP_STR_N_IC, "str_n-ic"}, { OP_CCLASS, "cclass"}, { OP_CCLASS_MB, "cclass-mb"}, { OP_CCLASS_MIX, "cclass-mix"}, @@ -205,7 +233,7 @@ static OpInfoType OpInfo[] = { { OP_BEGIN_LINE, "begin-line"}, { OP_END_LINE, "end-line"}, { OP_SEMI_END_BUF, "semi-end-buf"}, - { OP_BEGIN_POSITION, "begin-position"}, + { OP_CHECK_POSITION, "check-position"}, { OP_BACKREF1, "backref1"}, { OP_BACKREF2, "backref2"}, { OP_BACKREF_N, "backref-n"}, @@ -230,7 +258,8 @@ static OpInfoType OpInfo[] = { { OP_JUMP, "jump"}, { OP_PUSH, "push"}, { OP_PUSH_SUPER, "push-super"}, - { OP_POP_OUT, "pop-out"}, + { OP_POP, "pop"}, + { OP_POP_TO_MARK, "pop-to-mark"}, #ifdef USE_OP_PUSH_OR_JUMP_EXACT { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"}, #endif @@ -245,16 +274,12 @@ static OpInfoType OpInfo[] = { #ifdef USE_CALL { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"}, #endif - { OP_PREC_READ_START, "push-pos"}, - { OP_PREC_READ_END, "pop-pos"}, - { OP_PREC_READ_NOT_START, "prec-read-not-start"}, - { OP_PREC_READ_NOT_END, "prec-read-not-end"}, - { OP_ATOMIC_START, "atomic-start"}, - { OP_ATOMIC_END, "atomic-end"}, - { OP_LOOK_BEHIND, "look-behind"}, - { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"}, - { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"}, - { OP_PUSH_SAVE_VAL, "push-save-val"}, + { OP_MOVE, "move"}, + { OP_STEP_BACK_START, "step-back-start"}, + { OP_STEP_BACK_NEXT, "step-back-next"}, + { OP_CUT_TO_MARK, "cut-to-mark"}, + { OP_MARK, "mark"}, + { OP_SAVE_VAL, "save-val"}, { OP_UPDATE_VAR, "update-var"}, #ifdef USE_CALL { OP_CALL, "call"}, @@ -280,9 +305,14 @@ op2name(int opcode) } static void +p_after_op(FILE* f) +{ + fputs(" ", f); +} + +static void p_string(FILE* f, int len, UChar* s) { - fputs(":", f); while (len-- > 0) { fputc(*s++, f); } } @@ -291,16 +321,27 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) { int x = len * mb_len; - fprintf(f, ":%d:", len); + fprintf(f, "len:%d ", len); while (x-- > 0) { fputc(*s++, f); } } static void p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start) { - RelAddrType curr = (RelAddrType )(p - start); + char* flag; + char* space1; + char* space2; + RelAddrType curr; + AbsAddrType abs_addr; - fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); + curr = (RelAddrType )(p - start); + abs_addr = curr + rel_addr; + + flag = rel_addr < 0 ? "" : "+"; + space1 = rel_addr < 10 ? " " : ""; + space2 = abs_addr < 10 ? " " : ""; + + fprintf(f, "%s%s%d => %s%d", space1, flag, rel_addr, space2, abs_addr); } static int @@ -316,10 +357,32 @@ bitset_on_num(BitSetRef bs) return n; } + +#ifdef USE_DIRECT_THREADED_CODE +#define GET_OPCODE(reg,index) (reg)->ocs[index] +#else +#define GET_OPCODE(reg,index) (reg)->ops[index].opcode +#endif + static void print_compiled_byte_code(FILE* f, regex_t* reg, int index, Operation* start, OnigEncoding enc) { + static char* SaveTypeNames[] = { + "KEEP", + "S", + "RIGHT_RANGE" + }; + + static char* UpdateVarTypeNames[] = { + "KEEP_FROM_STACK_LAST", + "S_FROM_STACK", + "RIGHT_RANGE_FROM_STACK", + "RIGHT_RANGE_FROM_S_STACK", + "RIGHT_RANGE_TO_S", + "RIGHT_RANGE_INIT" + }; + int i, n; RelAddrType addr; LengthType len; @@ -332,13 +395,11 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, p = reg->ops + index; -#ifdef USE_DIRECT_THREADED_CODE - opcode = reg->ocs[index]; -#else - opcode = p->opcode; -#endif + opcode = GET_OPCODE(reg, index); fprintf(f, "%s", op2name(opcode)); + p_after_op(f); + switch (opcode) { case OP_STR_1: p_string(f, 1, p->exact.s); break; @@ -372,24 +433,16 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, mb_len = p->exact_len_n.len; len = p->exact_len_n.n; q = p->exact_len_n.s; - fprintf(f, ":%d:%d:", mb_len, len); + fprintf(f, "mblen:%d len:%d ", mb_len, len); n = len * mb_len; while (n-- > 0) { fputc(*q++, f); } } break; - case OP_STR_1_IC: - len = enclen(enc, p->exact.s); - p_string(f, len, p->exact.s); - break; - case OP_STR_N_IC: - len = p->exact_n.n; - p_len_string(f, len, 1, p->exact_n.s); - break; case OP_CCLASS: case OP_CCLASS_NOT: n = bitset_on_num(p->cclass.bsp); - fprintf(f, ":%d", n); + fprintf(f, "n:%d", n); break; case OP_CCLASS_MB: case OP_CCLASS_MB_NOT: @@ -401,7 +454,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, GET_CODE_POINT(ncode, codes); codes++; GET_CODE_POINT(code, codes); - fprintf(f, ":%d:0x%x", ncode, code); + fprintf(f, "n:%d code:0x%x", ncode, code); } break; case OP_CCLASS_MIX: @@ -416,7 +469,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, GET_CODE_POINT(ncode, codes); codes++; GET_CODE_POINT(code, codes); - fprintf(f, ":%d:%u:%u", n, code, ncode); + fprintf(f, "nsg:%d code:%u nmb:%u", n, code, ncode); } break; @@ -430,19 +483,19 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_WORD_BEGIN: case OP_WORD_END: mode = p->word_boundary.mode; - fprintf(f, ":%d", mode); + fprintf(f, "mode:%d", mode); break; case OP_BACKREF_N: case OP_BACKREF_N_IC: mem = p->backref_n.n1; - fprintf(f, ":%d", mem); + fprintf(f, "n:%d", mem); break; case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI: case OP_BACKREF_CHECK: - fputs(" ", f); n = p->backref_general.num; + fprintf(f, "n:%d ", n); for (i = 0; i < n; i++) { mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i]; if (i > 0) fputs(", ", f); @@ -456,8 +509,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, LengthType level; level = p->backref_general.nest_level; - fprintf(f, ":%d", level); - fputs(" ", f); + fprintf(f, "level:%d ", level); n = p->backref_general.num; for (i = 0; i < n; i++) { mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i]; @@ -470,7 +522,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_MEM_START: case OP_MEM_START_PUSH: mem = p->memory_start.num; - fprintf(f, ":%d", mem); + fprintf(f, "mem:%d", mem); break; case OP_MEM_END: @@ -480,35 +532,33 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_MEM_END_PUSH_REC: #endif mem = p->memory_end.num; - fprintf(f, ":%d", mem); + fprintf(f, "mem:%d", mem); break; case OP_JUMP: addr = p->jump.addr; - fputc(':', f); p_rel_addr(f, addr, p, start); break; case OP_PUSH: case OP_PUSH_SUPER: addr = p->push.addr; - fputc(':', f); p_rel_addr(f, addr, p, start); break; #ifdef USE_OP_PUSH_OR_JUMP_EXACT case OP_PUSH_OR_JUMP_EXACT1: addr = p->push_or_jump_exact1.addr; - fputc(':', f); p_rel_addr(f, addr, p, start); + fprintf(f, " c:"); p_string(f, 1, &(p->push_or_jump_exact1.c)); break; #endif case OP_PUSH_IF_PEEK_NEXT: addr = p->push_if_peek_next.addr; - fputc(':', f); p_rel_addr(f, addr, p, start); + fprintf(f, " c:"); p_string(f, 1, &(p->push_if_peek_next.c)); break; @@ -516,19 +566,19 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_REPEAT_NG: mem = p->repeat.id; addr = p->repeat.addr; - fprintf(f, ":%d:", mem); + fprintf(f, "id:%d ", mem); p_rel_addr(f, addr, p, start); break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: mem = p->repeat.id; - fprintf(f, ":%d", mem); + fprintf(f, "id:%d", mem); break; case OP_EMPTY_CHECK_START: mem = p->empty_check_start.mem; - fprintf(f, ":%d", mem); + fprintf(f, "id:%d", mem); break; case OP_EMPTY_CHECK_END: case OP_EMPTY_CHECK_END_MEMST: @@ -536,58 +586,82 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_EMPTY_CHECK_END_MEMST_PUSH: #endif mem = p->empty_check_end.mem; - fprintf(f, ":%d", mem); + fprintf(f, "id:%d", mem); break; - case OP_PREC_READ_NOT_START: - addr = p->prec_read_not_start.addr; - fputc(':', f); - p_rel_addr(f, addr, p, start); +#ifdef USE_CALL + case OP_CALL: + addr = p->call.addr; + fprintf(f, "=> %d", addr); break; +#endif - case OP_LOOK_BEHIND: - len = p->look_behind.len; - fprintf(f, ":%d", len); + case OP_MOVE: + fprintf(f, "n:%d", p->move.n); break; - case OP_LOOK_BEHIND_NOT_START: - addr = p->look_behind_not_start.addr; - len = p->look_behind_not_start.len; - fprintf(f, ":%d:", len); + case OP_STEP_BACK_START: + addr = p->step_back_start.addr; + fprintf(f, "init:%d rem:%d ", + p->step_back_start.initial, + p->step_back_start.remaining); p_rel_addr(f, addr, p, start); break; -#ifdef USE_CALL - case OP_CALL: - addr = p->call.addr; - fprintf(f, ":{/%d}", addr); + case OP_POP_TO_MARK: + mem = p->pop_to_mark.id; + fprintf(f, "id:%d", mem); + break; + + case OP_CUT_TO_MARK: + { + int restore; + + mem = p->cut_to_mark.id; + restore = p->cut_to_mark.restore_pos; + fprintf(f, "id:%d restore:%d", mem, restore); + } break; -#endif - case OP_PUSH_SAVE_VAL: + case OP_MARK: + { + int save; + + mem = p->mark.id; + save = p->mark.save_pos; + fprintf(f, "id:%d save:%d", mem, save); + } + break; + + case OP_SAVE_VAL: { SaveType type; - type = p->push_save_val.type; - mem = p->push_save_val.id; - fprintf(f, ":%d:%d", type, mem); + type = p->save_val.type; + mem = p->save_val.id; + fprintf(f, "%s id:%d", SaveTypeNames[type], mem); } break; case OP_UPDATE_VAR: { UpdateVarType type; + int clear; type = p->update_var.type; mem = p->update_var.id; - fprintf(f, ":%d:%d", type, mem); + clear = p->update_var.clear; + fprintf(f, "%s id:%d", UpdateVarTypeNames[type], mem); + if (type == UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK || + type == UPDATE_VAR_RIGHT_RANGE_FROM_STACK) + fprintf(f, " clear:%d", clear); } break; #ifdef USE_CALLOUT case OP_CALLOUT_CONTENTS: mem = p->callout_contents.num; - fprintf(f, ":%d", mem); + fprintf(f, "num:%d", mem); break; case OP_CALLOUT_NAME: @@ -596,14 +670,25 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, id = p->callout_name.id; mem = p->callout_name.num; - fprintf(f, ":%d:%d", id, mem); + fprintf(f, "id:%d num:%d", id, mem); } break; #endif case OP_TEXT_SEGMENT_BOUNDARY: if (p->text_segment_boundary.not != 0) - fprintf(f, ":not"); + fprintf(f, " not"); + break; + + case OP_CHECK_POSITION: + switch (p->check_position.type) { + case CHECK_POSITION_SEARCH_START: + fprintf(f, "search-start"); break; + case CHECK_POSITION_CURRENT_RIGHT_RANGE: + fprintf(f, "current-right-range"); break; + default: + break; + }; break; case OP_FINISH: @@ -621,24 +706,18 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index, case OP_BEGIN_LINE: case OP_END_LINE: case OP_SEMI_END_BUF: - case OP_BEGIN_POSITION: case OP_BACKREF1: case OP_BACKREF2: case OP_FAIL: - case OP_POP_OUT: - case OP_PREC_READ_START: - case OP_PREC_READ_END: - case OP_PREC_READ_NOT_END: - case OP_ATOMIC_START: - case OP_ATOMIC_END: - case OP_LOOK_BEHIND_NOT_END: + case OP_POP: + case OP_STEP_BACK_NEXT: #ifdef USE_CALL case OP_RETURN: #endif break; default: - fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode); + fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode); break; } } @@ -986,8 +1065,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /* used by normal-POP */ #define STK_SUPER_ALT STK_ALT_FLAG #define STK_ALT (0x0002 | STK_ALT_FLAG) -#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) -#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) /* handled by normal-POP */ #define STK_MEM_START 0x0010 @@ -1010,13 +1087,10 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ #define STK_MEM_END_MARK 0x8100 -#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */ -/* #define STK_REPEAT 0x0300 */ -#define STK_CALL_FRAME 0x0400 -#define STK_RETURN 0x0500 +#define STK_CALL_FRAME (0x0400 | STK_MASK_POP_HANDLED) +#define STK_RETURN (0x0500 | STK_MASK_POP_HANDLED) #define STK_SAVE_VAL 0x0600 -#define STK_PREC_READ_START 0x0700 -#define STK_PREC_READ_END 0x0800 +#define STK_MARK 0x0704 /* stack type check mask */ #define STK_MASK_POP_USED STK_ALT_FLAG @@ -1134,6 +1208,21 @@ struct OnigCalloutArgsStruct { #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ +#ifdef USE_RETRY_LIMIT +#define RETRY_IN_MATCH_ARG_INIT(msa,mpv) \ + (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\ + (msa).retry_limit_in_search = (mpv)->retry_limit_in_search;\ + (msa).retry_limit_in_search_counter = 0; +#else +#define RETRY_IN_MATCH_ARG_INIT(msa,mpv) +#endif + +#if defined(USE_CALL) +#define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;} +#else +#define POP_CALL +#endif + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \ (msa).stack_p = (void* )0;\ @@ -1141,7 +1230,7 @@ struct OnigCalloutArgsStruct { (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ - (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ @@ -1153,7 +1242,7 @@ struct OnigCalloutArgsStruct { (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ - (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\ + RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ } while(0) @@ -1196,17 +1285,17 @@ struct OnigCalloutArgsStruct { } while(0); -#define STACK_SAVE do{\ - msa->stack_n = (int )(stk_end - stk_base);\ - if (is_alloca != 0) {\ - size_t size = sizeof(StackIndex) * msa->ptr_num \ - + sizeof(StackType) * msa->stack_n;\ - msa->stack_p = xmalloc(size);\ - CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ - xmemcpy(msa->stack_p, alloc_base, size);\ +#define STACK_SAVE(msa,is_alloca,alloc_base) do{\ + (msa)->stack_n = (int )(stk_end - stk_base);\ + if ((is_alloca) != 0) {\ + size_t size = sizeof(StackIndex) * (msa)->ptr_num\ + + sizeof(StackType) * (msa)->stack_n;\ + (msa)->stack_p = xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\ + xmemcpy((msa)->stack_p, (alloc_base), size);\ }\ else {\ - msa->stack_p = alloc_base;\ + (msa)->stack_p = (alloc_base);\ };\ } while(0) @@ -1225,13 +1314,14 @@ onig_set_match_stack_limit_size(unsigned int size) return 0; } -#ifdef USE_RETRY_LIMIT_IN_MATCH +#ifdef USE_RETRY_LIMIT -static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; +static unsigned long RetryLimitInSearch = DEFAULT_RETRY_LIMIT_IN_SEARCH; #define CHECK_RETRY_LIMIT_IN_MATCH do {\ - if (retry_in_match_counter++ > retry_limit_in_match) {\ - MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\ + if (++retry_in_match_counter > retry_limit_in_match) {\ + MATCH_AT_ERROR_RETURN(retry_in_match_counter > msa->retry_limit_in_match ? ONIGERR_RETRY_LIMIT_IN_MATCH_OVER : ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER); \ }\ } while (0) @@ -1239,24 +1329,44 @@ static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; #define CHECK_RETRY_LIMIT_IN_MATCH -#endif /* USE_RETRY_LIMIT_IN_MATCH */ +#endif /* USE_RETRY_LIMIT */ extern unsigned long onig_get_retry_limit_in_match(void) { -#ifdef USE_RETRY_LIMIT_IN_MATCH +#ifdef USE_RETRY_LIMIT return RetryLimitInMatch; #else - /* return ONIG_NO_SUPPORT_CONFIG; */ return 0; #endif } extern int -onig_set_retry_limit_in_match(unsigned long size) +onig_set_retry_limit_in_match(unsigned long n) { -#ifdef USE_RETRY_LIMIT_IN_MATCH - RetryLimitInMatch = size; +#ifdef USE_RETRY_LIMIT + RetryLimitInMatch = n; + return 0; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +extern unsigned long +onig_get_retry_limit_in_search(void) +{ +#ifdef USE_RETRY_LIMIT + return RetryLimitInSearch; +#else + return 0; +#endif +} + +extern int +onig_set_retry_limit_in_search(unsigned long n) +{ +#ifdef USE_RETRY_LIMIT + RetryLimitInSearch = n; return 0; #else return ONIG_NO_SUPPORT_CONFIG; @@ -1305,8 +1415,9 @@ extern int onig_initialize_match_param(OnigMatchParam* mp) { mp->match_stack_limit = MatchStackLimit; -#ifdef USE_RETRY_LIMIT_IN_MATCH - mp->retry_limit_in_match = RetryLimitInMatch; +#ifdef USE_RETRY_LIMIT + mp->retry_limit_in_match = RetryLimitInMatch; + mp->retry_limit_in_search = RetryLimitInSearch; #endif #ifdef USE_CALLOUT @@ -1508,9 +1619,9 @@ onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, static int -stack_double(int is_alloca, char** arg_alloc_base, - StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, - MatchArg* msa) +stack_double(int* is_alloca, char** arg_alloc_base, + StackType** arg_stk_base, StackType** arg_stk_end, + StackType** arg_stk, MatchArg* msa) { unsigned int n; int used; @@ -1529,24 +1640,27 @@ stack_double(int is_alloca, char** arg_alloc_base, size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; n *= 2; new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; - if (is_alloca != 0) { + if (*is_alloca != 0) { new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { - STACK_SAVE; + STACK_SAVE(msa, *is_alloca, alloc_base); return ONIGERR_MEMORY; } xmemcpy(new_alloc_base, alloc_base, size); + *is_alloca = 0; } else { if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { - if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) + if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) { + STACK_SAVE(msa, *is_alloca, alloc_base); return ONIGERR_MATCH_STACK_LIMIT_OVER; + } else n = msa->match_stack_limit; } new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { - STACK_SAVE; + STACK_SAVE(msa, *is_alloca, alloc_base); return ONIGERR_MEMORY; } } @@ -1563,9 +1677,8 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_ENSURE(n) do {\ if ((int )(stk_end - stk) < (n)) {\ - int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - is_alloca = 0;\ + int r = stack_double(&is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\ + if (r != 0) return r;\ UPDATE_FOR_STACK_REALLOC;\ }\ } while(0) @@ -1590,6 +1703,16 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) +#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->zid = (int )(id);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + #define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ @@ -1614,13 +1737,8 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_PREC_READ_START(s,sprev) \ - STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev) -#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \ - STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev) -#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START) -#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev) +#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \ + STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id) #if 0 #define STACK_PUSH_REPEAT(sid, pat) do {\ @@ -1736,6 +1854,22 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) +#define STACK_PUSH_MARK(sid) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MARK;\ + stk->zid = (sid);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MARK;\ + stk->zid = (sid);\ + stk->u.val.v = (UChar* )(s);\ + stk->u.val.v2 = (sprev);\ + STACK_INC;\ +} while(0) + #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ @@ -1767,7 +1901,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while (0) -#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \ +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval, clear) do {\ int level = 0;\ StackType *k = stk;\ while (k > stk_base) {\ @@ -1777,6 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base, && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ + if (clear != 0) k->type = STK_VOID;\ break;\ }\ }\ @@ -1808,26 +1943,6 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while (0) -#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \ - int level = 0;\ - StackType *k = (stk_from);\ - while (k > stk_base) {\ - STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \ - if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->u.val.id == (sid)) {\ - if (level == 0) {\ - (sval) = k->u.val.v;\ - break;\ - }\ - }\ - else if (k->type == STK_CALL_FRAME)\ - level--;\ - else if (k->type == STK_RETURN)\ - level++;\ - k--;\ - }\ -} while (0) - #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ STACK_ENSURE(1);\ stk->type = STK_CALLOUT;\ @@ -1849,7 +1964,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ if ((p) < stk_base) {\ - fprintf(stderr, "at %s\n", at);\ + fprintf(DBGFP, "at %s\n", at);\ MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\ } #else @@ -1907,6 +2022,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ POP_REPEAT_INC \ POP_EMPTY_CHECK_START \ + POP_CALL \ POP_CALLOUT_CASE\ }\ }\ @@ -1914,12 +2030,14 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define POP_TIL_BODY(aname, til_type) do {\ +#define STACK_POP_TO_MARK(sid) do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, (aname));\ + STACK_BASE_CHECK(stk, "STACK_POP_TO_MARK");\ if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ - if (stk->type == (til_type)) break;\ + if (stk->type == STK_MARK) {\ + if (stk->zid == (sid)) break;\ + }\ else {\ if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ @@ -1931,58 +2049,57 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ POP_REPEAT_INC \ POP_EMPTY_CHECK_START \ + POP_CALL \ /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ }\ }\ }\ } while(0) -#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ - POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\ -} while(0) - -#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\ - POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\ -} while(0) - -#define STACK_EXEC_TO_VOID(k) do {\ - k = stk;\ +#define POP_TIL_BODY(aname, til_type) do {\ while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ - if (IS_TO_VOID_TARGET(k)) {\ - if (k->type == STK_TO_VOID_START) {\ - k->type = STK_VOID;\ - break;\ + stk--;\ + STACK_BASE_CHECK(stk, (aname));\ + if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ + if (stk->type == (til_type)) break;\ + else {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + POP_REPEAT_INC \ + POP_EMPTY_CHECK_START \ + POP_CALL \ + /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ }\ - k->type = STK_VOID;\ }\ }\ } while(0) -#define STACK_GET_PREC_READ_START(k) do {\ - int level = 0;\ + +#define STACK_TO_VOID_TO_MARK(k,sid) do {\ k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\ + STACK_BASE_CHECK(k, "STACK_TO_VOID_TO_MARK");\ if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_PREC_READ_START) {\ - if (level == 0) {\ - break;\ + if (k->type == STK_MARK) {\ + if (k->zid == (sid)) {\ + k->type = STK_VOID;\ + break;\ + } /* don't void different id mark */ \ }\ - level--;\ - }\ - else if (k->type == STK_PREC_READ_END) {\ - level++;\ + else\ + k->type = STK_VOID;\ }\ }\ } while(0) - #define EMPTY_CHECK_START_SEARCH(sid, k) do {\ k = stk;\ while (1) {\ @@ -2173,6 +2290,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c) #endif +#ifdef USE_CALL #define STACK_RETURN(addr) do {\ int level = 0;\ StackType* k = stk;\ @@ -2191,6 +2309,25 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) +#define GET_STACK_RETURN_CALL(k,addr) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "GET_STACK_RETURN_CALL");\ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) +#endif + #define STRING_CMP(s1,s2,len) do {\ while (len-- > 0) {\ @@ -2400,30 +2537,23 @@ backref_check_at_nested_level(regex_t* reg, } #endif /* USE_BACKREF_WITH_LEVEL */ +static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL; #ifdef ONIG_DEBUG_STATISTICS -#define USE_TIMEOFDAY - #ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif + static struct timeval ts, te; #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ (((te).tv_sec - (ts).tv_sec)*1000000)) #else -#ifdef HAVE_SYS_TIMES_H -#include <sys/times.h> -#endif + static struct tms ts, te; #define GETTIME(t) times(&(t)) #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif + +#endif /* USE_TIMEOFDAY */ static int OpCounter[256]; static int OpPrevCounter[256]; @@ -2531,9 +2661,9 @@ typedef struct { #endif /* USE_THREADED_CODE */ #define INC_OP p++ -#define NEXT_OUT SOP_OUT; NEXT_OP -#define JUMP_OUT SOP_OUT; JUMP_OP -#define BREAK_OUT SOP_OUT; BREAK_OP +#define JUMP_OUT_WITH_SPREV_SET SOP_OUT; NEXT_OP +#define JUMP_OUT SOP_OUT; JUMP_OP +#define BREAK_OUT SOP_OUT; BREAK_OP #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP @@ -2544,7 +2674,7 @@ typedef struct { int len, spos;\ spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\ xp = p - (offset);\ - fprintf(stderr, "%7u: %7ld: %4d> \"",\ + fprintf(DBGFP, "%7u: %7ld: %4d> \"",\ counter, GET_STACK_INDEX(stk), spos);\ counter++;\ bp = buf;\ @@ -2560,21 +2690,33 @@ typedef struct { xmemcpy(bp, "\"", 1); bp += 1;\ }\ *bp = 0;\ - fputs((char* )buf, stderr);\ - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\ + fputs((char* )buf, DBGFP);\ + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\ if (xp == FinishCode)\ - fprintf(stderr, "----: finish");\ + fprintf(DBGFP, "----: finish");\ else {\ - fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\ - print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\ + int index;\ + enum OpCode zopcode;\ + Operation* addr;\ + index = (int )(xp - reg->ops);\ + fprintf(DBGFP, "%4d: ", index);\ + print_compiled_byte_code(DBGFP, reg, index, reg->ops, encode); \ + zopcode = GET_OPCODE(reg, index);\ + if (zopcode == OP_RETURN) {\ + GET_STACK_RETURN_CALL(stkp, addr);\ + fprintf(DBGFP, " f:%ld -> %d", \ + GET_STACK_INDEX(stkp), (int )(addr - reg->ops));\ + }\ }\ - fprintf(stderr, "\n");\ + fprintf(DBGFP, "\n");\ } while(0); #else #define MATCH_DEBUG_OUT(offset) #endif -#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end +#define MATCH_AT_ERROR_RETURN(err_code) do {\ + best_len = err_code; goto match_at_end;\ +} while(0) /* match data(str - end) from position (sstart). */ @@ -2607,8 +2749,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_STR_MB2N, &&L_STR_MB3N, &&L_STR_MBN, - &&L_STR_1_IC, - &&L_STR_N_IC, &&L_CCLASS, &&L_CCLASS_MB, &&L_CCLASS_MIX, @@ -2635,7 +2775,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_BEGIN_LINE, &&L_END_LINE, &&L_SEMI_END_BUF, - &&L_BEGIN_POSITION, + &&L_CHECK_POSITION, &&L_BACKREF1, &&L_BACKREF2, &&L_BACKREF_N, @@ -2660,7 +2800,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_JUMP, &&L_PUSH, &&L_PUSH_SUPER, - &&L_POP_OUT, + &&L_POP, + &&L_POP_TO_MARK, #ifdef USE_OP_PUSH_OR_JUMP_EXACT &&L_PUSH_OR_JUMP_EXACT1, #endif @@ -2675,16 +2816,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_CALL &&L_EMPTY_CHECK_END_MEMST_PUSH, #endif - &&L_PREC_READ_START, - &&L_PREC_READ_END, - &&L_PREC_READ_NOT_START, - &&L_PREC_READ_NOT_END, - &&L_ATOMIC_START, - &&L_ATOMIC_END, - &&L_LOOK_BEHIND, - &&L_LOOK_BEHIND_NOT_START, - &&L_LOOK_BEHIND_NOT_END, - &&L_PUSH_SAVE_VAL, + &&L_MOVE, + &&L_STEP_BACK_START, + &&L_STEP_BACK_NEXT, + &&L_CUT_TO_MARK, + &&L_MARK, + &&L_SAVE_VAL, &&L_UPDATE_VAR, #ifdef USE_CALL &&L_CALL, @@ -2701,7 +2838,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - UChar *s, *q, *ps, *sbegin; + UChar *s, *ps, *sbegin; UChar *right_range; int is_alloca; char *alloc_base; @@ -2714,7 +2851,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, StackIndex *repeat_stk; StackIndex *empty_check_stk; #endif -#ifdef USE_RETRY_LIMIT_IN_MATCH +#ifdef USE_RETRY_LIMIT unsigned long retry_limit_in_match; unsigned long retry_in_match_counter; #endif @@ -2727,6 +2864,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef USE_CALL + unsigned long subexp_call_nest_counter = 0; +#endif + #ifdef ONIG_DEBUG_MATCH static unsigned int counter = 1; #endif @@ -2747,8 +2888,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, msa->mp->match_at_call_counter++; #endif -#ifdef USE_RETRY_LIMIT_IN_MATCH +#ifdef USE_RETRY_LIMIT retry_limit_in_match = msa->retry_limit_in_match; + if (msa->retry_limit_in_search != 0) { + unsigned long rem = msa->retry_limit_in_search + - msa->retry_limit_in_search_counter; + if (rem < retry_limit_in_match) + retry_limit_in_match = rem; + } #endif pop_level = reg->stack_pop_level; @@ -2760,9 +2907,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", + fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", str, end, sstart, sprev); - fprintf(stderr, "size: %d, start offset: %d\n", + fprintf(DBGFP, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); #endif @@ -2771,7 +2918,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */ INIT_RIGHT_RANGE; -#ifdef USE_RETRY_LIMIT_IN_MATCH +#ifdef USE_RETRY_LIMIT retry_in_match_counter = 0; #endif @@ -2781,7 +2928,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (n > best_len) { OnigRegion* region; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { + if (OPTON_FIND_LONGEST(option)) { if (n > msa->best_len) { msa->best_len = n; msa->best_s = (UChar* )sstart; @@ -2796,7 +2943,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (keep > s) keep = s; #ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { + if (OPTON_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; rmt[0].rm_so = (regoff_t )(keep - str); @@ -2850,7 +2997,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #endif /* USE_CAPTURE_HISTORY */ #ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ + } /* else OPTON_POSIX_REGION() */ #endif } /* if (region) */ } /* n > best_len */ @@ -2860,12 +3007,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif SOP_OUT; - if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + if (OPTON_FIND_CONDITION(option)) { + if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) { best_len = ONIG_MISMATCH; goto fail; /* for retry */ } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { goto fail; /* for retry */ } } @@ -2879,28 +3026,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; s++; INC_OP; - NEXT_OUT; - - CASE_OP(STR_1_IC) - { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - ps = p->exact.s; - while (len-- > 0) { - if (*ps != *q) goto fail; - ps++; q++; - } - } - INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(STR_2) DATA_ENSURE(2); @@ -2969,34 +3095,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; JUMP_OUT; - CASE_OP(STR_N_IC) - { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - tlen = p->exact_n.n; - ps = p->exact_n.s; - endp = ps + tlen; - while (ps < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (ps >= endp) goto fail; - if (*ps != *q) goto fail; - ps++; q++; - } - } - } - - INC_OP; - JUMP_OUT; - CASE_OP(STR_MB2N1) DATA_ENSURE(2); ps = p->exact.s; @@ -3005,7 +3103,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; s++; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(STR_MB2N2) DATA_ENSURE(4); @@ -3088,9 +3186,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(CCLASS) DATA_ENSURE(1); if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail; + if (ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; s++; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MB) DATA_ENSURE(1); @@ -3110,7 +3209,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MIX) DATA_ENSURE(1); @@ -3124,14 +3223,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_NOT) DATA_ENSURE(1); if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail; s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MB_NOT) DATA_ENSURE(1); @@ -3160,7 +3259,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, cc_mb_not_success: INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MIX_NOT) DATA_ENSURE(1); @@ -3174,7 +3273,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR) DATA_ENSURE(1); @@ -3183,7 +3282,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR_ML) DATA_ENSURE(1); @@ -3191,7 +3290,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); s += n; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR_STAR) INC_OP; @@ -3239,7 +3338,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } } - NEXT_OUT; + JUMP_OUT; CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT) { @@ -3263,7 +3362,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - NEXT_OUT; + JUMP_OUT; CASE_OP(WORD) DATA_ENSURE(1); @@ -3272,7 +3371,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(WORD_ASCII) DATA_ENSURE(1); @@ -3281,7 +3380,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(NO_WORD) DATA_ENSURE(1); @@ -3290,7 +3389,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(NO_WORD_ASCII) DATA_ENSURE(1); @@ -3299,7 +3398,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(WORD_BOUNDARY) { @@ -3420,7 +3519,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BEGIN_LINE) if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; + if (OPTON_NOTBOL(msa->options)) goto fail; INC_OP; JUMP_OUT; } @@ -3435,7 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; + if (OPTON_NOTEOL(msa->options)) goto fail; INC_OP; JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -3459,7 +3558,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; + if (OPTON_NOTEOL(msa->options)) goto fail; INC_OP; JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -3483,10 +3582,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif goto fail; - CASE_OP(BEGIN_POSITION) - if (s != msa->start) - goto fail; - + CASE_OP(CHECK_POSITION) + switch (p->check_position.type) { + case CHECK_POSITION_SEARCH_START: + if (s != msa->start) goto fail; + break; + case CHECK_POSITION_CURRENT_RIGHT_RANGE: + if (s != right_range) goto fail; + break; + default: + break; + } INC_OP; JUMP_OUT; @@ -3746,7 +3852,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s); + fprintf(DBGFP, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s); #endif empty_check_found: /* empty loop founded, skip next instruction */ @@ -3779,7 +3885,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s); + fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s); #endif if (is_empty == -1) goto fail; goto empty_check_found; @@ -3802,7 +3908,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n", + fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n", (int )mem, s); #endif if (is_empty == -1) goto fail; @@ -3832,10 +3938,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; JUMP_OUT; - CASE_OP(POP_OUT) + CASE_OP(POP) STACK_POP_ONE; - /* for stop backtrack */ - /* CHECK_RETRY_LIMIT_IN_MATCH; */ + INC_OP; + JUMP_OUT; + + CASE_OP(POP_TO_MARK) + STACK_POP_TO_MARK(p->pop_to_mark.id); INC_OP; JUMP_OUT; @@ -3933,89 +4042,100 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } CHECK_INTERRUPT_JUMP_OUT; - CASE_OP(PREC_READ_START) - STACK_PUSH_PREC_READ_START(s, sprev); - INC_OP; - JUMP_OUT; +#ifdef USE_CALL + CASE_OP(CALL) + if (subexp_call_nest_counter == SubexpCallMaxNestLevel) + goto fail; + subexp_call_nest_counter++; + addr = p->call.addr; + INC_OP; STACK_PUSH_CALL_FRAME(p); + p = reg->ops + addr; - CASE_OP(PREC_READ_END) - STACK_GET_PREC_READ_START(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; - STACK_PUSH(STK_PREC_READ_END,0,0,0); - INC_OP; JUMP_OUT; - CASE_OP(PREC_READ_NOT_START) - addr = p->prec_read_not_start.addr; - STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - INC_OP; + CASE_OP(RETURN) + STACK_RETURN(p); + STACK_PUSH_RETURN; + subexp_call_nest_counter--; JUMP_OUT; +#endif - CASE_OP(PREC_READ_NOT_END) - STACK_POP_TIL_ALT_PREC_READ_NOT; - goto fail; + CASE_OP(MOVE) + if (p->move.n < 0) { + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, -p->move.n); + if (IS_NULL(s)) goto fail; + } + else { + int len; - CASE_OP(ATOMIC_START) - STACK_PUSH_TO_VOID_START; + for (tlen = p->move.n; tlen > 0; tlen--) { + len = enclen(encode, s); + sprev = s; + s += len; + if (s > end) goto fail; + if (s == end) { + if (tlen != 1) goto fail; + else break; + } + } + } + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); INC_OP; JUMP_OUT; - CASE_OP(ATOMIC_END) - STACK_EXEC_TO_VOID(stkp); - INC_OP; + CASE_OP(STEP_BACK_START) + tlen = p->step_back_start.initial; + if (tlen != 0) { + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + } + if (p->step_back_start.remaining != 0) { + STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining); + p += p->step_back_start.addr; + } + else + INC_OP; JUMP_OUT; - CASE_OP(LOOK_BEHIND) - tlen = p->look_behind.len; - s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + CASE_OP(STEP_BACK_NEXT) + tlen = (LengthType )stk->zid; /* remaining count */ + if (tlen != INFINITE_LEN) tlen--; + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (tlen != 0) { + STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen); + } INC_OP; JUMP_OUT; - CASE_OP(LOOK_BEHIND_NOT_START) - addr = p->look_behind_not_start.addr; - tlen = p->look_behind_not_start.len; - q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); - if (IS_NULL(q)) { - /* too short case -> success. ex. /(?<!XXX)a/.match("a") - If you want to change to fail, replace following line. */ - p += addr; - /* goto fail; */ + CASE_OP(CUT_TO_MARK) + mem = p->cut_to_mark.id; /* mem: mark id */ + STACK_TO_VOID_TO_MARK(stkp, mem); + if (p->cut_to_mark.restore_pos != 0) { + s = stkp->u.val.v; + sprev = stkp->u.val.v2; } - else { - STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev); - s = q; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - INC_OP; - } - JUMP_OUT; - - CASE_OP(LOOK_BEHIND_NOT_END) - STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; INC_OP; - goto fail; - -#ifdef USE_CALL - CASE_OP(CALL) - addr = p->call.addr; - INC_OP; STACK_PUSH_CALL_FRAME(p); - p = reg->ops + addr; JUMP_OUT; - CASE_OP(RETURN) - STACK_RETURN(p); - STACK_PUSH_RETURN; + CASE_OP(MARK) + mem = p->mark.id; /* mem: mark id */ + if (p->mark.save_pos != 0) + STACK_PUSH_MARK_WITH_POS(mem, s, sprev); + else + STACK_PUSH_MARK(mem); + + INC_OP; JUMP_OUT; -#endif - CASE_OP(PUSH_SAVE_VAL) + CASE_OP(SAVE_VAL) { SaveType type; - type = p->push_save_val.type; - mem = p->push_save_val.id; /* mem: save id */ + type = p->save_val.type; + mem = p->save_val.id; /* mem: save id */ switch ((enum SaveType )type) { case SAVE_KEEP: STACK_PUSH_SAVE_VAL(mem, type, s); @@ -4039,13 +4159,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, enum SaveType save_type; type = p->update_var.type; - mem = p->update_var.id; /* mem: save id */ switch ((enum UpdateVarType )type) { case UPDATE_VAR_KEEP_FROM_STACK_LAST: STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); break; case UPDATE_VAR_S_FROM_STACK: + mem = p->update_var.id; /* mem: save id */ STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s); break; case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK: @@ -4055,7 +4175,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case UPDATE_VAR_RIGHT_RANGE_FROM_STACK: save_type = SAVE_RIGHT_RANGE; get_save_val_type_last_id: - STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range); + mem = p->update_var.id; /* mem: save id */ + STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range, p->update_var.clear); + break; + case UPDATE_VAR_RIGHT_RANGE_TO_S: + right_range = s; break; case UPDATE_VAR_RIGHT_RANGE_INIT: INIT_RIGHT_RANGE; @@ -4163,10 +4287,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } BYTECODE_INTERPRETER_END; match_at_end: - STACK_SAVE; + if (msa->retry_limit_in_search != 0) { + msa->retry_limit_in_search_counter += retry_in_match_counter; + } + STACK_SAVE(msa, is_alloca, alloc_base); return best_len; } + +#ifdef USE_REGSET + typedef struct { regex_t* reg; OnigRegion* region; @@ -4433,7 +4563,7 @@ onig_regset_search_with_param(OnigRegSet* set, if (set->n == 0) return ONIG_MISMATCH; - if (IS_POSIX_REGION(option)) + if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT; r = 0; @@ -4453,11 +4583,11 @@ onig_regset_search_with_param(OnigRegSet* set, if (start > end || start < str) goto mismatch_no_msa; if (str < end) { /* forward search only */ - if (range <= start) + if (range < start) return ONIGERR_INVALID_ARGUMENT; } - if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (OPTON_CHECK_VALIDITY_OF_STRING(option)) { if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) { r = ONIGERR_INVALID_WIDE_CHAR_VALUE; goto finish_no_msa; @@ -4535,7 +4665,18 @@ onig_regset_search_with_param(OnigRegSet* set, for (i = 0; i < set->n; i++) { reg = set->rs[i].reg; if (reg->threshold_len == 0) { - REGSET_MATCH_AND_RETURN_CHECK(end); + /* REGSET_MATCH_AND_RETURN_CHECK(end); */ + /* Can't use REGSET_MATCH_AND_RETURN_CHECK() + because r must be set regex index (i) + */ + r = match_at(reg, str, end, end, s, prev, msas + i); + if (r != ONIG_MISMATCH) { + if (r >= 0) { + r = i; + goto match; + } + else goto finish; /* error */ + } } } @@ -4567,7 +4708,7 @@ onig_regset_search_with_param(OnigRegSet* set, for (i = 0; i < set->n; i++) { if (IS_NOT_NULL(msas)) MATCH_ARG_FREE(msas[i]); - if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) && + if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) && IS_NOT_NULL(set->rs[i].region)) { onig_region_clear(set->rs[i].region); } @@ -4586,7 +4727,7 @@ onig_regset_search_with_param(OnigRegSet* set, for (i = 0; i < set->n; i++) { if (IS_NOT_NULL(msas)) MATCH_ARG_FREE(msas[i]); - if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) && + if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) && IS_NOT_NULL(set->rs[i].region)) { onig_region_clear(set->rs[i].region); } @@ -4625,6 +4766,9 @@ onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end, return r; } +#endif /* USE_REGSET */ + + static UChar* slow_search(OnigEncoding enc, UChar* target, UChar* target_end, const UChar* text, const UChar* text_end, UChar* text_range) @@ -4656,48 +4800,6 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, return (UChar* )NULL; } -static int -str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) -{ - int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - while (t < tend) { - if (p >= end) return 0; - lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); - q = lowbuf; - while (lowlen > 0) { - if (t >= tend) return 0; - if (*t++ != *q++) return 0; - lowlen--; - } - } - - return 1; -} - -static UChar* -slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *s; - - s = (UChar* )text; - - while (s < text_range) { - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) - return s; - - s += enclen(enc, s); - } - - return (UChar* )NULL; -} - static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, const UChar* text, const UChar* adjust_text, @@ -4731,33 +4833,6 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, } static UChar* -slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - - while (s >= text) { - if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) - return s; - - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); - } - - return (UChar* )NULL; -} - - -static UChar* sunday_quick_search_step_forward(regex_t* reg, const UChar* target, const UChar* target_end, const UChar* text, const UChar* text_end, @@ -4770,8 +4845,9 @@ sunday_quick_search_step_forward(regex_t* reg, OnigEncoding enc; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); + fprintf(DBGFP, + "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", + text, text_end, text_range); #endif enc = reg->enc; @@ -4894,7 +4970,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region #ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) + && !OPTON_POSIX_REGION(option) #endif ) { r = onig_region_resize_clear(region, reg->num_mem + 1); @@ -4903,7 +4979,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, r = 0; if (r == 0) { - if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (OPTON_CHECK_VALIDITY_OF_STRING(option)) { if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { r = ONIGERR_INVALID_WIDE_CHAR_VALUE; goto end; @@ -4926,7 +5002,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar *p, *pprev = (UChar* )NULL; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n", + fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n", str, end, start, range); #endif @@ -4949,10 +5025,6 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, case OPTIMIZE_STR: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_STR_CASE_FOLD: - p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); - break; case OPTIMIZE_STR_FAST: p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range); @@ -5047,7 +5119,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, } #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, + fprintf(DBGFP, "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n", (int )(*low - str), (int )(*high - str), reg->dist_min, reg->dist_max); @@ -5075,12 +5147,6 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, range, adjrange, end, p); break; - case OPTIMIZE_STR_CASE_FOLD: - p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - case OPTIMIZE_STR_FAST: case OPTIMIZE_STR_FAST_STEP_FORWARD: goto exact_method; @@ -5150,7 +5216,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search: low: %d, high: %d\n", + fprintf(DBGFP, "backward_search: low: %d, high: %d\n", (int )(*low - str), (int )(*high - str)); #endif return 1; /* success */ @@ -5158,7 +5224,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, fail: #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search: fail.\n"); + fprintf(DBGFP, "backward_search: fail.\n"); #endif return 0; /* fail */ } @@ -5202,7 +5268,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar *orig_start = start; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, + fprintf(DBGFP, "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n", str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif @@ -5211,7 +5277,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (region #ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) + && ! OPTON_POSIX_REGION(option) #endif ) { r = onig_region_resize_clear(region, reg->num_mem + 1); @@ -5220,7 +5286,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; - if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (OPTON_CHECK_VALIDITY_OF_STRING(option)) { if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { r = ONIGERR_INVALID_WIDE_CHAR_VALUE; goto finish_no_msa; @@ -5233,7 +5299,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ + if (! OPTON_FIND_LONGEST(reg->options)) {\ goto match;\ }\ }\ @@ -5342,7 +5408,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) { + else if ((reg->anchor & ANCR_ANYCHAR_INF_ML) && range > start) { goto begin_position; } } @@ -5350,7 +5416,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, static const UChar* address_for_empty_string = (UChar* )""; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search: empty string.\n"); + fprintf(DBGFP, "onig_search: empty string.\n"); #endif if (reg->threshold_len == 0) { @@ -5366,7 +5432,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", (int )(end - str), (int )(start - str), (int )(range - str)); #endif @@ -5419,17 +5485,16 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) { + if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 && + (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { do { MATCH_AND_RETURN_CHECK(data_range); prev = s; s += enclen(reg->enc, s); - if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); - } + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; @@ -5509,7 +5574,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, mismatch: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(reg->options)) { + if (OPTON_FIND_LONGEST(reg->options)) { if (msa.best_len >= 0) { s = msa.best_s; goto match; @@ -5523,9 +5588,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not set in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region + if (OPTON_FIND_NOT_EMPTY(reg->options) && region #ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) + && !OPTON_POSIX_REGION(option) #endif ) { onig_region_clear(region); @@ -5533,7 +5598,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); + fprintf(DBGFP, "onig_search: error %d\n", r); #endif return r; @@ -5542,7 +5607,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, finish_no_msa: #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); + fprintf(DBGFP, "onig_search: error %d\n", r); #endif return r; @@ -5578,7 +5643,7 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, int rs; const UChar* start; - if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (OPTON_CHECK_VALIDITY_OF_STRING(option)) { if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) return ONIGERR_INVALID_WIDE_CHAR_VALUE; @@ -5616,6 +5681,19 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, return n; } +extern int +onig_get_subexp_call_max_nest_level(void) +{ + return SubexpCallMaxNestLevel; +} + +extern int +onig_set_subexp_call_max_nest_level(int level) +{ + SubexpCallMaxNestLevel = level; + return 0; +} + extern OnigEncoding onig_get_encoding(regex_t* reg) { @@ -5669,6 +5747,8 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from) *to = *from; } +#ifdef USE_REGSET + extern int onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[]) { @@ -5759,7 +5839,7 @@ onig_regset_add(OnigRegSet* set, regex_t* reg) { OnigRegion* region; - if (IS_FIND_LONGEST(reg->options)) + if (OPTON_FIND_LONGEST(reg->options)) return ONIGERR_INVALID_ARGUMENT; if (set->n != 0 && reg->enc != set->enc) @@ -5805,7 +5885,7 @@ onig_regset_replace(OnigRegSet* set, int at, regex_t* reg) set->n--; } else { - if (IS_FIND_LONGEST(reg->options)) + if (OPTON_FIND_LONGEST(reg->options)) return ONIGERR_INVALID_ARGUMENT; if (set->n > 1 && reg->enc != set->enc) @@ -5864,6 +5944,8 @@ onig_regset_get_region(OnigRegSet* set, int at) return set->rs[at].region; } +#endif /* USE_REGSET */ + #ifdef USE_DIRECT_THREADED_CODE extern int @@ -6385,7 +6467,7 @@ onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) } -#include <stdio.h> +#ifndef ONIG_NO_PRINT static FILE* OutFp; @@ -6483,4 +6565,6 @@ onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) return ONIG_NORMAL; } +#endif /* ONIG_NO_PRINT */ + #endif /* USE_CALLOUT */ diff --git a/src/regint.h b/src/regint.h index cc540da..04ebe0a 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,6 +44,7 @@ defined(ONIG_DEBUG_STATISTICS) #ifndef ONIG_DEBUG #define ONIG_DEBUG +#define DBGFP stderr #endif #endif @@ -55,13 +56,14 @@ /* config */ /* spec. config */ +#define USE_REGSET #define USE_CALL #define USE_CALLOUT #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ #define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -#define USE_RETRY_LIMIT_IN_MATCH +#define USE_RETRY_LIMIT #ifdef USE_GOTO_LABELS_AS_VALUES #define USE_THREADED_CODE #define USE_DIRECT_THREADED_CODE @@ -71,73 +73,33 @@ #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY +#define USE_TIMEOFDAY -#define USE_WORD_BEGIN_END /* "\<", "\>" */ +#define USE_WORD_BEGIN_END /* "\<", "\>" */ #define USE_CAPTURE_HISTORY #define USE_VARIABLE_META_CHARS #define USE_POSIX_API_REGION_OPTION #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE /* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ +#define INIT_MATCH_STACK_SIZE 160 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 +#define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */ +#define DEFAULT_PARSE_DEPTH_LIMIT 4096 +#define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20 -#include "regenc.h" - -#define INIT_MATCH_STACK_SIZE 160 -#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ -#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 -#define DEFAULT_PARSE_DEPTH_LIMIT 4096 - -/* */ -/* escape other system UChar definition */ -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif -#define xmalloc malloc -#define xrealloc realloc -#define xcalloc calloc -#define xfree free - -#define st_init_table onig_st_init_table -#define st_init_table_with_size onig_st_init_table_with_size -#define st_init_numtable onig_st_init_numtable -#define st_init_numtable_with_size onig_st_init_numtable_with_size -#define st_init_strtable onig_st_init_strtable -#define st_init_strtable_with_size onig_st_init_strtable_with_size -#define st_delete onig_st_delete -#define st_delete_safe onig_st_delete_safe -#define st_insert onig_st_insert -#define st_lookup onig_st_lookup -#define st_foreach onig_st_foreach -#define st_add_direct onig_st_add_direct -#define st_free_table onig_st_free_table -#define st_cleanup_safe onig_st_cleanup_safe -#define st_copy onig_st_copy -#define st_nothing_key_clone onig_st_nothing_key_clone -#define st_nothing_key_free onig_st_nothing_key_free -/* */ -#define onig_st_is_member st_is_member - -#define xmemset memset -#define xmemcpy memcpy -#define xmemmove memmove - -#if defined(_WIN32) && !defined(__GNUC__) -#define xalloca _alloca -#define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args) -#define xsnprintf sprintf_s -#define xstrcat(dest,src,size) strcat_s(dest,size,src) -#else -#define xalloca alloca -#define xvsnprintf vsnprintf -#define xsnprintf snprintf -#define xstrcat(dest,src,size) strcat(dest,src) -#endif +#include "regenc.h" +#ifndef ONIG_NO_STANDARD_C_HEADERS #include <stddef.h> +#include <stdarg.h> #include <limits.h> #include <stdlib.h> +#include <string.h> +#include <ctype.h> #ifdef HAVE_STDINT_H #include <stdint.h> @@ -147,9 +109,6 @@ #include <alloca.h> #endif -#include <string.h> - -#include <ctype.h> #ifdef HAVE_SYS_TYPES_H #ifndef __BORLANDC__ #include <sys/types.h> @@ -160,21 +119,52 @@ #include <inttypes.h> #endif -#ifdef __BORLANDC__ +#if defined(_WIN32) || defined(__BORLANDC__) #include <malloc.h> #endif -#ifdef ONIG_DEBUG +#if defined(ONIG_DEBUG) || defined(NEED_TO_INCLUDE_STDIO) # include <stdio.h> #endif -#ifdef _WIN32 -#if defined(_MSC_VER) && (_MSC_VER < 1300) -typedef int intptr_t; -typedef unsigned int uintptr_t; +#ifdef ONIG_DEBUG_STATISTICS +#ifdef USE_TIMEOFDAY + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#else /* USE_TIMEOFDAY */ + +#ifdef HAVE_SYS_TIMES_H +#include <sys/times.h> #endif + +#endif /* USE_TIMEOFDAY */ +#endif /* ONIG_DEBUG_STATISTICS */ + +/* I don't think these x....'s need to be included in + ONIG_NO_STANDARD_C_HEADERS, but they are required by Issue #170 + and do so since there is no problem. + */ +#ifndef xmemset +#define xmemset memset +#endif + +#ifndef xmemcpy +#define xmemcpy memcpy +#endif + +#ifndef xmemmove +#define xmemmove memmove #endif +#endif /* ONIG_NO_STANDARD_C_HEADERS */ + + #ifdef MIN #undef MIN #endif @@ -191,8 +181,94 @@ typedef unsigned int uintptr_t; #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) -#define CHAR_MAP_SIZE 256 -#define INFINITE_LEN ONIG_INFINITE_DISTANCE +#ifndef ONIG_INT_MAX +#define ONIG_INT_MAX INT_MAX +#endif + +#define CHAR_MAP_SIZE 256 +#define INFINITE_LEN ONIG_INFINITE_DISTANCE +#define STEP_BACK_MAX_CHAR_LEN 65535 /* INT_MAX is too big */ +#define LOOK_BEHIND_MAX_CHAR_LEN STEP_BACK_MAX_CHAR_LEN + +/* escape other system UChar definition */ +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + +#define xmalloc malloc +#define xrealloc realloc +#define xcalloc calloc +#define xfree free + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_lookup onig_st_lookup +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free +/* */ +#define onig_st_is_member st_is_member + + +#if defined(_WIN32) && !defined(__GNUC__) + +#ifndef xalloca +#define xalloca _alloca +#endif +#ifndef xvsnprintf +#define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args) +#endif +#ifndef xsnprintf +#define xsnprintf sprintf_s +#endif +#ifndef xstrcat +#define xstrcat(dest,src,size) strcat_s(dest,size,src) +#endif + +#else + +#ifndef xalloca +#define xalloca alloca +#endif +#ifndef xvsnprintf +#define xvsnprintf vsnprintf +#endif +#ifndef xsnprintf +#define xsnprintf snprintf +#endif +#ifndef xstrcat +#define xstrcat(dest,src,size) strcat(dest,src) +#endif + +#endif /* defined(_WIN32) && !defined(__GNUC__) */ + + +#ifdef _WIN32 +#if defined(_MSC_VER) && (_MSC_VER < 1300) +typedef int intptr_t; +typedef unsigned int uintptr_t; +#endif +#endif + +#if SIZEOF_VOIDP == SIZEOF_LONG +typedef unsigned long hash_data_type; +#elif SIZEOF_VOIDP == SIZEOF_LONG_LONG +typedef unsigned long long hash_data_type; +#endif + +/* strend hash */ +typedef void* hash_table_type; #ifdef USE_CALLOUT @@ -236,7 +312,6 @@ enum OptimizeType { OPTIMIZE_STR, /* Slow Search */ OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */ OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ - OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */ OPTIMIZE_MAP /* char map */ }; @@ -289,32 +364,20 @@ typedef unsigned int MemStatusType; (IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) -#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) -#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) -#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) -#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) -#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) -#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_FIND_CONDITION(option) ((option) & \ +#define OPTON_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define OPTON_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) +#define OPTON_FIND_CONDITION(option) ((option) & \ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) -#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) -#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) -#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) - -#define IS_WORD_ASCII(option) \ - ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) -#define IS_DIGIT_ASCII(option) \ - ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) -#define IS_SPACE_ASCII(option) \ - ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) -#define IS_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII) - -#define IS_ASCII_MODE_CTYPE_OPTION(ctype, options) \ - ((ctype) >= 0 && \ - (((ctype) < ONIGENC_CTYPE_ASCII && IS_POSIX_ASCII(options)) ||\ - ((ctype) == ONIGENC_CTYPE_WORD && IS_WORD_ASCII(options)) ||\ - ((ctype) == ONIGENC_CTYPE_DIGIT && IS_DIGIT_ASCII(options)) ||\ - ((ctype) == ONIGENC_CTYPE_SPACE && IS_SPACE_ASCII(options)))) +#define OPTON_NEGATE_SINGLELINE(option) ((option) & \ + ONIG_OPTION_NEGATE_SINGLELINE) +#define OPTON_DONT_CAPTURE_GROUP(option) ((option) & \ + ONIG_OPTION_DONT_CAPTURE_GROUP) +#define OPTON_CAPTURE_GROUP(option) ((option) & ONIG_OPTION_CAPTURE_GROUP) +#define OPTON_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define OPTON_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) +#define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \ + ONIG_OPTION_CHECK_VALIDITY_OF_STRING) #define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) @@ -326,17 +389,17 @@ typedef unsigned int MemStatusType; #define BITS_PER_BYTE 8 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) #define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */ -#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) +#define BITSET_REAL_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) typedef uint32_t Bits; -typedef Bits BitSet[BITSET_SIZE]; +typedef Bits BitSet[BITSET_REAL_SIZE]; typedef Bits* BitSetRef; -#define SIZE_BITSET sizeof(BitSet) +#define SIZE_BITSET sizeof(BitSet) #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { (bs)[i] = 0; } \ } while (0) #define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5] @@ -356,14 +419,6 @@ typedef struct _BBuf { #define BB_INIT(buf,size) bbuf_init((BBuf* )(buf), (size)) -/* -#define BB_SIZE_INC(buf,inc) do{\ - (buf)->alloc += (inc);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) -*/ - #define BB_EXPAND(buf,low) do{\ do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ @@ -460,22 +515,20 @@ typedef struct _BBuf { /* operation code */ enum OpCode { - OP_FINISH = 0, /* matching process terminator (no more alternative) */ - OP_END = 1, /* pattern code terminator (success end) */ - OP_STR_1 = 2, /* single byte, N = 1 */ - OP_STR_2, /* single byte, N = 2 */ - OP_STR_3, /* single byte, N = 3 */ - OP_STR_4, /* single byte, N = 4 */ - OP_STR_5, /* single byte, N = 5 */ - OP_STR_N, /* single byte */ - OP_STR_MB2N1, /* mb-length = 2 N = 1 */ - OP_STR_MB2N2, /* mb-length = 2 N = 2 */ - OP_STR_MB2N3, /* mb-length = 2 N = 3 */ - OP_STR_MB2N, /* mb-length = 2 */ - OP_STR_MB3N, /* mb-length = 3 */ - OP_STR_MBN, /* other length */ - OP_STR_1_IC, /* single byte, N = 1, ignore case */ - OP_STR_N_IC, /* single byte, ignore case */ + OP_FINISH = 0, /* matching process terminator (no more alternative) */ + OP_END = 1, /* pattern code terminator (success end) */ + OP_STR_1 = 2, /* single byte, N = 1 */ + OP_STR_2, /* single byte, N = 2 */ + OP_STR_3, /* single byte, N = 3 */ + OP_STR_4, /* single byte, N = 4 */ + OP_STR_5, /* single byte, N = 5 */ + OP_STR_N, /* single byte */ + OP_STR_MB2N1, /* mb-length = 2 N = 1 */ + OP_STR_MB2N2, /* mb-length = 2 N = 2 */ + OP_STR_MB2N3, /* mb-length = 2 N = 3 */ + OP_STR_MB2N, /* mb-length = 2 */ + OP_STR_MB3N, /* mb-length = 3 */ + OP_STR_MBN, /* other length */ OP_CCLASS, OP_CCLASS_MB, OP_CCLASS_MIX, @@ -502,7 +555,7 @@ enum OpCode { OP_BEGIN_LINE, OP_END_LINE, OP_SEMI_END_BUF, - OP_BEGIN_POSITION, + OP_CHECK_POSITION, OP_BACKREF1, OP_BACKREF2, OP_BACKREF_N, @@ -527,7 +580,8 @@ enum OpCode { OP_JUMP, OP_PUSH, OP_PUSH_SUPER, - OP_POP_OUT, + OP_POP, + OP_POP_TO_MARK, #ifdef USE_OP_PUSH_OR_JUMP_EXACT OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ #endif @@ -542,16 +596,12 @@ enum OpCode { #ifdef USE_CALL OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ #endif - OP_PREC_READ_START, /* (?=...) start */ - OP_PREC_READ_END, /* (?=...) end */ - OP_PREC_READ_NOT_START, /* (?!...) start */ - OP_PREC_READ_NOT_END, /* (?!...) end */ - OP_ATOMIC_START, /* (?>...) start */ - OP_ATOMIC_END, /* (?>...) end */ - OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ - OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */ - OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */ - OP_PUSH_SAVE_VAL, + OP_MOVE, + OP_STEP_BACK_START, + OP_STEP_BACK_NEXT, + OP_CUT_TO_MARK, + OP_MARK, + OP_SAVE_VAL, OP_UPDATE_VAR, #ifdef USE_CALL OP_CALL, /* \g<name> */ @@ -574,7 +624,13 @@ enum UpdateVarType { UPDATE_VAR_S_FROM_STACK = 1, UPDATE_VAR_RIGHT_RANGE_FROM_STACK = 2, UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, - UPDATE_VAR_RIGHT_RANGE_INIT = 4, + UPDATE_VAR_RIGHT_RANGE_TO_S = 4, + UPDATE_VAR_RIGHT_RANGE_INIT = 5, +}; + +enum CheckPositionType { + CHECK_POSITION_SEARCH_START = 0, + CHECK_POSITION_CURRENT_RIGHT_RANGE = 1, }; enum TextSegmentBoundaryType { @@ -585,6 +641,7 @@ enum TextSegmentBoundaryType { typedef int RelAddrType; typedef int AbsAddrType; typedef int LengthType; +typedef int RelPositionType; typedef int RepeatNumType; typedef int MemNumType; typedef void* PointerType; @@ -619,7 +676,8 @@ typedef int ModeType; #define OPSIZE_JUMP 1 #define OPSIZE_PUSH 1 #define OPSIZE_PUSH_SUPER 1 -#define OPSIZE_POP_OUT 1 +#define OPSIZE_POP 1 +#define OPSIZE_POP_TO_MARK 1 #ifdef USE_OP_PUSH_OR_JUMP_EXACT #define OPSIZE_PUSH_OR_JUMP_EXACT1 1 #endif @@ -628,10 +686,6 @@ typedef int ModeType; #define OPSIZE_REPEAT_INC 1 #define OPSIZE_REPEAT_INC_NG 1 #define OPSIZE_WORD_BOUNDARY 1 -#define OPSIZE_PREC_READ_START 1 -#define OPSIZE_PREC_READ_NOT_START 1 -#define OPSIZE_PREC_READ_END 1 -#define OPSIZE_PREC_READ_NOT_END 1 #define OPSIZE_BACKREF 1 #define OPSIZE_FAIL 1 #define OPSIZE_MEM_START 1 @@ -640,16 +694,17 @@ typedef int ModeType; #define OPSIZE_MEM_END_PUSH_REC 1 #define OPSIZE_MEM_END 1 #define OPSIZE_MEM_END_REC 1 -#define OPSIZE_ATOMIC_START 1 -#define OPSIZE_ATOMIC_END 1 #define OPSIZE_EMPTY_CHECK_START 1 #define OPSIZE_EMPTY_CHECK_END 1 -#define OPSIZE_LOOK_BEHIND 1 -#define OPSIZE_LOOK_BEHIND_NOT_START 1 -#define OPSIZE_LOOK_BEHIND_NOT_END 1 +#define OPSIZE_CHECK_POSITION 1 #define OPSIZE_CALL 1 #define OPSIZE_RETURN 1 -#define OPSIZE_PUSH_SAVE_VAL 1 +#define OPSIZE_MOVE 1 +#define OPSIZE_STEP_BACK_START 1 +#define OPSIZE_STEP_BACK_NEXT 1 +#define OPSIZE_CUT_TO_MARK 1 +#define OPSIZE_MARK 1 +#define OPSIZE_SAVE_VAL 1 #define OPSIZE_UPDATE_VAR 1 #ifdef USE_CALLOUT @@ -750,6 +805,9 @@ typedef struct { int not; } text_segment_boundary; struct { + enum CheckPositionType type; + } check_position; + struct { union { MemNumType n1; /* num == 1 */ MemNumType* ns; /* num > 1 */ @@ -781,6 +839,9 @@ typedef struct { UChar c; } push_if_peek_next; struct { + MemNumType id; + } pop_to_mark; + struct { MemNumType id; RelAddrType addr; } repeat; /* REPEAT, REPEAT_NG */ @@ -804,16 +865,33 @@ typedef struct { RelAddrType addr; } look_behind_not_start; struct { - AbsAddrType addr; - } call; + RelPositionType n; /* char relative position */ + } move; + struct { + LengthType initial; /* char length */ + LengthType remaining; /* char length */ + RelAddrType addr; + } step_back_start; + struct { + MemNumType id; + int restore_pos; /* flag: restore current string position */ + } cut_to_mark; + struct { + MemNumType id; + int save_pos; /* flag: save current string position */ + } mark; struct { SaveType type; MemNumType id; - } push_save_val; + } save_val; struct { UpdateVarType type; MemNumType id; + int clear; /* UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK or UPDATE_VAR_RIGHT_RANGE_FROM_STACK */ } update_var; + struct { + AbsAddrType addr; + } call; #ifdef USE_CALLOUT struct { MemNumType num; @@ -899,6 +977,17 @@ struct re_pattern_buffer { extern void onig_add_end_call(void (*func)(void)); +extern void onig_warning(const char* s); +extern UChar* onig_error_code_to_format P_((int code)); +extern void ONIG_VARIADIC_FUNC_ATTR onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); +extern RegexExt* onig_get_regex_ext(regex_t* reg); +extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); +extern int onig_positive_int_multiply(int x, int y); +extern hash_table_type onig_st_init_strend_table_with_size P_((int size)); +extern int onig_st_lookup_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); +extern int onig_st_insert_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type value)); #ifdef ONIG_DEBUG @@ -910,16 +999,8 @@ extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); extern void onig_statistics_init P_((void)); extern int onig_print_statistics P_((FILE* f)); #endif -#endif -extern void onig_warning(const char* s); -extern UChar* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); -extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); -extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); -extern RegexExt* onig_get_regex_ext(regex_t* reg); -extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); -extern int onig_positive_int_multiply(int x, int y); +#endif /* ONIG_DEBUG */ #ifdef USE_CALLOUT @@ -998,20 +1079,6 @@ extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num #endif /* USE_CALLOUT */ -/* strend hash */ -typedef void hash_table_type; - -#ifdef _WIN32 -# include <windows.h> -typedef ULONG_PTR hash_data_type; -#else -typedef unsigned long hash_data_type; -#endif - -extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); -extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); -extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); - typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); #endif /* REGINT_H */ diff --git a/src/regparse.c b/src/regparse.c index fed53f7..cc015a7 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,12 +27,14 @@ * SUCH DAMAGE. */ -#include "regparse.h" -#include "st.h" - #ifdef DEBUG_NODE_FREE -#include <stdio.h> +#ifndef NEED_TO_INCLUDE_STDIO +#define NEED_TO_INCLUDE_STDIO #endif +#endif + +#include "regparse.h" +#include "st.h" #define INIT_TAG_NAMES_ALLOC_NUM 5 @@ -45,6 +47,26 @@ #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') +#define OPTON_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define OPTON_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define OPTON_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define OPTON_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define OPTON_WORD_ASCII(option) \ + ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define OPTON_DIGIT_ASCII(option) \ + ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define OPTON_SPACE_ASCII(option) \ + ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define OPTON_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII) +#define OPTON_TEXT_SEGMENT_WORD(option) ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD) + +#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \ + ((ctype) >= 0 && \ + (((ctype) < ONIGENC_CTYPE_ASCII && OPTON_POSIX_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_WORD && OPTON_WORD_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options)))) + OnigSyntaxType OnigSyntaxOniguruma = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | @@ -74,6 +96,7 @@ OnigSyntaxType OnigSyntaxOniguruma = { , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | @@ -136,6 +159,20 @@ OnigSyntaxType OnigSyntaxRuby = { OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA; +typedef enum { + CS_VALUE, + CS_RANGE, + CS_COMPLETE, + CS_START +} CSTATE; + +typedef enum { + CV_UNDEF, + CV_SB, + CV_MB, + CV_CPROP +} CVAL; + extern void onig_null_warn(const char* s ARG_UNUSED) { } #ifdef DEFAULT_WARN_FUNCTION @@ -295,7 +332,7 @@ backref_rel_to_abs(int rel_no, ScanEnv* env) #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ @@ -315,35 +352,35 @@ static void bitset_invert(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); } } static void bitset_invert_to(BitSetRef from, BitSetRef to) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); } } static void bitset_and(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; } } static void bitset_or(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; } } static void bitset_copy(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } + for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; } } extern int @@ -368,16 +405,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end) } } -static int -save_entry(ScanEnv* env, enum SaveType type, int* id) -{ - int nid = env->save_num; - - env->save_num++; - *id = nid; - return 0; -} - /* scan pattern methods */ #define PEND_VALUE 0 @@ -478,7 +505,7 @@ str_end_hash(st_str_end_key* x) return (int) (val + (val >> 5)); } -extern hash_table_type* +extern hash_table_type onig_st_init_strend_table_with_size(int size) { static struct st_hash_type hashType = { @@ -486,12 +513,11 @@ onig_st_init_strend_table_with_size(int size) str_end_hash, }; - return (hash_table_type* ) - onig_st_init_table_with_size(&hashType, size); + return (hash_table_type )onig_st_init_table_with_size(&hashType, size); } extern int -onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, +onig_st_lookup_strend(hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type *value) { st_str_end_key key; @@ -503,7 +529,7 @@ onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, } extern int -onig_st_insert_strend(hash_table_type* table, const UChar* str_key, +onig_st_insert_strend(hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type value) { st_str_end_key* key; @@ -569,7 +595,7 @@ callout_name_table_hash(st_callout_name_key* x) return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type); } -extern hash_table_type* +extern hash_table_type onig_st_init_callout_name_table_with_size(int size) { static struct st_hash_type hashType = { @@ -577,12 +603,11 @@ onig_st_init_callout_name_table_with_size(int size) callout_name_table_hash, }; - return (hash_table_type* ) - onig_st_init_table_with_size(&hashType, size); + return (hash_table_type )onig_st_init_table_with_size(&hashType, size); } extern int -onig_st_lookup_callout_name_table(hash_table_type* table, +onig_st_lookup_callout_name_table(hash_table_type table, OnigEncoding enc, int type, const UChar* str_key, @@ -600,7 +625,7 @@ onig_st_lookup_callout_name_table(hash_table_type* table, } static int -st_insert_callout_name_table(hash_table_type* table, +st_insert_callout_name_table(hash_table_type table, OnigEncoding enc, int type, UChar* str_key, UChar* end_key, hash_data_type value) @@ -775,7 +800,7 @@ onig_foreach_name(regex_t* reg, } static int -i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map) { int i; @@ -792,7 +817,7 @@ i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) } extern int -onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +onig_renumber_name_table(regex_t* reg, GroupNumMap* map) { NameTable* t = (NameTable* )reg->name_table; @@ -1142,12 +1167,12 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, extern int onig_noname_group_capture_is_active(regex_t* reg) { - if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + if (OPTON_DONT_CAPTURE_GROUP(reg->options)) return 0; if (onig_number_of_names(reg) > 0 && IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! OPTON_CAPTURE_GROUP(reg->options)) { return 0; } @@ -1603,12 +1628,11 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, fe->arg_types[i] = arg_types[i]; } for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { + if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT; if (fe->arg_types[i] == ONIG_TYPE_STRING) { OnigValue* val; UChar* ds; - if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT; - val = opt_defaults + j; ds = onigenc_strdup(enc, val->s.start, val->s.end); CHECK_NULL_RETURN_MEMERR(ds); @@ -1983,15 +2007,15 @@ scan_env_clear(ScanEnv* env) xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); - env->parse_depth = 0; + env->parse_depth = 0; #ifdef ONIG_DEBUG_PARSE - env->max_parse_depth = 0; + env->max_parse_depth = 0; #endif - env->backref_num = 0; - env->keep_num = 0; - env->save_num = 0; - env->save_alloc_num = 0; - env->saves = 0; + env->backref_num = 0; + env->keep_num = 0; + env->id_num = 0; + env->save_alloc_num = 0; + env->saves = 0; } static int @@ -2042,16 +2066,11 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) return 0; } -extern void -onig_node_free(Node* node) +static void +node_free_body(Node* node) { - start: if (IS_NULL(node)) return ; -#ifdef DEBUG_NODE_FREE - fprintf(stderr, "onig_node_free: %p\n", node); -#endif - switch (NODE_TYPE(node)) { case NODE_STRING: if (STR_(node)->capacity != 0 && @@ -2063,12 +2082,12 @@ onig_node_free(Node* node) case NODE_LIST: case NODE_ALT: onig_node_free(NODE_CAR(node)); - { - Node* next_node = NODE_CDR(node); - + node = NODE_CDR(node); + while (IS_NOT_NULL(node)) { + Node* next = NODE_CDR(node); + onig_node_free(NODE_CAR(node)); xfree(node); - node = next_node; - goto start; + node = next; } break; @@ -2100,9 +2119,15 @@ onig_node_free(Node* node) break; case NODE_QUANT: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); + break; + case NODE_ANCHOR: if (NODE_BODY(node)) onig_node_free(NODE_BODY(node)); + if (IS_NOT_NULL(ANCHOR_(node)->lead_node)) + onig_node_free(ANCHOR_(node)->lead_node); break; case NODE_CTYPE: @@ -2110,7 +2135,18 @@ onig_node_free(Node* node) case NODE_GIMMICK: break; } +} +extern void +onig_node_free(Node* node) +{ + if (IS_NULL(node)) return ; + +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "onig_node_free: %p\n", node); +#endif + + node_free_body(node); xfree(node); } @@ -2137,6 +2173,66 @@ node_new(void) return node; } +extern int +onig_node_copy(Node** rcopy, Node* from) +{ + int r; + Node* copy; + + *rcopy = NULL_NODE; + + switch (NODE_TYPE(from)) { + case NODE_LIST: + case NODE_ALT: + case NODE_ANCHOR: + /* These node's link to other nodes are processed by caller. */ + break; + case NODE_STRING: + case NODE_CCLASS: + case NODE_CTYPE: + /* Fixed contents after copy. */ + break; + default: + /* Not supported yet. */ + return ONIGERR_TYPE_BUG; + break; + } + + copy = node_new(); + CHECK_NULL_RETURN_MEMERR(copy); + xmemcpy(copy, from, sizeof(*copy)); + + switch (NODE_TYPE(copy)) { + case NODE_STRING: + r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE); + if (r != 0) { + err: + onig_node_free(copy); + return r; + } + break; + + case NODE_CCLASS: + { + CClassNode *fcc, *tcc; + + fcc = CCLASS_(from); + tcc = CCLASS_(copy); + if (IS_NOT_NULL(fcc->mbuf)) { + r = bbuf_clone(&(tcc->mbuf), fcc->mbuf); + if (r != 0) goto err; + } + } + break; + + default: + break; + } + + *rcopy = copy; + return ONIG_NORMAL; +} + static void initialize_cclass(CClassNode* cc) @@ -2166,30 +2262,20 @@ node_new_ctype(int type, int not, OnigOptionType options) NODE_SET_TYPE(node, NODE_CTYPE); CTYPE_(node)->ctype = type; CTYPE_(node)->not = not; - CTYPE_(node)->options = options; - CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options); - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE); + CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options); return node; } static Node* -node_new_anychar_with_fixed_option(OnigOptionType option) +node_new_anychar(OnigOptionType options) { - CtypeNode* ct; Node* node; - node = node_new_anychar(); + node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options); CHECK_NULL_RETURN(node); - ct = CTYPE_(node); - ct->options = option; - NODE_STATUS_ADD(node, FIXED_OPTION); + if (OPTON_MULTILINE(options)) + NODE_STATUS_ADD(node, MULTILINE); return node; } @@ -2198,18 +2284,18 @@ node_new_no_newline(Node** node, ScanEnv* env) { Node* n; - n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE); + n = node_new_anychar(ONIG_OPTION_NONE); CHECK_NULL_RETURN_MEMERR(n); *node = n; return 0; } static int -node_new_true_anychar(Node** node, ScanEnv* env) +node_new_true_anychar(Node** node) { Node* n; - n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE); + n = node_new_anychar(ONIG_OPTION_MULTILINE); CHECK_NULL_RETURN_MEMERR(n); *node = n; return 0; @@ -2291,16 +2377,41 @@ make_alt(int n, Node* ns[]) return make_list_or_alt(NODE_ALT, n, ns); } -extern Node* -onig_node_new_anchor(int type, int ascii_mode) +static Node* +node_new_anchor(int type) { - Node* node = node_new(); + Node* node; + + node = node_new(); CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_ANCHOR); ANCHOR_(node)->type = type; - ANCHOR_(node)->char_len = -1; + ANCHOR_(node)->char_min_len = 0; + ANCHOR_(node)->char_max_len = INFINITE_LEN; + ANCHOR_(node)->ascii_mode = 0; + ANCHOR_(node)->lead_node = NULL_NODE; + return node; +} + +static Node* +node_new_anchor_with_options(int type, OnigOptionType options) +{ + int ascii_mode; + Node* node; + + node = node_new_anchor(type); + CHECK_NULL_RETURN(node); + + ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0; ANCHOR_(node)->ascii_mode = ascii_mode; + + if (type == ANCR_TEXT_SEGMENT_BOUNDARY || + type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) { + if (OPTON_TEXT_SEGMENT_WORD(options)) + NODE_STATUS_ADD(node, TEXT_SEGMENT_WORD); + } + return node; } @@ -2312,8 +2423,9 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) { int i; - Node* node = node_new(); + Node* node; + node = node_new(); CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_BACKREF); @@ -2322,6 +2434,9 @@ node_new_backref(int back_num, int* backrefs, int by_name, if (by_name != 0) NODE_STATUS_ADD(node, BY_NAME); + if (OPTON_IGNORECASE(env->options)) + NODE_STATUS_ADD(node, IGNORECASE); + #ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { NODE_STATUS_ADD(node, NEST_LEVEL); @@ -2512,24 +2627,35 @@ node_drop_group(Node* group) } static int +node_set_fail(Node* node) +{ + NODE_SET_TYPE(node, NODE_GIMMICK); + GIMMICK_(node)->type = GIMMICK_FAIL; + return ONIG_NORMAL; +} + +static int node_new_fail(Node** node, ScanEnv* env) { *node = node_new(); CHECK_NULL_RETURN_MEMERR(*node); - NODE_SET_TYPE(*node, NODE_GIMMICK); - GIMMICK_(*node)->type = GIMMICK_FAIL; - return ONIG_NORMAL; + return node_set_fail(*node); +} + +extern int +onig_node_reset_fail(Node* node) +{ + node_free_body(node); + return node_set_fail(node); } static int node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env) { int id; - int r; - r = save_entry(env, save_type, &id); - if (r != ONIG_NORMAL) return r; + ID_ENTRY(env, id); *node = node_new(); CHECK_NULL_RETURN_MEMERR(*node); @@ -2692,10 +2818,10 @@ make_text_segment(Node** node, ScanEnv* env) ns[1] = NULL_NODE; r = ONIGERR_MEMORY; - ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE); + ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options); if (IS_NULL(ns[0])) goto err; - r = node_new_true_anychar(&ns[1], env); + r = node_new_true_anychar(&ns[1]); if (r != 0) goto err1; x = make_list(2, ns); @@ -2710,7 +2836,7 @@ make_text_segment(Node** node, ScanEnv* env) ns[0] = NULL_NODE; ns[1] = x; - r = node_new_true_anychar(&ns[0], env); + r = node_new_true_anychar(&ns[0]); if (r != 0) goto err1; x = make_list(2, ns); @@ -2757,6 +2883,9 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, id, env); if (r != 0) goto err; + if (is_range_cutter != 0) + NODE_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS); + r = node_new_fail(&ns[3], env); if (r != 0) goto err; @@ -2896,6 +3025,7 @@ make_range_clear(Node** node, ScanEnv* env) r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, ID_NOT_USED_DONT_CARE_ME, env); if (r != 0) goto err; + NODE_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS); x = make_alt(2, ns); if (IS_NULL(x)) goto err0; @@ -3059,7 +3189,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE); if (IS_NULL(quant)) goto err0; - r = node_new_true_anychar(&body, env); + r = node_new_true_anychar(&body); if (r != 0) { onig_node_free(quant); goto err; @@ -3094,7 +3224,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, id2 = GIMMICK_(ns[1])->id; - r = node_new_true_anychar(&ns[3], env); + r = node_new_true_anychar(&ns[3]); if (r != 0) goto err; possessive = 1; @@ -3167,9 +3297,9 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) } extern int -onig_node_str_set(Node* node, const UChar* s, const UChar* end) +onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free) { - onig_node_str_clear(node); + onig_node_str_clear(node, need_free); return onig_node_str_cat(node, s, end); } @@ -3183,9 +3313,10 @@ node_str_cat_char(Node* node, UChar c) } extern void -onig_node_str_clear(Node* node) +onig_node_str_clear(Node* node, int need_free) { - if (STR_(node)->capacity != 0 && + if (need_free != 0 && + STR_(node)->capacity != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); } @@ -3194,29 +3325,52 @@ onig_node_str_clear(Node* node) STR_(node)->s = STR_(node)->buf; STR_(node)->end = STR_(node)->buf; STR_(node)->capacity = 0; - STR_(node)->case_min_len = 0; } -static Node* -node_new_str(const UChar* s, const UChar* end) +static int +node_set_str(Node* node, const UChar* s, const UChar* end) { - Node* node = node_new(); - CHECK_NULL_RETURN(node); + int r; NODE_SET_TYPE(node, NODE_STRING); STR_(node)->flag = 0; STR_(node)->s = STR_(node)->buf; STR_(node)->end = STR_(node)->buf; STR_(node)->capacity = 0; - STR_(node)->case_min_len = 0; - if (onig_node_str_cat(node, s, end)) { + r = onig_node_str_cat(node, s, end); + return r; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + int r; + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + r = node_set_str(node, s, end); + if (r != 0) { onig_node_free(node); return NULL; } + return node; } +static int +node_reset_str(Node* node, const UChar* s, const UChar* end) +{ + node_free_body(node); + return node_set_str(node, s, end); +} + +extern int +onig_node_reset_empty(Node* node) +{ + return node_reset_str(node, NULL, NULL); +} + extern Node* onig_node_new_str(const UChar* s, const UChar* end) { @@ -3224,9 +3378,22 @@ onig_node_new_str(const UChar* s, const UChar* end) } static Node* -node_new_str_crude(UChar* s, UChar* end) +node_new_str_with_options(const UChar* s, const UChar* end, + OnigOptionType options) +{ + Node* node; + node = node_new_str(s, end); + + if (OPTON_IGNORECASE(options)) + NODE_STATUS_ADD(node, IGNORECASE); + + return node; +} + +static Node* +node_new_str_crude(UChar* s, UChar* end, OnigOptionType options) { - Node* node = node_new_str(s, end); + Node* node = node_new_str_with_options(s, end, options); CHECK_NULL_RETURN(node); NODE_STRING_SET_CRUDE(node); return node; @@ -3239,14 +3406,14 @@ node_new_empty(void) } static Node* -node_new_str_crude_char(UChar c) +node_new_str_crude_char(UChar c, OnigOptionType options) { int i; UChar p[1]; Node* node; p[0] = c; - node = node_new_str_crude(p, p + 1); + node = node_new_str_crude(p, p + 1, options); /* clear buf tail */ for (i = 1; i < NODE_STRING_BUF_SIZE; i++) @@ -3269,12 +3436,13 @@ str_node_split_last_char(Node* node, OnigEncoding enc) if (p && p > sn->s) { /* can be split. */ rn = node_new_str(p, sn->end); CHECK_NULL_RETURN(rn); - if (NODE_STRING_IS_CRUDE(node)) - NODE_STRING_SET_CRUDE(rn); sn->end = (UChar* )p; + STR_(rn)->flag = sn->flag; + NODE_STATUS(rn) = NODE_STATUS(node); } } + return rn; } @@ -3301,7 +3469,7 @@ scan_number(UChar** src, const UChar* end, OnigEncoding enc) PFETCH(c); if (IS_CODE_DIGIT_ASCII(enc, c)) { val = (int )DIGITVAL(c); - if ((INT_MAX - val) / 10 < num) + if ((ONIG_INT_MAX - val) / 10 < num) return -1; /* overflow */ num = num * 10 + val; @@ -3389,6 +3557,179 @@ scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen, return ONIG_NORMAL; } +static int +scan_number_of_base(UChar** src, UChar* end, int minlen, + OnigEncoding enc, OnigCodePoint* rcode, int base) +{ + int r; + + if (base == 16) + r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode); + else if (base == 8) + r = scan_octal_number(src, end, minlen, 11, enc, rcode); + else + r = ONIGERR_INVALID_CODE_POINT_VALUE; + + return r; +} + +#define IS_CODE_POINT_DIVIDE(c) ((c) == ' ' || (c) == '\n') + +enum CPS_STATE { + CPS_EMPTY = 0, + CPS_START = 1, + CPS_RANGE = 2 +}; + +static int +check_code_point_sequence_cc(UChar* p, UChar* end, int base, + OnigEncoding enc, int state) +{ + int r; + int n; + int end_digit; + OnigCodePoint code; + OnigCodePoint c; + PFETCH_READY; + + end_digit = FALSE; + n = 0; + while (! PEND) { + start: + PFETCH(c); + if (c == '}') { + end_char: + if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE; + return n; + } + + if (IS_CODE_POINT_DIVIDE(c)) { + while (! PEND) { + PFETCH(c); + if (! IS_CODE_POINT_DIVIDE(c)) break; + } + if (IS_CODE_POINT_DIVIDE(c)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + else if (c == '-') { + range: + if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + end_digit = FALSE; + state = CPS_RANGE; + goto start; + } + else if (end_digit == TRUE) { + if (base == 16) { + if (IS_CODE_XDIGIT_ASCII(enc, c)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + else if (base == 8) { + if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + + if (c == '}') goto end_char; + if (c == '-') goto range; + + PUNFETCH; + r = scan_number_of_base(&p, end, 1, enc, &code, base); + if (r != 0) return r; + n++; + end_digit = TRUE; + state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc) +{ + int r; + int n; + int end_digit; + OnigCodePoint code; + OnigCodePoint c; + PFETCH_READY; + + end_digit = FALSE; + n = 0; + while (! PEND) { + PFETCH(c); + if (c == '}') { + end_char: + return n; + } + + if (IS_CODE_POINT_DIVIDE(c)) { + while (! PEND) { + PFETCH(c); + if (! IS_CODE_POINT_DIVIDE(c)) break; + } + if (IS_CODE_POINT_DIVIDE(c)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + else if (end_digit == TRUE) { + if (base == 16) { + if (IS_CODE_XDIGIT_ASCII(enc, c)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + else if (base == 8) { + if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + + if (c == '}') goto end_char; + + PUNFETCH; + r = scan_number_of_base(&p, end, 1, enc, &code, base); + if (r != 0) return r; + n++; + end_digit = TRUE; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode) +{ + int r; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + while (! PEND) { + PFETCH(c); + if (! IS_CODE_POINT_DIVIDE(c)) break; + } + if (IS_CODE_POINT_DIVIDE(c)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (c == '}') { + *src = p; + return 1; /* end of sequence */ + } + else if (c == '-' && in_cc == TRUE) { + *src = p; + return 2; /* range */ + } + + PUNFETCH; + r = scan_number_of_base(&p, end, 1, enc, rcode, base); + if (r != 0) return r; + + *src = p; + return ONIG_NORMAL; +} + #define BB_WRITE_CODE_POINT(bbuf,pos,code) \ BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) @@ -4000,10 +4341,10 @@ node_new_general_newline(Node** node, ScanEnv* env) dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf); if (dlen < 0) return dlen; - alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen); + alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen); if (alen < 0) return alen; - crnl = node_new_str_crude(buf, buf + dlen + alen); + crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE); CHECK_NULL_RETURN_MEMERR(crnl); ncc = node_new_cclass(); @@ -4011,10 +4352,10 @@ node_new_general_newline(Node** node, ScanEnv* env) cc = CCLASS_(ncc); if (dlen == 1) { - bitset_set_range(cc->bs, 0x0a, 0x0d); + bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d); } else { - r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d); + r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d); if (r != 0) { err1: onig_node_free(ncc); @@ -4074,8 +4415,9 @@ enum TokenSyms { typedef struct { enum TokenSyms type; + int code_point_continue; int escaped; - int base; /* is number: 8, 16 (used in [....]) */ + int base_num; /* is number: 8, 16 (used in [....]) */ UChar* backp; union { UChar* s; @@ -4112,6 +4454,11 @@ typedef struct { } u; } PToken; +static void +ptoken_init(PToken* tok) +{ + tok->code_point_continue = 0; +} static int fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env) @@ -4681,7 +5028,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, } static int -fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) +fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env, int state) { int r; OnigCodePoint code; @@ -4692,6 +5039,24 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* p = *src; PFETCH_READY; + if (tok->code_point_continue != 0) { + r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code); + if (r == 1) { + tok->code_point_continue = 0; + } + else if (r == 2) { + tok->type = TK_CC_RANGE; + goto end; + } + else if (r == 0) { + tok->type = TK_CODE_POINT; + tok->u.code = code; + goto end; + } + else + return r; /* error */ + } + if (PEND) { tok->type = TK_EOT; return tok->type; @@ -4699,9 +5064,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) PFETCH(c); tok->type = TK_CHAR; - tok->base = 0; - tok->u.code = c; - tok->escaped = 0; + tok->base_num = 0; + tok->u.code = c; + tok->escaped = 0; if (c == ']') { tok->type = TK_CC_CLOSE; @@ -4798,16 +5163,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 8; - tok->u.code = code; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + tok->base_num = 8; + goto brace_code_point_entry; } break; @@ -4825,10 +5182,24 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; + tok->base_num = 16; + brace_code_point_entry: + if ((p > prev + enclen(enc, prev))) { + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PPEEK_IS('}')) { + PINC; + } + else { + int curr_state; + + curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; + r = check_code_point_sequence_cc(p, end, tok->base_num, enc, + curr_state); + if (r < 0) return r; + if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + tok->code_point_continue = TRUE; + } tok->type = TK_CODE_POINT; - tok->base = 16; tok->u.code = code; } else { @@ -4843,8 +5214,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 16; - tok->u.byte = (UChar )code; + tok->base_num = 16; + tok->u.byte = (UChar )code; } break; @@ -4858,9 +5229,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ code = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = code; + tok->type = TK_CODE_POINT; + tok->base_num = 16; + tok->u.code = code; } break; @@ -4876,8 +5247,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 8; - tok->u.byte = (UChar )code; + tok->base_num = 8; + tok->u.byte = (UChar )code; } break; @@ -4941,15 +5312,29 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* p = *src; PFETCH_READY; + if (tok->code_point_continue != 0) { + r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code); + if (r == 1) { + tok->code_point_continue = 0; + } + else if (r == 0) { + tok->type = TK_CODE_POINT; + tok->u.code = code; + goto out; + } + else + return r; /* error */ + } + start: if (PEND) { tok->type = TK_EOT; return tok->type; } - tok->type = TK_STRING; - tok->base = 0; - tok->backp = p; + tok->type = TK_STRING; + tok->base_num = 0; + tok->backp = p; PFETCH(c); if (IS_MC_ESC_CODE(c, syn)) { @@ -5204,15 +5589,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = code; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + tok->base_num = 8; + goto brace_code_point_entry; } break; @@ -5229,8 +5607,19 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; + tok->base_num = 16; + brace_code_point_entry: + if ((p > prev + enclen(enc, prev))) { + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PPEEK_IS('}')) { + PINC; + } + else { + r = check_code_point_sequence(p, end, tok->base_num, enc); + if (r < 0) return r; + if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + tok->code_point_continue = TRUE; + } tok->type = TK_CODE_POINT; tok->u.code = code; } @@ -5246,8 +5635,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 16; - tok->u.byte = (UChar )code; + tok->base_num = 16; + tok->u.byte = (UChar )code; } break; @@ -5261,9 +5650,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ code = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = code; + tok->type = TK_CODE_POINT; + tok->base_num = 16; + tok->u.code = code; } break; @@ -5311,8 +5700,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 8; - tok->u.byte = (UChar )code; + tok->base_num = 8; + tok->u.byte = (UChar )code; } else if (c != '0') { PINC; @@ -5484,7 +5873,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == MC_ANYCHAR(syn)) goto any_char; else if (c == MC_ANYTIME(syn)) - goto anytime; + goto any_time; else if (c == MC_ZERO_OR_ONE_TIME(syn)) goto zero_or_one_time; else if (c == MC_ONE_OR_MORE_TIME(syn)) @@ -5508,7 +5897,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) case '*': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; #ifdef USE_VARIABLE_META_CHARS - anytime: + any_time: #endif tok->type = TK_REPEAT; tok->u.repeat.lower = 0; @@ -5664,14 +6053,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) case '^': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->options) + tok->u.subtype = (OPTON_SINGLELINE(env->options) ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->options) + tok->u.subtype = (OPTON_SINGLELINE(env->options) ? ANCR_SEMI_END_BUF : ANCR_END_LINE); break; @@ -5686,7 +6075,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '#': - if (IS_EXTEND(env->options)) { + if (OPTON_EXTEND(env->options)) { while (!PEND) { PFETCH(c); if (ONIGENC_IS_CODE_NEWLINE(enc, c)) @@ -5698,7 +6087,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case ' ': case '\t': case '\n': case '\r': case '\f': - if (IS_EXTEND(env->options)) + if (OPTON_EXTEND(env->options)) goto start; break; @@ -5708,9 +6097,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) } } -#ifdef USE_VARIABLE_META_CHARS out: -#endif *src = p; return tok->type; } @@ -5884,8 +6271,6 @@ add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not, static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { -#define ASCII_LIMIT 127 - int c, r; int ascii_mode; int is_single; @@ -5894,7 +6279,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) OnigCodePoint sb_out; OnigEncoding enc = env->enc; - ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options); + ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options); r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { @@ -6115,20 +6500,6 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en } -typedef enum { - CS_VALUE, - CS_RANGE, - CS_COMPLETE, - CS_START -} CSTATE; - -typedef enum { - CV_UNDEF, - CV_SB, - CV_MB, - CV_CPROP -} CVAL; - static int cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state, ScanEnv* env) @@ -6262,11 +6633,12 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) *np = NULL_NODE; INC_PARSE_DEPTH(env->parse_depth); + state = CS_START; prev_cc = (CClassNode* )NULL; - r = fetch_token_in_cc(tok, src, end, env); + r = fetch_token_cc(tok, src, end, env, state); if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) { neg = 1; - r = fetch_token_in_cc(tok, src, end, env); + r = fetch_token_cc(tok, src, end, env, state); } else { neg = 0; @@ -6287,7 +6659,6 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) cc = CCLASS_(node); and_start = 0; - state = CS_START; curr_type = CV_UNDEF; p = *src; @@ -6308,19 +6679,19 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case TK_CRUDE_BYTE: - /* tok->base != 0 : octal or hexadec. */ - if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + /* tok->base_num != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) { int i, j; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; UChar* psave = p; - int base = tok->base; + int base_num = tok->base_num; buf[0] = tok->u.byte; for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE); if (r < 0) goto err; - if (r != TK_CRUDE_BYTE || tok->base != base) { + if (r != TK_CRUDE_BYTE || tok->base_num != base_num) { fetched = 1; break; } @@ -6343,7 +6714,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (i > len) { /* fetch back */ p = psave; for (i = 1; i < len; i++) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE); } fetched = 0; } @@ -6424,7 +6795,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) case TK_CC_RANGE: if (state == CS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, CS_RANGE); if (r < 0) goto err; fetched = 1; @@ -6451,7 +6822,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) in_code = tok->u.code; in_raw = 0; - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, CS_VALUE); if (r < 0) goto err; fetched = 1; @@ -6466,7 +6837,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) goto any_char_in; /* [!--] is allowed */ } else { /* CS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, CS_VALUE); if (r < 0) goto err; fetched = 1; @@ -6549,7 +6920,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (fetched) r = tok->type; else { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env, state); if (r < 0) goto err; } } @@ -6578,8 +6949,6 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) BITSET_IS_EMPTY(cc->bs, is_empty); if (is_empty == 0) { -#define NEWLINE_CODE 0x0a - if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) BITSET_SET_BIT(cc->bs, NEWLINE_CODE); @@ -7095,10 +7464,10 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, break; case '=': - *np = onig_node_new_anchor(ANCR_PREC_READ, FALSE); + *np = node_new_anchor(ANCR_PREC_READ); break; case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE); + *np = node_new_anchor(ANCR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ *np = node_new_bag(BAG_STOP_BACKTRACK); @@ -7116,9 +7485,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; PFETCH(c); if (c == '=') - *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE); + *np = node_new_anchor(ANCR_LOOK_BEHIND); else if (c == '!') - *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE); + *np = node_new_anchor(ANCR_LOOK_BEHIND_NOT); else { if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; @@ -7131,7 +7500,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, named_group1: list_capture = 0; +#ifdef USE_CAPTURE_HISTORY named_group2: +#endif name = p; r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, &num_type, FALSE); @@ -7612,7 +7983,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, } #endif else { - if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + if (OPTON_DONT_CAPTURE_GROUP(env->options)) goto group; *np = node_new_memory(0); @@ -7883,7 +8254,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) else { len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) { - csnode = onig_node_new_str(buf, buf + len); + csnode = node_new_str(buf, buf + len); if (IS_NULL(csnode)) goto err_free_ns; NODE_STRING_SET_CASE_EXPANDED(csnode); @@ -7922,6 +8293,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, Node** tp; unsigned int parse_depth; + retry: group = 0; *np = NULL; if (tok->type == (enum TokenSyms )term) @@ -7955,19 +8327,28 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, } } else if (r == 2) { /* option only */ - Node* target; - OnigOptionType prev = env->options; + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) { + env->options = BAG_(*np)->o.options; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + onig_node_free(*np); + goto retry; + } + else { + Node* target; + OnigOptionType prev = env->options; - env->options = BAG_(*np)->o.options; - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_alts(&target, tok, term, src, end, env, FALSE); - env->options = prev; - if (r < 0) { - onig_node_free(target); - return r; + env->options = BAG_(*np)->o.options; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_alts(&target, tok, term, src, end, env, FALSE); + env->options = prev; + if (r < 0) { + onig_node_free(target); + return r; + } + NODE_BODY(*np) = target; } - NODE_BODY(*np) = target; return tok->type; } break; @@ -7983,7 +8364,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, case TK_STRING: tk_byte: { - *np = node_new_str(tok->backp, *src); + *np = node_new_str_with_options(tok->backp, *src, env->options); CHECK_NULL_RETURN_MEMERR(*np); while (1) { @@ -8004,7 +8385,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, case TK_CRUDE_BYTE: tk_crude_byte: { - *np = node_new_str_crude_char(tok->u.byte); + *np = node_new_str_crude_char(tok->u.byte, env->options); CHECK_NULL_RETURN_MEMERR(*np); len = 1; while (1) { @@ -8041,9 +8422,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); if (len < 0) return len; #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - *np = node_new_str_crude(buf, buf + len); + *np = node_new_str_crude(buf, buf + len, env->options); #else - *np = node_new_str(buf, buf + len); + *np = node_new_str_with_options(buf, buf + len, env->options); #endif CHECK_NULL_RETURN_MEMERR(*np); } @@ -8061,7 +8442,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (IS_NULL(qend)) { nextp = qend = end; } - *np = node_new_str(qstart, qend); + *np = node_new_str_with_options(qstart, qend, env->options); CHECK_NULL_RETURN_MEMERR(*np); *src = nextp; } @@ -8109,7 +8490,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (r != 0) return r; cc = CCLASS_(*np); - if (IS_IGNORECASE(env->options)) { + if (OPTON_IGNORECASE(env->options)) { IApplyCaseFoldArg iarg; iarg.env = env; @@ -8136,12 +8517,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, break; case TK_ANYCHAR: - *np = node_new_anychar(); + *np = node_new_anychar(env->options); CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); + *np = node_new_anychar(env->options); CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE); CHECK_NULL_RETURN_MEMERR(qn); @@ -8179,12 +8560,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, #endif case TK_ANCHOR: - { - int ascii_mode = - IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0; - *np = onig_node_new_anchor(tok->u.anchor, ascii_mode); - CHECK_NULL_RETURN_MEMERR(*np); - } + *np = node_new_anchor_with_options(tok->u.anchor, env->options); + CHECK_NULL_RETURN_MEMERR(*np); break; case TK_REPEAT: @@ -8218,7 +8595,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, break; case TK_TRUE_ANYCHAR: - r = node_new_true_anychar(np, env); + r = node_new_true_anychar(np); if (r < 0) return r; break; @@ -8364,9 +8741,11 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, { int r; Node *node, **headp; + OnigOptionType save_options; *top = NULL; INC_PARSE_DEPTH(env->parse_depth); + save_options = env->options; r = parse_branch(&node, tok, term, src, end, env, group_head); if (r < 0) { @@ -8415,6 +8794,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, return ONIGERR_PARSER_BUG; } + env->options = save_options; DEC_PARSE_DEPTH(env->parse_depth); return r; } @@ -8425,6 +8805,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) int r; PToken tok; + ptoken_init(&tok); r = fetch_token(&tok, src, end, env); if (r < 0) return r; r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE); @@ -8483,10 +8864,9 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, p = (UChar* )pattern; r = parse_regexp(root, &p, (UChar* )end, env); - -#ifdef USE_CALL if (r != 0) return r; +#ifdef USE_CALL if (env->has_call_zero != 0) { Node* zero_node; r = make_call_zero_body(*root, env, &zero_node); diff --git a/src/regparse.h b/src/regparse.h index 1525ccb..979e982 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,23 +31,23 @@ #include "regint.h" -#define NODE_STRING_MARGIN 16 -#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 6 +#define NODE_STRING_MARGIN 16 +#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 /* node type */ typedef enum { - NODE_STRING = 0, - NODE_CCLASS = 1, - NODE_CTYPE = 2, - NODE_BACKREF = 3, - NODE_QUANT = 4, - NODE_BAG = 5, - NODE_ANCHOR = 6, - NODE_LIST = 7, - NODE_ALT = 8, - NODE_CALL = 9, - NODE_GIMMICK = 10 + NODE_STRING = 0, + NODE_CCLASS = 1, + NODE_CTYPE = 2, + NODE_BACKREF = 3, + NODE_QUANT = 4, + NODE_BAG = 5, + NODE_ANCHOR = 6, + NODE_LIST = 7, + NODE_ALT = 8, + NODE_CALL = 9, + NODE_GIMMICK = 10 } NodeType; enum BagType { @@ -67,10 +67,10 @@ enum GimmickType { }; enum BodyEmptyType { - BODY_IS_NOT_EMPTY = 0, - BODY_IS_EMPTY_POSSIBILITY = 1, - BODY_IS_EMPTY_POSSIBILITY_MEM = 2, - BODY_IS_EMPTY_POSSIBILITY_REC = 3 + BODY_IS_NOT_EMPTY = 0, + BODY_MAY_BE_EMPTY = 1, + BODY_MAY_BE_EMPTY_MEM = 2, + BODY_MAY_BE_EMPTY_REC = 3 }; struct _Node; @@ -84,8 +84,7 @@ typedef struct { UChar* end; unsigned int flag; UChar buf[NODE_STRING_BUF_SIZE]; - int capacity; /* (allocated size - 1) or 0: use buf[] */ - int case_min_len; + int capacity; /* (allocated size - 1) or 0: use buf[] */ } StrNode; typedef struct { @@ -110,7 +109,7 @@ typedef struct { enum BodyEmptyType emptiness; struct _Node* head_exact; struct _Node* next_head_exact; - int include_referred; /* include called node. don't eliminate even if {0} */ + int include_referred; /* include called node. don't eliminate even if {0} */ } QuantNode; typedef struct { @@ -139,7 +138,8 @@ typedef struct { /* for multiple call reference */ OnigLen min_len; /* min length (byte) */ OnigLen max_len; /* max length (byte) */ - int char_len; /* character length */ + OnigLen min_char_len; + OnigLen max_char_len; int opt_count; /* referenced count in optimize_nodes() */ } BagNode; @@ -189,8 +189,10 @@ typedef struct { struct _Node* body; int type; - int char_len; + OnigLen char_min_len; + OnigLen char_max_len; int ascii_mode; + struct _Node* lead_node; } AnchorNode; typedef struct { @@ -209,7 +211,6 @@ typedef struct { int ctype; int not; - OnigOptionType options; int ascii_mode; } CtypeNode; @@ -248,6 +249,11 @@ typedef struct _Node { } u; } Node; +typedef struct { + int new_val; +} GroupNumMap; + + #define NULL_NODE ((Node* )0) @@ -280,62 +286,59 @@ typedef struct _Node { #define CALL_(node) (&((node)->u.call)) #define GIMMICK_(node) (&((node)->u.gimmick)) -#define NODE_CAR(node) (CONS_(node)->car) -#define NODE_CDR(node) (CONS_(node)->cdr) +#define NODE_CAR(node) (CONS_(node)->car) +#define NODE_CDR(node) (CONS_(node)->cdr) #define CTYPE_ANYCHAR -1 #define NODE_IS_ANYCHAR(node) \ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) -#define CTYPE_OPTION(node, reg) \ - (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) - #define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML) #define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF) -#define NODE_STRING_CRUDE (1<<0) -#define NODE_STRING_CASE_EXPANDED (1<<1) -#define NODE_STRING_CASE_FOLD_MATCH (1<<2) +#define NODE_STRING_CRUDE (1<<0) +#define NODE_STRING_CASE_EXPANDED (1<<1) #define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) #define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE #define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE #define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED -#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH #define NODE_STRING_IS_CRUDE(node) \ (((node)->u.str.flag & NODE_STRING_CRUDE) != 0) #define NODE_STRING_IS_CASE_EXPANDED(node) \ (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0) -#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \ - (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0) #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) /* node status bits */ -#define NODE_ST_MIN_FIXED (1<<0) -#define NODE_ST_MAX_FIXED (1<<1) -#define NODE_ST_CLEN_FIXED (1<<2) -#define NODE_ST_MARK1 (1<<3) -#define NODE_ST_MARK2 (1<<4) -#define NODE_ST_STRICT_REAL_REPEAT (1<<5) -#define NODE_ST_RECURSION (1<<6) -#define NODE_ST_CALLED (1<<7) -#define NODE_ST_ADDR_FIXED (1<<8) -#define NODE_ST_NAMED_GROUP (1<<9) -#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ -#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ -#define NODE_ST_IN_MULTI_ENTRY (1<<12) -#define NODE_ST_NEST_LEVEL (1<<13) -#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */ -#define NODE_ST_BY_NAME (1<<15) /* backref by name */ -#define NODE_ST_BACKREF (1<<16) -#define NODE_ST_CHECKER (1<<17) -#define NODE_ST_FIXED_OPTION (1<<18) -#define NODE_ST_PROHIBIT_RECURSION (1<<19) -#define NODE_ST_SUPER (1<<20) -#define NODE_ST_EMPTY_STATUS_CHECK (1<<21) +#define NODE_ST_FIXED_MIN (1<<0) +#define NODE_ST_FIXED_MAX (1<<1) +#define NODE_ST_FIXED_CLEN (1<<2) +#define NODE_ST_MARK1 (1<<3) +#define NODE_ST_MARK2 (1<<4) +#define NODE_ST_STRICT_REAL_REPEAT (1<<5) +#define NODE_ST_RECURSION (1<<6) +#define NODE_ST_CALLED (1<<7) +#define NODE_ST_FIXED_ADDR (1<<8) +#define NODE_ST_NAMED_GROUP (1<<9) +#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ +#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ +#define NODE_ST_IN_MULTI_ENTRY (1<<12) +#define NODE_ST_NEST_LEVEL (1<<13) +#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */ +#define NODE_ST_BY_NAME (1<<15) /* backref by name */ +#define NODE_ST_BACKREF (1<<16) +#define NODE_ST_CHECKER (1<<17) +#define NODE_ST_PROHIBIT_RECURSION (1<<18) +#define NODE_ST_SUPER (1<<19) +#define NODE_ST_EMPTY_STATUS_CHECK (1<<20) +#define NODE_ST_IGNORECASE (1<<21) +#define NODE_ST_MULTILINE (1<<22) +#define NODE_ST_TEXT_SEGMENT_WORD (1<<23) +#define NODE_ST_ABSENT_WITH_SIDE_EFFECTS (1<<24) /* stopper or clear */ +#define NODE_ST_FIXED_CLEN_MIN_SURE (1<<25) #define NODE_STATUS(node) (((Node* )node)->u.base.status) @@ -349,17 +352,16 @@ typedef struct _Node { #define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0) #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) #define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0) -#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0) -#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0) -#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0) +#define NODE_IS_FIXED_ADDR(node) ((NODE_STATUS(node) & NODE_ST_FIXED_ADDR) != 0) +#define NODE_IS_FIXED_CLEN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN) != 0) +#define NODE_IS_FIXED_MIN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MIN) != 0) +#define NODE_IS_FIXED_MAX(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MAX) != 0) #define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0) #define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0) #define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0) #define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0) #define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0) #define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0) -#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) #define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0) #define NODE_IS_PROHIBIT_RECURSION(node) \ ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) @@ -367,6 +369,11 @@ typedef struct _Node { ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0) #define NODE_IS_EMPTY_STATUS_CHECK(node) \ ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0) +#define NODE_IS_IGNORECASE(node) ((NODE_STATUS(node) & NODE_ST_IGNORECASE) != 0) +#define NODE_IS_MULTILINE(node) ((NODE_STATUS(node) & NODE_ST_MULTILINE) != 0) +#define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0) +#define NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) ((NODE_STATUS(node) & NODE_ST_ABSENT_WITH_SIDE_EFFECTS) != 0) +#define NODE_IS_FIXED_CLEN_MIN_SURE(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN_MIN_SURE) != 0) #define NODE_PARENT(node) ((node)->u.base.parent) #define NODE_BODY(node) ((node)->u.base.body) @@ -375,11 +382,20 @@ typedef struct _Node { #define NODE_CALL_BODY(node) ((node)->body) #define NODE_ANCHOR_BODY(node) ((node)->body) -#define SCANENV_MEMENV_SIZE 8 +#define SCANENV_MEMENV_SIZE 8 #define SCANENV_MEMENV(senv) \ (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ (senv)->mem_env_dynamic : (senv)->mem_env_static) +#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) +#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) +#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + +#define ID_ENTRY(env, id) do {\ + id = (env)->id_num++;\ +} while(0) + + typedef struct { Node* mem_node; Node* empty_repeat_node; @@ -403,50 +419,44 @@ typedef struct { UChar* error_end; regex_t* reg; /* for reg->names only */ int num_call; -#ifdef USE_CALL - UnsetAddrList* unset_addr_list; - int has_call_zero; -#endif int num_mem; int num_named; int mem_alloc; MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; MemEnv* mem_env_dynamic; + int backref_num; + int keep_num; + int id_num; + int save_alloc_num; + SaveItem* saves; +#ifdef USE_CALL + UnsetAddrList* unset_addr_list; + int has_call_zero; +#endif unsigned int parse_depth; #ifdef ONIG_DEBUG_PARSE unsigned int max_parse_depth; #endif - int backref_num; - int keep_num; - int save_num; - int save_alloc_num; - SaveItem* saves; } ScanEnv; -#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) -#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) -#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - -typedef struct { - int new_val; -} GroupNumRemap; - -extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); +extern int onig_renumber_name_table P_((regex_t* reg, GroupNumMap* map)); extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); extern int onig_reduce_nested_quantifier P_((Node* pnode)); +extern int onig_node_copy(Node** rcopy, Node* from); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); -extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); +extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end, int need_free)); +extern void onig_node_str_clear P_((Node* node, int need_free)); extern void onig_node_free P_((Node* node)); +extern int onig_node_reset_empty P_((Node* node)); +extern int onig_node_reset_fail P_((Node* node)); extern Node* onig_node_new_bag P_((enum BagType type)); -extern Node* onig_node_new_anchor P_((int type, int ascii_mode)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); extern Node* onig_node_new_alt P_((Node* left, Node* right)); -extern void onig_node_str_clear P_((Node* node)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); extern int onig_free_shared_cclass_table P_((void)); diff --git a/src/regposerr.c b/src/regposerr.c index e1747c5..12d95a9 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -2,7 +2,7 @@ regposerr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,7 +37,10 @@ #include "config.h" #include "onigposix.h" +#ifndef ONIG_NO_STANDARD_C_HEADERS #include <string.h> +#include <stdio.h> +#endif #if defined(__GNUC__) # define ARG_UNUSED __attribute__ ((unused)) @@ -46,13 +49,26 @@ #endif #if defined(_WIN32) && !defined(__GNUC__) + +#ifndef xsnprintf #define xsnprintf sprintf_s +#endif +#ifndef xstrncpy #define xstrncpy(dest,src,size) strncpy_s(dest,size,src,_TRUNCATE) +#endif + #else + +#ifndef xsnprintf #define xsnprintf snprintf +#endif +#ifndef xstrncpy #define xstrncpy strncpy #endif +#endif + + static char* ESTRING[] = { NULL, "failed to match", /* REG_NOMATCH */ @@ -75,7 +91,6 @@ static char* ESTRING[] = { "invalid argument" /* REG_EONIG_BADARG */ }; -#include <stdio.h> extern size_t diff --git a/src/regposix.c b/src/regposix.c index b3e78ff..4e523a4 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,6 +63,7 @@ onig2posix_error_code(int code) { ONIGERR_MEMORY, REG_ESPACE }, { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER, REG_EONIG_INTERNAL }, { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, diff --git a/src/regsyntax.c b/src/regsyntax.c index 513c7f7..984aac6 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -152,7 +152,9 @@ OnigSyntaxType OnigSyntaxJava = { ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) - , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND ) , ONIG_OPTION_SINGLELINE , { @@ -185,7 +187,7 @@ OnigSyntaxType OnigSyntaxPerl = { ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) - , SYN_GNU_REGEX_BV + , SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH , ONIG_OPTION_SINGLELINE , { @@ -223,7 +225,7 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL ) - , ( SYN_GNU_REGEX_BV | + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) , ONIG_OPTION_SINGLELINE diff --git a/src/regversion.c b/src/regversion.c index de993d3..1f0faa1 100644 --- a/src/regversion.c +++ b/src/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,8 +27,11 @@ * SUCH DAMAGE. */ +#ifndef NEED_TO_INCLUDE_STDIO +#define NEED_TO_INCLUDE_STDIO +#endif + #include "regint.h" -#include <stdio.h> extern const char* onig_version(void) diff --git a/src/sjis_prop.c b/src/sjis_prop.c index e33fbb2..2597a9f 100644 --- a/src/sjis_prop.c +++ b/src/sjis_prop.c @@ -31,8 +31,7 @@ #line 1 "sjis_prop.gperf" -#include <string.h> -#include "regenc.h" +#include "regint.h" #define TOTAL_KEYWORDS 16 #define MIN_WORD_LENGTH 4 @@ -89,50 +88,50 @@ onigenc_sjis_lookup_property_name (register const char *str, register size_t len static struct PropertyNameCtype wordlist[] = { {""}, {""}, {""}, {""}, -#line 23 "sjis_prop.gperf" +#line 22 "sjis_prop.gperf" {"Word", 12}, -#line 12 "sjis_prop.gperf" +#line 11 "sjis_prop.gperf" {"Alpha", 1}, {""}, {""}, -#line 26 "sjis_prop.gperf" +#line 25 "sjis_prop.gperf" {"Hiragana", 15}, {""}, -#line 24 "sjis_prop.gperf" +#line 23 "sjis_prop.gperf" {"Alnum", 13}, {""}, {""}, -#line 27 "sjis_prop.gperf" +#line 26 "sjis_prop.gperf" {"Katakana", 16}, {""}, -#line 25 "sjis_prop.gperf" +#line 24 "sjis_prop.gperf" {"ASCII", 14}, -#line 22 "sjis_prop.gperf" +#line 21 "sjis_prop.gperf" {"XDigit", 11}, {""}, {""}, {""}, -#line 14 "sjis_prop.gperf" +#line 13 "sjis_prop.gperf" {"Cntrl", 3}, {""}, {""}, -#line 13 "sjis_prop.gperf" +#line 12 "sjis_prop.gperf" {"Blank", 2}, {""}, -#line 19 "sjis_prop.gperf" +#line 18 "sjis_prop.gperf" {"Punct", 8}, {""}, {""}, {""}, {""}, -#line 18 "sjis_prop.gperf" +#line 17 "sjis_prop.gperf" {"Print", 7}, {""}, {""}, {""}, {""}, -#line 21 "sjis_prop.gperf" +#line 20 "sjis_prop.gperf" {"Upper", 10}, {""}, {""}, {""}, {""}, -#line 20 "sjis_prop.gperf" +#line 19 "sjis_prop.gperf" {"Space", 9}, {""}, {""}, {""}, {""}, -#line 17 "sjis_prop.gperf" +#line 16 "sjis_prop.gperf" {"Lower", 6}, {""}, {""}, {""}, {""}, -#line 16 "sjis_prop.gperf" +#line 15 "sjis_prop.gperf" {"Graph", 5}, {""}, {""}, {""}, {""}, -#line 15 "sjis_prop.gperf" +#line 14 "sjis_prop.gperf" {"Digit", 4} }; diff --git a/src/sjis_prop.gperf b/src/sjis_prop.gperf index 2cec8cf..842b61b 100644 --- a/src/sjis_prop.gperf +++ b/src/sjis_prop.gperf @@ -1,6 +1,5 @@ %{ -#include <string.h> -#include "regenc.h" +#include "regint.h" %} struct PropertyNameCtype { @@ -2,17 +2,14 @@ /* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#ifdef _WIN32 -#include <malloc.h> +#ifndef NEED_TO_INCLUDE_STDIO +#define NEED_TO_INCLUDE_STDIO #endif #include "regint.h" #include "st.h" + typedef struct st_table_entry st_table_entry; struct st_table_entry { @@ -3,15 +3,14 @@ /* @(#) st.h 5.1 89/12/14 */ #ifndef ST_INCLUDED - #define ST_INCLUDED -#ifdef _WIN32 -# include <windows.h> -typedef ULONG_PTR st_data_t; -#else +#if SIZEOF_VOIDP == SIZEOF_LONG typedef unsigned long st_data_t; +#elif SIZEOF_VOIDP == SIZEOF_LONG_LONG +typedef unsigned long long st_data_t; #endif + #define ST_DATA_T_DEFINED typedef struct st_table st_table; diff --git a/src/unicode.c b/src/unicode.c index 474436a..080da74 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -278,9 +278,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { - int n, m, i, j, k, len; - OnigCodePoint code, codes[3]; - const struct ByUnfoldKey* buk; + int n, m, i, j, k, len, lens[3]; + int index; + int fn, ncs[3]; + OnigCodePoint cs[3][4]; + OnigCodePoint code, codes[3], orig_codes[3]; + const struct ByUnfoldKey* buk1; n = 0; @@ -316,38 +319,161 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, } #endif - buk = onigenc_unicode_unfold_key(code); - if (buk != 0) { - if (buk->fold_len == 1) { + orig_codes[0] = code; + lens[0] = len; + p += len; + + buk1 = onigenc_unicode_unfold_key(orig_codes[0]); + if (buk1 != 0 && buk1->fold_len == 1) { + codes[0] = *FOLDS1_FOLD(buk1->index); + } + else + codes[0] = orig_codes[0]; + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0) + goto fold1; + + if (p < end) { + const struct ByUnfoldKey* buk; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + orig_codes[1] = code; + len = enclen(enc, p); + lens[1] = lens[0] + len; + buk = onigenc_unicode_unfold_key(orig_codes[1]); + if (buk != 0 && buk->fold_len == 1) { + codes[1] = *FOLDS1_FOLD(buk->index); + } + else + codes[1] = orig_codes[1]; + + p += len; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + orig_codes[2] = code; + len = enclen(enc, p); + lens[2] = lens[1] + len; + buk = onigenc_unicode_unfold_key(orig_codes[2]); + if (buk != 0 && buk->fold_len == 1) { + codes[2] = *FOLDS1_FOLD(buk->index); + } + else + codes[2] = orig_codes[2]; + + index = onigenc_unicode_fold3_key(codes); + if (index >= 0) { + m = FOLDS3_UNFOLDS_NUM(index); + for (i = 0; i < m; i++) { + items[n].byte_len = lens[2]; + items[n].code_len = 1; + items[n].code[0] = FOLDS3_UNFOLDS(index)[i]; + n++; + } + + for (fn = 0; fn < 3; fn++) { + int sindex; + cs[fn][0] = FOLDS3_FOLD(index)[fn]; + ncs[fn] = 1; + sindex = onigenc_unicode_fold1_key(&cs[fn][0]); + if (sindex >= 0) { + int m = FOLDS1_UNFOLDS_NUM(sindex); + for (i = 0; i < m; i++) { + cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i]; + } + ncs[fn] += m; + } + } + + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = lens[2]; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + if (items[n].code[0] == orig_codes[0] && + items[n].code[1] == orig_codes[1] && + items[n].code[2] == orig_codes[2]) + continue; + n++; + } + } + } + + return n; + } + } + + index = onigenc_unicode_fold2_key(codes); + if (index >= 0) { + m = FOLDS2_UNFOLDS_NUM(index); + for (i = 0; i < m; i++) { + items[n].byte_len = lens[1]; + items[n].code_len = 1; + items[n].code[0] = FOLDS2_UNFOLDS(index)[i]; + n++; + } + + for (fn = 0; fn < 2; fn++) { + int sindex; + cs[fn][0] = FOLDS2_FOLD(index)[fn]; + ncs[fn] = 1; + sindex = onigenc_unicode_fold1_key(&cs[fn][0]); + if (sindex >= 0) { + int m = FOLDS1_UNFOLDS_NUM(sindex); + for (i = 0; i < m; i++) { + cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i]; + } + ncs[fn] += m; + } + } + + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = lens[1]; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + if (items[n].code[0] == orig_codes[0] && + items[n].code[1] == orig_codes[1]) + continue; + n++; + } + } + + return n; + } + } + + fold1: + if (buk1 != 0) { + if (buk1->fold_len == 1) { int un; - items[0].byte_len = len; + items[0].byte_len = lens[0]; items[0].code_len = 1; - items[0].code[0] = *FOLDS1_FOLD(buk->index); + items[0].code[0] = *FOLDS1_FOLD(buk1->index); n++; - un = FOLDS1_UNFOLDS_NUM(buk->index); + un = FOLDS1_UNFOLDS_NUM(buk1->index); for (i = 0; i < un; i++) { - OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i]; - if (unfold != code) { - items[n].byte_len = len; + OnigCodePoint unfold = FOLDS1_UNFOLDS(buk1->index)[i]; + if (unfold != orig_codes[0]) { + items[n].byte_len = lens[0]; items[n].code_len = 1; items[n].code[0] = unfold; n++; } } - code = items[0].code[0]; /* for multi-code to unfold search. */ } else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - OnigCodePoint cs[3][4]; - int fn, ncs[3]; - - if (buk->fold_len == 2) { - m = FOLDS2_UNFOLDS_NUM(buk->index); + if (buk1->fold_len == 2) { + m = FOLDS2_UNFOLDS_NUM(buk1->index); for (i = 0; i < m; i++) { - OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i]; - if (unfold == code) continue; + OnigCodePoint unfold = FOLDS2_UNFOLDS(buk1->index)[i]; + if (unfold == orig_codes[0]) continue; - items[n].byte_len = len; + items[n].byte_len = lens[0]; items[n].code_len = 1; items[n].code[0] = unfold; n++; @@ -355,7 +481,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (fn = 0; fn < 2; fn++) { int index; - cs[fn][0] = FOLDS2_FOLD(buk->index)[fn]; + cs[fn][0] = FOLDS2_FOLD(buk1->index)[fn]; ncs[fn] = 1; index = onigenc_unicode_fold1_key(&cs[fn][0]); if (index >= 0) { @@ -369,7 +495,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (i = 0; i < ncs[0]; i++) { for (j = 0; j < ncs[1]; j++) { - items[n].byte_len = len; + items[n].byte_len = lens[0]; items[n].code_len = 2; items[n].code[0] = cs[0][i]; items[n].code[1] = cs[1][j]; @@ -378,12 +504,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, } } else { /* fold_len == 3 */ - m = FOLDS3_UNFOLDS_NUM(buk->index); + m = FOLDS3_UNFOLDS_NUM(buk1->index); for (i = 0; i < m; i++) { - OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i]; - if (unfold == code) continue; + OnigCodePoint unfold = FOLDS3_UNFOLDS(buk1->index)[i]; + if (unfold == orig_codes[0]) continue; - items[n].byte_len = len; + items[n].byte_len = lens[0]; items[n].code_len = 1; items[n].code[0] = unfold; n++; @@ -391,7 +517,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (fn = 0; fn < 3; fn++) { int index; - cs[fn][0] = FOLDS3_FOLD(buk->index)[fn]; + cs[fn][0] = FOLDS3_FOLD(buk1->index)[fn]; ncs[fn] = 1; index = onigenc_unicode_fold1_key(&cs[fn][0]); if (index >= 0) { @@ -406,7 +532,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (i = 0; i < ncs[0]; i++) { for (j = 0; j < ncs[1]; j++) { for (k = 0; k < ncs[2]; k++) { - items[n].byte_len = len; + items[n].byte_len = lens[0]; items[n].code_len = 3; items[n].code[0] = cs[0][i]; items[n].code[1] = cs[1][j]; @@ -416,17 +542,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, } } } - - /* multi char folded code is not head of another folded multi char */ - return n; } } else { - int index = onigenc_unicode_fold1_key(&code); + int index = onigenc_unicode_fold1_key(orig_codes); if (index >= 0) { int m = FOLDS1_UNFOLDS_NUM(index); for (i = 0; i < m; i++) { - items[n].byte_len = len; + items[n].byte_len = lens[0]; items[n].code_len = 1; items[n].code[0] = FOLDS1_UNFOLDS(index)[i]; n++; @@ -434,64 +557,6 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, } } - if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0) - return n; - - p += len; - if (p < end) { - int clen; - int index; - - codes[0] = code; - code = ONIGENC_MBC_TO_CODE(enc, p, end); - - buk = onigenc_unicode_unfold_key(code); - if (buk != 0 && buk->fold_len == 1) { - codes[1] = *FOLDS1_FOLD(buk->index); - } - else - codes[1] = code; - - clen = enclen(enc, p); - len += clen; - - index = onigenc_unicode_fold2_key(codes); - if (index >= 0) { - m = FOLDS2_UNFOLDS_NUM(index); - for (i = 0; i < m; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = FOLDS2_UNFOLDS(index)[i]; - n++; - } - } - - p += clen; - if (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - buk = onigenc_unicode_unfold_key(code); - if (buk != 0 && buk->fold_len == 1) { - codes[2] = *FOLDS1_FOLD(buk->index); - } - else - codes[2] = code; - - clen = enclen(enc, p); - len += clen; - - index = onigenc_unicode_fold3_key(codes); - if (index >= 0) { - m = FOLDS3_UNFOLDS_NUM(index); - for (i = 0; i < m; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = FOLDS3_UNFOLDS(index)[i]; - n++; - } - } - } - } - return n; } @@ -930,7 +995,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER if (! ONIGENC_IS_UNICODE_ENCODING(enc)) { - return from != 0x000d || to != 0x000a; + return from != 0x000d || to != NEWLINE_CODE; } btype = unicode_egcb_is_break_2code(from, to); @@ -973,7 +1038,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, return 1; #else - return from != 0x000d || to != 0x000a; + return from != 0x000d || to != NEWLINE_CODE; #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */ } diff --git a/src/unicode_egcb_data.c b/src/unicode_egcb_data.c index 3c49422..0935bb4 100644 --- a/src/unicode_egcb_data.c +++ b/src/unicode_egcb_data.c @@ -1,6 +1,6 @@ /* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@ * SUCH DAMAGE. */ -#define GRAPHEME_BREAK_PROPERTY_VERSION 120100 +#define GRAPHEME_BREAK_PROPERTY_VERSION 130000 /* CR @@ -43,7 +43,7 @@ V ZWJ */ -static int EGCB_RANGE_NUM = 1326; +static int EGCB_RANGE_NUM = 1344; static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x000000, 0x000009, EGCB_Control }, {0x00000a, 0x00000a, EGCB_LF }, @@ -136,7 +136,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x000b47, 0x000b48, EGCB_SpacingMark }, {0x000b4b, 0x000b4c, EGCB_SpacingMark }, {0x000b4d, 0x000b4d, EGCB_Extend }, - {0x000b56, 0x000b57, EGCB_Extend }, + {0x000b55, 0x000b57, EGCB_Extend }, {0x000b62, 0x000b63, EGCB_Extend }, {0x000b82, 0x000b82, EGCB_Extend }, {0x000bbe, 0x000bbe, EGCB_Extend }, @@ -182,6 +182,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x000d4e, 0x000d4e, EGCB_Prepend }, {0x000d57, 0x000d57, EGCB_Extend }, {0x000d62, 0x000d63, EGCB_Extend }, + {0x000d81, 0x000d81, EGCB_Extend }, {0x000d82, 0x000d83, EGCB_SpacingMark }, {0x000dca, 0x000dca, EGCB_Extend }, {0x000dcf, 0x000dcf, EGCB_Extend }, @@ -267,7 +268,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x001a6d, 0x001a72, EGCB_SpacingMark }, {0x001a73, 0x001a7c, EGCB_Extend }, {0x001a7f, 0x001a7f, EGCB_Extend }, - {0x001ab0, 0x001abe, EGCB_Extend }, + {0x001ab0, 0x001ac0, EGCB_Extend }, {0x001b00, 0x001b03, EGCB_Extend }, {0x001b04, 0x001b04, EGCB_SpacingMark }, {0x001b34, 0x001b3a, EGCB_Extend }, @@ -329,6 +330,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x00a823, 0x00a824, EGCB_SpacingMark }, {0x00a825, 0x00a826, EGCB_Extend }, {0x00a827, 0x00a827, EGCB_SpacingMark }, + {0x00a82c, 0x00a82c, EGCB_Extend }, {0x00a880, 0x00a881, EGCB_SpacingMark }, {0x00a8b4, 0x00a8c3, EGCB_SpacingMark }, {0x00a8c4, 0x00a8c5, EGCB_Extend }, @@ -1189,6 +1191,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x010a3f, 0x010a3f, EGCB_Extend }, {0x010ae5, 0x010ae6, EGCB_Extend }, {0x010d24, 0x010d27, EGCB_Extend }, + {0x010eab, 0x010eac, EGCB_Extend }, {0x010f46, 0x010f50, EGCB_Extend }, {0x011000, 0x011000, EGCB_SpacingMark }, {0x011001, 0x011001, EGCB_Extend }, @@ -1215,6 +1218,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x0111bf, 0x0111c0, EGCB_SpacingMark }, {0x0111c2, 0x0111c3, EGCB_Prepend }, {0x0111c9, 0x0111cc, EGCB_Extend }, + {0x0111ce, 0x0111ce, EGCB_SpacingMark }, + {0x0111cf, 0x0111cf, EGCB_Extend }, {0x01122c, 0x01122e, EGCB_SpacingMark }, {0x01122f, 0x011231, EGCB_Extend }, {0x011232, 0x011233, EGCB_SpacingMark }, @@ -1286,6 +1291,17 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x01182f, 0x011837, EGCB_Extend }, {0x011838, 0x011838, EGCB_SpacingMark }, {0x011839, 0x01183a, EGCB_Extend }, + {0x011930, 0x011930, EGCB_Extend }, + {0x011931, 0x011935, EGCB_SpacingMark }, + {0x011937, 0x011938, EGCB_SpacingMark }, + {0x01193b, 0x01193c, EGCB_Extend }, + {0x01193d, 0x01193d, EGCB_SpacingMark }, + {0x01193e, 0x01193e, EGCB_Extend }, + {0x01193f, 0x01193f, EGCB_Prepend }, + {0x011940, 0x011940, EGCB_SpacingMark }, + {0x011941, 0x011941, EGCB_Prepend }, + {0x011942, 0x011942, EGCB_SpacingMark }, + {0x011943, 0x011943, EGCB_Extend }, {0x0119d1, 0x0119d3, EGCB_SpacingMark }, {0x0119d4, 0x0119d7, EGCB_Extend }, {0x0119da, 0x0119db, EGCB_Extend }, @@ -1337,6 +1353,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = { {0x016f4f, 0x016f4f, EGCB_Extend }, {0x016f51, 0x016f87, EGCB_SpacingMark }, {0x016f8f, 0x016f92, EGCB_Extend }, + {0x016fe4, 0x016fe4, EGCB_Extend }, + {0x016ff0, 0x016ff1, EGCB_SpacingMark }, {0x01bc9d, 0x01bc9e, EGCB_Extend }, {0x01bca0, 0x01bca3, EGCB_Control }, {0x01d165, 0x01d165, EGCB_Extend }, diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 171a0fa..3ffb2d1 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -9,7 +9,7 @@ /* This gperf source file was generated by make_unicode_fold_data.py */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,15 +33,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <string.h> -#include "regenc.h" +#include "regint.h" -#define TOTAL_KEYWORDS 1353 +#define TOTAL_KEYWORDS 1356 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 3 #define MIN_HASH_VALUE 3 -#define MAX_HASH_VALUE 1791 -/* maximum key range = 1789, duplicates = 0 */ +#define MAX_HASH_VALUE 1776 +/* maximum key range = 1774, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -56,32 +55,32 @@ hash(OnigCodePoint codes[]) { static const unsigned short asso_values[] = { - 4, 2, 147, 16, 1, 13, 1228, 725, 1224, 638, - 1204, 95, 9, 82, 1403, 80, 8, 68, 1391, 6, - 1203, 602, 1200, 320, 151, 67, 1385, 61, 1384, 158, - 0, 11, 1252, 3, 733, 278, 23, 425, 660, 424, - 640, 20, 635, 105, 5, 29, 916, 34, 630, 0, - 906, 1378, 899, 1377, 470, 1363, 623, 902, 610, 230, - 1193, 1358, 1190, 779, 986, 1336, 890, 883, 456, 597, - 285, 158, 590, 477, 583, 42, 576, 1447, 447, 1440, - 440, 1353, 426, 1429, 400, 1417, 327, 1407, 567, 1389, - 386, 1207, 458, 724, 854, 979, 1016, 66, 232, 413, - 204, 1431, 226, 1317, 190, 1167, 379, 1319, 198, 1270, - 175, 209, 561, 362, 528, 1323, 257, 1099, 240, 1309, - 508, 1172, 484, 1306, 699, 865, 850, 488, 679, 755, - 420, 639, 1149, 105, 691, 789, 1125, 46, 809, 652, - 1293, 3, 1262, 166, 1426, 0, 1424, 1792, 1220, 460, - 706, 1, 367, 1792, 1341, 1792, 1151, 56, 1235, 1792, - 384, 345, 965, 40, 1168, 33, 218, 180, 1159, 20, - 1296, 276, 1243, 14, 1401, 777, 1142, 682, 1284, 129, - 1135, 610, 1181, 871, 1123, 858, 1042, 510, 842, 499, - 999, 346, 830, 0, 823, 1288, 1115, 768, 732, 1272, - 1101, 1098, 1082, 1270, 811, 177, 1078, 1074, 1061, 1058, + 4, 2, 147, 16, 1, 13, 1251, 864, 1234, 725, + 1206, 95, 9, 82, 1390, 80, 8, 75, 1383, 6, + 1203, 643, 1202, 602, 151, 68, 1380, 67, 1376, 158, + 0, 11, 1375, 3, 733, 278, 23, 424, 660, 320, + 640, 20, 635, 34, 5, 29, 916, 27, 630, 0, + 906, 1363, 899, 1360, 470, 1355, 623, 902, 610, 230, + 1192, 1335, 1134, 1090, 986, 1325, 890, 883, 456, 597, + 285, 158, 590, 477, 583, 42, 576, 1449, 447, 1448, + 440, 1317, 426, 1440, 400, 1438, 327, 1426, 567, 1412, + 386, 1454, 458, 797, 854, 1340, 965, 66, 232, 413, + 204, 1424, 226, 1419, 190, 1185, 379, 1293, 198, 1196, + 175, 209, 561, 362, 528, 1306, 257, 1174, 240, 1292, + 508, 1171, 484, 1237, 699, 865, 850, 488, 679, 755, + 420, 639, 1149, 105, 691, 857, 1168, 77, 809, 715, + 799, 3, 1286, 166, 1421, 0, 1415, 1777, 1228, 460, + 706, 1, 367, 1777, 1337, 1777, 1254, 57, 1271, 1777, + 384, 345, 1386, 52, 1159, 40, 218, 180, 1151, 33, + 1238, 276, 1220, 14, 1401, 682, 1142, 610, 1284, 129, + 1125, 510, 1277, 1436, 1122, 991, 1042, 499, 842, 448, + 999, 346, 830, 0, 823, 867, 1108, 768, 732, 177, + 1105, 1100, 1093, 1081, 811, 777, 1078, 1074, 1061, 1058, 1050, 789, 353, 1018, 548, 1006, 517, 988, 334, 765, 166, 753, 138, 743, 100, 969, 87, 720, 49, 975, - 75, 965, 62, 411, 126, 955, 113, 1262, 317, 943, - 304, 679, 293, 1252, 714, 937, 275, 490, 263, 469, - 395, 1345, 931, 208, 921, 80, 542, 11, 246 + 75, 965, 62, 411, 126, 955, 113, 1267, 317, 943, + 304, 679, 293, 1261, 714, 937, 275, 490, 263, 469, + 395, 1347, 931, 208, 921, 80, 542, 11, 246 }; return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+3] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } @@ -93,7 +92,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) { -1, -1, -1, - 3429, + 3438, 1906, @@ -103,7 +102,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 171, - 3414, + 3423, 1927, @@ -147,11 +146,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 219, - 1618, + 3432, 2237, - 3417, + 3426, 2981, @@ -161,7 +160,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2990, - 3423, + 3420, 2282, @@ -173,11 +172,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2927, - 1606, + 1618, 2984, - 3507, + 3516, 2906, @@ -187,11 +186,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2900, - 1600, + 1606, 2648, - 3558, + 3567, 2189, @@ -207,17 +206,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1795, - 3738, + 3747, 1513, - 378, + 1600, - 1459, + 378, 2438, - 3570, + 3579, 2195, @@ -233,7 +232,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1807, - 3750, + 3759, 1525, @@ -243,7 +242,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2441, - 3564, + 3573, 2192, @@ -259,7 +258,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1801, - 3744, + 3753, 1519, @@ -267,7 +266,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2561, - 3552, + 3561, 2186, @@ -275,7 +274,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 456, - 2972, + 1459, 863, @@ -283,17 +282,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1789, - 3732, + 3741, 1507, - 2966, + 2972, - 2942, + 2966, 2555, - 3546, + 3555, 2183, @@ -303,13 +302,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 857, - 3411, + 2942, 2879, 1783, - 3726, + 3735, 1501, @@ -319,7 +318,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2930, - 3582, + 3591, 2201, @@ -335,17 +334,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1819, - 3762, + 3771, 1537, - 3354, + 3363, 2537, 2924, - 3576, + 3585, 2198, @@ -361,7 +360,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1813, - 3756, + 3765, 1531, @@ -369,7 +368,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2573, - 3540, + 3549, 2180, @@ -384,7 +383,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1777, - 3720, + 3729, 1495, @@ -408,7 +407,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 514, - 3495, + 3504, 1864, @@ -424,7 +423,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1306, - 3534, + 3543, 2177, @@ -439,7 +438,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1771, - 3714, + 3723, 1489, @@ -449,7 +448,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 324, - 1192, + 1183, 27, @@ -459,9 +458,9 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 848, - 1726, + 1708, - 3669, + 3660, 3141, @@ -479,7 +478,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 315, - 830, + 810, 12, @@ -494,7 +493,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2372, - 4086, + 4095, 1360, @@ -504,7 +503,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 309, - 3633, + 3642, 0, @@ -545,7 +544,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 312, - 3459, + 3468, 6, @@ -559,11 +558,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2600, - 3885, + 3894, 2366, - 3342, + 3351, 1354, @@ -585,7 +584,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 499, - 3879, + 3888, 168, @@ -610,7 +609,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 46, - 3618, + 3627, 2219, @@ -632,7 +631,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 715, - 3612, + 3621, 2216, @@ -652,13 +651,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1567, - 3492, + 3501, 1966, 959, - 3873, + 3882, -1, 2279, @@ -667,7 +666,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 887, - 3600, + 3609, 2210, @@ -683,12 +682,12 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1837, - 3780, + 3789, 1555, -1, - 3594, + 3603, 2207, @@ -704,16 +703,16 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1831, - 3774, + 3783, 1549, 703, -1, - 3867, + 3876, - 3588, + 3597, 2204, @@ -729,11 +728,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1825, - 3768, + 3777, 1543, - 3822, + 3813, 1990, @@ -742,7 +741,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 288, -1, - 2273, + 2291, 2681, @@ -761,7 +760,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1765, - 3708, + 3717, 1483, -1, -1, @@ -772,9 +771,9 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 381, - 3921, + 3930, - 2960, + 3008, 845, @@ -796,17 +795,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1747, - 3690, + 3699, 1465, 785, - 3903, + 3912, 2432, - 3339, + 3348, 37, @@ -818,7 +817,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3228, - 3915, + 3924, 2771, @@ -829,7 +828,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2501, - 3891, + 3900, 3234, @@ -839,7 +838,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 318, - 3924, + 3933, 18, @@ -863,7 +862,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3084, - 3624, + 3633, 2222, @@ -873,7 +872,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3297, - 3897, + 3906, 1987, @@ -889,21 +888,21 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3225, - 3387, + 3396, 3075, - 4020, + 4029, - 3573, + 3582, 2345, 3219, - 3945, + 3954, - 4005, + 4014, 96, @@ -911,7 +910,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1810, - 3753, + 3762, 1528, @@ -925,7 +924,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2357, - 3528, + 3537, 1984, @@ -933,9 +932,9 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 282, - 3996, + 4005, - 3939, + 3948, 2669, -1, @@ -944,14 +943,14 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2564, - 2291, - 2285, + + 4023, -1, - 4008, + 4017, - 3522, + 3531, 1981, @@ -959,13 +958,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 279, - 4014, + 4011, 3072, 2663, - 3516, + 3525, 1978, @@ -978,11 +977,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2657, - 3008, - 3002, - 3486, + 1678, + + 3495, 1963, @@ -999,17 +998,18 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 369, 2699, - -1, + + 778, 3177, - 3399, + 3408, 2330, - 3621, + 3630, - 3444, + 3453, 1942, @@ -1023,7 +1023,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2585, - 3501, + 3510, 1576, @@ -1031,13 +1031,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 270, - 3381, + 3390, 2312, 2642, - 3861, + 3870, 2045, @@ -1049,7 +1049,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 65, - 3615, + 3624, 348, @@ -1074,15 +1074,15 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1429, - 3843, + 3852, 3023, 727, - 1678, + 1672, - 3375, + 3384, 3201, @@ -1095,13 +1095,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2462, 59, - -1, - - 4002, + -1, -1, - 778, + 768, - 1672, + 1654, 2171, @@ -1109,7 +1107,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2396, - 3369, + 3378, 1408, @@ -1117,13 +1115,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1759, - 3702, + 3711, 1477, - 768, + 748, - 3363, + 3372, 2033, @@ -1138,12 +1136,12 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 622, -1, - 3930, + 3939, - 3333, + 3342, -1, - 3405, + 3414, -1, 2513, @@ -1169,17 +1167,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1591, -1, - 3909, + 3918, 2864, 1753, - 3696, + 3705, 1471, - 3348, + 3357, -1, 3231, @@ -1213,7 +1211,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1384, - 3510, + 3519, 1975, @@ -1226,7 +1224,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2651, - 3504, + 3513, 1972, @@ -1234,26 +1232,26 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3120, - 3888, + 3897, 598, 2645, - 3498, + 3507, 1969, 965, -1, - 3981, + 3990, 595, 2639, - 3489, + 3498, 3078, @@ -1267,7 +1265,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2630, - 3330, + 3339, 3054, @@ -1279,7 +1277,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2333, - 3456, + 3465, 1948, @@ -1287,12 +1285,12 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 249, - 2267, + 2273, -1, 2597, - 1654, + 1642, 574, @@ -1303,7 +1301,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2324, - 3450, + 3459, 1945, @@ -1311,17 +1309,17 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 246, - 748, + 739, 3189, 2591, - 3426, + 3435, 1933, - 2954, + 2960, 237, -1, @@ -1336,7 +1334,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3210, - 4050, + 4059, 1921, @@ -1351,11 +1349,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2300, - 3393, + 3402, 1333, - 2255, + 3336, 3153, @@ -1365,13 +1363,13 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1330, - 363, + 2267, 673, - 3357, + 3366, - 3957, + 3966, 3039, @@ -1379,28 +1377,28 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1918, - 3972, + 3981, 222, - 3351, + 3360, 3014, 3035, - 3855, - - 2918, + 3864, + -1, 3249, 3011, - 3345, + 3354, 2288, - -1, + + 2954, 1327, @@ -1410,15 +1408,15 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3057, - 3336, + 3345, - 3597, + 3606, 2054, 1090, - 3951, + 3960, 2486, @@ -1428,11 +1426,11 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1834, - 3777, + 3786, 1552, - 1642, + 1636, 3005, @@ -1444,21 +1442,21 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 357, - 4083, + 4092, 730, 2744, - 3849, + 3858, - 739, + 733, 2048, 1084, - 3933, + 3942, 2474, @@ -1480,36 +1478,37 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1420, - 3606, + 3615, 2213, 1252, - -1, -1, + -1, + + 363, 129, - 3555, + 3564, 1843, - 3786, + 3795, 1561, 592, 77, - - 1029, + -1, 1792, - 3735, + 3744, 1510, - 2702, + 3999, 589, @@ -1519,22 +1518,22 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1912, - 3927, + 3936, 216, - 2249, + 2255, 2840, 1705, - 3648, + 3657, -1, 3240, - 3543, + 3552, 2546, @@ -1550,14 +1549,14 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1780, - 3723, + 3732, 1498, - 3537, + 3546, -1, - 2912, + 2918, 447, @@ -1569,7 +1568,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1774, - 3717, + 3726, 1492, @@ -1577,7 +1576,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3183, - 3531, + 3540, -1, 2534, @@ -1592,31 +1591,28 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1768, - 3711, + 3720, 1486, 1702, - 3645, + 3654, 2528, 565, - 3471, + 1192, 3087, - - 934, - - 255, + -1, -1, 556, - 1636, + 1726, - 2612, + 3678, 3051, @@ -1630,31 +1626,31 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 3159, 432, + -1, - 2495, - - 733, + 830, 3032, 1744, - 3687, + 3696, 1462, - -1, - 691, + 1029, + + 2072, 2429, 3029, - 1453, + 2702, 3156, - 3408, + 2753, -1, -1, 550, @@ -1679,7 +1675,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1723, - 3666, + 3675, -1, 3135, @@ -1705,9 +1701,8 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 411, - 3990, - - 3984, + 3993, + -1, 2831, @@ -1740,7 +1735,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1087, - 3975, + 3984, 2480, @@ -1756,16 +1751,16 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2705, - 405, + 2495, 2414, - -1, - 1426, + 691, - 1666, + 1426, + -1, - 4077, + 4086, 724, @@ -1774,71 +1769,74 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2477, 74, - -1, + + 1453, 3258, - 3963, + 3972, - 402, + 1696, - 762, - -1, + 3648, + + 2249, 2411, - 1660, + 3417, 1423, - 4035, + 4044, 547, - 3801, + 3810, - 3483, - -1, + 3492, + + 798, 947, 261, 3273, - - 755, + -1, 2624, - 3480, + 3489, 1960, 944, - 3876, - -1, + 3885, + + 2912, 2327, 2621, -1, -1, - 3438, + 3447, 1939, 899, - 3453, + 3462, - 3870, + 3879, 916, 2579, - 3432, + 3441, 1936, @@ -1855,19 +1853,19 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2321, - 3864, + 3873, - 3420, + 3429, 1930, - 3798, + 3807, 234, 1345, - 3636, + 3645, 2228, @@ -1879,14 +1877,14 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 156, - 577, + 3831, -1, - 3978, + 3987, 1339, - 3630, + 3639, 2225, @@ -1897,25 +1895,26 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 150, - 3609, + 3618, 893, - 3840, + 3849, 1585, -1, 132, - 3591, + 3600, 1846, - 3789, + 3798, 1564, - -1, + + 694, 114, @@ -1923,70 +1922,72 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1828, - 3771, + 3780, 1546, -1, 884, - 3579, + 3588, -1, 688, - -1, -1, + + 3333, + -1, 102, - 3819, + 3828, 1816, - 3759, + 3768, 1534, - 3567, - -1, + 3576, - 1129, - -1, + 2005, - 3549, + 1038, + + 303, + + 3558, 89, - 2786, + 2711, 1804, - 3747, + 3756, 1522, - 3561, + 3570, 1786, - 3729, + 3738, 1504, 2570, 83, - - 1035, + -1, 1798, - 3741, + 3750, 1516, + -1, - 2708, - - 3474, + 3483, 1957, @@ -2001,14 +2002,16 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 2615, 2540, - -1, + + 405, 1762, - 3705, + 3714, 1480, - -1, + + 1666, 2552, @@ -2025,8 +2028,9 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1681, - 4080, - -1, + 4089, + + 762, 438, -1, @@ -2037,31 +2041,29 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1756, - 3699, + 3708, 1474, 782, - 2005, - - 1038, + 3801, - 303, + 3102, + -1, - 4047, + 4056, 435, - - 2711, + -1, 3315, - 3969, + 3978, 1750, - 3693, + 3702, 1468, @@ -2073,7 +2075,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 583, -1, - 4041, + 4050, -1, 3213, @@ -2081,9 +2083,9 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 580, - 3966, + 3975, - 4017, + 4026, -1, 2504, @@ -2092,7 +2094,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1165, - 3999, + 4008, 3246, @@ -2102,7 +2104,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1669, - 3993, + 4002, 2162, @@ -2116,7 +2118,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1741, - 3684, + 3693, 765, @@ -2131,15 +2133,15 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1738, - 3681, + 3690, 2855, 1735, - 3678, + 3687, - 3987, + 3996, 3171, -1, @@ -2160,110 +2162,117 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1732, - 3675, + 3684, - 2150, + 1189, 2852, 1729, - 3672, + 3681, 3150, - 2846, + 1720, - 1717, + 3672, - 3660, + 3480, 836, - -1, -1, -1, + + 934, + + 255, + + 2150, 833, - -1, + + 2612, 3144, - -1, - 820, + 823, + + 2846, + + 1717, + + 3669, 1186, 417, + -1, -1, 2147, - 2453, + 1714, - 49, + 3666, - 1714, + 2141, - 3657, + 820, 2843, 1711, - 3654, - -1, - - 2387, - -1, - - 1399, - -1, + 3663, - 816, + 2837, - 2141, - -1, + 1699, - 813, + 3651, - 3882, + 816, -1, - 2837, - - 1699, + 3891, + -1, -1, - 3642, + 813, 2123, 1162, - 2066, + 802, - 631, + 2117, - 360, + 1156, 2819, 1663, + -1, - 2747, + 2813, - 802, - -1, -1, -1, + 1651, + -1, - 2117, + 3477, - 1156, + 1954, - 3858, + 931, - 1456, + 2426, + + 3867, 758, - 2813, + 2609, + -1, - 1651, + 745, 2111, @@ -2271,9 +2280,8 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 396, - 3291, - - 2426, + 3216, + -1, 2807, @@ -2283,37 +2291,21 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 1096, - 745, - - 1120, - - 375, - - 2741, - - 3852, - - 2777, - -1, - - 736, - 2099, 1138, 390, - 1444, - -1, + 2741, + + 3861, 2795, 1615, - 3168, - - 3846, + 736, 2093, @@ -2321,620 +2313,637 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) 384, - 15, - - 2723, + 1444, + -1, 2789, 1603, + 3168, + + 3855, + + 2066, + -1, + + 360, + -1, -1, + + 2747, + 2465, 62, 2738, - 2375, - - 3318, + 2453, - 1363, + 49, - 2120, + 3318, + -1, - 1159, + 1456, 2399, - -1, - - 1411, - 2816, + 3063, - 1657, + 1411, - 4062, + 2387, + -1, - 3468, + 1399, + -1, - 1954, + 15, - 931, + 2723, - 3462, + 3471, 1951, 925, + -1, -1, - 2609, + 2375, - 3216, + 2603, - 752, + 1363, - 2603, + 3846, - 3837, + 24, + + 2729, 1894, - -1, + + 1891, 198, - 1891, + 195, 1876, - 195, + 3843, 180, - 3834, - - 1023, - -1, + 1375, - 3831, + 3840, + -1, -1, 2270, - 2696, + 2264, 1303, - 2264, + 1300, 2258, - - 1300, + -1, 1285, - -1, -1, - 2084, + 2105, - 1108, + 1144, 3312, - 3828, + 3837, + -1, - 1873, + 2801, - 2765, + 1627, - 177, + 3834, - 3825, + 2084, - 1870, + 1108, + + 3825, 2957, - 174, + 2951, - 3813, + 2765, - 2951, + 1873, 2921, - 3063, + 177, - 2252, + 577, - 1123, + 2102, - 1282, - -1, + 1141, - 2246, + 2471, - 2780, + 68, - 1279, - -1, + 3822, - 2105, + 2798, - 1144, + 1621, + + 2252, -1, - 670, + 1282, - 3810, + 2405, - 2801, + 3819, - 1627, + 1417, - 3807, + 1870, + -1, - 3603, + 174, - 1909, + 3816, - 2915, + 1120, - 213, + 375, + + 3804, -1, - 126, + 2777, - 2909, + 3612, - 1840, + 3330, - 3783, + 2246, - 1558, + 2915, - 3585, + 1279, - 2075, + 126, - 1099, + 3594, - 3795, + 1840, - 1318, + 3792, + + 1558, + -1, 108, - 2756, + 1123, 1822, - 3765, + 3774, 1540, - 1189, - - 3942, - - 1183, - - 24, - - 2729, - - 1720, - - 3663, + 2780, - 1708, + 2120, - 3651, + 1159, -1, - 2987, + 2909, -1, - 1375, + 2816, + + 1657, 2114, 1153, - 823, - - 3402, + 2075, - 810, + 1099, + -1, 2810, 1645, - -1, - 4074, + 2756, - 2072, + 4083, - 1696, + 752, + -1, - 3639, + 2459, - 2102, + 56, - 1141, + 21, - 2753, + 2726, - 4071, + 4080, 742, - - 2798, - - 1621, + -1, 3309, - 798, - - 3261, - -1, -1, + 2393, + -1, - 4029, + 1405, - 2471, + 1369, + -1, - 68, + 4038, - 4044, + 2447, - 2459, + 43, - 56, + 4053, + -1, -1, - 646, + 3165, - 4023, - -1, + 4032, - 2405, - -1, + 3534, - 1417, + 2381, - 2393, + 993, - 9, + 1393, - 1405, + 2444, - 21, + 670, - 2726, + 2666, + -1, - 4011, + 3486, - 2447, + 4020, - 43, + 940, - 2369, + 258, + -1, 3255, - 1357, + 2618, - 3282, + 3300, - 1369, + 646, -1, - 2381, + 3474, + -1, - 3477, + 928, - 1393, + 252, - 940, + 1117, - 258, + 3294, - 3300, + 2606, - 3906, + 1035, - 2618, + 2774, - 1117, + 655, - 3096, + 3963, - 3627, + 2708, - 3954, + 3636, - 2774, + 3951, + + 3261, 529, - 3294, + 526, 147, - - 526, + -1, 511, - 3525, + 3456, 1582, - 993, + 909, -1, - 2444, + 3915, - 3465, + 3450, - 2666, + 2588, - 928, + 902, - 252, + 3444, 890, - 3447, + 896, - 2606, + 2582, - 909, - -1, -1, + 2315, 3090, - 2588, - - 3126, - - 3165, + 2576, - 508, + 3927, -1, - 3162, - - 2315, - - 505, - - 3441, - - 3435, - - 902, + 2309, - 896, - -1, -1, + 3162, + -1, - 2582, + 2303, - 2576, + 1909, 1903, - 1900, + 213, 207, + -1, - 204, - - 2309, + 1900, - 2303, + 508, - 1017, + 204, 1888, -1, 192, - 2690, + 3096, - 4065, - -1, + 1129, + + 1318, 1312, + 1882, + + 2786, + + 186, + 1309, + -1, - 544, + 4074, - 3270, + 1297, + + 3381, + + 505, + -1, + + 3306, 2108, 1147, - 1882, + 1291, - 1297, + 2987, - 186, + 2975, 2804, 1633, - 1011, + 3270, - 700, - -1, -1, + 2969, - 2684, + 3969, + -1, - 2975, + 2945, - 2969, + 1017, + -1, - 3285, + 2081, - 1291, + 1105, - 655, + 2690, - 1005, + 3303, - 3816, + 2933, - 2945, + 2762, - 3804, + 2078, - 2678, + 1102, - 3306, + 9, + -1, -1, - 2081, + 2759, - 1105, + 1011, - 2078, + 3, + -1, - 1102, + 2369, + + 2684, + + 1357, + + 700, + + 3282, + + 2363, -1, - 2762, + 1351, - 999, + 402, - 2759, + 1005, - 2933, + 649, - 3372, + 999, - 2672, + 1660, - 3, + 2678, -1, - 3792, + 2672, 3114, - 3519, + 3528, - 694, + 3522, 987, - 2363, + 980, - 3918, + 3285, - 1351, + 755, 2660, - 3513, - -1, + 2654, - 980, + 1023, -1, -1, -1, - 2654, + 2696, + -1, -1, -1, -1, -1, - 3129, + 3957, + + 3921, + -1, -1, -1, -1, -1, + + 3126, -1, -1, -1, -1, -1, -1, + + 3945, -1, -1, -1, -1, -1, -1, + -1, - 649, + 631, + -1, + + 3129, -1, -1, - 3396, + 3405, -1, -1, -1, -1, -1, -1, - 3303, - -1, -1, - - 3960, + 4071, + -1, -1, -1, -1, 3105, -1, - 3948, + 3399, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 3390, - -1, -1, + 3279, - 3900, + 3393, -1, - 3912, - -1, -1, -1, - - 3936, - - 3384, - -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, - - 3378, + 3387, -1, - 3102, - -1, -1, -1, -1, -1, -1, - -1, -1, + 544, - 3366, + 538, + -1, -1, -1, - 3279, - -1, -1, -1, -1, -1, + 535, - 3360, - -1, -1, -1, + 3375, - 538, + 3369, - 535, + 523, -1, -1, -1, - 619, - -1, + 3411, + -1, -1, - 523, + 517, + -1, -1, -1, -1, -1, + + 4068, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 517, + 3123, -1, -1, -1, - 616, - -1, -1, -1, -1, + 619, + -1, -1, -1, -1, -1, -1, - 3123, - -1, -1, -1, -1, + 3291, + -1, -1, -1, -1, -1, -1, - 610, + 616, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 610, + -1, + 604, - -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 4059, - -1, -1, + 3909, + -1, -1, -1, -1, - 4053, - -1, + 3903, + -1, -1, -1, - 3894, - -1, -1, -1, -1, -1, -1, + 4062, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -2943,7 +2952,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) -1, 3111, - -1, + -1, -1, -1, -1, -1, 3108, -1, -1, -1, -1, -1, -1, @@ -2961,26 +2970,25 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 4068, - -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, + 4077, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 4056, - -1, -1, -1, -1, - - 4038, + 4065, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 4032, + 4047, + -1, -1, -1, -1, + + 4041, + -1, -1, - 4026 + 4035 }; diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index c39b19d..e06ba0b 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -9,7 +9,7 @@ /* This gperf source file was generated by make_unicode_fold_data.py */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <string.h> -#include "regenc.h" +#include "regint.h" #define TOTAL_KEYWORDS 59 #define MIN_WORD_LENGTH 6 diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 295c447..7ab24d1 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -9,7 +9,7 @@ /* This gperf source file was generated by make_unicode_fold_data.py */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <string.h> -#include "regenc.h" +#include "regint.h" #define TOTAL_KEYWORDS 14 #define MIN_WORD_LENGTH 9 diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c index 68694b0..ca4998d 100644 --- a/src/unicode_fold_data.c +++ b/src/unicode_fold_data.c @@ -1,7 +1,34 @@ /* This file was generated by make_unicode_fold_data.py. */ +/*- + * Copyright (c) 2017-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + #include "regenc.h" -#define UNICODE_CASEFOLD_VERSION 120100 +#define UNICODE_CASEFOLD_VERSION 130000 OnigCodePoint OnigUnicodeFolds1[] = { @@ -1105,262 +1132,265 @@ OnigCodePoint OnigUnicodeFolds1[] = { /*3321*/ 0xa7bd, 1, 0xa7bc, /* LATIN CAPITAL LETTER GLOTTAL I */ /*3324*/ 0xa7bf, 1, 0xa7be, /* LATIN CAPITAL LETTER GLOTTAL U */ /*3327*/ 0xa7c3, 1, 0xa7c2, /* LATIN CAPITAL LETTER ANGLICANA W */ - /*3330*/ 0xab53, 1, 0xa7b3, /* LATIN CAPITAL LETTER CHI */ - /*3333*/ 0xff41, 1, 0xff21, /* FULLWIDTH LATIN CAPITAL LETTER A */ - /*3336*/ 0xff42, 1, 0xff22, /* FULLWIDTH LATIN CAPITAL LETTER B */ - /*3339*/ 0xff43, 1, 0xff23, /* FULLWIDTH LATIN CAPITAL LETTER C */ - /*3342*/ 0xff44, 1, 0xff24, /* FULLWIDTH LATIN CAPITAL LETTER D */ - /*3345*/ 0xff45, 1, 0xff25, /* FULLWIDTH LATIN CAPITAL LETTER E */ - /*3348*/ 0xff46, 1, 0xff26, /* FULLWIDTH LATIN CAPITAL LETTER F */ - /*3351*/ 0xff47, 1, 0xff27, /* FULLWIDTH LATIN CAPITAL LETTER G */ - /*3354*/ 0xff48, 1, 0xff28, /* FULLWIDTH LATIN CAPITAL LETTER H */ - /*3357*/ 0xff49, 1, 0xff29, /* FULLWIDTH LATIN CAPITAL LETTER I */ - /*3360*/ 0xff4a, 1, 0xff2a, /* FULLWIDTH LATIN CAPITAL LETTER J */ - /*3363*/ 0xff4b, 1, 0xff2b, /* FULLWIDTH LATIN CAPITAL LETTER K */ - /*3366*/ 0xff4c, 1, 0xff2c, /* FULLWIDTH LATIN CAPITAL LETTER L */ - /*3369*/ 0xff4d, 1, 0xff2d, /* FULLWIDTH LATIN CAPITAL LETTER M */ - /*3372*/ 0xff4e, 1, 0xff2e, /* FULLWIDTH LATIN CAPITAL LETTER N */ - /*3375*/ 0xff4f, 1, 0xff2f, /* FULLWIDTH LATIN CAPITAL LETTER O */ - /*3378*/ 0xff50, 1, 0xff30, /* FULLWIDTH LATIN CAPITAL LETTER P */ - /*3381*/ 0xff51, 1, 0xff31, /* FULLWIDTH LATIN CAPITAL LETTER Q */ - /*3384*/ 0xff52, 1, 0xff32, /* FULLWIDTH LATIN CAPITAL LETTER R */ - /*3387*/ 0xff53, 1, 0xff33, /* FULLWIDTH LATIN CAPITAL LETTER S */ - /*3390*/ 0xff54, 1, 0xff34, /* FULLWIDTH LATIN CAPITAL LETTER T */ - /*3393*/ 0xff55, 1, 0xff35, /* FULLWIDTH LATIN CAPITAL LETTER U */ - /*3396*/ 0xff56, 1, 0xff36, /* FULLWIDTH LATIN CAPITAL LETTER V */ - /*3399*/ 0xff57, 1, 0xff37, /* FULLWIDTH LATIN CAPITAL LETTER W */ - /*3402*/ 0xff58, 1, 0xff38, /* FULLWIDTH LATIN CAPITAL LETTER X */ - /*3405*/ 0xff59, 1, 0xff39, /* FULLWIDTH LATIN CAPITAL LETTER Y */ - /*3408*/ 0xff5a, 1, 0xff3a, /* FULLWIDTH LATIN CAPITAL LETTER Z */ - /*3411*/ 0x010428, 1, 0x010400, /* DESERET CAPITAL LETTER LONG I */ - /*3414*/ 0x010429, 1, 0x010401, /* DESERET CAPITAL LETTER LONG E */ - /*3417*/ 0x01042a, 1, 0x010402, /* DESERET CAPITAL LETTER LONG A */ - /*3420*/ 0x01042b, 1, 0x010403, /* DESERET CAPITAL LETTER LONG AH */ - /*3423*/ 0x01042c, 1, 0x010404, /* DESERET CAPITAL LETTER LONG O */ - /*3426*/ 0x01042d, 1, 0x010405, /* DESERET CAPITAL LETTER LONG OO */ - /*3429*/ 0x01042e, 1, 0x010406, /* DESERET CAPITAL LETTER SHORT I */ - /*3432*/ 0x01042f, 1, 0x010407, /* DESERET CAPITAL LETTER SHORT E */ - /*3435*/ 0x010430, 1, 0x010408, /* DESERET CAPITAL LETTER SHORT A */ - /*3438*/ 0x010431, 1, 0x010409, /* DESERET CAPITAL LETTER SHORT AH */ - /*3441*/ 0x010432, 1, 0x01040a, /* DESERET CAPITAL LETTER SHORT O */ - /*3444*/ 0x010433, 1, 0x01040b, /* DESERET CAPITAL LETTER SHORT OO */ - /*3447*/ 0x010434, 1, 0x01040c, /* DESERET CAPITAL LETTER AY */ - /*3450*/ 0x010435, 1, 0x01040d, /* DESERET CAPITAL LETTER OW */ - /*3453*/ 0x010436, 1, 0x01040e, /* DESERET CAPITAL LETTER WU */ - /*3456*/ 0x010437, 1, 0x01040f, /* DESERET CAPITAL LETTER YEE */ - /*3459*/ 0x010438, 1, 0x010410, /* DESERET CAPITAL LETTER H */ - /*3462*/ 0x010439, 1, 0x010411, /* DESERET CAPITAL LETTER PEE */ - /*3465*/ 0x01043a, 1, 0x010412, /* DESERET CAPITAL LETTER BEE */ - /*3468*/ 0x01043b, 1, 0x010413, /* DESERET CAPITAL LETTER TEE */ - /*3471*/ 0x01043c, 1, 0x010414, /* DESERET CAPITAL LETTER DEE */ - /*3474*/ 0x01043d, 1, 0x010415, /* DESERET CAPITAL LETTER CHEE */ - /*3477*/ 0x01043e, 1, 0x010416, /* DESERET CAPITAL LETTER JEE */ - /*3480*/ 0x01043f, 1, 0x010417, /* DESERET CAPITAL LETTER KAY */ - /*3483*/ 0x010440, 1, 0x010418, /* DESERET CAPITAL LETTER GAY */ - /*3486*/ 0x010441, 1, 0x010419, /* DESERET CAPITAL LETTER EF */ - /*3489*/ 0x010442, 1, 0x01041a, /* DESERET CAPITAL LETTER VEE */ - /*3492*/ 0x010443, 1, 0x01041b, /* DESERET CAPITAL LETTER ETH */ - /*3495*/ 0x010444, 1, 0x01041c, /* DESERET CAPITAL LETTER THEE */ - /*3498*/ 0x010445, 1, 0x01041d, /* DESERET CAPITAL LETTER ES */ - /*3501*/ 0x010446, 1, 0x01041e, /* DESERET CAPITAL LETTER ZEE */ - /*3504*/ 0x010447, 1, 0x01041f, /* DESERET CAPITAL LETTER ESH */ - /*3507*/ 0x010448, 1, 0x010420, /* DESERET CAPITAL LETTER ZHEE */ - /*3510*/ 0x010449, 1, 0x010421, /* DESERET CAPITAL LETTER ER */ - /*3513*/ 0x01044a, 1, 0x010422, /* DESERET CAPITAL LETTER EL */ - /*3516*/ 0x01044b, 1, 0x010423, /* DESERET CAPITAL LETTER EM */ - /*3519*/ 0x01044c, 1, 0x010424, /* DESERET CAPITAL LETTER EN */ - /*3522*/ 0x01044d, 1, 0x010425, /* DESERET CAPITAL LETTER ENG */ - /*3525*/ 0x01044e, 1, 0x010426, /* DESERET CAPITAL LETTER OI */ - /*3528*/ 0x01044f, 1, 0x010427, /* DESERET CAPITAL LETTER EW */ - /*3531*/ 0x0104d8, 1, 0x0104b0, /* OSAGE CAPITAL LETTER A */ - /*3534*/ 0x0104d9, 1, 0x0104b1, /* OSAGE CAPITAL LETTER AI */ - /*3537*/ 0x0104da, 1, 0x0104b2, /* OSAGE CAPITAL LETTER AIN */ - /*3540*/ 0x0104db, 1, 0x0104b3, /* OSAGE CAPITAL LETTER AH */ - /*3543*/ 0x0104dc, 1, 0x0104b4, /* OSAGE CAPITAL LETTER BRA */ - /*3546*/ 0x0104dd, 1, 0x0104b5, /* OSAGE CAPITAL LETTER CHA */ - /*3549*/ 0x0104de, 1, 0x0104b6, /* OSAGE CAPITAL LETTER EHCHA */ - /*3552*/ 0x0104df, 1, 0x0104b7, /* OSAGE CAPITAL LETTER E */ - /*3555*/ 0x0104e0, 1, 0x0104b8, /* OSAGE CAPITAL LETTER EIN */ - /*3558*/ 0x0104e1, 1, 0x0104b9, /* OSAGE CAPITAL LETTER HA */ - /*3561*/ 0x0104e2, 1, 0x0104ba, /* OSAGE CAPITAL LETTER HYA */ - /*3564*/ 0x0104e3, 1, 0x0104bb, /* OSAGE CAPITAL LETTER I */ - /*3567*/ 0x0104e4, 1, 0x0104bc, /* OSAGE CAPITAL LETTER KA */ - /*3570*/ 0x0104e5, 1, 0x0104bd, /* OSAGE CAPITAL LETTER EHKA */ - /*3573*/ 0x0104e6, 1, 0x0104be, /* OSAGE CAPITAL LETTER KYA */ - /*3576*/ 0x0104e7, 1, 0x0104bf, /* OSAGE CAPITAL LETTER LA */ - /*3579*/ 0x0104e8, 1, 0x0104c0, /* OSAGE CAPITAL LETTER MA */ - /*3582*/ 0x0104e9, 1, 0x0104c1, /* OSAGE CAPITAL LETTER NA */ - /*3585*/ 0x0104ea, 1, 0x0104c2, /* OSAGE CAPITAL LETTER O */ - /*3588*/ 0x0104eb, 1, 0x0104c3, /* OSAGE CAPITAL LETTER OIN */ - /*3591*/ 0x0104ec, 1, 0x0104c4, /* OSAGE CAPITAL LETTER PA */ - /*3594*/ 0x0104ed, 1, 0x0104c5, /* OSAGE CAPITAL LETTER EHPA */ - /*3597*/ 0x0104ee, 1, 0x0104c6, /* OSAGE CAPITAL LETTER SA */ - /*3600*/ 0x0104ef, 1, 0x0104c7, /* OSAGE CAPITAL LETTER SHA */ - /*3603*/ 0x0104f0, 1, 0x0104c8, /* OSAGE CAPITAL LETTER TA */ - /*3606*/ 0x0104f1, 1, 0x0104c9, /* OSAGE CAPITAL LETTER EHTA */ - /*3609*/ 0x0104f2, 1, 0x0104ca, /* OSAGE CAPITAL LETTER TSA */ - /*3612*/ 0x0104f3, 1, 0x0104cb, /* OSAGE CAPITAL LETTER EHTSA */ - /*3615*/ 0x0104f4, 1, 0x0104cc, /* OSAGE CAPITAL LETTER TSHA */ - /*3618*/ 0x0104f5, 1, 0x0104cd, /* OSAGE CAPITAL LETTER DHA */ - /*3621*/ 0x0104f6, 1, 0x0104ce, /* OSAGE CAPITAL LETTER U */ - /*3624*/ 0x0104f7, 1, 0x0104cf, /* OSAGE CAPITAL LETTER WA */ - /*3627*/ 0x0104f8, 1, 0x0104d0, /* OSAGE CAPITAL LETTER KHA */ - /*3630*/ 0x0104f9, 1, 0x0104d1, /* OSAGE CAPITAL LETTER GHA */ - /*3633*/ 0x0104fa, 1, 0x0104d2, /* OSAGE CAPITAL LETTER ZA */ - /*3636*/ 0x0104fb, 1, 0x0104d3, /* OSAGE CAPITAL LETTER ZHA */ - /*3639*/ 0x010cc0, 1, 0x010c80, /* OLD HUNGARIAN CAPITAL LETTER A */ - /*3642*/ 0x010cc1, 1, 0x010c81, /* OLD HUNGARIAN CAPITAL LETTER AA */ - /*3645*/ 0x010cc2, 1, 0x010c82, /* OLD HUNGARIAN CAPITAL LETTER EB */ - /*3648*/ 0x010cc3, 1, 0x010c83, /* OLD HUNGARIAN CAPITAL LETTER AMB */ - /*3651*/ 0x010cc4, 1, 0x010c84, /* OLD HUNGARIAN CAPITAL LETTER EC */ - /*3654*/ 0x010cc5, 1, 0x010c85, /* OLD HUNGARIAN CAPITAL LETTER ENC */ - /*3657*/ 0x010cc6, 1, 0x010c86, /* OLD HUNGARIAN CAPITAL LETTER ECS */ - /*3660*/ 0x010cc7, 1, 0x010c87, /* OLD HUNGARIAN CAPITAL LETTER ED */ - /*3663*/ 0x010cc8, 1, 0x010c88, /* OLD HUNGARIAN CAPITAL LETTER AND */ - /*3666*/ 0x010cc9, 1, 0x010c89, /* OLD HUNGARIAN CAPITAL LETTER E */ - /*3669*/ 0x010cca, 1, 0x010c8a, /* OLD HUNGARIAN CAPITAL LETTER CLOS.. */ - /*3672*/ 0x010ccb, 1, 0x010c8b, /* OLD HUNGARIAN CAPITAL LETTER EE */ - /*3675*/ 0x010ccc, 1, 0x010c8c, /* OLD HUNGARIAN CAPITAL LETTER EF */ - /*3678*/ 0x010ccd, 1, 0x010c8d, /* OLD HUNGARIAN CAPITAL LETTER EG */ - /*3681*/ 0x010cce, 1, 0x010c8e, /* OLD HUNGARIAN CAPITAL LETTER EGY */ - /*3684*/ 0x010ccf, 1, 0x010c8f, /* OLD HUNGARIAN CAPITAL LETTER EH */ - /*3687*/ 0x010cd0, 1, 0x010c90, /* OLD HUNGARIAN CAPITAL LETTER I */ - /*3690*/ 0x010cd1, 1, 0x010c91, /* OLD HUNGARIAN CAPITAL LETTER II */ - /*3693*/ 0x010cd2, 1, 0x010c92, /* OLD HUNGARIAN CAPITAL LETTER EJ */ - /*3696*/ 0x010cd3, 1, 0x010c93, /* OLD HUNGARIAN CAPITAL LETTER EK */ - /*3699*/ 0x010cd4, 1, 0x010c94, /* OLD HUNGARIAN CAPITAL LETTER AK */ - /*3702*/ 0x010cd5, 1, 0x010c95, /* OLD HUNGARIAN CAPITAL LETTER UNK */ - /*3705*/ 0x010cd6, 1, 0x010c96, /* OLD HUNGARIAN CAPITAL LETTER EL */ - /*3708*/ 0x010cd7, 1, 0x010c97, /* OLD HUNGARIAN CAPITAL LETTER ELY */ - /*3711*/ 0x010cd8, 1, 0x010c98, /* OLD HUNGARIAN CAPITAL LETTER EM */ - /*3714*/ 0x010cd9, 1, 0x010c99, /* OLD HUNGARIAN CAPITAL LETTER EN */ - /*3717*/ 0x010cda, 1, 0x010c9a, /* OLD HUNGARIAN CAPITAL LETTER ENY */ - /*3720*/ 0x010cdb, 1, 0x010c9b, /* OLD HUNGARIAN CAPITAL LETTER O */ - /*3723*/ 0x010cdc, 1, 0x010c9c, /* OLD HUNGARIAN CAPITAL LETTER OO */ - /*3726*/ 0x010cdd, 1, 0x010c9d, /* OLD HUNGARIAN CAPITAL LETTER NIKO.. */ - /*3729*/ 0x010cde, 1, 0x010c9e, /* OLD HUNGARIAN CAPITAL LETTER RUDI.. */ - /*3732*/ 0x010cdf, 1, 0x010c9f, /* OLD HUNGARIAN CAPITAL LETTER OEE */ - /*3735*/ 0x010ce0, 1, 0x010ca0, /* OLD HUNGARIAN CAPITAL LETTER EP */ - /*3738*/ 0x010ce1, 1, 0x010ca1, /* OLD HUNGARIAN CAPITAL LETTER EMP */ - /*3741*/ 0x010ce2, 1, 0x010ca2, /* OLD HUNGARIAN CAPITAL LETTER ER */ - /*3744*/ 0x010ce3, 1, 0x010ca3, /* OLD HUNGARIAN CAPITAL LETTER SHOR.. */ - /*3747*/ 0x010ce4, 1, 0x010ca4, /* OLD HUNGARIAN CAPITAL LETTER ES */ - /*3750*/ 0x010ce5, 1, 0x010ca5, /* OLD HUNGARIAN CAPITAL LETTER ESZ */ - /*3753*/ 0x010ce6, 1, 0x010ca6, /* OLD HUNGARIAN CAPITAL LETTER ET */ - /*3756*/ 0x010ce7, 1, 0x010ca7, /* OLD HUNGARIAN CAPITAL LETTER ENT */ - /*3759*/ 0x010ce8, 1, 0x010ca8, /* OLD HUNGARIAN CAPITAL LETTER ETY */ - /*3762*/ 0x010ce9, 1, 0x010ca9, /* OLD HUNGARIAN CAPITAL LETTER ECH */ - /*3765*/ 0x010cea, 1, 0x010caa, /* OLD HUNGARIAN CAPITAL LETTER U */ - /*3768*/ 0x010ceb, 1, 0x010cab, /* OLD HUNGARIAN CAPITAL LETTER UU */ - /*3771*/ 0x010cec, 1, 0x010cac, /* OLD HUNGARIAN CAPITAL LETTER NIKO.. */ - /*3774*/ 0x010ced, 1, 0x010cad, /* OLD HUNGARIAN CAPITAL LETTER RUDI.. */ - /*3777*/ 0x010cee, 1, 0x010cae, /* OLD HUNGARIAN CAPITAL LETTER EV */ - /*3780*/ 0x010cef, 1, 0x010caf, /* OLD HUNGARIAN CAPITAL LETTER EZ */ - /*3783*/ 0x010cf0, 1, 0x010cb0, /* OLD HUNGARIAN CAPITAL LETTER EZS */ - /*3786*/ 0x010cf1, 1, 0x010cb1, /* OLD HUNGARIAN CAPITAL LETTER ENT-.. */ - /*3789*/ 0x010cf2, 1, 0x010cb2, /* OLD HUNGARIAN CAPITAL LETTER US */ - /*3792*/ 0x0118c0, 1, 0x0118a0, /* WARANG CITI CAPITAL LETTER NGAA */ - /*3795*/ 0x0118c1, 1, 0x0118a1, /* WARANG CITI CAPITAL LETTER A */ - /*3798*/ 0x0118c2, 1, 0x0118a2, /* WARANG CITI CAPITAL LETTER WI */ - /*3801*/ 0x0118c3, 1, 0x0118a3, /* WARANG CITI CAPITAL LETTER YU */ - /*3804*/ 0x0118c4, 1, 0x0118a4, /* WARANG CITI CAPITAL LETTER YA */ - /*3807*/ 0x0118c5, 1, 0x0118a5, /* WARANG CITI CAPITAL LETTER YO */ - /*3810*/ 0x0118c6, 1, 0x0118a6, /* WARANG CITI CAPITAL LETTER II */ - /*3813*/ 0x0118c7, 1, 0x0118a7, /* WARANG CITI CAPITAL LETTER UU */ - /*3816*/ 0x0118c8, 1, 0x0118a8, /* WARANG CITI CAPITAL LETTER E */ - /*3819*/ 0x0118c9, 1, 0x0118a9, /* WARANG CITI CAPITAL LETTER O */ - /*3822*/ 0x0118ca, 1, 0x0118aa, /* WARANG CITI CAPITAL LETTER ANG */ - /*3825*/ 0x0118cb, 1, 0x0118ab, /* WARANG CITI CAPITAL LETTER GA */ - /*3828*/ 0x0118cc, 1, 0x0118ac, /* WARANG CITI CAPITAL LETTER KO */ - /*3831*/ 0x0118cd, 1, 0x0118ad, /* WARANG CITI CAPITAL LETTER ENY */ - /*3834*/ 0x0118ce, 1, 0x0118ae, /* WARANG CITI CAPITAL LETTER YUJ */ - /*3837*/ 0x0118cf, 1, 0x0118af, /* WARANG CITI CAPITAL LETTER UC */ - /*3840*/ 0x0118d0, 1, 0x0118b0, /* WARANG CITI CAPITAL LETTER ENN */ - /*3843*/ 0x0118d1, 1, 0x0118b1, /* WARANG CITI CAPITAL LETTER ODD */ - /*3846*/ 0x0118d2, 1, 0x0118b2, /* WARANG CITI CAPITAL LETTER TTE */ - /*3849*/ 0x0118d3, 1, 0x0118b3, /* WARANG CITI CAPITAL LETTER NUNG */ - /*3852*/ 0x0118d4, 1, 0x0118b4, /* WARANG CITI CAPITAL LETTER DA */ - /*3855*/ 0x0118d5, 1, 0x0118b5, /* WARANG CITI CAPITAL LETTER AT */ - /*3858*/ 0x0118d6, 1, 0x0118b6, /* WARANG CITI CAPITAL LETTER AM */ - /*3861*/ 0x0118d7, 1, 0x0118b7, /* WARANG CITI CAPITAL LETTER BU */ - /*3864*/ 0x0118d8, 1, 0x0118b8, /* WARANG CITI CAPITAL LETTER PU */ - /*3867*/ 0x0118d9, 1, 0x0118b9, /* WARANG CITI CAPITAL LETTER HIYO */ - /*3870*/ 0x0118da, 1, 0x0118ba, /* WARANG CITI CAPITAL LETTER HOLO */ - /*3873*/ 0x0118db, 1, 0x0118bb, /* WARANG CITI CAPITAL LETTER HORR */ - /*3876*/ 0x0118dc, 1, 0x0118bc, /* WARANG CITI CAPITAL LETTER HAR */ - /*3879*/ 0x0118dd, 1, 0x0118bd, /* WARANG CITI CAPITAL LETTER SSUU */ - /*3882*/ 0x0118de, 1, 0x0118be, /* WARANG CITI CAPITAL LETTER SII */ - /*3885*/ 0x0118df, 1, 0x0118bf, /* WARANG CITI CAPITAL LETTER VIYO */ - /*3888*/ 0x016e60, 1, 0x016e40, /* MEDEFAIDRIN CAPITAL LETTER M */ - /*3891*/ 0x016e61, 1, 0x016e41, /* MEDEFAIDRIN CAPITAL LETTER S */ - /*3894*/ 0x016e62, 1, 0x016e42, /* MEDEFAIDRIN CAPITAL LETTER V */ - /*3897*/ 0x016e63, 1, 0x016e43, /* MEDEFAIDRIN CAPITAL LETTER W */ - /*3900*/ 0x016e64, 1, 0x016e44, /* MEDEFAIDRIN CAPITAL LETTER ATIU */ - /*3903*/ 0x016e65, 1, 0x016e45, /* MEDEFAIDRIN CAPITAL LETTER Z */ - /*3906*/ 0x016e66, 1, 0x016e46, /* MEDEFAIDRIN CAPITAL LETTER KP */ - /*3909*/ 0x016e67, 1, 0x016e47, /* MEDEFAIDRIN CAPITAL LETTER P */ - /*3912*/ 0x016e68, 1, 0x016e48, /* MEDEFAIDRIN CAPITAL LETTER T */ - /*3915*/ 0x016e69, 1, 0x016e49, /* MEDEFAIDRIN CAPITAL LETTER G */ - /*3918*/ 0x016e6a, 1, 0x016e4a, /* MEDEFAIDRIN CAPITAL LETTER F */ - /*3921*/ 0x016e6b, 1, 0x016e4b, /* MEDEFAIDRIN CAPITAL LETTER I */ - /*3924*/ 0x016e6c, 1, 0x016e4c, /* MEDEFAIDRIN CAPITAL LETTER K */ - /*3927*/ 0x016e6d, 1, 0x016e4d, /* MEDEFAIDRIN CAPITAL LETTER A */ - /*3930*/ 0x016e6e, 1, 0x016e4e, /* MEDEFAIDRIN CAPITAL LETTER J */ - /*3933*/ 0x016e6f, 1, 0x016e4f, /* MEDEFAIDRIN CAPITAL LETTER E */ - /*3936*/ 0x016e70, 1, 0x016e50, /* MEDEFAIDRIN CAPITAL LETTER B */ - /*3939*/ 0x016e71, 1, 0x016e51, /* MEDEFAIDRIN CAPITAL LETTER C */ - /*3942*/ 0x016e72, 1, 0x016e52, /* MEDEFAIDRIN CAPITAL LETTER U */ - /*3945*/ 0x016e73, 1, 0x016e53, /* MEDEFAIDRIN CAPITAL LETTER YU */ - /*3948*/ 0x016e74, 1, 0x016e54, /* MEDEFAIDRIN CAPITAL LETTER L */ - /*3951*/ 0x016e75, 1, 0x016e55, /* MEDEFAIDRIN CAPITAL LETTER Q */ - /*3954*/ 0x016e76, 1, 0x016e56, /* MEDEFAIDRIN CAPITAL LETTER HP */ - /*3957*/ 0x016e77, 1, 0x016e57, /* MEDEFAIDRIN CAPITAL LETTER NY */ - /*3960*/ 0x016e78, 1, 0x016e58, /* MEDEFAIDRIN CAPITAL LETTER X */ - /*3963*/ 0x016e79, 1, 0x016e59, /* MEDEFAIDRIN CAPITAL LETTER D */ - /*3966*/ 0x016e7a, 1, 0x016e5a, /* MEDEFAIDRIN CAPITAL LETTER OE */ - /*3969*/ 0x016e7b, 1, 0x016e5b, /* MEDEFAIDRIN CAPITAL LETTER N */ - /*3972*/ 0x016e7c, 1, 0x016e5c, /* MEDEFAIDRIN CAPITAL LETTER R */ - /*3975*/ 0x016e7d, 1, 0x016e5d, /* MEDEFAIDRIN CAPITAL LETTER O */ - /*3978*/ 0x016e7e, 1, 0x016e5e, /* MEDEFAIDRIN CAPITAL LETTER AI */ - /*3981*/ 0x016e7f, 1, 0x016e5f, /* MEDEFAIDRIN CAPITAL LETTER Y */ - /*3984*/ 0x01e922, 1, 0x01e900, /* ADLAM CAPITAL LETTER ALIF */ - /*3987*/ 0x01e923, 1, 0x01e901, /* ADLAM CAPITAL LETTER DAALI */ - /*3990*/ 0x01e924, 1, 0x01e902, /* ADLAM CAPITAL LETTER LAAM */ - /*3993*/ 0x01e925, 1, 0x01e903, /* ADLAM CAPITAL LETTER MIIM */ - /*3996*/ 0x01e926, 1, 0x01e904, /* ADLAM CAPITAL LETTER BA */ - /*3999*/ 0x01e927, 1, 0x01e905, /* ADLAM CAPITAL LETTER SINNYIIYHE */ - /*4002*/ 0x01e928, 1, 0x01e906, /* ADLAM CAPITAL LETTER PE */ - /*4005*/ 0x01e929, 1, 0x01e907, /* ADLAM CAPITAL LETTER BHE */ - /*4008*/ 0x01e92a, 1, 0x01e908, /* ADLAM CAPITAL LETTER RA */ - /*4011*/ 0x01e92b, 1, 0x01e909, /* ADLAM CAPITAL LETTER E */ - /*4014*/ 0x01e92c, 1, 0x01e90a, /* ADLAM CAPITAL LETTER FA */ - /*4017*/ 0x01e92d, 1, 0x01e90b, /* ADLAM CAPITAL LETTER I */ - /*4020*/ 0x01e92e, 1, 0x01e90c, /* ADLAM CAPITAL LETTER O */ - /*4023*/ 0x01e92f, 1, 0x01e90d, /* ADLAM CAPITAL LETTER DHA */ - /*4026*/ 0x01e930, 1, 0x01e90e, /* ADLAM CAPITAL LETTER YHE */ - /*4029*/ 0x01e931, 1, 0x01e90f, /* ADLAM CAPITAL LETTER WAW */ - /*4032*/ 0x01e932, 1, 0x01e910, /* ADLAM CAPITAL LETTER NUN */ - /*4035*/ 0x01e933, 1, 0x01e911, /* ADLAM CAPITAL LETTER KAF */ - /*4038*/ 0x01e934, 1, 0x01e912, /* ADLAM CAPITAL LETTER YA */ - /*4041*/ 0x01e935, 1, 0x01e913, /* ADLAM CAPITAL LETTER U */ - /*4044*/ 0x01e936, 1, 0x01e914, /* ADLAM CAPITAL LETTER JIIM */ - /*4047*/ 0x01e937, 1, 0x01e915, /* ADLAM CAPITAL LETTER CHI */ - /*4050*/ 0x01e938, 1, 0x01e916, /* ADLAM CAPITAL LETTER HA */ - /*4053*/ 0x01e939, 1, 0x01e917, /* ADLAM CAPITAL LETTER QAAF */ - /*4056*/ 0x01e93a, 1, 0x01e918, /* ADLAM CAPITAL LETTER GA */ - /*4059*/ 0x01e93b, 1, 0x01e919, /* ADLAM CAPITAL LETTER NYA */ - /*4062*/ 0x01e93c, 1, 0x01e91a, /* ADLAM CAPITAL LETTER TU */ - /*4065*/ 0x01e93d, 1, 0x01e91b, /* ADLAM CAPITAL LETTER NHA */ - /*4068*/ 0x01e93e, 1, 0x01e91c, /* ADLAM CAPITAL LETTER VA */ - /*4071*/ 0x01e93f, 1, 0x01e91d, /* ADLAM CAPITAL LETTER KHA */ - /*4074*/ 0x01e940, 1, 0x01e91e, /* ADLAM CAPITAL LETTER GBE */ - /*4077*/ 0x01e941, 1, 0x01e91f, /* ADLAM CAPITAL LETTER ZAL */ - /*4080*/ 0x01e942, 1, 0x01e920, /* ADLAM CAPITAL LETTER KPO */ - /*4083*/ 0x01e943, 1, 0x01e921, /* ADLAM CAPITAL LETTER SHA */ -#define FOLDS1_NORMAL_END_INDEX 4086 + /*3330*/ 0xa7c8, 1, 0xa7c7, /* LATIN CAPITAL LETTER D WITH SHORT.. */ + /*3333*/ 0xa7ca, 1, 0xa7c9, /* LATIN CAPITAL LETTER S WITH SHORT.. */ + /*3336*/ 0xa7f6, 1, 0xa7f5, /* LATIN CAPITAL LETTER REVERSED HAL.. */ + /*3339*/ 0xab53, 1, 0xa7b3, /* LATIN CAPITAL LETTER CHI */ + /*3342*/ 0xff41, 1, 0xff21, /* FULLWIDTH LATIN CAPITAL LETTER A */ + /*3345*/ 0xff42, 1, 0xff22, /* FULLWIDTH LATIN CAPITAL LETTER B */ + /*3348*/ 0xff43, 1, 0xff23, /* FULLWIDTH LATIN CAPITAL LETTER C */ + /*3351*/ 0xff44, 1, 0xff24, /* FULLWIDTH LATIN CAPITAL LETTER D */ + /*3354*/ 0xff45, 1, 0xff25, /* FULLWIDTH LATIN CAPITAL LETTER E */ + /*3357*/ 0xff46, 1, 0xff26, /* FULLWIDTH LATIN CAPITAL LETTER F */ + /*3360*/ 0xff47, 1, 0xff27, /* FULLWIDTH LATIN CAPITAL LETTER G */ + /*3363*/ 0xff48, 1, 0xff28, /* FULLWIDTH LATIN CAPITAL LETTER H */ + /*3366*/ 0xff49, 1, 0xff29, /* FULLWIDTH LATIN CAPITAL LETTER I */ + /*3369*/ 0xff4a, 1, 0xff2a, /* FULLWIDTH LATIN CAPITAL LETTER J */ + /*3372*/ 0xff4b, 1, 0xff2b, /* FULLWIDTH LATIN CAPITAL LETTER K */ + /*3375*/ 0xff4c, 1, 0xff2c, /* FULLWIDTH LATIN CAPITAL LETTER L */ + /*3378*/ 0xff4d, 1, 0xff2d, /* FULLWIDTH LATIN CAPITAL LETTER M */ + /*3381*/ 0xff4e, 1, 0xff2e, /* FULLWIDTH LATIN CAPITAL LETTER N */ + /*3384*/ 0xff4f, 1, 0xff2f, /* FULLWIDTH LATIN CAPITAL LETTER O */ + /*3387*/ 0xff50, 1, 0xff30, /* FULLWIDTH LATIN CAPITAL LETTER P */ + /*3390*/ 0xff51, 1, 0xff31, /* FULLWIDTH LATIN CAPITAL LETTER Q */ + /*3393*/ 0xff52, 1, 0xff32, /* FULLWIDTH LATIN CAPITAL LETTER R */ + /*3396*/ 0xff53, 1, 0xff33, /* FULLWIDTH LATIN CAPITAL LETTER S */ + /*3399*/ 0xff54, 1, 0xff34, /* FULLWIDTH LATIN CAPITAL LETTER T */ + /*3402*/ 0xff55, 1, 0xff35, /* FULLWIDTH LATIN CAPITAL LETTER U */ + /*3405*/ 0xff56, 1, 0xff36, /* FULLWIDTH LATIN CAPITAL LETTER V */ + /*3408*/ 0xff57, 1, 0xff37, /* FULLWIDTH LATIN CAPITAL LETTER W */ + /*3411*/ 0xff58, 1, 0xff38, /* FULLWIDTH LATIN CAPITAL LETTER X */ + /*3414*/ 0xff59, 1, 0xff39, /* FULLWIDTH LATIN CAPITAL LETTER Y */ + /*3417*/ 0xff5a, 1, 0xff3a, /* FULLWIDTH LATIN CAPITAL LETTER Z */ + /*3420*/ 0x010428, 1, 0x010400, /* DESERET CAPITAL LETTER LONG I */ + /*3423*/ 0x010429, 1, 0x010401, /* DESERET CAPITAL LETTER LONG E */ + /*3426*/ 0x01042a, 1, 0x010402, /* DESERET CAPITAL LETTER LONG A */ + /*3429*/ 0x01042b, 1, 0x010403, /* DESERET CAPITAL LETTER LONG AH */ + /*3432*/ 0x01042c, 1, 0x010404, /* DESERET CAPITAL LETTER LONG O */ + /*3435*/ 0x01042d, 1, 0x010405, /* DESERET CAPITAL LETTER LONG OO */ + /*3438*/ 0x01042e, 1, 0x010406, /* DESERET CAPITAL LETTER SHORT I */ + /*3441*/ 0x01042f, 1, 0x010407, /* DESERET CAPITAL LETTER SHORT E */ + /*3444*/ 0x010430, 1, 0x010408, /* DESERET CAPITAL LETTER SHORT A */ + /*3447*/ 0x010431, 1, 0x010409, /* DESERET CAPITAL LETTER SHORT AH */ + /*3450*/ 0x010432, 1, 0x01040a, /* DESERET CAPITAL LETTER SHORT O */ + /*3453*/ 0x010433, 1, 0x01040b, /* DESERET CAPITAL LETTER SHORT OO */ + /*3456*/ 0x010434, 1, 0x01040c, /* DESERET CAPITAL LETTER AY */ + /*3459*/ 0x010435, 1, 0x01040d, /* DESERET CAPITAL LETTER OW */ + /*3462*/ 0x010436, 1, 0x01040e, /* DESERET CAPITAL LETTER WU */ + /*3465*/ 0x010437, 1, 0x01040f, /* DESERET CAPITAL LETTER YEE */ + /*3468*/ 0x010438, 1, 0x010410, /* DESERET CAPITAL LETTER H */ + /*3471*/ 0x010439, 1, 0x010411, /* DESERET CAPITAL LETTER PEE */ + /*3474*/ 0x01043a, 1, 0x010412, /* DESERET CAPITAL LETTER BEE */ + /*3477*/ 0x01043b, 1, 0x010413, /* DESERET CAPITAL LETTER TEE */ + /*3480*/ 0x01043c, 1, 0x010414, /* DESERET CAPITAL LETTER DEE */ + /*3483*/ 0x01043d, 1, 0x010415, /* DESERET CAPITAL LETTER CHEE */ + /*3486*/ 0x01043e, 1, 0x010416, /* DESERET CAPITAL LETTER JEE */ + /*3489*/ 0x01043f, 1, 0x010417, /* DESERET CAPITAL LETTER KAY */ + /*3492*/ 0x010440, 1, 0x010418, /* DESERET CAPITAL LETTER GAY */ + /*3495*/ 0x010441, 1, 0x010419, /* DESERET CAPITAL LETTER EF */ + /*3498*/ 0x010442, 1, 0x01041a, /* DESERET CAPITAL LETTER VEE */ + /*3501*/ 0x010443, 1, 0x01041b, /* DESERET CAPITAL LETTER ETH */ + /*3504*/ 0x010444, 1, 0x01041c, /* DESERET CAPITAL LETTER THEE */ + /*3507*/ 0x010445, 1, 0x01041d, /* DESERET CAPITAL LETTER ES */ + /*3510*/ 0x010446, 1, 0x01041e, /* DESERET CAPITAL LETTER ZEE */ + /*3513*/ 0x010447, 1, 0x01041f, /* DESERET CAPITAL LETTER ESH */ + /*3516*/ 0x010448, 1, 0x010420, /* DESERET CAPITAL LETTER ZHEE */ + /*3519*/ 0x010449, 1, 0x010421, /* DESERET CAPITAL LETTER ER */ + /*3522*/ 0x01044a, 1, 0x010422, /* DESERET CAPITAL LETTER EL */ + /*3525*/ 0x01044b, 1, 0x010423, /* DESERET CAPITAL LETTER EM */ + /*3528*/ 0x01044c, 1, 0x010424, /* DESERET CAPITAL LETTER EN */ + /*3531*/ 0x01044d, 1, 0x010425, /* DESERET CAPITAL LETTER ENG */ + /*3534*/ 0x01044e, 1, 0x010426, /* DESERET CAPITAL LETTER OI */ + /*3537*/ 0x01044f, 1, 0x010427, /* DESERET CAPITAL LETTER EW */ + /*3540*/ 0x0104d8, 1, 0x0104b0, /* OSAGE CAPITAL LETTER A */ + /*3543*/ 0x0104d9, 1, 0x0104b1, /* OSAGE CAPITAL LETTER AI */ + /*3546*/ 0x0104da, 1, 0x0104b2, /* OSAGE CAPITAL LETTER AIN */ + /*3549*/ 0x0104db, 1, 0x0104b3, /* OSAGE CAPITAL LETTER AH */ + /*3552*/ 0x0104dc, 1, 0x0104b4, /* OSAGE CAPITAL LETTER BRA */ + /*3555*/ 0x0104dd, 1, 0x0104b5, /* OSAGE CAPITAL LETTER CHA */ + /*3558*/ 0x0104de, 1, 0x0104b6, /* OSAGE CAPITAL LETTER EHCHA */ + /*3561*/ 0x0104df, 1, 0x0104b7, /* OSAGE CAPITAL LETTER E */ + /*3564*/ 0x0104e0, 1, 0x0104b8, /* OSAGE CAPITAL LETTER EIN */ + /*3567*/ 0x0104e1, 1, 0x0104b9, /* OSAGE CAPITAL LETTER HA */ + /*3570*/ 0x0104e2, 1, 0x0104ba, /* OSAGE CAPITAL LETTER HYA */ + /*3573*/ 0x0104e3, 1, 0x0104bb, /* OSAGE CAPITAL LETTER I */ + /*3576*/ 0x0104e4, 1, 0x0104bc, /* OSAGE CAPITAL LETTER KA */ + /*3579*/ 0x0104e5, 1, 0x0104bd, /* OSAGE CAPITAL LETTER EHKA */ + /*3582*/ 0x0104e6, 1, 0x0104be, /* OSAGE CAPITAL LETTER KYA */ + /*3585*/ 0x0104e7, 1, 0x0104bf, /* OSAGE CAPITAL LETTER LA */ + /*3588*/ 0x0104e8, 1, 0x0104c0, /* OSAGE CAPITAL LETTER MA */ + /*3591*/ 0x0104e9, 1, 0x0104c1, /* OSAGE CAPITAL LETTER NA */ + /*3594*/ 0x0104ea, 1, 0x0104c2, /* OSAGE CAPITAL LETTER O */ + /*3597*/ 0x0104eb, 1, 0x0104c3, /* OSAGE CAPITAL LETTER OIN */ + /*3600*/ 0x0104ec, 1, 0x0104c4, /* OSAGE CAPITAL LETTER PA */ + /*3603*/ 0x0104ed, 1, 0x0104c5, /* OSAGE CAPITAL LETTER EHPA */ + /*3606*/ 0x0104ee, 1, 0x0104c6, /* OSAGE CAPITAL LETTER SA */ + /*3609*/ 0x0104ef, 1, 0x0104c7, /* OSAGE CAPITAL LETTER SHA */ + /*3612*/ 0x0104f0, 1, 0x0104c8, /* OSAGE CAPITAL LETTER TA */ + /*3615*/ 0x0104f1, 1, 0x0104c9, /* OSAGE CAPITAL LETTER EHTA */ + /*3618*/ 0x0104f2, 1, 0x0104ca, /* OSAGE CAPITAL LETTER TSA */ + /*3621*/ 0x0104f3, 1, 0x0104cb, /* OSAGE CAPITAL LETTER EHTSA */ + /*3624*/ 0x0104f4, 1, 0x0104cc, /* OSAGE CAPITAL LETTER TSHA */ + /*3627*/ 0x0104f5, 1, 0x0104cd, /* OSAGE CAPITAL LETTER DHA */ + /*3630*/ 0x0104f6, 1, 0x0104ce, /* OSAGE CAPITAL LETTER U */ + /*3633*/ 0x0104f7, 1, 0x0104cf, /* OSAGE CAPITAL LETTER WA */ + /*3636*/ 0x0104f8, 1, 0x0104d0, /* OSAGE CAPITAL LETTER KHA */ + /*3639*/ 0x0104f9, 1, 0x0104d1, /* OSAGE CAPITAL LETTER GHA */ + /*3642*/ 0x0104fa, 1, 0x0104d2, /* OSAGE CAPITAL LETTER ZA */ + /*3645*/ 0x0104fb, 1, 0x0104d3, /* OSAGE CAPITAL LETTER ZHA */ + /*3648*/ 0x010cc0, 1, 0x010c80, /* OLD HUNGARIAN CAPITAL LETTER A */ + /*3651*/ 0x010cc1, 1, 0x010c81, /* OLD HUNGARIAN CAPITAL LETTER AA */ + /*3654*/ 0x010cc2, 1, 0x010c82, /* OLD HUNGARIAN CAPITAL LETTER EB */ + /*3657*/ 0x010cc3, 1, 0x010c83, /* OLD HUNGARIAN CAPITAL LETTER AMB */ + /*3660*/ 0x010cc4, 1, 0x010c84, /* OLD HUNGARIAN CAPITAL LETTER EC */ + /*3663*/ 0x010cc5, 1, 0x010c85, /* OLD HUNGARIAN CAPITAL LETTER ENC */ + /*3666*/ 0x010cc6, 1, 0x010c86, /* OLD HUNGARIAN CAPITAL LETTER ECS */ + /*3669*/ 0x010cc7, 1, 0x010c87, /* OLD HUNGARIAN CAPITAL LETTER ED */ + /*3672*/ 0x010cc8, 1, 0x010c88, /* OLD HUNGARIAN CAPITAL LETTER AND */ + /*3675*/ 0x010cc9, 1, 0x010c89, /* OLD HUNGARIAN CAPITAL LETTER E */ + /*3678*/ 0x010cca, 1, 0x010c8a, /* OLD HUNGARIAN CAPITAL LETTER CLOS.. */ + /*3681*/ 0x010ccb, 1, 0x010c8b, /* OLD HUNGARIAN CAPITAL LETTER EE */ + /*3684*/ 0x010ccc, 1, 0x010c8c, /* OLD HUNGARIAN CAPITAL LETTER EF */ + /*3687*/ 0x010ccd, 1, 0x010c8d, /* OLD HUNGARIAN CAPITAL LETTER EG */ + /*3690*/ 0x010cce, 1, 0x010c8e, /* OLD HUNGARIAN CAPITAL LETTER EGY */ + /*3693*/ 0x010ccf, 1, 0x010c8f, /* OLD HUNGARIAN CAPITAL LETTER EH */ + /*3696*/ 0x010cd0, 1, 0x010c90, /* OLD HUNGARIAN CAPITAL LETTER I */ + /*3699*/ 0x010cd1, 1, 0x010c91, /* OLD HUNGARIAN CAPITAL LETTER II */ + /*3702*/ 0x010cd2, 1, 0x010c92, /* OLD HUNGARIAN CAPITAL LETTER EJ */ + /*3705*/ 0x010cd3, 1, 0x010c93, /* OLD HUNGARIAN CAPITAL LETTER EK */ + /*3708*/ 0x010cd4, 1, 0x010c94, /* OLD HUNGARIAN CAPITAL LETTER AK */ + /*3711*/ 0x010cd5, 1, 0x010c95, /* OLD HUNGARIAN CAPITAL LETTER UNK */ + /*3714*/ 0x010cd6, 1, 0x010c96, /* OLD HUNGARIAN CAPITAL LETTER EL */ + /*3717*/ 0x010cd7, 1, 0x010c97, /* OLD HUNGARIAN CAPITAL LETTER ELY */ + /*3720*/ 0x010cd8, 1, 0x010c98, /* OLD HUNGARIAN CAPITAL LETTER EM */ + /*3723*/ 0x010cd9, 1, 0x010c99, /* OLD HUNGARIAN CAPITAL LETTER EN */ + /*3726*/ 0x010cda, 1, 0x010c9a, /* OLD HUNGARIAN CAPITAL LETTER ENY */ + /*3729*/ 0x010cdb, 1, 0x010c9b, /* OLD HUNGARIAN CAPITAL LETTER O */ + /*3732*/ 0x010cdc, 1, 0x010c9c, /* OLD HUNGARIAN CAPITAL LETTER OO */ + /*3735*/ 0x010cdd, 1, 0x010c9d, /* OLD HUNGARIAN CAPITAL LETTER NIKO.. */ + /*3738*/ 0x010cde, 1, 0x010c9e, /* OLD HUNGARIAN CAPITAL LETTER RUDI.. */ + /*3741*/ 0x010cdf, 1, 0x010c9f, /* OLD HUNGARIAN CAPITAL LETTER OEE */ + /*3744*/ 0x010ce0, 1, 0x010ca0, /* OLD HUNGARIAN CAPITAL LETTER EP */ + /*3747*/ 0x010ce1, 1, 0x010ca1, /* OLD HUNGARIAN CAPITAL LETTER EMP */ + /*3750*/ 0x010ce2, 1, 0x010ca2, /* OLD HUNGARIAN CAPITAL LETTER ER */ + /*3753*/ 0x010ce3, 1, 0x010ca3, /* OLD HUNGARIAN CAPITAL LETTER SHOR.. */ + /*3756*/ 0x010ce4, 1, 0x010ca4, /* OLD HUNGARIAN CAPITAL LETTER ES */ + /*3759*/ 0x010ce5, 1, 0x010ca5, /* OLD HUNGARIAN CAPITAL LETTER ESZ */ + /*3762*/ 0x010ce6, 1, 0x010ca6, /* OLD HUNGARIAN CAPITAL LETTER ET */ + /*3765*/ 0x010ce7, 1, 0x010ca7, /* OLD HUNGARIAN CAPITAL LETTER ENT */ + /*3768*/ 0x010ce8, 1, 0x010ca8, /* OLD HUNGARIAN CAPITAL LETTER ETY */ + /*3771*/ 0x010ce9, 1, 0x010ca9, /* OLD HUNGARIAN CAPITAL LETTER ECH */ + /*3774*/ 0x010cea, 1, 0x010caa, /* OLD HUNGARIAN CAPITAL LETTER U */ + /*3777*/ 0x010ceb, 1, 0x010cab, /* OLD HUNGARIAN CAPITAL LETTER UU */ + /*3780*/ 0x010cec, 1, 0x010cac, /* OLD HUNGARIAN CAPITAL LETTER NIKO.. */ + /*3783*/ 0x010ced, 1, 0x010cad, /* OLD HUNGARIAN CAPITAL LETTER RUDI.. */ + /*3786*/ 0x010cee, 1, 0x010cae, /* OLD HUNGARIAN CAPITAL LETTER EV */ + /*3789*/ 0x010cef, 1, 0x010caf, /* OLD HUNGARIAN CAPITAL LETTER EZ */ + /*3792*/ 0x010cf0, 1, 0x010cb0, /* OLD HUNGARIAN CAPITAL LETTER EZS */ + /*3795*/ 0x010cf1, 1, 0x010cb1, /* OLD HUNGARIAN CAPITAL LETTER ENT-.. */ + /*3798*/ 0x010cf2, 1, 0x010cb2, /* OLD HUNGARIAN CAPITAL LETTER US */ + /*3801*/ 0x0118c0, 1, 0x0118a0, /* WARANG CITI CAPITAL LETTER NGAA */ + /*3804*/ 0x0118c1, 1, 0x0118a1, /* WARANG CITI CAPITAL LETTER A */ + /*3807*/ 0x0118c2, 1, 0x0118a2, /* WARANG CITI CAPITAL LETTER WI */ + /*3810*/ 0x0118c3, 1, 0x0118a3, /* WARANG CITI CAPITAL LETTER YU */ + /*3813*/ 0x0118c4, 1, 0x0118a4, /* WARANG CITI CAPITAL LETTER YA */ + /*3816*/ 0x0118c5, 1, 0x0118a5, /* WARANG CITI CAPITAL LETTER YO */ + /*3819*/ 0x0118c6, 1, 0x0118a6, /* WARANG CITI CAPITAL LETTER II */ + /*3822*/ 0x0118c7, 1, 0x0118a7, /* WARANG CITI CAPITAL LETTER UU */ + /*3825*/ 0x0118c8, 1, 0x0118a8, /* WARANG CITI CAPITAL LETTER E */ + /*3828*/ 0x0118c9, 1, 0x0118a9, /* WARANG CITI CAPITAL LETTER O */ + /*3831*/ 0x0118ca, 1, 0x0118aa, /* WARANG CITI CAPITAL LETTER ANG */ + /*3834*/ 0x0118cb, 1, 0x0118ab, /* WARANG CITI CAPITAL LETTER GA */ + /*3837*/ 0x0118cc, 1, 0x0118ac, /* WARANG CITI CAPITAL LETTER KO */ + /*3840*/ 0x0118cd, 1, 0x0118ad, /* WARANG CITI CAPITAL LETTER ENY */ + /*3843*/ 0x0118ce, 1, 0x0118ae, /* WARANG CITI CAPITAL LETTER YUJ */ + /*3846*/ 0x0118cf, 1, 0x0118af, /* WARANG CITI CAPITAL LETTER UC */ + /*3849*/ 0x0118d0, 1, 0x0118b0, /* WARANG CITI CAPITAL LETTER ENN */ + /*3852*/ 0x0118d1, 1, 0x0118b1, /* WARANG CITI CAPITAL LETTER ODD */ + /*3855*/ 0x0118d2, 1, 0x0118b2, /* WARANG CITI CAPITAL LETTER TTE */ + /*3858*/ 0x0118d3, 1, 0x0118b3, /* WARANG CITI CAPITAL LETTER NUNG */ + /*3861*/ 0x0118d4, 1, 0x0118b4, /* WARANG CITI CAPITAL LETTER DA */ + /*3864*/ 0x0118d5, 1, 0x0118b5, /* WARANG CITI CAPITAL LETTER AT */ + /*3867*/ 0x0118d6, 1, 0x0118b6, /* WARANG CITI CAPITAL LETTER AM */ + /*3870*/ 0x0118d7, 1, 0x0118b7, /* WARANG CITI CAPITAL LETTER BU */ + /*3873*/ 0x0118d8, 1, 0x0118b8, /* WARANG CITI CAPITAL LETTER PU */ + /*3876*/ 0x0118d9, 1, 0x0118b9, /* WARANG CITI CAPITAL LETTER HIYO */ + /*3879*/ 0x0118da, 1, 0x0118ba, /* WARANG CITI CAPITAL LETTER HOLO */ + /*3882*/ 0x0118db, 1, 0x0118bb, /* WARANG CITI CAPITAL LETTER HORR */ + /*3885*/ 0x0118dc, 1, 0x0118bc, /* WARANG CITI CAPITAL LETTER HAR */ + /*3888*/ 0x0118dd, 1, 0x0118bd, /* WARANG CITI CAPITAL LETTER SSUU */ + /*3891*/ 0x0118de, 1, 0x0118be, /* WARANG CITI CAPITAL LETTER SII */ + /*3894*/ 0x0118df, 1, 0x0118bf, /* WARANG CITI CAPITAL LETTER VIYO */ + /*3897*/ 0x016e60, 1, 0x016e40, /* MEDEFAIDRIN CAPITAL LETTER M */ + /*3900*/ 0x016e61, 1, 0x016e41, /* MEDEFAIDRIN CAPITAL LETTER S */ + /*3903*/ 0x016e62, 1, 0x016e42, /* MEDEFAIDRIN CAPITAL LETTER V */ + /*3906*/ 0x016e63, 1, 0x016e43, /* MEDEFAIDRIN CAPITAL LETTER W */ + /*3909*/ 0x016e64, 1, 0x016e44, /* MEDEFAIDRIN CAPITAL LETTER ATIU */ + /*3912*/ 0x016e65, 1, 0x016e45, /* MEDEFAIDRIN CAPITAL LETTER Z */ + /*3915*/ 0x016e66, 1, 0x016e46, /* MEDEFAIDRIN CAPITAL LETTER KP */ + /*3918*/ 0x016e67, 1, 0x016e47, /* MEDEFAIDRIN CAPITAL LETTER P */ + /*3921*/ 0x016e68, 1, 0x016e48, /* MEDEFAIDRIN CAPITAL LETTER T */ + /*3924*/ 0x016e69, 1, 0x016e49, /* MEDEFAIDRIN CAPITAL LETTER G */ + /*3927*/ 0x016e6a, 1, 0x016e4a, /* MEDEFAIDRIN CAPITAL LETTER F */ + /*3930*/ 0x016e6b, 1, 0x016e4b, /* MEDEFAIDRIN CAPITAL LETTER I */ + /*3933*/ 0x016e6c, 1, 0x016e4c, /* MEDEFAIDRIN CAPITAL LETTER K */ + /*3936*/ 0x016e6d, 1, 0x016e4d, /* MEDEFAIDRIN CAPITAL LETTER A */ + /*3939*/ 0x016e6e, 1, 0x016e4e, /* MEDEFAIDRIN CAPITAL LETTER J */ + /*3942*/ 0x016e6f, 1, 0x016e4f, /* MEDEFAIDRIN CAPITAL LETTER E */ + /*3945*/ 0x016e70, 1, 0x016e50, /* MEDEFAIDRIN CAPITAL LETTER B */ + /*3948*/ 0x016e71, 1, 0x016e51, /* MEDEFAIDRIN CAPITAL LETTER C */ + /*3951*/ 0x016e72, 1, 0x016e52, /* MEDEFAIDRIN CAPITAL LETTER U */ + /*3954*/ 0x016e73, 1, 0x016e53, /* MEDEFAIDRIN CAPITAL LETTER YU */ + /*3957*/ 0x016e74, 1, 0x016e54, /* MEDEFAIDRIN CAPITAL LETTER L */ + /*3960*/ 0x016e75, 1, 0x016e55, /* MEDEFAIDRIN CAPITAL LETTER Q */ + /*3963*/ 0x016e76, 1, 0x016e56, /* MEDEFAIDRIN CAPITAL LETTER HP */ + /*3966*/ 0x016e77, 1, 0x016e57, /* MEDEFAIDRIN CAPITAL LETTER NY */ + /*3969*/ 0x016e78, 1, 0x016e58, /* MEDEFAIDRIN CAPITAL LETTER X */ + /*3972*/ 0x016e79, 1, 0x016e59, /* MEDEFAIDRIN CAPITAL LETTER D */ + /*3975*/ 0x016e7a, 1, 0x016e5a, /* MEDEFAIDRIN CAPITAL LETTER OE */ + /*3978*/ 0x016e7b, 1, 0x016e5b, /* MEDEFAIDRIN CAPITAL LETTER N */ + /*3981*/ 0x016e7c, 1, 0x016e5c, /* MEDEFAIDRIN CAPITAL LETTER R */ + /*3984*/ 0x016e7d, 1, 0x016e5d, /* MEDEFAIDRIN CAPITAL LETTER O */ + /*3987*/ 0x016e7e, 1, 0x016e5e, /* MEDEFAIDRIN CAPITAL LETTER AI */ + /*3990*/ 0x016e7f, 1, 0x016e5f, /* MEDEFAIDRIN CAPITAL LETTER Y */ + /*3993*/ 0x01e922, 1, 0x01e900, /* ADLAM CAPITAL LETTER ALIF */ + /*3996*/ 0x01e923, 1, 0x01e901, /* ADLAM CAPITAL LETTER DAALI */ + /*3999*/ 0x01e924, 1, 0x01e902, /* ADLAM CAPITAL LETTER LAAM */ + /*4002*/ 0x01e925, 1, 0x01e903, /* ADLAM CAPITAL LETTER MIIM */ + /*4005*/ 0x01e926, 1, 0x01e904, /* ADLAM CAPITAL LETTER BA */ + /*4008*/ 0x01e927, 1, 0x01e905, /* ADLAM CAPITAL LETTER SINNYIIYHE */ + /*4011*/ 0x01e928, 1, 0x01e906, /* ADLAM CAPITAL LETTER PE */ + /*4014*/ 0x01e929, 1, 0x01e907, /* ADLAM CAPITAL LETTER BHE */ + /*4017*/ 0x01e92a, 1, 0x01e908, /* ADLAM CAPITAL LETTER RA */ + /*4020*/ 0x01e92b, 1, 0x01e909, /* ADLAM CAPITAL LETTER E */ + /*4023*/ 0x01e92c, 1, 0x01e90a, /* ADLAM CAPITAL LETTER FA */ + /*4026*/ 0x01e92d, 1, 0x01e90b, /* ADLAM CAPITAL LETTER I */ + /*4029*/ 0x01e92e, 1, 0x01e90c, /* ADLAM CAPITAL LETTER O */ + /*4032*/ 0x01e92f, 1, 0x01e90d, /* ADLAM CAPITAL LETTER DHA */ + /*4035*/ 0x01e930, 1, 0x01e90e, /* ADLAM CAPITAL LETTER YHE */ + /*4038*/ 0x01e931, 1, 0x01e90f, /* ADLAM CAPITAL LETTER WAW */ + /*4041*/ 0x01e932, 1, 0x01e910, /* ADLAM CAPITAL LETTER NUN */ + /*4044*/ 0x01e933, 1, 0x01e911, /* ADLAM CAPITAL LETTER KAF */ + /*4047*/ 0x01e934, 1, 0x01e912, /* ADLAM CAPITAL LETTER YA */ + /*4050*/ 0x01e935, 1, 0x01e913, /* ADLAM CAPITAL LETTER U */ + /*4053*/ 0x01e936, 1, 0x01e914, /* ADLAM CAPITAL LETTER JIIM */ + /*4056*/ 0x01e937, 1, 0x01e915, /* ADLAM CAPITAL LETTER CHI */ + /*4059*/ 0x01e938, 1, 0x01e916, /* ADLAM CAPITAL LETTER HA */ + /*4062*/ 0x01e939, 1, 0x01e917, /* ADLAM CAPITAL LETTER QAAF */ + /*4065*/ 0x01e93a, 1, 0x01e918, /* ADLAM CAPITAL LETTER GA */ + /*4068*/ 0x01e93b, 1, 0x01e919, /* ADLAM CAPITAL LETTER NYA */ + /*4071*/ 0x01e93c, 1, 0x01e91a, /* ADLAM CAPITAL LETTER TU */ + /*4074*/ 0x01e93d, 1, 0x01e91b, /* ADLAM CAPITAL LETTER NHA */ + /*4077*/ 0x01e93e, 1, 0x01e91c, /* ADLAM CAPITAL LETTER VA */ + /*4080*/ 0x01e93f, 1, 0x01e91d, /* ADLAM CAPITAL LETTER KHA */ + /*4083*/ 0x01e940, 1, 0x01e91e, /* ADLAM CAPITAL LETTER GBE */ + /*4086*/ 0x01e941, 1, 0x01e91f, /* ADLAM CAPITAL LETTER ZAL */ + /*4089*/ 0x01e942, 1, 0x01e920, /* ADLAM CAPITAL LETTER KPO */ + /*4092*/ 0x01e943, 1, 0x01e921, /* ADLAM CAPITAL LETTER SHA */ +#define FOLDS1_NORMAL_END_INDEX 4095 /* ----- LOCALE ----- */ - /*4086*/ 0x0069, 1, 0x0049, /* LATIN CAPITAL LETTER I */ -#define FOLDS1_END_INDEX 4089 + /*4095*/ 0x0069, 1, 0x0049, /* LATIN CAPITAL LETTER I */ +#define FOLDS1_END_INDEX 4098 }; OnigCodePoint OnigUnicodeFolds2[] = { diff --git a/src/unicode_property_data.c b/src/unicode_property_data.c index 0083dd6..3b645c6 100644 --- a/src/unicode_property_data.c +++ b/src/unicode_property_data.c @@ -33,6 +33,32 @@ /* Generated by make_unicode_property_data.py. */ +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */ static const OnigCodePoint @@ -42,7 +68,7 @@ CR_NEWLINE[] = { 1, /* PROPERTY: 'Alpha': POSIX [[:Alpha:]] */ static const OnigCodePoint -CR_Alpha[] = { 679, +CR_Alpha[] = { 695, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -96,7 +122,7 @@ CR_Alpha[] = { 679, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d4, 0x08df, 0x08e3, 0x08e9, 0x08f0, 0x093b, @@ -200,8 +226,7 @@ CR_Alpha[] = { 679, 0x0cde, 0x0cde, 0x0ce0, 0x0ce3, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d44, @@ -211,7 +236,7 @@ CR_Alpha[] = { 679, 0x0d54, 0x0d57, 0x0d5f, 0x0d63, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -299,6 +324,7 @@ CR_Alpha[] = { 679, 0x1a20, 0x1a5e, 0x1a61, 0x1a74, 0x1aa7, 0x1aa7, +0x1abf, 0x1ac0, 0x1b00, 0x1b33, 0x1b35, 0x1b43, 0x1b45, 0x1b4b, @@ -386,10 +412,10 @@ CR_Alpha[] = { 679, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -401,8 +427,8 @@ CR_Alpha[] = { 679, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa805, +0xa7c2, 0xa7ca, +0xa7f5, 0xa805, 0xa807, 0xa827, 0xa840, 0xa873, 0xa880, 0xa8c3, @@ -433,7 +459,7 @@ CR_Alpha[] = { 679, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, @@ -518,19 +544,24 @@ CR_Alpha[] = { 679, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11045, 0x11082, 0x110b8, 0x110d0, 0x110e8, 0x11100, 0x11132, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11180, 0x111bf, 0x111c1, 0x111c4, +0x111ce, 0x111cf, 0x111da, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, @@ -559,7 +590,7 @@ CR_Alpha[] = { 679, 0x11400, 0x11441, 0x11443, 0x11445, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114c1, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -575,7 +606,14 @@ CR_Alpha[] = { 679, 0x1171d, 0x1172a, 0x11800, 0x11838, 0x118a0, 0x118df, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193c, +0x1193f, 0x11942, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119df, @@ -608,6 +646,7 @@ CR_Alpha[] = { 679, 0x11d93, 0x11d96, 0x11d98, 0x11d98, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -626,8 +665,10 @@ CR_Alpha[] = { 679, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -716,12 +757,13 @@ CR_Alpha[] = { 679, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Alpha */ /* PROPERTY: 'Blank': POSIX [[:Blank:]] */ @@ -746,7 +788,7 @@ CR_Cntrl[] = { 2, /* PROPERTY: 'Digit': POSIX [[:Digit:]] */ static const OnigCodePoint -CR_Digit[] = { 59, +CR_Digit[] = { 61, 0x0030, 0x0039, 0x0660, 0x0669, 0x06f0, 0x06f9, @@ -797,6 +839,7 @@ CR_Digit[] = { 59, 0x116c0, 0x116c9, 0x11730, 0x11739, 0x118e0, 0x118e9, +0x11950, 0x11959, 0x11c50, 0x11c59, 0x11d50, 0x11d59, 0x11da0, 0x11da9, @@ -806,11 +849,12 @@ CR_Digit[] = { 59, 0x1e140, 0x1e149, 0x1e2f0, 0x1e2f9, 0x1e950, 0x1e959, +0x1fbf0, 0x1fbf9, }; /* END of CR_Digit */ /* PROPERTY: 'Graph': POSIX [[:Graph:]] */ static const OnigCodePoint -CR_Graph[] = { 671, +CR_Graph[] = { 682, 0x0021, 0x007e, 0x00a1, 0x0377, 0x037a, 0x037f, @@ -835,7 +879,7 @@ CR_Graph[] = { 671, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -890,7 +934,7 @@ CR_Graph[] = { 671, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -934,15 +978,14 @@ CR_Graph[] = { 671, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, 0x0d4a, 0x0d4f, 0x0d54, 0x0d63, 0x0d66, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -1029,7 +1072,7 @@ CR_Graph[] = { 671, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa0, 0x1aad, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b7c, 0x1b80, 0x1bf3, @@ -1070,7 +1113,7 @@ CR_Graph[] = { 671, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cf3, 0x2cf9, 0x2d25, @@ -1087,7 +1130,7 @@ CR_Graph[] = { 671, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2de0, 0x2e4f, +0x2de0, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -1097,18 +1140,16 @@ CR_Graph[] = { 671, 0x3099, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, 0xa640, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa82b, +0xa7c2, 0xa7ca, +0xa7f5, 0xa82c, 0xa830, 0xa839, 0xa840, 0xa877, 0xa880, 0xa8c5, @@ -1128,7 +1169,7 @@ CR_Graph[] = { 671, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabed, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -1173,7 +1214,7 @@ CR_Graph[] = { 671, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fd, 0x10280, 0x1029c, @@ -1233,8 +1274,12 @@ CR_Graph[] = { 671, 0x10cfa, 0x10d27, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x1104d, 0x11052, 0x1106f, @@ -1243,10 +1288,9 @@ CR_Graph[] = { 671, 0x110d0, 0x110e8, 0x110f0, 0x110f9, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11176, -0x11180, 0x111cd, -0x111d0, 0x111df, +0x11180, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, 0x11213, 0x1123e, @@ -1272,9 +1316,8 @@ CR_Graph[] = { 671, 0x1135d, 0x11363, 0x11366, 0x1136c, 0x11370, 0x11374, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +0x11400, 0x1145b, +0x1145d, 0x11461, 0x11480, 0x114c7, 0x114d0, 0x114d9, 0x11580, 0x115b5, @@ -1289,7 +1332,14 @@ CR_Graph[] = { 671, 0x11730, 0x1173f, 0x11800, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e4, @@ -1317,6 +1367,7 @@ CR_Graph[] = { 671, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -1340,9 +1391,11 @@ CR_Graph[] = { 671, 0x16f00, 0x16f4a, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, -0x16fe0, 0x16fe3, +0x16fe0, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -1440,17 +1493,15 @@ CR_Graph[] = { 671, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -1459,24 +1510,28 @@ CR_Graph[] = { 671, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, @@ -1486,7 +1541,7 @@ CR_Graph[] = { 671, /* PROPERTY: 'Lower': POSIX [[:Lower:]] */ static const OnigCodePoint -CR_Lower[] = { 649, +CR_Lower[] = { 652, 0x0061, 0x007a, 0x00aa, 0x00aa, 0x00b5, 0x00b5, @@ -2095,9 +2150,12 @@ CR_Lower[] = { 649, 0xa7bd, 0xa7bd, 0xa7bf, 0xa7bf, 0xa7c3, 0xa7c3, +0xa7c8, 0xa7c8, +0xa7ca, 0xa7ca, +0xa7f6, 0xa7f6, 0xa7f8, 0xa7fa, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab68, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -2140,7 +2198,7 @@ CR_Lower[] = { 649, /* PROPERTY: 'Print': POSIX [[:Print:]] */ static const OnigCodePoint -CR_Print[] = { 668, +CR_Print[] = { 679, 0x0020, 0x007e, 0x00a0, 0x0377, 0x037a, 0x037f, @@ -2165,7 +2223,7 @@ CR_Print[] = { 668, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -2220,7 +2278,7 @@ CR_Print[] = { 668, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -2264,15 +2322,14 @@ CR_Print[] = { 668, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, 0x0d4a, 0x0d4f, 0x0d54, 0x0d63, 0x0d66, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -2358,7 +2415,7 @@ CR_Print[] = { 668, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa0, 0x1aad, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b7c, 0x1b80, 0x1bf3, @@ -2397,7 +2454,7 @@ CR_Print[] = { 668, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cf3, 0x2cf9, 0x2d25, @@ -2414,7 +2471,7 @@ CR_Print[] = { 668, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2de0, 0x2e4f, +0x2de0, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -2424,18 +2481,16 @@ CR_Print[] = { 668, 0x3099, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, 0xa640, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa82b, +0xa7c2, 0xa7ca, +0xa7f5, 0xa82c, 0xa830, 0xa839, 0xa840, 0xa877, 0xa880, 0xa8c5, @@ -2455,7 +2510,7 @@ CR_Print[] = { 668, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabed, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -2500,7 +2555,7 @@ CR_Print[] = { 668, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fd, 0x10280, 0x1029c, @@ -2560,8 +2615,12 @@ CR_Print[] = { 668, 0x10cfa, 0x10d27, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x1104d, 0x11052, 0x1106f, @@ -2570,10 +2629,9 @@ CR_Print[] = { 668, 0x110d0, 0x110e8, 0x110f0, 0x110f9, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11176, -0x11180, 0x111cd, -0x111d0, 0x111df, +0x11180, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, 0x11213, 0x1123e, @@ -2599,9 +2657,8 @@ CR_Print[] = { 668, 0x1135d, 0x11363, 0x11366, 0x1136c, 0x11370, 0x11374, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +0x11400, 0x1145b, +0x1145d, 0x11461, 0x11480, 0x114c7, 0x114d0, 0x114d9, 0x11580, 0x115b5, @@ -2616,7 +2673,14 @@ CR_Print[] = { 668, 0x11730, 0x1173f, 0x11800, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e4, @@ -2644,6 +2708,7 @@ CR_Print[] = { 668, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -2667,9 +2732,11 @@ CR_Print[] = { 668, 0x16f00, 0x16f4a, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, -0x16fe0, 0x16fe3, +0x16fe0, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -2767,17 +2834,15 @@ CR_Print[] = { 668, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -2786,24 +2851,28 @@ CR_Print[] = { 668, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, @@ -2813,7 +2882,7 @@ CR_Print[] = { 668, /* PROPERTY: 'Punct': POSIX [[:Punct:]] */ static const OnigCodePoint -CR_Punct[] = { 182, +CR_Punct[] = { 185, 0x0021, 0x0023, 0x0025, 0x002a, 0x002c, 0x002f, @@ -2904,6 +2973,7 @@ CR_Punct[] = { 182, 0x2d70, 0x2d70, 0x2e00, 0x2e2e, 0x2e30, 0x2e4f, +0x2e52, 0x2e52, 0x3001, 0x3003, 0x3008, 0x3011, 0x3014, 0x301f, @@ -2957,6 +3027,7 @@ CR_Punct[] = { 182, 0x10af0, 0x10af6, 0x10b39, 0x10b3f, 0x10b99, 0x10b9c, +0x10ead, 0x10ead, 0x10f55, 0x10f59, 0x11047, 0x1104d, 0x110bb, 0x110bc, @@ -2970,7 +3041,7 @@ CR_Punct[] = { 182, 0x11238, 0x1123d, 0x112a9, 0x112a9, 0x1144b, 0x1144f, -0x1145b, 0x1145b, +0x1145a, 0x1145b, 0x1145d, 0x1145d, 0x114c6, 0x114c6, 0x115c1, 0x115d7, @@ -2978,6 +3049,7 @@ CR_Punct[] = { 182, 0x11660, 0x1166c, 0x1173c, 0x1173e, 0x1183b, 0x1183b, +0x11944, 0x11946, 0x119e2, 0x119e2, 0x11a3f, 0x11a46, 0x11a9a, 0x11a9c, @@ -3015,7 +3087,7 @@ CR_Space[] = { 10, /* PROPERTY: 'Upper': POSIX [[:Upper:]] */ static const OnigCodePoint -CR_Upper[] = { 641, +CR_Upper[] = { 643, 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, @@ -3615,7 +3687,9 @@ CR_Upper[] = { 641, 0xa7bc, 0xa7bc, 0xa7be, 0xa7be, 0xa7c2, 0xa7c2, -0xa7c4, 0xa7c6, +0xa7c4, 0xa7c7, +0xa7c9, 0xa7c9, +0xa7f5, 0xa7f5, 0xff21, 0xff3a, 0x10400, 0x10427, 0x104b0, 0x104d3, @@ -3669,7 +3743,7 @@ CR_XDigit[] = { 3, /* PROPERTY: 'Word': POSIX [[:Word:]] */ static const OnigCodePoint -CR_Word[] = { 716, +CR_Word[] = { 732, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3721,7 +3795,7 @@ CR_Word[] = { 716, 0x0840, 0x085b, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x08e1, 0x08e3, 0x0963, 0x0966, 0x096f, @@ -3781,7 +3855,7 @@ CR_Word[] = { 716, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b6f, @@ -3827,8 +3901,7 @@ CR_Word[] = { 716, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, @@ -3837,7 +3910,7 @@ CR_Word[] = { 716, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -3936,7 +4009,7 @@ CR_Word[] = { 716, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b59, 0x1b6b, 0x1b73, @@ -4021,10 +4094,10 @@ CR_Word[] = { 716, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -4035,8 +4108,9 @@ CR_Word[] = { 716, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa827, +0xa7c2, 0xa7ca, +0xa7f5, 0xa827, +0xa82c, 0xa82c, 0xa840, 0xa873, 0xa880, 0xa8c5, 0xa8d0, 0xa8d9, @@ -4062,7 +4136,7 @@ CR_Word[] = { 716, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, @@ -4161,9 +4235,13 @@ CR_Word[] = { 716, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f50, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11046, 0x11066, 0x1106f, @@ -4172,12 +4250,12 @@ CR_Word[] = { 716, 0x110f0, 0x110f9, 0x11100, 0x11134, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11173, 0x11176, 0x11176, 0x11180, 0x111c4, 0x111c9, 0x111cc, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11237, @@ -4206,7 +4284,7 @@ CR_Word[] = { 716, 0x11370, 0x11374, 0x11400, 0x1144a, 0x11450, 0x11459, -0x1145e, 0x1145f, +0x1145e, 0x11461, 0x11480, 0x114c5, 0x114c7, 0x114c7, 0x114d0, 0x114d9, @@ -4223,7 +4301,14 @@ CR_Word[] = { 716, 0x11730, 0x11739, 0x11800, 0x1183a, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11943, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e1, @@ -4254,6 +4339,7 @@ CR_Word[] = { 716, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -4274,9 +4360,11 @@ CR_Word[] = { 716, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, -0x16fe3, 0x16fe3, +0x16fe3, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -4379,18 +4467,20 @@ CR_Word[] = { 716, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0100, 0xe01ef, }; /* END of CR_Word */ /* PROPERTY: 'Alnum': POSIX [[:Alnum:]] */ static const OnigCodePoint -CR_Alnum[] = { 715, +CR_Alnum[] = { 732, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -4444,7 +4534,7 @@ CR_Alnum[] = { 715, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d4, 0x08df, 0x08e3, 0x08e9, 0x08f0, 0x093b, @@ -4554,8 +4644,7 @@ CR_Alnum[] = { 715, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d44, @@ -4566,7 +4655,7 @@ CR_Alnum[] = { 715, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -4662,6 +4751,7 @@ CR_Alnum[] = { 715, 0x1a80, 0x1a89, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, +0x1abf, 0x1ac0, 0x1b00, 0x1b33, 0x1b35, 0x1b43, 0x1b45, 0x1b4b, @@ -4749,10 +4839,10 @@ CR_Alnum[] = { 715, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -4763,8 +4853,8 @@ CR_Alnum[] = { 715, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa805, +0xa7c2, 0xa7ca, +0xa7f5, 0xa805, 0xa807, 0xa827, 0xa840, 0xa873, 0xa880, 0xa8c3, @@ -4795,7 +4885,7 @@ CR_Alnum[] = { 715, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -4884,9 +4974,13 @@ CR_Alnum[] = { 715, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11045, 0x11066, 0x1106f, @@ -4895,12 +4989,12 @@ CR_Alnum[] = { 715, 0x110f0, 0x110f9, 0x11100, 0x11132, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11180, 0x111bf, 0x111c1, 0x111c4, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11234, @@ -4930,7 +5024,7 @@ CR_Alnum[] = { 715, 0x11443, 0x11445, 0x11447, 0x1144a, 0x11450, 0x11459, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114c1, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -4950,7 +5044,15 @@ CR_Alnum[] = { 715, 0x11730, 0x11739, 0x11800, 0x11838, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193c, +0x1193f, 0x11942, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119df, @@ -4986,6 +5088,7 @@ CR_Alnum[] = { 715, 0x11d98, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -5006,8 +5109,10 @@ CR_Alnum[] = { 715, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -5100,12 +5205,14 @@ CR_Alnum[] = { 715, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Alnum */ /* PROPERTY: 'ASCII': POSIX [[:ASCII:]] */ @@ -5164,7 +5271,7 @@ CR_Arabic[] = { 57, 0x06de, 0x06ff, 0x0750, 0x077f, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x08e1, 0x08e3, 0x08ff, 0xfb50, 0xfbc1, @@ -5213,17 +5320,16 @@ CR_Arabic[] = { 57, /* PROPERTY: 'Armenian': Script */ static const OnigCodePoint -CR_Armenian[] = { 5, +CR_Armenian[] = { 4, 0x0531, 0x0556, -0x0559, 0x0588, -0x058a, 0x058a, +0x0559, 0x058a, 0x058d, 0x058f, 0xfb13, 0xfb17, }; /* END of CR_Armenian */ /* PROPERTY: 'Assigned': - */ static const OnigCodePoint -CR_Assigned[] = { 666, +CR_Assigned[] = { 677, 0x0000, 0x0377, 0x037a, 0x037f, 0x0384, 0x038a, @@ -5247,7 +5353,7 @@ CR_Assigned[] = { 666, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -5302,7 +5408,7 @@ CR_Assigned[] = { 666, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -5346,15 +5452,14 @@ CR_Assigned[] = { 666, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, 0x0d4a, 0x0d4f, 0x0d54, 0x0d63, 0x0d66, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -5440,7 +5545,7 @@ CR_Assigned[] = { 666, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa0, 0x1aad, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b7c, 0x1b80, 0x1bf3, @@ -5478,7 +5583,7 @@ CR_Assigned[] = { 666, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cf3, 0x2cf9, 0x2d25, @@ -5495,7 +5600,7 @@ CR_Assigned[] = { 666, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2de0, 0x2e4f, +0x2de0, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -5505,18 +5610,16 @@ CR_Assigned[] = { 666, 0x3099, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, 0xa640, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa82b, +0xa7c2, 0xa7ca, +0xa7f5, 0xa82c, 0xa830, 0xa839, 0xa840, 0xa877, 0xa880, 0xa8c5, @@ -5536,7 +5639,7 @@ CR_Assigned[] = { 666, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabed, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -5581,7 +5684,7 @@ CR_Assigned[] = { 666, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fd, 0x10280, 0x1029c, @@ -5641,8 +5744,12 @@ CR_Assigned[] = { 666, 0x10cfa, 0x10d27, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x1104d, 0x11052, 0x1106f, @@ -5651,10 +5758,9 @@ CR_Assigned[] = { 666, 0x110d0, 0x110e8, 0x110f0, 0x110f9, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11176, -0x11180, 0x111cd, -0x111d0, 0x111df, +0x11180, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, 0x11213, 0x1123e, @@ -5680,9 +5786,8 @@ CR_Assigned[] = { 666, 0x1135d, 0x11363, 0x11366, 0x1136c, 0x11370, 0x11374, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +0x11400, 0x1145b, +0x1145d, 0x11461, 0x11480, 0x114c7, 0x114d0, 0x114d9, 0x11580, 0x115b5, @@ -5697,7 +5802,14 @@ CR_Assigned[] = { 666, 0x11730, 0x1173f, 0x11800, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e4, @@ -5725,6 +5837,7 @@ CR_Assigned[] = { 666, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -5748,9 +5861,11 @@ CR_Assigned[] = { 666, 0x16f00, 0x16f4a, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, -0x16fe0, 0x16fe3, +0x16fe0, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -5848,17 +5963,15 @@ CR_Assigned[] = { 666, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -5867,24 +5980,28 @@ CR_Assigned[] = { 666, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, @@ -5969,7 +6086,7 @@ static const OnigCodePoint CR_Bopomofo[] = { 3, 0x02ea, 0x02eb, 0x3105, 0x312f, -0x31a0, 0x31ba, +0x31a0, 0x31bf, }; /* END of CR_Bopomofo */ /* PROPERTY: 'Brahmi': Script */ @@ -6001,7 +6118,7 @@ CR_Buhid[] = { 1, /* PROPERTY: 'C': Major Category */ static const OnigCodePoint -CR_C[] = { 668, +CR_C[] = { 679, 0x0000, 0x001f, 0x007f, 0x009f, 0x00ad, 0x00ad, @@ -6029,7 +6146,7 @@ CR_C[] = { 668, 0x085f, 0x085f, 0x086b, 0x089f, 0x08b5, 0x08b5, -0x08be, 0x08d2, +0x08c8, 0x08d2, 0x08e2, 0x08e2, 0x0984, 0x0984, 0x098d, 0x098e, @@ -6084,7 +6201,7 @@ CR_C[] = { 668, 0x0b3a, 0x0b3b, 0x0b45, 0x0b46, 0x0b49, 0x0b4a, -0x0b4e, 0x0b55, +0x0b4e, 0x0b54, 0x0b58, 0x0b5b, 0x0b5e, 0x0b5e, 0x0b64, 0x0b65, @@ -6129,14 +6246,13 @@ CR_C[] = { 668, 0x0ce4, 0x0ce5, 0x0cf0, 0x0cf0, 0x0cf3, 0x0cff, -0x0d04, 0x0d04, 0x0d0d, 0x0d0d, 0x0d11, 0x0d11, 0x0d45, 0x0d45, 0x0d49, 0x0d49, 0x0d50, 0x0d53, 0x0d64, 0x0d65, -0x0d80, 0x0d81, +0x0d80, 0x0d80, 0x0d84, 0x0d84, 0x0d97, 0x0d99, 0x0db2, 0x0db2, @@ -6223,7 +6339,7 @@ CR_C[] = { 668, 0x1a8a, 0x1a8f, 0x1a9a, 0x1a9f, 0x1aae, 0x1aaf, -0x1abf, 0x1aff, +0x1ac1, 0x1aff, 0x1b4c, 0x1b4f, 0x1b7d, 0x1b7f, 0x1bf4, 0x1bfb, @@ -6262,7 +6378,7 @@ CR_C[] = { 668, 0x2427, 0x243f, 0x244b, 0x245f, 0x2b74, 0x2b75, -0x2b96, 0x2b97, +0x2b96, 0x2b96, 0x2c2f, 0x2c2f, 0x2c5f, 0x2c5f, 0x2cf4, 0x2cf8, @@ -6280,7 +6396,7 @@ CR_C[] = { 668, 0x2dcf, 0x2dcf, 0x2dd7, 0x2dd7, 0x2ddf, 0x2ddf, -0x2e50, 0x2e7f, +0x2e53, 0x2e7f, 0x2e9a, 0x2e9a, 0x2ef4, 0x2eff, 0x2fd6, 0x2fef, @@ -6290,18 +6406,16 @@ CR_C[] = { 668, 0x3100, 0x3104, 0x3130, 0x3130, 0x318f, 0x318f, -0x31bb, 0x31bf, 0x31e4, 0x31ef, 0x321f, 0x321f, -0x4db6, 0x4dbf, -0x9ff0, 0x9fff, +0x9ffd, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, 0xa62c, 0xa63f, 0xa6f8, 0xa6ff, 0xa7c0, 0xa7c1, -0xa7c7, 0xa7f6, -0xa82c, 0xa82f, +0xa7cb, 0xa7f4, +0xa82d, 0xa82f, 0xa83a, 0xa83f, 0xa878, 0xa87f, 0xa8c6, 0xa8cd, @@ -6321,7 +6435,7 @@ CR_C[] = { 668, 0xab17, 0xab1f, 0xab27, 0xab27, 0xab2f, 0xab2f, -0xab68, 0xab6f, +0xab6c, 0xab6f, 0xabee, 0xabef, 0xabfa, 0xabff, 0xd7a4, 0xd7af, @@ -6365,7 +6479,7 @@ CR_C[] = { 668, 0x10103, 0x10106, 0x10134, 0x10136, 0x1018f, 0x1018f, -0x1019c, 0x1019f, +0x1019d, 0x1019f, 0x101a1, 0x101cf, 0x101fe, 0x1027f, 0x1029d, 0x1029f, @@ -6424,9 +6538,13 @@ CR_C[] = { 668, 0x10cf3, 0x10cf9, 0x10d28, 0x10d2f, 0x10d3a, 0x10e5f, -0x10e7f, 0x10eff, +0x10e7f, 0x10e7f, +0x10eaa, 0x10eaa, +0x10eae, 0x10eaf, +0x10eb2, 0x10eff, 0x10f28, 0x10f2f, -0x10f5a, 0x10fdf, +0x10f5a, 0x10faf, +0x10fcc, 0x10fdf, 0x10ff7, 0x10fff, 0x1104e, 0x11051, 0x11070, 0x1107e, @@ -6435,9 +6553,8 @@ CR_C[] = { 668, 0x110e9, 0x110ef, 0x110fa, 0x110ff, 0x11135, 0x11135, -0x11147, 0x1114f, +0x11148, 0x1114f, 0x11177, 0x1117f, -0x111ce, 0x111cf, 0x111e0, 0x111e0, 0x111f5, 0x111ff, 0x11212, 0x11212, @@ -6464,9 +6581,8 @@ CR_C[] = { 668, 0x11364, 0x11365, 0x1136d, 0x1136f, 0x11375, 0x113ff, -0x1145a, 0x1145a, 0x1145c, 0x1145c, -0x11460, 0x1147f, +0x11462, 0x1147f, 0x114c8, 0x114cf, 0x114da, 0x1157f, 0x115b6, 0x115b7, @@ -6481,7 +6597,14 @@ CR_C[] = { 668, 0x11740, 0x117ff, 0x1183c, 0x1189f, 0x118f3, 0x118fe, -0x11900, 0x1199f, +0x11907, 0x11908, +0x1190a, 0x1190b, +0x11914, 0x11914, +0x11917, 0x11917, +0x11936, 0x11936, +0x11939, 0x1193a, +0x11947, 0x1194f, +0x1195a, 0x1199f, 0x119a8, 0x119a9, 0x119d8, 0x119d9, 0x119e5, 0x119ff, @@ -6508,7 +6631,8 @@ CR_C[] = { 668, 0x11d92, 0x11d92, 0x11d99, 0x11d9f, 0x11daa, 0x11edf, -0x11ef9, 0x11fbf, +0x11ef9, 0x11faf, +0x11fb1, 0x11fbf, 0x11ff2, 0x11ffe, 0x1239a, 0x123ff, 0x1246f, 0x1246f, @@ -6531,9 +6655,11 @@ CR_C[] = { 668, 0x16f4b, 0x16f4e, 0x16f88, 0x16f8e, 0x16fa0, 0x16fdf, -0x16fe4, 0x16fff, +0x16fe5, 0x16fef, +0x16ff2, 0x16fff, 0x187f8, 0x187ff, -0x18af3, 0x1afff, +0x18cd6, 0x18cff, +0x18d09, 0x1afff, 0x1b11f, 0x1b14f, 0x1b153, 0x1b163, 0x1b168, 0x1b16f, @@ -6632,17 +6758,15 @@ CR_C[] = { 668, 0x1f0c0, 0x1f0c0, 0x1f0d0, 0x1f0d0, 0x1f0f6, 0x1f0ff, -0x1f10d, 0x1f10f, -0x1f16d, 0x1f16f, -0x1f1ad, 0x1f1e5, +0x1f1ae, 0x1f1e5, 0x1f203, 0x1f20f, 0x1f23c, 0x1f23f, 0x1f249, 0x1f24f, 0x1f252, 0x1f25f, 0x1f266, 0x1f2ff, -0x1f6d6, 0x1f6df, +0x1f6d8, 0x1f6df, 0x1f6ed, 0x1f6ef, -0x1f6fb, 0x1f6ff, +0x1f6fd, 0x1f6ff, 0x1f774, 0x1f77f, 0x1f7d9, 0x1f7df, 0x1f7ec, 0x1f7ff, @@ -6650,25 +6774,29 @@ CR_C[] = { 668, 0x1f848, 0x1f84f, 0x1f85a, 0x1f85f, 0x1f888, 0x1f88f, -0x1f8ae, 0x1f8ff, -0x1f90c, 0x1f90c, -0x1f972, 0x1f972, -0x1f977, 0x1f979, -0x1f9a3, 0x1f9a4, -0x1f9ab, 0x1f9ad, -0x1f9cb, 0x1f9cc, +0x1f8ae, 0x1f8af, +0x1f8b2, 0x1f8ff, +0x1f979, 0x1f979, +0x1f9cc, 0x1f9cc, 0x1fa54, 0x1fa5f, 0x1fa6e, 0x1fa6f, -0x1fa74, 0x1fa77, +0x1fa75, 0x1fa77, 0x1fa7b, 0x1fa7f, -0x1fa83, 0x1fa8f, -0x1fa96, 0x1ffff, -0x2a6d7, 0x2a6ff, +0x1fa87, 0x1fa8f, +0x1faa9, 0x1faaf, +0x1fab7, 0x1fabf, +0x1fac3, 0x1facf, +0x1fad7, 0x1faff, +0x1fb93, 0x1fb93, +0x1fbcb, 0x1fbef, +0x1fbfa, 0x1ffff, +0x2a6de, 0x2a6ff, 0x2b735, 0x2b73f, 0x2b81e, 0x2b81f, 0x2cea2, 0x2ceaf, 0x2ebe1, 0x2f7ff, -0x2fa1e, 0xe00ff, +0x2fa1e, 0x2ffff, +0x3134b, 0xe00ff, 0xe01f0, 0x10ffff, }; /* END of CR_C */ @@ -6687,7 +6815,7 @@ CR_Carian[] = { 1, /* PROPERTY: 'Case_Ignorable': Derived Property */ static const OnigCodePoint -CR_Case_Ignorable[] = { 401, +CR_Case_Ignorable[] = { 410, 0x0027, 0x0027, 0x002e, 0x002e, 0x003a, 0x003a, @@ -6705,6 +6833,7 @@ CR_Case_Ignorable[] = { 401, 0x0387, 0x0387, 0x0483, 0x0489, 0x0559, 0x0559, +0x055f, 0x055f, 0x0591, 0x05bd, 0x05bf, 0x05bf, 0x05c1, 0x05c2, @@ -6763,7 +6892,7 @@ CR_Case_Ignorable[] = { 401, 0x0b3f, 0x0b3f, 0x0b41, 0x0b44, 0x0b4d, 0x0b4d, -0x0b56, 0x0b56, +0x0b55, 0x0b56, 0x0b62, 0x0b63, 0x0b82, 0x0b82, 0x0bc0, 0x0bc0, @@ -6786,6 +6915,7 @@ CR_Case_Ignorable[] = { 401, 0x0d41, 0x0d44, 0x0d4d, 0x0d4d, 0x0d62, 0x0d63, +0x0d81, 0x0d81, 0x0dca, 0x0dca, 0x0dd2, 0x0dd4, 0x0dd6, 0x0dd6, @@ -6847,7 +6977,7 @@ CR_Case_Ignorable[] = { 401, 0x1a73, 0x1a7c, 0x1a7f, 0x1a7f, 0x1aa7, 0x1aa7, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b03, 0x1b34, 0x1b34, 0x1b36, 0x1b3a, @@ -6920,6 +7050,7 @@ CR_Case_Ignorable[] = { 401, 0xa806, 0xa806, 0xa80b, 0xa80b, 0xa825, 0xa826, +0xa82c, 0xa82c, 0xa8c4, 0xa8c5, 0xa8e0, 0xa8f1, 0xa8ff, 0xa8ff, @@ -6948,6 +7079,7 @@ CR_Case_Ignorable[] = { 401, 0xaaf3, 0xaaf4, 0xaaf6, 0xaaf6, 0xab5b, 0xab5f, +0xab69, 0xab6b, 0xabe5, 0xabe5, 0xabe8, 0xabe8, 0xabed, 0xabed, @@ -6978,6 +7110,7 @@ CR_Case_Ignorable[] = { 401, 0x10a3f, 0x10a3f, 0x10ae5, 0x10ae6, 0x10d24, 0x10d27, +0x10eab, 0x10eac, 0x10f46, 0x10f50, 0x11001, 0x11001, 0x11038, 0x11046, @@ -6993,6 +7126,7 @@ CR_Case_Ignorable[] = { 401, 0x11180, 0x11181, 0x111b6, 0x111be, 0x111c9, 0x111cc, +0x111cf, 0x111cf, 0x1122f, 0x11231, 0x11234, 0x11234, 0x11236, 0x11237, @@ -7028,6 +7162,9 @@ CR_Case_Ignorable[] = { 401, 0x11727, 0x1172b, 0x1182f, 0x11837, 0x11839, 0x1183a, +0x1193b, 0x1193c, +0x1193e, 0x1193e, +0x11943, 0x11943, 0x119d4, 0x119d7, 0x119da, 0x119db, 0x119e0, 0x119e0, @@ -7062,7 +7199,7 @@ CR_Case_Ignorable[] = { 401, 0x16f4f, 0x16f4f, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, -0x16fe3, 0x16fe3, +0x16fe3, 0x16fe4, 0x1bc9d, 0x1bc9e, 0x1bca0, 0x1bca3, 0x1d167, 0x1d169, @@ -7093,7 +7230,7 @@ CR_Case_Ignorable[] = { 401, /* PROPERTY: 'Cased': Derived Property */ static const OnigCodePoint -CR_Cased[] = { 140, +CR_Cased[] = { 141, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -7184,10 +7321,11 @@ CR_Cased[] = { 140, 0xa722, 0xa787, 0xa78b, 0xa78e, 0xa790, 0xa7bf, -0xa7c2, 0xa7c6, +0xa7c2, 0xa7ca, +0xa7f5, 0xa7f6, 0xa7f8, 0xa7fa, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab68, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -7275,7 +7413,7 @@ CR_Cf[] = { 20, static const OnigCodePoint CR_Chakma[] = { 2, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, }; /* END of CR_Chakma */ /* PROPERTY: 'Cham': Script */ @@ -7289,7 +7427,7 @@ CR_Cham[] = { 4, /* PROPERTY: 'Changes_When_Casefolded': Derived Property */ static const OnigCodePoint -CR_Changes_When_Casefolded[] = { 612, +CR_Changes_When_Casefolded[] = { 614, 0x0041, 0x005a, 0x00b5, 0x00b5, 0x00c0, 0x00d6, @@ -7891,7 +8029,9 @@ CR_Changes_When_Casefolded[] = { 612, 0xa7bc, 0xa7bc, 0xa7be, 0xa7be, 0xa7c2, 0xa7c2, -0xa7c4, 0xa7c6, +0xa7c4, 0xa7c7, +0xa7c9, 0xa7c9, +0xa7f5, 0xa7f5, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -7906,7 +8046,7 @@ CR_Changes_When_Casefolded[] = { 612, /* PROPERTY: 'Changes_When_Casemapped': Derived Property */ static const OnigCodePoint -CR_Changes_When_Casemapped[] = { 123, +CR_Changes_When_Casemapped[] = { 124, 0x0041, 0x005a, 0x0061, 0x007a, 0x00b5, 0x00b5, @@ -8015,7 +8155,8 @@ CR_Changes_When_Casemapped[] = { 123, 0xa790, 0xa794, 0xa796, 0xa7ae, 0xa7b0, 0xa7bf, -0xa7c2, 0xa7c6, +0xa7c2, 0xa7ca, +0xa7f5, 0xa7f6, 0xab53, 0xab53, 0xab70, 0xabbf, 0xfb00, 0xfb06, @@ -8034,7 +8175,7 @@ CR_Changes_When_Casemapped[] = { 123, /* PROPERTY: 'Changes_When_Lowercased': Derived Property */ static const OnigCodePoint -CR_Changes_When_Lowercased[] = { 599, +CR_Changes_When_Lowercased[] = { 601, 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, @@ -8626,7 +8767,9 @@ CR_Changes_When_Lowercased[] = { 599, 0xa7bc, 0xa7bc, 0xa7be, 0xa7be, 0xa7c2, 0xa7c2, -0xa7c4, 0xa7c6, +0xa7c4, 0xa7c7, +0xa7c9, 0xa7c9, +0xa7f5, 0xa7f5, 0xff21, 0xff3a, 0x10400, 0x10427, 0x104b0, 0x104d3, @@ -8638,7 +8781,7 @@ CR_Changes_When_Lowercased[] = { 599, /* PROPERTY: 'Changes_When_Titlecased': Derived Property */ static const OnigCodePoint -CR_Changes_When_Titlecased[] = { 615, +CR_Changes_When_Titlecased[] = { 618, 0x0061, 0x007a, 0x00b5, 0x00b5, 0x00df, 0x00f6, @@ -9243,6 +9386,9 @@ CR_Changes_When_Titlecased[] = { 615, 0xa7bd, 0xa7bd, 0xa7bf, 0xa7bf, 0xa7c3, 0xa7c3, +0xa7c8, 0xa7c8, +0xa7ca, 0xa7ca, +0xa7f6, 0xa7f6, 0xab53, 0xab53, 0xab70, 0xabbf, 0xfb00, 0xfb06, @@ -9258,7 +9404,7 @@ CR_Changes_When_Titlecased[] = { 615, /* PROPERTY: 'Changes_When_Uppercased': Derived Property */ static const OnigCodePoint -CR_Changes_When_Uppercased[] = { 616, +CR_Changes_When_Uppercased[] = { 619, 0x0061, 0x007a, 0x00b5, 0x00b5, 0x00df, 0x00f6, @@ -9864,6 +10010,9 @@ CR_Changes_When_Uppercased[] = { 616, 0xa7bd, 0xa7bd, 0xa7bf, 0xa7bf, 0xa7c3, 0xa7c3, +0xa7c8, 0xa7c8, +0xa7ca, 0xa7ca, +0xa7f6, 0xa7f6, 0xab53, 0xab53, 0xab70, 0xabbf, 0xfb00, 0xfb06, @@ -9885,9 +10034,15 @@ CR_Cherokee[] = { 3, 0xab70, 0xabbf, }; /* END of CR_Cherokee */ +/* PROPERTY: 'Chorasmian': Script */ +static const OnigCodePoint +CR_Chorasmian[] = { 1, +0x10fb0, 0x10fcb, +}; /* END of CR_Chorasmian */ + /* PROPERTY: 'Cn': General Category */ static const OnigCodePoint -CR_Cn[] = { 666, +CR_Cn[] = { 677, 0x0378, 0x0379, 0x0380, 0x0383, 0x038b, 0x038b, @@ -9911,7 +10066,7 @@ CR_Cn[] = { 666, 0x085f, 0x085f, 0x086b, 0x089f, 0x08b5, 0x08b5, -0x08be, 0x08d2, +0x08c8, 0x08d2, 0x0984, 0x0984, 0x098d, 0x098e, 0x0991, 0x0992, @@ -9965,7 +10120,7 @@ CR_Cn[] = { 666, 0x0b3a, 0x0b3b, 0x0b45, 0x0b46, 0x0b49, 0x0b4a, -0x0b4e, 0x0b55, +0x0b4e, 0x0b54, 0x0b58, 0x0b5b, 0x0b5e, 0x0b5e, 0x0b64, 0x0b65, @@ -10010,14 +10165,13 @@ CR_Cn[] = { 666, 0x0ce4, 0x0ce5, 0x0cf0, 0x0cf0, 0x0cf3, 0x0cff, -0x0d04, 0x0d04, 0x0d0d, 0x0d0d, 0x0d11, 0x0d11, 0x0d45, 0x0d45, 0x0d49, 0x0d49, 0x0d50, 0x0d53, 0x0d64, 0x0d65, -0x0d80, 0x0d81, +0x0d80, 0x0d80, 0x0d84, 0x0d84, 0x0d97, 0x0d99, 0x0db2, 0x0db2, @@ -10104,7 +10258,7 @@ CR_Cn[] = { 666, 0x1a8a, 0x1a8f, 0x1a9a, 0x1a9f, 0x1aae, 0x1aaf, -0x1abf, 0x1aff, +0x1ac1, 0x1aff, 0x1b4c, 0x1b4f, 0x1b7d, 0x1b7f, 0x1bf4, 0x1bfb, @@ -10141,7 +10295,7 @@ CR_Cn[] = { 666, 0x2427, 0x243f, 0x244b, 0x245f, 0x2b74, 0x2b75, -0x2b96, 0x2b97, +0x2b96, 0x2b96, 0x2c2f, 0x2c2f, 0x2c5f, 0x2c5f, 0x2cf4, 0x2cf8, @@ -10159,7 +10313,7 @@ CR_Cn[] = { 666, 0x2dcf, 0x2dcf, 0x2dd7, 0x2dd7, 0x2ddf, 0x2ddf, -0x2e50, 0x2e7f, +0x2e53, 0x2e7f, 0x2e9a, 0x2e9a, 0x2ef4, 0x2eff, 0x2fd6, 0x2fef, @@ -10169,18 +10323,16 @@ CR_Cn[] = { 666, 0x3100, 0x3104, 0x3130, 0x3130, 0x318f, 0x318f, -0x31bb, 0x31bf, 0x31e4, 0x31ef, 0x321f, 0x321f, -0x4db6, 0x4dbf, -0x9ff0, 0x9fff, +0x9ffd, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, 0xa62c, 0xa63f, 0xa6f8, 0xa6ff, 0xa7c0, 0xa7c1, -0xa7c7, 0xa7f6, -0xa82c, 0xa82f, +0xa7cb, 0xa7f4, +0xa82d, 0xa82f, 0xa83a, 0xa83f, 0xa878, 0xa87f, 0xa8c6, 0xa8cd, @@ -10200,7 +10352,7 @@ CR_Cn[] = { 666, 0xab17, 0xab1f, 0xab27, 0xab27, 0xab2f, 0xab2f, -0xab68, 0xab6f, +0xab6c, 0xab6f, 0xabee, 0xabef, 0xabfa, 0xabff, 0xd7a4, 0xd7af, @@ -10245,7 +10397,7 @@ CR_Cn[] = { 666, 0x10103, 0x10106, 0x10134, 0x10136, 0x1018f, 0x1018f, -0x1019c, 0x1019f, +0x1019d, 0x1019f, 0x101a1, 0x101cf, 0x101fe, 0x1027f, 0x1029d, 0x1029f, @@ -10304,9 +10456,13 @@ CR_Cn[] = { 666, 0x10cf3, 0x10cf9, 0x10d28, 0x10d2f, 0x10d3a, 0x10e5f, -0x10e7f, 0x10eff, +0x10e7f, 0x10e7f, +0x10eaa, 0x10eaa, +0x10eae, 0x10eaf, +0x10eb2, 0x10eff, 0x10f28, 0x10f2f, -0x10f5a, 0x10fdf, +0x10f5a, 0x10faf, +0x10fcc, 0x10fdf, 0x10ff7, 0x10fff, 0x1104e, 0x11051, 0x11070, 0x1107e, @@ -10315,9 +10471,8 @@ CR_Cn[] = { 666, 0x110e9, 0x110ef, 0x110fa, 0x110ff, 0x11135, 0x11135, -0x11147, 0x1114f, +0x11148, 0x1114f, 0x11177, 0x1117f, -0x111ce, 0x111cf, 0x111e0, 0x111e0, 0x111f5, 0x111ff, 0x11212, 0x11212, @@ -10344,9 +10499,8 @@ CR_Cn[] = { 666, 0x11364, 0x11365, 0x1136d, 0x1136f, 0x11375, 0x113ff, -0x1145a, 0x1145a, 0x1145c, 0x1145c, -0x11460, 0x1147f, +0x11462, 0x1147f, 0x114c8, 0x114cf, 0x114da, 0x1157f, 0x115b6, 0x115b7, @@ -10361,7 +10515,14 @@ CR_Cn[] = { 666, 0x11740, 0x117ff, 0x1183c, 0x1189f, 0x118f3, 0x118fe, -0x11900, 0x1199f, +0x11907, 0x11908, +0x1190a, 0x1190b, +0x11914, 0x11914, +0x11917, 0x11917, +0x11936, 0x11936, +0x11939, 0x1193a, +0x11947, 0x1194f, +0x1195a, 0x1199f, 0x119a8, 0x119a9, 0x119d8, 0x119d9, 0x119e5, 0x119ff, @@ -10388,7 +10549,8 @@ CR_Cn[] = { 666, 0x11d92, 0x11d92, 0x11d99, 0x11d9f, 0x11daa, 0x11edf, -0x11ef9, 0x11fbf, +0x11ef9, 0x11faf, +0x11fb1, 0x11fbf, 0x11ff2, 0x11ffe, 0x1239a, 0x123ff, 0x1246f, 0x1246f, @@ -10412,9 +10574,11 @@ CR_Cn[] = { 666, 0x16f4b, 0x16f4e, 0x16f88, 0x16f8e, 0x16fa0, 0x16fdf, -0x16fe4, 0x16fff, +0x16fe5, 0x16fef, +0x16ff2, 0x16fff, 0x187f8, 0x187ff, -0x18af3, 0x1afff, +0x18cd6, 0x18cff, +0x18d09, 0x1afff, 0x1b11f, 0x1b14f, 0x1b153, 0x1b163, 0x1b168, 0x1b16f, @@ -10512,17 +10676,15 @@ CR_Cn[] = { 666, 0x1f0c0, 0x1f0c0, 0x1f0d0, 0x1f0d0, 0x1f0f6, 0x1f0ff, -0x1f10d, 0x1f10f, -0x1f16d, 0x1f16f, -0x1f1ad, 0x1f1e5, +0x1f1ae, 0x1f1e5, 0x1f203, 0x1f20f, 0x1f23c, 0x1f23f, 0x1f249, 0x1f24f, 0x1f252, 0x1f25f, 0x1f266, 0x1f2ff, -0x1f6d6, 0x1f6df, +0x1f6d8, 0x1f6df, 0x1f6ed, 0x1f6ef, -0x1f6fb, 0x1f6ff, +0x1f6fd, 0x1f6ff, 0x1f774, 0x1f77f, 0x1f7d9, 0x1f7df, 0x1f7ec, 0x1f7ff, @@ -10530,25 +10692,29 @@ CR_Cn[] = { 666, 0x1f848, 0x1f84f, 0x1f85a, 0x1f85f, 0x1f888, 0x1f88f, -0x1f8ae, 0x1f8ff, -0x1f90c, 0x1f90c, -0x1f972, 0x1f972, -0x1f977, 0x1f979, -0x1f9a3, 0x1f9a4, -0x1f9ab, 0x1f9ad, -0x1f9cb, 0x1f9cc, +0x1f8ae, 0x1f8af, +0x1f8b2, 0x1f8ff, +0x1f979, 0x1f979, +0x1f9cc, 0x1f9cc, 0x1fa54, 0x1fa5f, 0x1fa6e, 0x1fa6f, -0x1fa74, 0x1fa77, +0x1fa75, 0x1fa77, 0x1fa7b, 0x1fa7f, -0x1fa83, 0x1fa8f, -0x1fa96, 0x1ffff, -0x2a6d7, 0x2a6ff, +0x1fa87, 0x1fa8f, +0x1faa9, 0x1faaf, +0x1fab7, 0x1fabf, +0x1fac3, 0x1facf, +0x1fad7, 0x1faff, +0x1fb93, 0x1fb93, +0x1fbcb, 0x1fbef, +0x1fbfa, 0x1ffff, +0x2a6de, 0x2a6ff, 0x2b735, 0x2b73f, 0x2b81e, 0x2b81f, 0x2cea2, 0x2ceaf, 0x2ebe1, 0x2f7ff, -0x2fa1e, 0xe0000, +0x2fa1e, 0x2ffff, +0x3134b, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, @@ -10566,7 +10732,7 @@ CR_Co[] = { 3, /* PROPERTY: 'Common': Script */ static const OnigCodePoint -CR_Common[] = { 172, +CR_Common[] = { 173, 0x0000, 0x0040, 0x005b, 0x0060, 0x007b, 0x00a9, @@ -10581,7 +10747,6 @@ CR_Common[] = { 172, 0x037e, 0x037e, 0x0385, 0x0385, 0x0387, 0x0387, -0x0589, 0x0589, 0x0605, 0x0605, 0x060c, 0x060c, 0x061b, 0x061b, @@ -10620,8 +10785,8 @@ CR_Common[] = { 172, 0x2460, 0x27ff, 0x2900, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2bff, -0x2e00, 0x2e4f, +0x2b97, 0x2bff, +0x2e00, 0x2e52, 0x2ff0, 0x2ffb, 0x3000, 0x3004, 0x3006, 0x3006, @@ -10644,6 +10809,7 @@ CR_Common[] = { 172, 0xa92e, 0xa92e, 0xa9cf, 0xa9cf, 0xab5b, 0xab5b, +0xab6a, 0xab6b, 0xfd3e, 0xfd3f, 0xfe10, 0xfe19, 0xfe30, 0xfe52, @@ -10661,7 +10827,7 @@ CR_Common[] = { 172, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1013f, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101d0, 0x101fc, 0x102e1, 0x102fb, 0x16fe2, 0x16fe3, @@ -10705,18 +10871,16 @@ CR_Common[] = { 172, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f1ff, 0x1f201, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -10725,18 +10889,21 @@ CR_Common[] = { 172, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, 0xe0001, 0xe0001, 0xe0020, 0xe007f, }; /* END of CR_Common */ @@ -10790,7 +10957,7 @@ CR_Cyrillic[] = { 8, /* PROPERTY: 'Dash': Binary Property */ static const OnigCodePoint -CR_Dash[] = { 21, +CR_Dash[] = { 22, 0x002d, 0x002d, 0x058a, 0x058a, 0x05be, 0x05be, @@ -10812,6 +10979,7 @@ CR_Dash[] = { 21, 0xfe58, 0xfe58, 0xfe63, 0xfe63, 0xff0d, 0xff0d, +0x10ead, 0x10ead, }; /* END of CR_Dash */ /* PROPERTY: 'Default_Ignorable_Code_Point': Derived Property */ @@ -10866,7 +11034,7 @@ CR_Devanagari[] = { 4, /* PROPERTY: 'Diacritic': Binary Property */ static const OnigCodePoint -CR_Diacritic[] = { 171, +CR_Diacritic[] = { 176, 0x005e, 0x005e, 0x0060, 0x0060, 0x00a8, 0x00a8, @@ -10909,6 +11077,7 @@ CR_Diacritic[] = { 171, 0x0afd, 0x0aff, 0x0b3c, 0x0b3c, 0x0b4d, 0x0b4d, +0x0b55, 0x0b55, 0x0bcd, 0x0bcd, 0x0c4d, 0x0c4d, 0x0cbc, 0x0cbc, @@ -10986,6 +11155,7 @@ CR_Diacritic[] = { 171, 0xaabf, 0xaac2, 0xaaf6, 0xaaf6, 0xab5b, 0xab5f, +0xab69, 0xab6b, 0xabec, 0xabed, 0xfb1e, 0xfb1e, 0xfe20, 0xfe2f, @@ -11017,6 +11187,8 @@ CR_Diacritic[] = { 171, 0x116b6, 0x116b7, 0x1172b, 0x1172b, 0x11839, 0x1183a, +0x1193d, 0x1193e, +0x11943, 0x11943, 0x119e0, 0x119e0, 0x11a34, 0x11a34, 0x11a47, 0x11a47, @@ -11028,6 +11200,7 @@ CR_Diacritic[] = { 171, 0x16af0, 0x16af4, 0x16b30, 0x16b36, 0x16f8f, 0x16f9f, +0x16ff0, 0x16ff1, 0x1d167, 0x1d169, 0x1d16d, 0x1d172, 0x1d17b, 0x1d182, @@ -11040,6 +11213,19 @@ CR_Diacritic[] = { 171, 0x1e948, 0x1e94a, }; /* END of CR_Diacritic */ +/* PROPERTY: 'Dives_Akuru': Script */ +static const OnigCodePoint +CR_Dives_Akuru[] = { 8, +0x11900, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, +}; /* END of CR_Dives_Akuru */ + /* PROPERTY: 'Dogra': Script */ static const OnigCodePoint CR_Dogra[] = { 1, @@ -11077,7 +11263,7 @@ CR_Elymaic[] = { 1, /* PROPERTY: 'Emoji': Emoji Property */ static const OnigCodePoint -CR_Emoji[] = { 151, +CR_Emoji[] = { 152, 0x0023, 0x0023, 0x002a, 0x002a, 0x0030, 0x0039, @@ -11124,6 +11310,7 @@ CR_Emoji[] = { 151, 0x2699, 0x2699, 0x269b, 0x269c, 0x26a0, 0x26a1, +0x26a7, 0x26a7, 0x26aa, 0x26ab, 0x26b0, 0x26b1, 0x26bd, 0x26be, @@ -11210,25 +11397,25 @@ CR_Emoji[] = { 151, 0x1f5fa, 0x1f64f, 0x1f680, 0x1f6c5, 0x1f6cb, 0x1f6d2, -0x1f6d5, 0x1f6d5, +0x1f6d5, 0x1f6d7, 0x1f6e0, 0x1f6e5, 0x1f6e9, 0x1f6e9, 0x1f6eb, 0x1f6ec, 0x1f6f0, 0x1f6f0, -0x1f6f3, 0x1f6fa, +0x1f6f3, 0x1f6fc, 0x1f7e0, 0x1f7eb, -0x1f90d, 0x1f93a, +0x1f90c, 0x1f93a, 0x1f93c, 0x1f945, -0x1f947, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f947, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1f9ff, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, }; /* END of CR_Emoji */ /* PROPERTY: 'Emoji_Component': Emoji Property */ @@ -11254,7 +11441,7 @@ CR_Emoji_Modifier[] = { 1, /* PROPERTY: 'Emoji_Modifier_Base': Emoji Property */ static const OnigCodePoint -CR_Emoji_Modifier_Base[] = { 36, +CR_Emoji_Modifier_Base[] = { 38, 0x261d, 0x261d, 0x26f9, 0x26f9, 0x270a, 0x270d, @@ -11281,11 +11468,13 @@ CR_Emoji_Modifier_Base[] = { 36, 0x1f6b4, 0x1f6b6, 0x1f6c0, 0x1f6c0, 0x1f6cc, 0x1f6cc, +0x1f90c, 0x1f90c, 0x1f90f, 0x1f90f, 0x1f918, 0x1f91f, 0x1f926, 0x1f926, 0x1f930, 0x1f939, 0x1f93c, 0x1f93e, +0x1f977, 0x1f977, 0x1f9b5, 0x1f9b6, 0x1f9b8, 0x1f9b9, 0x1f9bb, 0x1f9bb, @@ -11361,22 +11550,22 @@ CR_Emoji_Presentation[] = { 81, 0x1f680, 0x1f6c5, 0x1f6cc, 0x1f6cc, 0x1f6d0, 0x1f6d2, -0x1f6d5, 0x1f6d5, +0x1f6d5, 0x1f6d7, 0x1f6eb, 0x1f6ec, -0x1f6f4, 0x1f6fa, +0x1f6f4, 0x1f6fc, 0x1f7e0, 0x1f7eb, -0x1f90d, 0x1f93a, +0x1f90c, 0x1f93a, 0x1f93c, 0x1f945, -0x1f947, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f947, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1f9ff, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, }; /* END of CR_Emoji_Presentation */ /* PROPERTY: 'Ethiopic': Script */ @@ -11418,7 +11607,7 @@ CR_Ethiopic[] = { 32, /* PROPERTY: 'Extended_Pictographic': Emoji Property */ static const OnigCodePoint -CR_Extended_Pictographic[] = { 77, +CR_Extended_Pictographic[] = { 78, 0x00a9, 0x00a9, 0x00ae, 0x00ae, 0x203c, 0x203c, @@ -11495,16 +11684,18 @@ CR_Extended_Pictographic[] = { 77, 0x1f8ae, 0x1f8ff, 0x1f90c, 0x1f93a, 0x1f93c, 0x1f945, -0x1f947, 0x1fffd, +0x1f947, 0x1faff, +0x1fc00, 0x1fffd, }; /* END of CR_Extended_Pictographic */ /* PROPERTY: 'Extender': Binary Property */ static const OnigCodePoint -CR_Extender[] = { 31, +CR_Extender[] = { 32, 0x00b7, 0x00b7, 0x02d0, 0x02d1, 0x0640, 0x0640, 0x07fa, 0x07fa, +0x0b55, 0x0b55, 0x0e46, 0x0e46, 0x0ec6, 0x0ec6, 0x180a, 0x180a, @@ -11589,7 +11780,7 @@ CR_Grantha[] = { 15, /* PROPERTY: 'Grapheme_Base': Derived Property */ static const OnigCodePoint -CR_Grapheme_Base[] = { 819, +CR_Grapheme_Base[] = { 834, 0x0020, 0x007e, 0x00a0, 0x00ac, 0x00ae, 0x02ff, @@ -11633,7 +11824,7 @@ CR_Grapheme_Base[] = { 819, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x0903, 0x0939, 0x093b, 0x093b, 0x093d, 0x0940, @@ -11739,8 +11930,7 @@ CR_Grapheme_Base[] = { 819, 0x0ce0, 0x0ce1, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d02, 0x0d03, -0x0d05, 0x0d0c, +0x0d02, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d3d, @@ -11922,7 +12112,7 @@ CR_Grapheme_Base[] = { 819, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cee, 0x2cf2, 0x2cf3, @@ -11940,7 +12130,7 @@ CR_Grapheme_Base[] = { 819, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2e00, 0x2e4f, +0x2e00, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -11951,11 +12141,9 @@ CR_Grapheme_Base[] = { 819, 0x309b, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -11965,8 +12153,8 @@ CR_Grapheme_Base[] = { 819, 0xa6a0, 0xa6ef, 0xa6f2, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa801, +0xa7c2, 0xa7ca, +0xa7f5, 0xa801, 0xa803, 0xa805, 0xa807, 0xa80a, 0xa80c, 0xa824, @@ -12008,7 +12196,7 @@ CR_Grapheme_Base[] = { 819, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabe4, 0xabe6, 0xabe7, 0xabe9, 0xabec, @@ -12056,7 +12244,7 @@ CR_Grapheme_Base[] = { 819, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fc, 0x10280, 0x1029c, @@ -12114,9 +12302,13 @@ CR_Grapheme_Base[] = { 819, 0x10cfa, 0x10d23, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10ead, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f45, 0x10f51, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x11000, 0x11002, 0x11037, @@ -12130,12 +12322,12 @@ CR_Grapheme_Base[] = { 819, 0x110f0, 0x110f9, 0x11103, 0x11126, 0x1112c, 0x1112c, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11172, 0x11174, 0x11176, 0x11182, 0x111b5, 0x111bf, 0x111c8, -0x111cd, 0x111cd, +0x111cd, 0x111ce, 0x111d0, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, @@ -12168,10 +12360,9 @@ CR_Grapheme_Base[] = { 819, 0x11400, 0x11437, 0x11440, 0x11441, 0x11445, 0x11445, -0x11447, 0x11459, -0x1145b, 0x1145b, +0x11447, 0x1145b, 0x1145d, 0x1145d, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114af, 0x114b1, 0x114b2, 0x114b9, 0x114b9, @@ -12205,7 +12396,17 @@ CR_Grapheme_Base[] = { 819, 0x11838, 0x11838, 0x1183b, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x1192f, +0x11931, 0x11935, +0x11937, 0x11938, +0x1193d, 0x1193d, +0x1193f, 0x11942, +0x11944, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d3, 0x119dc, 0x119df, @@ -12243,6 +12444,7 @@ CR_Grapheme_Base[] = { 819, 0x11da0, 0x11da9, 0x11ee0, 0x11ef2, 0x11ef5, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -12267,8 +12469,10 @@ CR_Grapheme_Base[] = { 819, 0x16f50, 0x16f87, 0x16f93, 0x16f9f, 0x16fe0, 0x16fe3, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -12372,17 +12576,15 @@ CR_Grapheme_Base[] = { 819, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -12391,29 +12593,33 @@ CR_Grapheme_Base[] = { 819, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Grapheme_Base */ /* PROPERTY: 'Grapheme_Extend': Derived Property */ static const OnigCodePoint -CR_Grapheme_Extend[] = { 335, +CR_Grapheme_Extend[] = { 344, 0x0300, 0x036f, 0x0483, 0x0489, 0x0591, 0x05bd, @@ -12474,7 +12680,7 @@ CR_Grapheme_Extend[] = { 335, 0x0b3e, 0x0b3f, 0x0b41, 0x0b44, 0x0b4d, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b62, 0x0b63, 0x0b82, 0x0b82, 0x0bbe, 0x0bbe, @@ -12503,6 +12709,7 @@ CR_Grapheme_Extend[] = { 335, 0x0d4d, 0x0d4d, 0x0d57, 0x0d57, 0x0d62, 0x0d63, +0x0d81, 0x0d81, 0x0dca, 0x0dca, 0x0dcf, 0x0dcf, 0x0dd2, 0x0dd4, @@ -12561,7 +12768,7 @@ CR_Grapheme_Extend[] = { 335, 0x1a65, 0x1a6c, 0x1a73, 0x1a7c, 0x1a7f, 0x1a7f, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b03, 0x1b34, 0x1b3a, 0x1b3c, 0x1b3c, @@ -12600,6 +12807,7 @@ CR_Grapheme_Extend[] = { 335, 0xa806, 0xa806, 0xa80b, 0xa80b, 0xa825, 0xa826, +0xa82c, 0xa82c, 0xa8c4, 0xa8c5, 0xa8e0, 0xa8f1, 0xa8ff, 0xa8ff, @@ -12640,6 +12848,7 @@ CR_Grapheme_Extend[] = { 335, 0x10a3f, 0x10a3f, 0x10ae5, 0x10ae6, 0x10d24, 0x10d27, +0x10eab, 0x10eac, 0x10f46, 0x10f50, 0x11001, 0x11001, 0x11038, 0x11046, @@ -12653,6 +12862,7 @@ CR_Grapheme_Extend[] = { 335, 0x11180, 0x11181, 0x111b6, 0x111be, 0x111c9, 0x111cc, +0x111cf, 0x111cf, 0x1122f, 0x11231, 0x11234, 0x11234, 0x11236, 0x11237, @@ -12693,6 +12903,10 @@ CR_Grapheme_Extend[] = { 335, 0x11727, 0x1172b, 0x1182f, 0x11837, 0x11839, 0x1183a, +0x11930, 0x11930, +0x1193b, 0x1193c, +0x1193e, 0x1193e, +0x11943, 0x11943, 0x119d4, 0x119d7, 0x119da, 0x119db, 0x119e0, 0x119e0, @@ -12724,6 +12938,7 @@ CR_Grapheme_Extend[] = { 335, 0x16b30, 0x16b36, 0x16f4f, 0x16f4f, 0x16f8f, 0x16f92, +0x16fe4, 0x16fe4, 0x1bc9d, 0x1bc9e, 0x1d165, 0x1d165, 0x1d167, 0x1d169, @@ -12753,7 +12968,7 @@ CR_Grapheme_Extend[] = { 335, /* PROPERTY: 'Grapheme_Link': Derived Property */ static const OnigCodePoint -CR_Grapheme_Link[] = { 52, +CR_Grapheme_Link[] = { 54, 0x094d, 0x094d, 0x09cd, 0x09cd, 0x0a4d, 0x0a4d, @@ -12778,6 +12993,7 @@ CR_Grapheme_Link[] = { 52, 0x1bf2, 0x1bf3, 0x2d7f, 0x2d7f, 0xa806, 0xa806, +0xa82c, 0xa82c, 0xa8c4, 0xa8c4, 0xa953, 0xa953, 0xa9c0, 0xa9c0, @@ -12799,6 +13015,7 @@ CR_Grapheme_Link[] = { 52, 0x116b6, 0x116b6, 0x1172b, 0x1172b, 0x11839, 0x11839, +0x1193d, 0x1193e, 0x119e0, 0x119e0, 0x11a34, 0x11a34, 0x11a47, 0x11a47, @@ -12902,7 +13119,7 @@ CR_Gurmukhi[] = { 16, /* PROPERTY: 'Han': Script */ static const OnigCodePoint -CR_Han[] = { 17, +CR_Han[] = { 19, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -12910,16 +13127,18 @@ CR_Han[] = { 17, 0x3007, 0x3007, 0x3021, 0x3029, 0x3038, 0x303b, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xf900, 0xfa6d, 0xfa70, 0xfad9, -0x20000, 0x2a6d6, +0x16ff0, 0x16ff1, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Han */ /* PROPERTY: 'Hangul': Script */ @@ -13027,7 +13246,7 @@ CR_IDS_Trinary_Operator[] = { 1, /* PROPERTY: 'ID_Continue': Derived Property */ static const OnigCodePoint -CR_ID_Continue[] = { 713, +CR_ID_Continue[] = { 730, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -13080,7 +13299,7 @@ CR_ID_Continue[] = { 713, 0x0840, 0x085b, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x08e1, 0x08e3, 0x0963, 0x0966, 0x096f, @@ -13140,7 +13359,7 @@ CR_ID_Continue[] = { 713, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b6f, @@ -13186,8 +13405,7 @@ CR_ID_Continue[] = { 713, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, @@ -13196,7 +13414,7 @@ CR_ID_Continue[] = { 713, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -13297,6 +13515,7 @@ CR_ID_Continue[] = { 713, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, 0x1ab0, 0x1abd, +0x1abf, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b59, 0x1b6b, 0x1b73, @@ -13379,10 +13598,10 @@ CR_ID_Continue[] = { 713, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -13393,8 +13612,9 @@ CR_ID_Continue[] = { 713, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa827, +0xa7c2, 0xa7ca, +0xa7f5, 0xa827, +0xa82c, 0xa82c, 0xa840, 0xa873, 0xa880, 0xa8c5, 0xa8d0, 0xa8d9, @@ -13420,7 +13640,7 @@ CR_ID_Continue[] = { 713, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, @@ -13519,9 +13739,13 @@ CR_ID_Continue[] = { 713, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f50, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11046, 0x11066, 0x1106f, @@ -13530,12 +13754,12 @@ CR_ID_Continue[] = { 713, 0x110f0, 0x110f9, 0x11100, 0x11134, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11173, 0x11176, 0x11176, 0x11180, 0x111c4, 0x111c9, 0x111cc, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11237, @@ -13564,7 +13788,7 @@ CR_ID_Continue[] = { 713, 0x11370, 0x11374, 0x11400, 0x1144a, 0x11450, 0x11459, -0x1145e, 0x1145f, +0x1145e, 0x11461, 0x11480, 0x114c5, 0x114c7, 0x114c7, 0x114d0, 0x114d9, @@ -13581,7 +13805,14 @@ CR_ID_Continue[] = { 713, 0x11730, 0x11739, 0x11800, 0x1183a, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11943, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e1, @@ -13612,6 +13843,7 @@ CR_ID_Continue[] = { 713, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -13632,9 +13864,11 @@ CR_ID_Continue[] = { 713, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, -0x16fe3, 0x16fe3, +0x16fe3, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -13734,18 +13968,20 @@ CR_ID_Continue[] = { 713, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0100, 0xe01ef, }; /* END of CR_ID_Continue */ /* PROPERTY: 'ID_Start': Derived Property */ static const OnigCodePoint -CR_ID_Start[] = { 609, +CR_ID_Start[] = { 622, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -13796,7 +14032,7 @@ CR_ID_Start[] = { 609, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x0904, 0x0939, 0x093d, 0x093d, 0x0950, 0x0950, @@ -13872,7 +14108,7 @@ CR_ID_Start[] = { 609, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, 0x0cf1, 0x0cf2, -0x0d05, 0x0d0c, +0x0d04, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d3d, @@ -14041,10 +14277,10 @@ CR_ID_Start[] = { 609, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -14056,8 +14292,8 @@ CR_ID_Start[] = { 609, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa801, +0xa7c2, 0xa7ca, +0xa7f5, 0xa801, 0xa803, 0xa805, 0xa807, 0xa80a, 0xa80c, 0xa822, @@ -14094,7 +14330,7 @@ CR_ID_Start[] = { 609, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabe2, 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, @@ -14179,15 +14415,19 @@ CR_ID_Start[] = { 609, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d23, +0x10e80, 0x10ea9, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11003, 0x11037, 0x11083, 0x110af, 0x110d0, 0x110e8, 0x11103, 0x11126, 0x11144, 0x11144, +0x11147, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11183, 0x111b2, @@ -14213,7 +14453,7 @@ CR_ID_Start[] = { 609, 0x1135d, 0x11361, 0x11400, 0x11434, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114af, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -14226,7 +14466,13 @@ CR_ID_Start[] = { 609, 0x11700, 0x1171a, 0x11800, 0x1182b, 0x118a0, 0x118df, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x1192f, +0x1193f, 0x1193f, +0x11941, 0x11941, 0x119a0, 0x119a7, 0x119aa, 0x119d0, 0x119e1, 0x119e1, @@ -14251,6 +14497,7 @@ CR_ID_Start[] = { 609, 0x11d6a, 0x11d89, 0x11d98, 0x11d98, 0x11ee0, 0x11ef2, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -14270,7 +14517,8 @@ CR_ID_Start[] = { 609, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -14349,33 +14597,37 @@ CR_ID_Start[] = { 609, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_ID_Start */ /* PROPERTY: 'Ideographic': Binary Property */ static const OnigCodePoint -CR_Ideographic[] = { 16, +CR_Ideographic[] = { 19, 0x3006, 0x3007, 0x3021, 0x3029, 0x3038, 0x303a, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xf900, 0xfa6d, 0xfa70, 0xfad9, +0x16fe4, 0x16fe4, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b170, 0x1b2fb, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Ideographic */ /* PROPERTY: 'Imperial_Aramaic': Script */ @@ -14393,7 +14645,7 @@ CR_Inherited[] = { 28, 0x064b, 0x0655, 0x0670, 0x0670, 0x0951, 0x0954, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1cd0, 0x1cd2, 0x1cd4, 0x1ce0, 0x1ce2, 0x1ce8, @@ -14505,6 +14757,13 @@ CR_Kharoshthi[] = { 8, 0x10a50, 0x10a58, }; /* END of CR_Kharoshthi */ +/* PROPERTY: 'Khitan_Small_Script': Script */ +static const OnigCodePoint +CR_Khitan_Small_Script[] = { 2, +0x16fe4, 0x16fe4, +0x18b00, 0x18cd5, +}; /* END of CR_Khitan_Small_Script */ + /* PROPERTY: 'Khmer': Script */ static const OnigCodePoint CR_Khmer[] = { 4, @@ -14530,7 +14789,7 @@ CR_Khudawadi[] = { 2, /* PROPERTY: 'L': Major Category */ static const OnigCodePoint -CR_L[] = { 609, +CR_L[] = { 622, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -14581,7 +14840,7 @@ CR_L[] = { 609, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x0904, 0x0939, 0x093d, 0x093d, 0x0950, 0x0950, @@ -14657,7 +14916,7 @@ CR_L[] = { 609, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, 0x0cf1, 0x0cf2, -0x0d05, 0x0d0c, +0x0d04, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d3d, @@ -14828,10 +15087,10 @@ CR_L[] = { 609, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -14843,8 +15102,8 @@ CR_L[] = { 609, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa801, +0xa7c2, 0xa7ca, +0xa7f5, 0xa801, 0xa803, 0xa805, 0xa807, 0xa80a, 0xa80c, 0xa822, @@ -14881,7 +15140,7 @@ CR_L[] = { 609, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabe2, 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, @@ -14965,15 +15224,19 @@ CR_L[] = { 609, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d23, +0x10e80, 0x10ea9, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11003, 0x11037, 0x11083, 0x110af, 0x110d0, 0x110e8, 0x11103, 0x11126, 0x11144, 0x11144, +0x11147, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11183, 0x111b2, @@ -14999,7 +15262,7 @@ CR_L[] = { 609, 0x1135d, 0x11361, 0x11400, 0x11434, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114af, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -15012,7 +15275,13 @@ CR_L[] = { 609, 0x11700, 0x1171a, 0x11800, 0x1182b, 0x118a0, 0x118df, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x1192f, +0x1193f, 0x1193f, +0x11941, 0x11941, 0x119a0, 0x119a7, 0x119aa, 0x119d0, 0x119e1, 0x119e1, @@ -15037,6 +15306,7 @@ CR_L[] = { 609, 0x11d6a, 0x11d89, 0x11d98, 0x11d98, 0x11ee0, 0x11ef2, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12480, 0x12543, 0x13000, 0x1342e, @@ -15055,7 +15325,8 @@ CR_L[] = { 609, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -15134,17 +15405,18 @@ CR_L[] = { 609, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_L */ /* PROPERTY: 'LC': General Category */ static const OnigCodePoint -CR_LC[] = { 131, +CR_LC[] = { 132, 0x0041, 0x005a, 0x0061, 0x007a, 0x00b5, 0x00b5, @@ -15229,10 +15501,11 @@ CR_LC[] = { 131, 0xa771, 0xa787, 0xa78b, 0xa78e, 0xa790, 0xa7bf, -0xa7c2, 0xa7c6, +0xa7c2, 0xa7ca, +0xa7f5, 0xa7f6, 0xa7fa, 0xa7fa, 0xab30, 0xab5a, -0xab60, 0xab67, +0xab60, 0xab68, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -15321,11 +15594,11 @@ CR_Latin[] = { 32, 0x2c60, 0x2c7f, 0xa722, 0xa787, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa7ff, +0xa7c2, 0xa7ca, +0xa7f5, 0xa7ff, 0xab30, 0xab5a, 0xab5c, 0xab64, -0xab66, 0xab67, +0xab66, 0xab69, 0xfb00, 0xfb06, 0xff21, 0xff3a, 0xff41, 0xff5a, @@ -15371,13 +15644,14 @@ CR_Linear_B[] = { 7, /* PROPERTY: 'Lisu': Script */ static const OnigCodePoint -CR_Lisu[] = { 1, +CR_Lisu[] = { 2, 0xa4d0, 0xa4ff, +0x11fb0, 0x11fb0, }; /* END of CR_Lisu */ /* PROPERTY: 'Ll': General Category */ static const OnigCodePoint -CR_Ll[] = { 642, +CR_Ll[] = { 645, 0x0061, 0x007a, 0x00b5, 0x00b5, 0x00df, 0x00f6, @@ -15979,9 +16253,12 @@ CR_Ll[] = { 642, 0xa7bd, 0xa7bd, 0xa7bf, 0xa7bf, 0xa7c3, 0xa7c3, +0xa7c8, 0xa7c8, +0xa7ca, 0xa7ca, +0xa7f6, 0xa7f6, 0xa7fa, 0xa7fa, 0xab30, 0xab5a, -0xab60, 0xab67, +0xab60, 0xab68, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -16024,7 +16301,7 @@ CR_Ll[] = { 642, /* PROPERTY: 'Lm': General Category */ static const OnigCodePoint -CR_Lm[] = { 60, +CR_Lm[] = { 61, 0x02b0, 0x02c1, 0x02c6, 0x02d1, 0x02e0, 0x02e4, @@ -16077,6 +16354,7 @@ CR_Lm[] = { 60, 0xaadd, 0xaadd, 0xaaf3, 0xaaf4, 0xab5c, 0xab5f, +0xab69, 0xab69, 0xff70, 0xff70, 0xff9e, 0xff9f, 0x16b40, 0x16b43, @@ -16089,7 +16367,7 @@ CR_Lm[] = { 60, /* PROPERTY: 'Lo': General Category */ static const OnigCodePoint -CR_Lo[] = { 476, +CR_Lo[] = { 489, 0x00aa, 0x00aa, 0x00ba, 0x00ba, 0x01bb, 0x01bb, @@ -16114,7 +16392,7 @@ CR_Lo[] = { 476, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x0904, 0x0939, 0x093d, 0x093d, 0x0950, 0x0950, @@ -16190,7 +16468,7 @@ CR_Lo[] = { 476, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, 0x0cf1, 0x0cf2, -0x0d05, 0x0d0c, +0x0d04, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d3d, @@ -16303,10 +16581,10 @@ CR_Lo[] = { 476, 0x30ff, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa014, 0xa016, 0xa48c, 0xa4d0, 0xa4f7, @@ -16430,15 +16708,19 @@ CR_Lo[] = { 476, 0x10b80, 0x10b91, 0x10c00, 0x10c48, 0x10d00, 0x10d23, +0x10e80, 0x10ea9, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11003, 0x11037, 0x11083, 0x110af, 0x110d0, 0x110e8, 0x11103, 0x11126, 0x11144, 0x11144, +0x11147, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11183, 0x111b2, @@ -16464,7 +16746,7 @@ CR_Lo[] = { 476, 0x1135d, 0x11361, 0x11400, 0x11434, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114af, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -16476,7 +16758,13 @@ CR_Lo[] = { 476, 0x116b8, 0x116b8, 0x11700, 0x1171a, 0x11800, 0x1182b, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x1192f, +0x1193f, 0x1193f, +0x11941, 0x11941, 0x119a0, 0x119a7, 0x119aa, 0x119d0, 0x119e1, 0x119e1, @@ -16501,6 +16789,7 @@ CR_Lo[] = { 476, 0x11d6a, 0x11d89, 0x11d98, 0x11d98, 0x11ee0, 0x11ef2, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12480, 0x12543, 0x13000, 0x1342e, @@ -16514,7 +16803,8 @@ CR_Lo[] = { 476, 0x16f00, 0x16f4a, 0x16f50, 0x16f50, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -16560,12 +16850,13 @@ CR_Lo[] = { 476, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Lo */ /* PROPERTY: 'Logical_Order_Exception': Binary Property */ @@ -16600,7 +16891,7 @@ CR_Lt[] = { 10, /* PROPERTY: 'Lu': General Category */ static const OnigCodePoint -CR_Lu[] = { 636, +CR_Lu[] = { 638, 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, @@ -17198,7 +17489,9 @@ CR_Lu[] = { 636, 0xa7bc, 0xa7bc, 0xa7be, 0xa7be, 0xa7c2, 0xa7c2, -0xa7c4, 0xa7c6, +0xa7c4, 0xa7c7, +0xa7c9, 0xa7c9, +0xa7f5, 0xa7f5, 0xff21, 0xff3a, 0x10400, 0x10427, 0x104b0, 0x104d3, @@ -17254,7 +17547,7 @@ CR_Lydian[] = { 2, /* PROPERTY: 'M': Major Category */ static const OnigCodePoint -CR_M[] = { 280, +CR_M[] = { 290, 0x0300, 0x036f, 0x0483, 0x0489, 0x0591, 0x05bd, @@ -17313,7 +17606,7 @@ CR_M[] = { 280, 0x0b3e, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b62, 0x0b63, 0x0b82, 0x0b82, 0x0bbe, 0x0bc2, @@ -17340,7 +17633,7 @@ CR_M[] = { 280, 0x0d4a, 0x0d4d, 0x0d57, 0x0d57, 0x0d62, 0x0d63, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0dca, 0x0dca, 0x0dcf, 0x0dd4, 0x0dd6, 0x0dd6, @@ -17387,7 +17680,7 @@ CR_M[] = { 280, 0x1a55, 0x1a5e, 0x1a60, 0x1a7c, 0x1a7f, 0x1a7f, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b04, 0x1b34, 0x1b44, 0x1b6b, 0x1b73, @@ -17416,6 +17709,7 @@ CR_M[] = { 280, 0xa806, 0xa806, 0xa80b, 0xa80b, 0xa823, 0xa827, +0xa82c, 0xa82c, 0xa880, 0xa881, 0xa8b4, 0xa8c5, 0xa8e0, 0xa8f1, @@ -17451,6 +17745,7 @@ CR_M[] = { 280, 0x10a3f, 0x10a3f, 0x10ae5, 0x10ae6, 0x10d24, 0x10d27, +0x10eab, 0x10eac, 0x10f46, 0x10f50, 0x11000, 0x11002, 0x11038, 0x11046, @@ -17463,6 +17758,7 @@ CR_M[] = { 280, 0x11180, 0x11182, 0x111b3, 0x111c0, 0x111c9, 0x111cc, +0x111ce, 0x111cf, 0x1122c, 0x11237, 0x1123e, 0x1123e, 0x112df, 0x112ea, @@ -17485,6 +17781,11 @@ CR_M[] = { 280, 0x116ab, 0x116b7, 0x1171d, 0x1172b, 0x1182c, 0x1183a, +0x11930, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193e, +0x11940, 0x11940, +0x11942, 0x11943, 0x119d1, 0x119d7, 0x119da, 0x119e0, 0x119e4, 0x119e4, @@ -17512,6 +17813,8 @@ CR_M[] = { 280, 0x16f4f, 0x16f4f, 0x16f51, 0x16f87, 0x16f8f, 0x16f92, +0x16fe4, 0x16fe4, +0x16ff0, 0x16ff1, 0x1bc9d, 0x1bc9e, 0x1d165, 0x1d169, 0x1d16d, 0x1d172, @@ -17551,9 +17854,8 @@ CR_Makasar[] = { 1, /* PROPERTY: 'Malayalam': Script */ static const OnigCodePoint -CR_Malayalam[] = { 8, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +CR_Malayalam[] = { 7, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, @@ -17741,7 +18043,7 @@ CR_Math[] = { 138, /* PROPERTY: 'Mc': General Category */ static const OnigCodePoint -CR_Mc[] = { 168, +CR_Mc[] = { 175, 0x0903, 0x0903, 0x093b, 0x093b, 0x093e, 0x0940, @@ -17861,6 +18163,7 @@ CR_Mc[] = { 168, 0x11182, 0x11182, 0x111b3, 0x111b5, 0x111bf, 0x111c0, +0x111ce, 0x111ce, 0x1122c, 0x1122e, 0x11232, 0x11233, 0x11235, 0x11235, @@ -17892,6 +18195,11 @@ CR_Mc[] = { 168, 0x11726, 0x11726, 0x1182c, 0x1182e, 0x11838, 0x11838, +0x11930, 0x11935, +0x11937, 0x11938, +0x1193d, 0x1193d, +0x11940, 0x11940, +0x11942, 0x11942, 0x119d1, 0x119d3, 0x119dc, 0x119df, 0x119e4, 0x119e4, @@ -17908,6 +18216,7 @@ CR_Mc[] = { 168, 0x11d96, 0x11d96, 0x11ef5, 0x11ef6, 0x16f51, 0x16f87, +0x16ff0, 0x16ff1, 0x1d165, 0x1d166, 0x1d16d, 0x1d172, }; /* END of CR_Mc */ @@ -17967,7 +18276,7 @@ CR_Miao[] = { 3, /* PROPERTY: 'Mn': General Category */ static const OnigCodePoint -CR_Mn[] = { 318, +CR_Mn[] = { 327, 0x0300, 0x036f, 0x0483, 0x0487, 0x0591, 0x05bd, @@ -18026,7 +18335,7 @@ CR_Mn[] = { 318, 0x0b3f, 0x0b3f, 0x0b41, 0x0b44, 0x0b4d, 0x0b4d, -0x0b56, 0x0b56, +0x0b55, 0x0b56, 0x0b62, 0x0b63, 0x0b82, 0x0b82, 0x0bc0, 0x0bc0, @@ -18049,6 +18358,7 @@ CR_Mn[] = { 318, 0x0d41, 0x0d44, 0x0d4d, 0x0d4d, 0x0d62, 0x0d63, +0x0d81, 0x0d81, 0x0dca, 0x0dca, 0x0dd2, 0x0dd4, 0x0dd6, 0x0dd6, @@ -18106,6 +18416,7 @@ CR_Mn[] = { 318, 0x1a73, 0x1a7c, 0x1a7f, 0x1a7f, 0x1ab0, 0x1abd, +0x1abf, 0x1ac0, 0x1b00, 0x1b03, 0x1b34, 0x1b34, 0x1b36, 0x1b3a, @@ -18146,6 +18457,7 @@ CR_Mn[] = { 318, 0xa806, 0xa806, 0xa80b, 0xa80b, 0xa825, 0xa826, +0xa82c, 0xa82c, 0xa8c4, 0xa8c5, 0xa8e0, 0xa8f1, 0xa8ff, 0xa8ff, @@ -18185,6 +18497,7 @@ CR_Mn[] = { 318, 0x10a3f, 0x10a3f, 0x10ae5, 0x10ae6, 0x10d24, 0x10d27, +0x10eab, 0x10eac, 0x10f46, 0x10f50, 0x11001, 0x11001, 0x11038, 0x11046, @@ -18198,6 +18511,7 @@ CR_Mn[] = { 318, 0x11180, 0x11181, 0x111b6, 0x111be, 0x111c9, 0x111cc, +0x111cf, 0x111cf, 0x1122f, 0x11231, 0x11234, 0x11234, 0x11236, 0x11237, @@ -18233,6 +18547,9 @@ CR_Mn[] = { 318, 0x11727, 0x1172b, 0x1182f, 0x11837, 0x11839, 0x1183a, +0x1193b, 0x1193c, +0x1193e, 0x1193e, +0x11943, 0x11943, 0x119d4, 0x119d7, 0x119da, 0x119db, 0x119e0, 0x119e0, @@ -18264,6 +18581,7 @@ CR_Mn[] = { 318, 0x16b30, 0x16b36, 0x16f4f, 0x16f4f, 0x16f8f, 0x16f92, +0x16fe4, 0x16fe4, 0x1bc9d, 0x1bc9e, 0x1d167, 0x1d169, 0x1d17b, 0x1d182, @@ -18335,7 +18653,7 @@ CR_Myanmar[] = { 3, /* PROPERTY: 'N': Major Category */ static const OnigCodePoint -CR_N[] = { 130, +CR_N[] = { 133, 0x0030, 0x0039, 0x00b2, 0x00b3, 0x00b9, 0x00b9, @@ -18432,6 +18750,7 @@ CR_N[] = { 130, 0x10e60, 0x10e7e, 0x10f1d, 0x10f26, 0x10f51, 0x10f54, +0x10fc5, 0x10fcb, 0x11052, 0x1106f, 0x110f0, 0x110f9, 0x11136, 0x1113f, @@ -18444,6 +18763,7 @@ CR_N[] = { 130, 0x116c0, 0x116c9, 0x11730, 0x1173b, 0x118e0, 0x118f2, +0x11950, 0x11959, 0x11c50, 0x11c6c, 0x11d50, 0x11d59, 0x11da0, 0x11da9, @@ -18466,6 +18786,7 @@ CR_N[] = { 130, 0x1ed01, 0x1ed2d, 0x1ed2f, 0x1ed3d, 0x1f100, 0x1f10c, +0x1fbf0, 0x1fbf9, }; /* END of CR_N */ /* PROPERTY: 'Nabataean': Script */ @@ -18497,10 +18818,9 @@ CR_New_Tai_Lue[] = { 4, /* PROPERTY: 'Newa': Script */ static const OnigCodePoint -CR_Newa[] = { 3, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +CR_Newa[] = { 2, +0x11400, 0x1145b, +0x1145d, 0x11461, }; /* END of CR_Newa */ /* PROPERTY: 'Nko': Script */ @@ -18529,7 +18849,7 @@ CR_Nl[] = { 12, /* PROPERTY: 'No': General Category */ static const OnigCodePoint -CR_No[] = { 70, +CR_No[] = { 71, 0x00b2, 0x00b3, 0x00b9, 0x00b9, 0x00bc, 0x00be, @@ -18583,6 +18903,7 @@ CR_No[] = { 70, 0x10e60, 0x10e7e, 0x10f1d, 0x10f26, 0x10f51, 0x10f54, +0x10fc5, 0x10fcb, 0x11052, 0x11065, 0x111e1, 0x111f4, 0x1173a, 0x1173b, @@ -18718,7 +19039,7 @@ CR_Oriya[] = { 14, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -18740,7 +19061,7 @@ CR_Osmanya[] = { 2, /* PROPERTY: 'Other_Alphabetic': Binary Property */ static const OnigCodePoint -CR_Other_Alphabetic[] = { 221, +CR_Other_Alphabetic[] = { 230, 0x0345, 0x0345, 0x05b0, 0x05bd, 0x05bf, 0x05bf, @@ -18818,7 +19139,7 @@ CR_Other_Alphabetic[] = { 221, 0x0d4a, 0x0d4c, 0x0d57, 0x0d57, 0x0d62, 0x0d63, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0dcf, 0x0dd4, 0x0dd6, 0x0dd6, 0x0dd8, 0x0ddf, @@ -18856,6 +19177,7 @@ CR_Other_Alphabetic[] = { 221, 0x1a17, 0x1a1b, 0x1a55, 0x1a5e, 0x1a61, 0x1a74, +0x1abf, 0x1ac0, 0x1b00, 0x1b04, 0x1b35, 0x1b43, 0x1b80, 0x1b82, @@ -18897,6 +19219,7 @@ CR_Other_Alphabetic[] = { 221, 0x10a05, 0x10a06, 0x10a0c, 0x10a0f, 0x10d24, 0x10d27, +0x10eab, 0x10eac, 0x11000, 0x11002, 0x11038, 0x11045, 0x11082, 0x11082, @@ -18906,6 +19229,7 @@ CR_Other_Alphabetic[] = { 221, 0x11145, 0x11146, 0x11180, 0x11182, 0x111b3, 0x111bf, +0x111ce, 0x111cf, 0x1122c, 0x11234, 0x11237, 0x11237, 0x1123e, 0x1123e, @@ -18927,6 +19251,11 @@ CR_Other_Alphabetic[] = { 221, 0x116ab, 0x116b5, 0x1171d, 0x1172a, 0x1182c, 0x11838, +0x11930, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193c, +0x11940, 0x11940, +0x11942, 0x11942, 0x119d1, 0x119d7, 0x119da, 0x119df, 0x119e4, 0x119e4, @@ -18952,6 +19281,7 @@ CR_Other_Alphabetic[] = { 221, 0x16f4f, 0x16f4f, 0x16f51, 0x16f87, 0x16f8f, 0x16f92, +0x16ff0, 0x16ff1, 0x1bc9e, 0x1bc9e, 0x1e000, 0x1e006, 0x1e008, 0x1e018, @@ -18982,7 +19312,7 @@ CR_Other_Default_Ignorable_Code_Point[] = { 11, /* PROPERTY: 'Other_Grapheme_Extend': Binary Property */ static const OnigCodePoint -CR_Other_Grapheme_Extend[] = { 24, +CR_Other_Grapheme_Extend[] = { 25, 0x09be, 0x09be, 0x09d7, 0x09d7, 0x0b3e, 0x0b3e, @@ -19004,6 +19334,7 @@ CR_Other_Grapheme_Extend[] = { 24, 0x114b0, 0x114b0, 0x114bd, 0x114bd, 0x115af, 0x115af, +0x11930, 0x11930, 0x1d165, 0x1d165, 0x1d16e, 0x1d172, 0xe0020, 0xe007f, @@ -19282,7 +19613,7 @@ CR_Pc[] = { 6, /* PROPERTY: 'Pd': General Category */ static const OnigCodePoint -CR_Pd[] = { 17, +CR_Pd[] = { 18, 0x002d, 0x002d, 0x058a, 0x058a, 0x05be, 0x05be, @@ -19300,6 +19631,7 @@ CR_Pd[] = { 17, 0xfe58, 0xfe58, 0xfe63, 0xfe63, 0xff0d, 0xff0d, +0x10ead, 0x10ead, }; /* END of CR_Pd */ /* PROPERTY: 'Pe': General Category */ @@ -19425,7 +19757,7 @@ CR_Pi[] = { 11, /* PROPERTY: 'Po': General Category */ static const OnigCodePoint -CR_Po[] = { 179, +CR_Po[] = { 181, 0x0021, 0x0023, 0x0025, 0x0027, 0x002a, 0x002a, @@ -19514,6 +19846,7 @@ CR_Po[] = { 179, 0x2e3c, 0x2e3f, 0x2e41, 0x2e41, 0x2e43, 0x2e4f, +0x2e52, 0x2e52, 0x3001, 0x3003, 0x303d, 0x303d, 0x30fb, 0x30fb, @@ -19579,7 +19912,7 @@ CR_Po[] = { 179, 0x11238, 0x1123d, 0x112a9, 0x112a9, 0x1144b, 0x1144f, -0x1145b, 0x1145b, +0x1145a, 0x1145b, 0x1145d, 0x1145d, 0x114c6, 0x114c6, 0x115c1, 0x115d7, @@ -19587,6 +19920,7 @@ CR_Po[] = { 179, 0x11660, 0x1166c, 0x1173c, 0x1173e, 0x1183b, 0x1183b, +0x11944, 0x11946, 0x119e2, 0x119e2, 0x11a3f, 0x11a46, 0x11a9a, 0x11a9c, @@ -19754,7 +20088,7 @@ CR_Runic[] = { 2, /* PROPERTY: 'S': Major Category */ static const OnigCodePoint -CR_S[] = { 226, +CR_S[] = { 229, 0x0024, 0x0024, 0x002b, 0x002b, 0x003c, 0x003e, @@ -19855,8 +20189,9 @@ CR_S[] = { 226, 0x29dc, 0x29fb, 0x29fe, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2bff, +0x2b97, 0x2bff, 0x2ce5, 0x2cea, +0x2e50, 0x2e51, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -19885,6 +20220,7 @@ CR_S[] = { 226, 0xa836, 0xa839, 0xaa77, 0xaa79, 0xab5b, 0xab5b, +0xab6a, 0xab6b, 0xfb29, 0xfb29, 0xfbb2, 0xfbc1, 0xfdfc, 0xfdfd, @@ -19904,7 +20240,7 @@ CR_S[] = { 226, 0x10137, 0x1013f, 0x10179, 0x10189, 0x1018c, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fc, 0x10877, 0x10878, @@ -19951,16 +20287,15 @@ CR_S[] = { 226, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f10d, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -19969,18 +20304,20 @@ CR_S[] = { 226, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, }; /* END of CR_S */ /* PROPERTY: 'Samaritan': Script */ @@ -20025,7 +20362,7 @@ CR_Sc[] = { 21, /* PROPERTY: 'Sentence_Terminal': Binary Property */ static const OnigCodePoint -CR_Sentence_Terminal[] = { 74, +CR_Sentence_Terminal[] = { 76, 0x0021, 0x0021, 0x002e, 0x002e, 0x003f, 0x003f, @@ -20089,6 +20426,8 @@ CR_Sentence_Terminal[] = { 74, 0x115c9, 0x115d7, 0x11641, 0x11642, 0x1173c, 0x1173e, +0x11944, 0x11944, +0x11946, 0x11946, 0x11a42, 0x11a43, 0x11a9b, 0x11a9c, 0x11c41, 0x11c42, @@ -20104,9 +20443,8 @@ CR_Sentence_Terminal[] = { 74, /* PROPERTY: 'Sharada': Script */ static const OnigCodePoint -CR_Sharada[] = { 2, -0x11180, 0x111cd, -0x111d0, 0x111df, +CR_Sharada[] = { 1, +0x11180, 0x111df, }; /* END of CR_Sharada */ /* PROPERTY: 'Shavian': Script */ @@ -20133,7 +20471,7 @@ CR_SignWriting[] = { 3, /* PROPERTY: 'Sinhala': Script */ static const OnigCodePoint CR_Sinhala[] = { 13, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -20150,7 +20488,7 @@ CR_Sinhala[] = { 13, /* PROPERTY: 'Sk': General Category */ static const OnigCodePoint -CR_Sk[] = { 29, +CR_Sk[] = { 30, 0x005e, 0x005e, 0x0060, 0x0060, 0x00a8, 0x00a8, @@ -20175,6 +20513,7 @@ CR_Sk[] = { 29, 0xa720, 0xa721, 0xa789, 0xa78a, 0xab5b, 0xab5b, +0xab6a, 0xab6b, 0xfbb2, 0xfbc1, 0xff3e, 0xff3e, 0xff40, 0xff40, @@ -20253,7 +20592,7 @@ CR_Sm[] = { 64, /* PROPERTY: 'So': General Category */ static const OnigCodePoint -CR_So[] = { 180, +CR_So[] = { 182, 0x00a6, 0x00a6, 0x00a9, 0x00a9, 0x00ae, 0x00ae, @@ -20334,8 +20673,9 @@ CR_So[] = { 180, 0x2b45, 0x2b46, 0x2b4d, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2bff, +0x2b97, 0x2bff, 0x2ce5, 0x2cea, +0x2e50, 0x2e51, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -20368,7 +20708,7 @@ CR_So[] = { 180, 0x10137, 0x1013f, 0x10179, 0x10189, 0x1018c, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fc, 0x10877, 0x10878, @@ -20403,17 +20743,16 @@ CR_So[] = { 180, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f10d, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, 0x1f300, 0x1f3fa, -0x1f400, 0x1f6d5, +0x1f400, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -20422,18 +20761,20 @@ CR_So[] = { 180, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, }; /* END of CR_So */ /* PROPERTY: 'Soft_Dotted': Binary Property */ @@ -20501,7 +20842,7 @@ CR_Sundanese[] = { 2, /* PROPERTY: 'Syloti_Nagri': Script */ static const OnigCodePoint CR_Syloti_Nagri[] = { 1, -0xa800, 0xa82b, +0xa800, 0xa82c, }; /* END of CR_Syloti_Nagri */ /* PROPERTY: 'Syriac': Script */ @@ -20584,10 +20925,11 @@ CR_Tamil[] = { 18, /* PROPERTY: 'Tangut': Script */ static const OnigCodePoint -CR_Tangut[] = { 3, +CR_Tangut[] = { 4, 0x16fe0, 0x16fe0, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18aff, +0x18d00, 0x18d08, }; /* END of CR_Tangut */ /* PROPERTY: 'Telugu': Script */ @@ -20609,7 +20951,7 @@ CR_Telugu[] = { 12, /* PROPERTY: 'Terminal_Punctuation': Binary Property */ static const OnigCodePoint -CR_Terminal_Punctuation[] = { 102, +CR_Terminal_Punctuation[] = { 104, 0x0021, 0x0021, 0x002c, 0x002c, 0x002e, 0x002e, @@ -20693,11 +21035,13 @@ CR_Terminal_Punctuation[] = { 102, 0x11238, 0x1123c, 0x112a9, 0x112a9, 0x1144b, 0x1144d, -0x1145b, 0x1145b, +0x1145a, 0x1145b, 0x115c2, 0x115c5, 0x115c9, 0x115d7, 0x11641, 0x11642, 0x1173c, 0x1173e, +0x11944, 0x11944, +0x11946, 0x11946, 0x11a42, 0x11a43, 0x11a9b, 0x11a9c, 0x11aa1, 0x11aa2, @@ -20763,9 +21107,9 @@ CR_Ugaritic[] = { 2, /* PROPERTY: 'Unified_Ideograph': Binary Property */ static const OnigCodePoint -CR_Unified_Ideograph[] = { 14, -0x3400, 0x4db5, -0x4e00, 0x9fef, +CR_Unified_Ideograph[] = { 15, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xfa0e, 0xfa0f, 0xfa11, 0xfa11, 0xfa13, 0xfa14, @@ -20773,16 +21117,17 @@ CR_Unified_Ideograph[] = { 14, 0xfa21, 0xfa21, 0xfa23, 0xfa24, 0xfa27, 0xfa29, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, +0x30000, 0x3134a, }; /* END of CR_Unified_Ideograph */ /* PROPERTY: 'Unknown': Script */ static const OnigCodePoint -CR_Unknown[] = { 664, +CR_Unknown[] = { 675, 0x0378, 0x0379, 0x0380, 0x0383, 0x038b, 0x038b, @@ -20806,7 +21151,7 @@ CR_Unknown[] = { 664, 0x085f, 0x085f, 0x086b, 0x089f, 0x08b5, 0x08b5, -0x08be, 0x08d2, +0x08c8, 0x08d2, 0x0984, 0x0984, 0x098d, 0x098e, 0x0991, 0x0992, @@ -20860,7 +21205,7 @@ CR_Unknown[] = { 664, 0x0b3a, 0x0b3b, 0x0b45, 0x0b46, 0x0b49, 0x0b4a, -0x0b4e, 0x0b55, +0x0b4e, 0x0b54, 0x0b58, 0x0b5b, 0x0b5e, 0x0b5e, 0x0b64, 0x0b65, @@ -20905,14 +21250,13 @@ CR_Unknown[] = { 664, 0x0ce4, 0x0ce5, 0x0cf0, 0x0cf0, 0x0cf3, 0x0cff, -0x0d04, 0x0d04, 0x0d0d, 0x0d0d, 0x0d11, 0x0d11, 0x0d45, 0x0d45, 0x0d49, 0x0d49, 0x0d50, 0x0d53, 0x0d64, 0x0d65, -0x0d80, 0x0d81, +0x0d80, 0x0d80, 0x0d84, 0x0d84, 0x0d97, 0x0d99, 0x0db2, 0x0db2, @@ -20999,7 +21343,7 @@ CR_Unknown[] = { 664, 0x1a8a, 0x1a8f, 0x1a9a, 0x1a9f, 0x1aae, 0x1aaf, -0x1abf, 0x1aff, +0x1ac1, 0x1aff, 0x1b4c, 0x1b4f, 0x1b7d, 0x1b7f, 0x1bf4, 0x1bfb, @@ -21036,7 +21380,7 @@ CR_Unknown[] = { 664, 0x2427, 0x243f, 0x244b, 0x245f, 0x2b74, 0x2b75, -0x2b96, 0x2b97, +0x2b96, 0x2b96, 0x2c2f, 0x2c2f, 0x2c5f, 0x2c5f, 0x2cf4, 0x2cf8, @@ -21054,7 +21398,7 @@ CR_Unknown[] = { 664, 0x2dcf, 0x2dcf, 0x2dd7, 0x2dd7, 0x2ddf, 0x2ddf, -0x2e50, 0x2e7f, +0x2e53, 0x2e7f, 0x2e9a, 0x2e9a, 0x2ef4, 0x2eff, 0x2fd6, 0x2fef, @@ -21064,18 +21408,16 @@ CR_Unknown[] = { 664, 0x3100, 0x3104, 0x3130, 0x3130, 0x318f, 0x318f, -0x31bb, 0x31bf, 0x31e4, 0x31ef, 0x321f, 0x321f, -0x4db6, 0x4dbf, -0x9ff0, 0x9fff, +0x9ffd, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, 0xa62c, 0xa63f, 0xa6f8, 0xa6ff, 0xa7c0, 0xa7c1, -0xa7c7, 0xa7f6, -0xa82c, 0xa82f, +0xa7cb, 0xa7f4, +0xa82d, 0xa82f, 0xa83a, 0xa83f, 0xa878, 0xa87f, 0xa8c6, 0xa8cd, @@ -21095,7 +21437,7 @@ CR_Unknown[] = { 664, 0xab17, 0xab1f, 0xab27, 0xab27, 0xab2f, 0xab2f, -0xab68, 0xab6f, +0xab6c, 0xab6f, 0xabee, 0xabef, 0xabfa, 0xabff, 0xd7a4, 0xd7af, @@ -21140,7 +21482,7 @@ CR_Unknown[] = { 664, 0x10103, 0x10106, 0x10134, 0x10136, 0x1018f, 0x1018f, -0x1019c, 0x1019f, +0x1019d, 0x1019f, 0x101a1, 0x101cf, 0x101fe, 0x1027f, 0x1029d, 0x1029f, @@ -21199,9 +21541,13 @@ CR_Unknown[] = { 664, 0x10cf3, 0x10cf9, 0x10d28, 0x10d2f, 0x10d3a, 0x10e5f, -0x10e7f, 0x10eff, +0x10e7f, 0x10e7f, +0x10eaa, 0x10eaa, +0x10eae, 0x10eaf, +0x10eb2, 0x10eff, 0x10f28, 0x10f2f, -0x10f5a, 0x10fdf, +0x10f5a, 0x10faf, +0x10fcc, 0x10fdf, 0x10ff7, 0x10fff, 0x1104e, 0x11051, 0x11070, 0x1107e, @@ -21210,9 +21556,8 @@ CR_Unknown[] = { 664, 0x110e9, 0x110ef, 0x110fa, 0x110ff, 0x11135, 0x11135, -0x11147, 0x1114f, +0x11148, 0x1114f, 0x11177, 0x1117f, -0x111ce, 0x111cf, 0x111e0, 0x111e0, 0x111f5, 0x111ff, 0x11212, 0x11212, @@ -21239,9 +21584,8 @@ CR_Unknown[] = { 664, 0x11364, 0x11365, 0x1136d, 0x1136f, 0x11375, 0x113ff, -0x1145a, 0x1145a, 0x1145c, 0x1145c, -0x11460, 0x1147f, +0x11462, 0x1147f, 0x114c8, 0x114cf, 0x114da, 0x1157f, 0x115b6, 0x115b7, @@ -21256,7 +21600,14 @@ CR_Unknown[] = { 664, 0x11740, 0x117ff, 0x1183c, 0x1189f, 0x118f3, 0x118fe, -0x11900, 0x1199f, +0x11907, 0x11908, +0x1190a, 0x1190b, +0x11914, 0x11914, +0x11917, 0x11917, +0x11936, 0x11936, +0x11939, 0x1193a, +0x11947, 0x1194f, +0x1195a, 0x1199f, 0x119a8, 0x119a9, 0x119d8, 0x119d9, 0x119e5, 0x119ff, @@ -21283,7 +21634,8 @@ CR_Unknown[] = { 664, 0x11d92, 0x11d92, 0x11d99, 0x11d9f, 0x11daa, 0x11edf, -0x11ef9, 0x11fbf, +0x11ef9, 0x11faf, +0x11fb1, 0x11fbf, 0x11ff2, 0x11ffe, 0x1239a, 0x123ff, 0x1246f, 0x1246f, @@ -21307,9 +21659,11 @@ CR_Unknown[] = { 664, 0x16f4b, 0x16f4e, 0x16f88, 0x16f8e, 0x16fa0, 0x16fdf, -0x16fe4, 0x16fff, +0x16fe5, 0x16fef, +0x16ff2, 0x16fff, 0x187f8, 0x187ff, -0x18af3, 0x1afff, +0x18cd6, 0x18cff, +0x18d09, 0x1afff, 0x1b11f, 0x1b14f, 0x1b153, 0x1b163, 0x1b168, 0x1b16f, @@ -21407,17 +21761,15 @@ CR_Unknown[] = { 664, 0x1f0c0, 0x1f0c0, 0x1f0d0, 0x1f0d0, 0x1f0f6, 0x1f0ff, -0x1f10d, 0x1f10f, -0x1f16d, 0x1f16f, -0x1f1ad, 0x1f1e5, +0x1f1ae, 0x1f1e5, 0x1f203, 0x1f20f, 0x1f23c, 0x1f23f, 0x1f249, 0x1f24f, 0x1f252, 0x1f25f, 0x1f266, 0x1f2ff, -0x1f6d6, 0x1f6df, +0x1f6d8, 0x1f6df, 0x1f6ed, 0x1f6ef, -0x1f6fb, 0x1f6ff, +0x1f6fd, 0x1f6ff, 0x1f774, 0x1f77f, 0x1f7d9, 0x1f7df, 0x1f7ec, 0x1f7ff, @@ -21425,25 +21777,29 @@ CR_Unknown[] = { 664, 0x1f848, 0x1f84f, 0x1f85a, 0x1f85f, 0x1f888, 0x1f88f, -0x1f8ae, 0x1f8ff, -0x1f90c, 0x1f90c, -0x1f972, 0x1f972, -0x1f977, 0x1f979, -0x1f9a3, 0x1f9a4, -0x1f9ab, 0x1f9ad, -0x1f9cb, 0x1f9cc, +0x1f8ae, 0x1f8af, +0x1f8b2, 0x1f8ff, +0x1f979, 0x1f979, +0x1f9cc, 0x1f9cc, 0x1fa54, 0x1fa5f, 0x1fa6e, 0x1fa6f, -0x1fa74, 0x1fa77, +0x1fa75, 0x1fa77, 0x1fa7b, 0x1fa7f, -0x1fa83, 0x1fa8f, -0x1fa96, 0x1ffff, -0x2a6d7, 0x2a6ff, +0x1fa87, 0x1fa8f, +0x1faa9, 0x1faaf, +0x1fab7, 0x1fabf, +0x1fac3, 0x1facf, +0x1fad7, 0x1faff, +0x1fb93, 0x1fb93, +0x1fbcb, 0x1fbef, +0x1fbfa, 0x1ffff, +0x2a6de, 0x2a6ff, 0x2b735, 0x2b73f, 0x2b81e, 0x2b81f, 0x2cea2, 0x2ceaf, 0x2ebe1, 0x2f7ff, -0x2fa1e, 0xe0000, +0x2fa1e, 0x2ffff, +0x3134b, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0x10ffff, @@ -21485,7 +21841,7 @@ CR_Warang_Citi[] = { 2, /* PROPERTY: 'XID_Continue': Derived Property */ static const OnigCodePoint -CR_XID_Continue[] = { 720, +CR_XID_Continue[] = { 737, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -21538,7 +21894,7 @@ CR_XID_Continue[] = { 720, 0x0840, 0x085b, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x08e1, 0x08e3, 0x0963, 0x0966, 0x096f, @@ -21598,7 +21954,7 @@ CR_XID_Continue[] = { 720, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b6f, @@ -21644,8 +22000,7 @@ CR_XID_Continue[] = { 720, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, @@ -21654,7 +22009,7 @@ CR_XID_Continue[] = { 720, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -21755,6 +22110,7 @@ CR_XID_Continue[] = { 720, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, 0x1ab0, 0x1abd, +0x1abf, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b59, 0x1b6b, 0x1b73, @@ -21838,10 +22194,10 @@ CR_XID_Continue[] = { 720, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -21852,8 +22208,9 @@ CR_XID_Continue[] = { 720, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa827, +0xa7c2, 0xa7ca, +0xa7f5, 0xa827, +0xa82c, 0xa82c, 0xa840, 0xa873, 0xa880, 0xa8c5, 0xa8d0, 0xa8d9, @@ -21879,7 +22236,7 @@ CR_XID_Continue[] = { 720, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, @@ -21984,9 +22341,13 @@ CR_XID_Continue[] = { 720, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f50, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11046, 0x11066, 0x1106f, @@ -21995,12 +22356,12 @@ CR_XID_Continue[] = { 720, 0x110f0, 0x110f9, 0x11100, 0x11134, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11173, 0x11176, 0x11176, 0x11180, 0x111c4, 0x111c9, 0x111cc, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11237, @@ -22029,7 +22390,7 @@ CR_XID_Continue[] = { 720, 0x11370, 0x11374, 0x11400, 0x1144a, 0x11450, 0x11459, -0x1145e, 0x1145f, +0x1145e, 0x11461, 0x11480, 0x114c5, 0x114c7, 0x114c7, 0x114d0, 0x114d9, @@ -22046,7 +22407,14 @@ CR_XID_Continue[] = { 720, 0x11730, 0x11739, 0x11800, 0x1183a, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11943, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e1, @@ -22077,6 +22445,7 @@ CR_XID_Continue[] = { 720, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -22097,9 +22466,11 @@ CR_XID_Continue[] = { 720, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, -0x16fe3, 0x16fe3, +0x16fe3, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -22199,18 +22570,20 @@ CR_XID_Continue[] = { 720, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0100, 0xe01ef, }; /* END of CR_XID_Continue */ /* PROPERTY: 'XID_Start': Derived Property */ static const OnigCodePoint -CR_XID_Start[] = { 616, +CR_XID_Start[] = { 629, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -22261,7 +22634,7 @@ CR_XID_Start[] = { 616, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x0904, 0x0939, 0x093d, 0x093d, 0x0950, 0x0950, @@ -22337,7 +22710,7 @@ CR_XID_Start[] = { 616, 0x0cde, 0x0cde, 0x0ce0, 0x0ce1, 0x0cf1, 0x0cf2, -0x0d05, 0x0d0c, +0x0d04, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d3d, @@ -22506,10 +22879,10 @@ CR_XID_Start[] = { 616, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -22521,8 +22894,8 @@ CR_XID_Start[] = { 616, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa801, +0xa7c2, 0xa7ca, +0xa7f5, 0xa801, 0xa803, 0xa805, 0xa807, 0xa80a, 0xa80c, 0xa822, @@ -22559,7 +22932,7 @@ CR_XID_Start[] = { 616, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabe2, 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, @@ -22651,15 +23024,19 @@ CR_XID_Start[] = { 616, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d23, +0x10e80, 0x10ea9, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11003, 0x11037, 0x11083, 0x110af, 0x110d0, 0x110e8, 0x11103, 0x11126, 0x11144, 0x11144, +0x11147, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11183, 0x111b2, @@ -22685,7 +23062,7 @@ CR_XID_Start[] = { 616, 0x1135d, 0x11361, 0x11400, 0x11434, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114af, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -22698,7 +23075,13 @@ CR_XID_Start[] = { 616, 0x11700, 0x1171a, 0x11800, 0x1182b, 0x118a0, 0x118df, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x1192f, +0x1193f, 0x1193f, +0x11941, 0x11941, 0x119a0, 0x119a7, 0x119aa, 0x119d0, 0x119e1, 0x119e1, @@ -22723,6 +23106,7 @@ CR_XID_Start[] = { 616, 0x11d6a, 0x11d89, 0x11d98, 0x11d98, 0x11ee0, 0x11ef2, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -22742,7 +23126,8 @@ CR_XID_Start[] = { 616, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -22821,14 +23206,23 @@ CR_XID_Start[] = { 616, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_XID_Start */ +/* PROPERTY: 'Yezidi': Script */ +static const OnigCodePoint +CR_Yezidi[] = { 3, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, +}; /* END of CR_Yezidi */ + /* PROPERTY: 'Yi': Script */ static const OnigCodePoint CR_Yi[] = { 2, @@ -23603,7 +23997,10 @@ CR_In_Yi_Radicals[] = { 1, }; /* END of CR_In_Yi_Radicals */ /* PROPERTY: 'In_Lisu': Block */ -#define CR_In_Lisu CR_Lisu +static const OnigCodePoint +CR_In_Lisu[] = { 1, +0xa4d0, 0xa4ff, +}; /* END of CR_In_Lisu */ /* PROPERTY: 'In_Vai': Block */ static const OnigCodePoint @@ -24079,6 +24476,12 @@ CR_In_Rumi_Numeral_Symbols[] = { 1, 0x10e60, 0x10e7f, }; /* END of CR_In_Rumi_Numeral_Symbols */ +/* PROPERTY: 'In_Yezidi': Block */ +static const OnigCodePoint +CR_In_Yezidi[] = { 1, +0x10e80, 0x10ebf, +}; /* END of CR_In_Yezidi */ + /* PROPERTY: 'In_Old_Sogdian': Block */ static const OnigCodePoint CR_In_Old_Sogdian[] = { 1, @@ -24091,6 +24494,12 @@ CR_In_Sogdian[] = { 1, 0x10f30, 0x10f6f, }; /* END of CR_In_Sogdian */ +/* PROPERTY: 'In_Chorasmian': Block */ +static const OnigCodePoint +CR_In_Chorasmian[] = { 1, +0x10fb0, 0x10fdf, +}; /* END of CR_In_Chorasmian */ + /* PROPERTY: 'In_Elymaic': Block */ static const OnigCodePoint CR_In_Elymaic[] = { 1, @@ -24128,10 +24537,7 @@ CR_In_Mahajani[] = { 1, }; /* END of CR_In_Mahajani */ /* PROPERTY: 'In_Sharada': Block */ -static const OnigCodePoint -CR_In_Sharada[] = { 1, -0x11180, 0x111df, -}; /* END of CR_In_Sharada */ +#define CR_In_Sharada CR_Sharada /* PROPERTY: 'In_Sinhala_Archaic_Numbers': Block */ static const OnigCodePoint @@ -24217,6 +24623,12 @@ CR_In_Warang_Citi[] = { 1, 0x118a0, 0x118ff, }; /* END of CR_In_Warang_Citi */ +/* PROPERTY: 'In_Dives_Akuru': Block */ +static const OnigCodePoint +CR_In_Dives_Akuru[] = { 1, +0x11900, 0x1195f, +}; /* END of CR_In_Dives_Akuru */ + /* PROPERTY: 'In_Nandinagari': Block */ static const OnigCodePoint CR_In_Nandinagari[] = { 1, @@ -24271,6 +24683,12 @@ CR_In_Makasar[] = { 1, 0x11ee0, 0x11eff, }; /* END of CR_In_Makasar */ +/* PROPERTY: 'In_Lisu_Supplement': Block */ +static const OnigCodePoint +CR_In_Lisu_Supplement[] = { 1, +0x11fb0, 0x11fbf, +}; /* END of CR_In_Lisu_Supplement */ + /* PROPERTY: 'In_Tamil_Supplement': Block */ static const OnigCodePoint CR_In_Tamil_Supplement[] = { 1, @@ -24367,6 +24785,18 @@ CR_In_Tangut_Components[] = { 1, 0x18800, 0x18aff, }; /* END of CR_In_Tangut_Components */ +/* PROPERTY: 'In_Khitan_Small_Script': Block */ +static const OnigCodePoint +CR_In_Khitan_Small_Script[] = { 1, +0x18b00, 0x18cff, +}; /* END of CR_In_Khitan_Small_Script */ + +/* PROPERTY: 'In_Tangut_Supplement': Block */ +static const OnigCodePoint +CR_In_Tangut_Supplement[] = { 1, +0x18d00, 0x18d8f, +}; /* END of CR_In_Tangut_Supplement */ + /* PROPERTY: 'In_Kana_Supplement': Block */ static const OnigCodePoint CR_In_Kana_Supplement[] = { 1, @@ -24589,6 +25019,12 @@ CR_In_Symbols_and_Pictographs_Extended_A[] = { 1, 0x1fa70, 0x1faff, }; /* END of CR_In_Symbols_and_Pictographs_Extended_A */ +/* PROPERTY: 'In_Symbols_for_Legacy_Computing': Block */ +static const OnigCodePoint +CR_In_Symbols_for_Legacy_Computing[] = { 1, +0x1fb00, 0x1fbff, +}; /* END of CR_In_Symbols_for_Legacy_Computing */ + /* PROPERTY: 'In_CJK_Unified_Ideographs_Extension_B': Block */ static const OnigCodePoint CR_In_CJK_Unified_Ideographs_Extension_B[] = { 1, @@ -24625,6 +25061,12 @@ CR_In_CJK_Compatibility_Ideographs_Supplement[] = { 1, 0x2f800, 0x2fa1f, }; /* END of CR_In_CJK_Compatibility_Ideographs_Supplement */ +/* PROPERTY: 'In_CJK_Unified_Ideographs_Extension_G': Block */ +static const OnigCodePoint +CR_In_CJK_Unified_Ideographs_Extension_G[] = { 1, +0x30000, 0x3134f, +}; /* END of CR_In_CJK_Unified_Ideographs_Extension_G */ + /* PROPERTY: 'In_Tags': Block */ static const OnigCodePoint CR_In_Tags[] = { 1, @@ -24651,7 +25093,7 @@ CR_In_Supplementary_Private_Use_Area_B[] = { 1, /* PROPERTY: 'In_No_Block': Block */ static const OnigCodePoint -CR_In_No_Block[] = { 53, +CR_In_No_Block[] = { 54, 0x0870, 0x089f, 0x2fe0, 0x2fef, 0x10200, 0x1027f, @@ -24664,20 +25106,20 @@ CR_In_No_Block[] = { 53, 0x10bb0, 0x10bff, 0x10c50, 0x10c7f, 0x10d40, 0x10e5f, -0x10e80, 0x10eff, -0x10f70, 0x10fdf, +0x10ec0, 0x10eff, +0x10f70, 0x10faf, 0x11250, 0x1127f, 0x11380, 0x113ff, 0x114e0, 0x1157f, 0x116d0, 0x116ff, 0x11740, 0x117ff, 0x11850, 0x1189f, -0x11900, 0x1199f, +0x11960, 0x1199f, 0x11ab0, 0x11abf, 0x11b00, 0x11bff, 0x11cc0, 0x11cff, 0x11db0, 0x11edf, -0x11f00, 0x11fbf, +0x11f00, 0x11faf, 0x12550, 0x12fff, 0x13440, 0x143ff, 0x14680, 0x167ff, @@ -24685,7 +25127,7 @@ CR_In_No_Block[] = { 53, 0x16b90, 0x16e3f, 0x16ea0, 0x16eff, 0x16fa0, 0x16fdf, -0x18b00, 0x1afff, +0x18d90, 0x1afff, 0x1b300, 0x1bbff, 0x1bcb0, 0x1cfff, 0x1d250, 0x1d2df, @@ -24699,10 +25141,11 @@ CR_In_No_Block[] = { 53, 0x1ecc0, 0x1ecff, 0x1ed50, 0x1edff, 0x1ef00, 0x1efff, -0x1fb00, 0x1ffff, +0x1fc00, 0x1ffff, 0x2a6e0, 0x2a6ff, 0x2ebf0, 0x2f7ff, -0x2fa20, 0xdffff, +0x2fa20, 0x2ffff, +0x31350, 0xdffff, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, }; /* END of CR_In_No_Block */ @@ -24762,6 +25205,7 @@ const CodeRanges[] = { CR_Changes_When_Titlecased, CR_Changes_When_Uppercased, CR_Cherokee, + CR_Chorasmian, CR_Cn, CR_Co, CR_Common, @@ -24776,6 +25220,7 @@ const CodeRanges[] = { CR_Deseret, CR_Devanagari, CR_Diacritic, + CR_Dives_Akuru, CR_Dogra, CR_Duployan, CR_Egyptian_Hieroglyphs, @@ -24825,6 +25270,7 @@ const CodeRanges[] = { CR_Katakana, CR_Kayah_Li, CR_Kharoshthi, + CR_Khitan_Small_Script, CR_Khmer, CR_Khojki, CR_Khudawadi, @@ -24970,6 +25416,7 @@ const CodeRanges[] = { CR_White_Space, CR_XID_Continue, CR_XID_Start, + CR_Yezidi, CR_Yi, CR_Z, CR_Zanabazar_Square, @@ -25181,8 +25628,10 @@ const CodeRanges[] = { CR_In_Old_Hungarian, CR_In_Hanifi_Rohingya, CR_In_Rumi_Numeral_Symbols, + CR_In_Yezidi, CR_In_Old_Sogdian, CR_In_Sogdian, + CR_In_Chorasmian, CR_In_Elymaic, CR_In_Brahmi, CR_In_Kaithi, @@ -25204,6 +25653,7 @@ const CodeRanges[] = { CR_In_Ahom, CR_In_Dogra, CR_In_Warang_Citi, + CR_In_Dives_Akuru, CR_In_Nandinagari, CR_In_Zanabazar_Square, CR_In_Soyombo, @@ -25213,6 +25663,7 @@ const CodeRanges[] = { CR_In_Masaram_Gondi, CR_In_Gunjala_Gondi, CR_In_Makasar, + CR_In_Lisu_Supplement, CR_In_Tamil_Supplement, CR_In_Cuneiform, CR_In_Cuneiform_Numbers_and_Punctuation, @@ -25229,6 +25680,8 @@ const CodeRanges[] = { CR_In_Ideographic_Symbols_and_Punctuation, CR_In_Tangut, CR_In_Tangut_Components, + CR_In_Khitan_Small_Script, + CR_In_Tangut_Supplement, CR_In_Kana_Supplement, CR_In_Kana_Extended_A, CR_In_Small_Kana_Extension, @@ -25266,12 +25719,14 @@ const CodeRanges[] = { CR_In_Supplemental_Symbols_and_Pictographs, CR_In_Chess_Symbols, CR_In_Symbols_and_Pictographs_Extended_A, + CR_In_Symbols_for_Legacy_Computing, CR_In_CJK_Unified_Ideographs_Extension_B, CR_In_CJK_Unified_Ideographs_Extension_C, CR_In_CJK_Unified_Ideographs_Extension_D, CR_In_CJK_Unified_Ideographs_Extension_E, CR_In_CJK_Unified_Ideographs_Extension_F, CR_In_CJK_Compatibility_Ideographs_Supplement, + CR_In_CJK_Unified_Ideographs_Extension_G, CR_In_Tags, CR_In_Variation_Selectors_Supplement, CR_In_Supplementary_Private_Use_Area_A, @@ -25282,12 +25737,12 @@ const CodeRanges[] = { #define pool_offset(s) offsetof(struct unicode_prop_name_pool_t, unicode_prop_name_pool_str##s) -#define TOTAL_KEYWORDS 801 +#define TOTAL_KEYWORDS 822 #define MIN_WORD_LENGTH 1 #define MAX_WORD_LENGTH 44 #define MIN_HASH_VALUE 10 -#define MAX_HASH_VALUE 5809 -/* maximum key range = 5800, duplicates = 0 */ +#define MAX_HASH_VALUE 5519 +/* maximum key range = 5510, duplicates = 0 */ #ifndef GPERF_DOWNCASE #define GPERF_DOWNCASE 1 @@ -25346,32 +25801,32 @@ hash (register const char *str, register size_t len) { static const unsigned short asso_values[] = { - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 0, 1121, 136, 394, 19, - 417, 1048, 958, 7, 856, 8, 415, 103, 3, 16, - 1380, 1068, 56, 181, 326, 631, 1151, 930, 358, 1083, - 8, 0, 5, 5810, 5810, 5810, 5810, 0, 1121, 136, - 394, 19, 417, 1048, 958, 7, 856, 8, 415, 103, - 3, 16, 1380, 1068, 56, 181, 326, 631, 1151, 930, - 358, 1083, 8, 0, 5, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810, - 5810, 5810, 5810, 5810, 5810, 5810, 5810, 5810 + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 0, 1570, 136, 394, 19, + 985, 826, 1044, 7, 1389, 8, 415, 103, 3, 16, + 1080, 636, 56, 181, 326, 631, 1781, 1220, 786, 1639, + 12, 6, 0, 5520, 5520, 5520, 5520, 0, 1570, 136, + 394, 19, 985, 826, 1044, 7, 1389, 8, 415, 103, + 3, 16, 1080, 636, 56, 181, 326, 631, 1781, 1220, + 786, 1639, 12, 6, 0, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520 }; register unsigned int hval = (unsigned int )len; @@ -25414,21 +25869,22 @@ hash (register const char *str, register size_t len) struct unicode_prop_name_pool_t { char unicode_prop_name_pool_str10[sizeof("lana")]; - char unicode_prop_name_pool_str14[sizeof("z")]; - char unicode_prop_name_pool_str16[sizeof("yi")]; + char unicode_prop_name_pool_str13[sizeof("z")]; char unicode_prop_name_pool_str17[sizeof("lina")]; + char unicode_prop_name_pool_str22[sizeof("yi")]; char unicode_prop_name_pool_str24[sizeof("mn")]; - char unicode_prop_name_pool_str25[sizeof("yiii")]; char unicode_prop_name_pool_str27[sizeof("cn")]; char unicode_prop_name_pool_str28[sizeof("maka")]; char unicode_prop_name_pool_str30[sizeof("mani")]; - char unicode_prop_name_pool_str33[sizeof("zzzz")]; + char unicode_prop_name_pool_str31[sizeof("yiii")]; char unicode_prop_name_pool_str34[sizeof("inkannada")]; char unicode_prop_name_pool_str35[sizeof("ci")]; char unicode_prop_name_pool_str37[sizeof("lo")]; char unicode_prop_name_pool_str38[sizeof("lao")]; char unicode_prop_name_pool_str39[sizeof("laoo")]; + char unicode_prop_name_pool_str40[sizeof("zzzz")]; char unicode_prop_name_pool_str43[sizeof("miao")]; + char unicode_prop_name_pool_str48[sizeof("yezi")]; char unicode_prop_name_pool_str51[sizeof("innko")]; char unicode_prop_name_pool_str53[sizeof("co")]; char unicode_prop_name_pool_str56[sizeof("me")]; @@ -25479,6 +25935,7 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str225[sizeof("combiningmark")]; char unicode_prop_name_pool_str226[sizeof("incuneiformnumbersandpunctuation")]; char unicode_prop_name_pool_str231[sizeof("merc")]; + char unicode_prop_name_pool_str237[sizeof("inchorasmian")]; char unicode_prop_name_pool_str238[sizeof("perm")]; char unicode_prop_name_pool_str239[sizeof("inahom")]; char unicode_prop_name_pool_str240[sizeof("inipaextensions")]; @@ -25491,8 +25948,8 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str283[sizeof("incuneiform")]; char unicode_prop_name_pool_str290[sizeof("mc")]; char unicode_prop_name_pool_str293[sizeof("cc")]; - char unicode_prop_name_pool_str295[sizeof("inzanabazarsquare")]; char unicode_prop_name_pool_str298[sizeof("lineseparator")]; + char unicode_prop_name_pool_str299[sizeof("inzanabazarsquare")]; char unicode_prop_name_pool_str302[sizeof("armn")]; char unicode_prop_name_pool_str305[sizeof("qmark")]; char unicode_prop_name_pool_str306[sizeof("armi")]; @@ -25512,11 +25969,9 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str356[sizeof("incyrillic")]; char unicode_prop_name_pool_str357[sizeof("inthai")]; char unicode_prop_name_pool_str359[sizeof("incham")]; + char unicode_prop_name_pool_str364[sizeof("zs")]; char unicode_prop_name_pool_str367[sizeof("inkaithi")]; - char unicode_prop_name_pool_str369[sizeof("zs")]; char unicode_prop_name_pool_str372[sizeof("mtei")]; - char unicode_prop_name_pool_str375[sizeof("vai")]; - char unicode_prop_name_pool_str376[sizeof("vaii")]; char unicode_prop_name_pool_str379[sizeof("initialpunctuation")]; char unicode_prop_name_pool_str383[sizeof("cs")]; char unicode_prop_name_pool_str399[sizeof("insyriac")]; @@ -25525,18 +25980,17 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str417[sizeof("mand")]; char unicode_prop_name_pool_str419[sizeof("l")]; char unicode_prop_name_pool_str420[sizeof("ps")]; - char unicode_prop_name_pool_str427[sizeof("dia")]; char unicode_prop_name_pool_str431[sizeof("inkanaextendeda")]; - char unicode_prop_name_pool_str433[sizeof("di")]; char unicode_prop_name_pool_str436[sizeof("mend")]; char unicode_prop_name_pool_str437[sizeof("modi")]; char unicode_prop_name_pool_str441[sizeof("ideo")]; char unicode_prop_name_pool_str445[sizeof("katakana")]; char unicode_prop_name_pool_str449[sizeof("prti")]; + char unicode_prop_name_pool_str451[sizeof("yezidi")]; char unicode_prop_name_pool_str456[sizeof("inideographicdescriptioncharacters")]; char unicode_prop_name_pool_str457[sizeof("inlineara")]; - char unicode_prop_name_pool_str458[sizeof("xidcontinue")]; char unicode_prop_name_pool_str461[sizeof("brai")]; + char unicode_prop_name_pool_str462[sizeof("xidcontinue")]; char unicode_prop_name_pool_str463[sizeof("inlao")]; char unicode_prop_name_pool_str472[sizeof("ascii")]; char unicode_prop_name_pool_str474[sizeof("privateuse")]; @@ -25561,8 +26015,8 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str540[sizeof("inmendekikakui")]; char unicode_prop_name_pool_str545[sizeof("intransportandmapsymbols")]; char unicode_prop_name_pool_str547[sizeof("letternumber")]; - char unicode_prop_name_pool_str549[sizeof("xidc")]; char unicode_prop_name_pool_str550[sizeof("inmedefaidrin")]; + char unicode_prop_name_pool_str553[sizeof("xidc")]; char unicode_prop_name_pool_str558[sizeof("inchesssymbols")]; char unicode_prop_name_pool_str564[sizeof("inemoticons")]; char unicode_prop_name_pool_str573[sizeof("brahmi")]; @@ -25570,11 +26024,11 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str578[sizeof("palm")]; char unicode_prop_name_pool_str580[sizeof("inlycian")]; char unicode_prop_name_pool_str588[sizeof("inmiscellaneousmathematicalsymbolsa")]; - char unicode_prop_name_pool_str594[sizeof("xids")]; + char unicode_prop_name_pool_str598[sizeof("xids")]; char unicode_prop_name_pool_str603[sizeof("psalterpahlavi")]; char unicode_prop_name_pool_str619[sizeof("insundanese")]; char unicode_prop_name_pool_str620[sizeof("inoldsogdian")]; - char unicode_prop_name_pool_str632[sizeof("diacritic")]; + char unicode_prop_name_pool_str621[sizeof("kits")]; char unicode_prop_name_pool_str634[sizeof("gothic")]; char unicode_prop_name_pool_str635[sizeof("inancientsymbols")]; char unicode_prop_name_pool_str639[sizeof("meroiticcursive")]; @@ -25592,13 +26046,13 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str678[sizeof("inoldsoutharabian")]; char unicode_prop_name_pool_str699[sizeof("insylotinagri")]; char unicode_prop_name_pool_str701[sizeof("idsbinaryoperator")]; + char unicode_prop_name_pool_str704[sizeof("oriya")]; char unicode_prop_name_pool_str707[sizeof("sora")]; char unicode_prop_name_pool_str708[sizeof("bamum")]; char unicode_prop_name_pool_str709[sizeof("inkanasupplement")]; char unicode_prop_name_pool_str710[sizeof("incjkstrokes")]; char unicode_prop_name_pool_str715[sizeof("joinc")]; char unicode_prop_name_pool_str718[sizeof("inopticalcharacterrecognition")]; - char unicode_prop_name_pool_str722[sizeof("vs")]; char unicode_prop_name_pool_str728[sizeof("indominotiles")]; char unicode_prop_name_pool_str732[sizeof("batk")]; char unicode_prop_name_pool_str739[sizeof("grext")]; @@ -25607,7 +26061,6 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str752[sizeof("inmodifiertoneletters")]; char unicode_prop_name_pool_str753[sizeof("ital")]; char unicode_prop_name_pool_str760[sizeof("bass")]; - char unicode_prop_name_pool_str763[sizeof("decimalnumber")]; char unicode_prop_name_pool_str765[sizeof("alnum")]; char unicode_prop_name_pool_str767[sizeof("ids")]; char unicode_prop_name_pool_str776[sizeof("print")]; @@ -25616,19 +26069,20 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str783[sizeof("inmusicalsymbols")]; char unicode_prop_name_pool_str785[sizeof("intaile")]; char unicode_prop_name_pool_str794[sizeof("samr")]; + char unicode_prop_name_pool_str803[sizeof("vai")]; + char unicode_prop_name_pool_str804[sizeof("vaii")]; char unicode_prop_name_pool_str809[sizeof("samaritan")]; char unicode_prop_name_pool_str813[sizeof("s")]; char unicode_prop_name_pool_str816[sizeof("inlatinextendede")]; char unicode_prop_name_pool_str820[sizeof("bali")]; char unicode_prop_name_pool_str826[sizeof("lisu")]; char unicode_prop_name_pool_str827[sizeof("pauc")]; - char unicode_prop_name_pool_str828[sizeof("patternsyntax")]; char unicode_prop_name_pool_str829[sizeof("incontrolpictures")]; char unicode_prop_name_pool_str830[sizeof("blank")]; + char unicode_prop_name_pool_str832[sizeof("zl")]; char unicode_prop_name_pool_str834[sizeof("inmiscellaneoussymbols")]; char unicode_prop_name_pool_str835[sizeof("ll")]; char unicode_prop_name_pool_str836[sizeof("inancientgreekmusicalnotation")]; - char unicode_prop_name_pool_str837[sizeof("zl")]; char unicode_prop_name_pool_str838[sizeof("inlydian")]; char unicode_prop_name_pool_str839[sizeof("sm")]; char unicode_prop_name_pool_str843[sizeof("inmiscellaneoussymbolsandarrows")]; @@ -25636,603 +26090,626 @@ struct unicode_prop_name_pool_t char unicode_prop_name_pool_str845[sizeof("bengali")]; char unicode_prop_name_pool_str846[sizeof("pd")]; char unicode_prop_name_pool_str848[sizeof("inmiscellaneoussymbolsandpictographs")]; - char unicode_prop_name_pool_str850[sizeof("medf")]; - char unicode_prop_name_pool_str855[sizeof("cf")]; + char unicode_prop_name_pool_str849[sizeof("ingrantha")]; + char unicode_prop_name_pool_str856[sizeof("gong")]; char unicode_prop_name_pool_str858[sizeof("balinese")]; - char unicode_prop_name_pool_str860[sizeof("medefaidrin")]; - char unicode_prop_name_pool_str865[sizeof("han")]; + char unicode_prop_name_pool_str860[sizeof("osage")]; + char unicode_prop_name_pool_str865[sizeof("mong")]; char unicode_prop_name_pool_str869[sizeof("intamil")]; - char unicode_prop_name_pool_str870[sizeof("hani")]; char unicode_prop_name_pool_str871[sizeof("inmultani")]; - char unicode_prop_name_pool_str879[sizeof("hano")]; - char unicode_prop_name_pool_str887[sizeof("inshorthandformatcontrols")]; + char unicode_prop_name_pool_str879[sizeof("inosage")]; + char unicode_prop_name_pool_str885[sizeof("ingeneralpunctuation")]; + char unicode_prop_name_pool_str886[sizeof("georgian")]; char unicode_prop_name_pool_str891[sizeof("insaurashtra")]; - char unicode_prop_name_pool_str892[sizeof("pf")]; char unicode_prop_name_pool_str894[sizeof("inoldturkic")]; char unicode_prop_name_pool_str896[sizeof("idcontinue")]; - char unicode_prop_name_pool_str901[sizeof("hanunoo")]; char unicode_prop_name_pool_str905[sizeof("sc")]; - char unicode_prop_name_pool_str906[sizeof("inkhojki")]; char unicode_prop_name_pool_str913[sizeof("idst")]; char unicode_prop_name_pool_str915[sizeof("canadianaboriginal")]; - char unicode_prop_name_pool_str923[sizeof("hira")]; + char unicode_prop_name_pool_str922[sizeof("ingeorgian")]; + char unicode_prop_name_pool_str924[sizeof("osma")]; char unicode_prop_name_pool_str925[sizeof("plrd")]; char unicode_prop_name_pool_str939[sizeof("incaucasianalbanian")]; char unicode_prop_name_pool_str940[sizeof("indeseret")]; char unicode_prop_name_pool_str945[sizeof("inearlydynasticcuneiform")]; char unicode_prop_name_pool_str946[sizeof("inspacingmodifierletters")]; - char unicode_prop_name_pool_str950[sizeof("innewa")]; + char unicode_prop_name_pool_str957[sizeof("inmongolian")]; + char unicode_prop_name_pool_str964[sizeof("emoji")]; char unicode_prop_name_pool_str972[sizeof("idstart")]; - char unicode_prop_name_pool_str977[sizeof("zinh")]; - char unicode_prop_name_pool_str981[sizeof("incyrillicextendeda")]; - char unicode_prop_name_pool_str984[sizeof("dsrt")]; char unicode_prop_name_pool_str993[sizeof("cased")]; + char unicode_prop_name_pool_str995[sizeof("dia")]; char unicode_prop_name_pool_str999[sizeof("glagolitic")]; - char unicode_prop_name_pool_str1002[sizeof("inhanifirohingya")]; + char unicode_prop_name_pool_str1001[sizeof("di")]; + char unicode_prop_name_pool_str1004[sizeof("diak")]; char unicode_prop_name_pool_str1006[sizeof("inancientgreeknumbers")]; char unicode_prop_name_pool_str1008[sizeof("inmeeteimayekextensions")]; - char unicode_prop_name_pool_str1009[sizeof("intaixuanjingsymbols")]; + char unicode_prop_name_pool_str1013[sizeof("ingurmukhi")]; char unicode_prop_name_pool_str1016[sizeof("joincontrol")]; char unicode_prop_name_pool_str1020[sizeof("runr")]; - char unicode_prop_name_pool_str1023[sizeof("inwarangciti")]; - char unicode_prop_name_pool_str1025[sizeof("deseret")]; - char unicode_prop_name_pool_str1035[sizeof("inhiragana")]; char unicode_prop_name_pool_str1039[sizeof("sind")]; - char unicode_prop_name_pool_str1047[sizeof("cherokee")]; + char unicode_prop_name_pool_str1047[sizeof("odi")]; char unicode_prop_name_pool_str1050[sizeof("inlatinextendedc")]; char unicode_prop_name_pool_str1052[sizeof("adlm")]; - char unicode_prop_name_pool_str1053[sizeof("phoenician")]; - char unicode_prop_name_pool_str1056[sizeof("cher")]; - char unicode_prop_name_pool_str1059[sizeof("marchen")]; + char unicode_prop_name_pool_str1058[sizeof("zinh")]; char unicode_prop_name_pool_str1062[sizeof("inkhudawadi")]; char unicode_prop_name_pool_str1063[sizeof("sinhala")]; - char unicode_prop_name_pool_str1066[sizeof("lower")]; - char unicode_prop_name_pool_str1068[sizeof("graphemelink")]; - char unicode_prop_name_pool_str1069[sizeof("xidstart")]; - char unicode_prop_name_pool_str1071[sizeof("ingrantha")]; char unicode_prop_name_pool_str1072[sizeof("bidic")]; - char unicode_prop_name_pool_str1074[sizeof("xdigit")]; + char unicode_prop_name_pool_str1073[sizeof("xidstart")]; char unicode_prop_name_pool_str1076[sizeof("casedletter")]; - char unicode_prop_name_pool_str1078[sizeof("gong")]; + char unicode_prop_name_pool_str1078[sizeof("xdigit")]; char unicode_prop_name_pool_str1079[sizeof("multani")]; + char unicode_prop_name_pool_str1080[sizeof("logicalorderexception")]; char unicode_prop_name_pool_str1082[sizeof("gunjalagondi")]; - char unicode_prop_name_pool_str1084[sizeof("cham")]; - char unicode_prop_name_pool_str1086[sizeof("chakma")]; - char unicode_prop_name_pool_str1087[sizeof("mong")]; - char unicode_prop_name_pool_str1088[sizeof("kaithi")]; - char unicode_prop_name_pool_str1089[sizeof("inmahajani")]; - char unicode_prop_name_pool_str1090[sizeof("graphemebase")]; + char unicode_prop_name_pool_str1084[sizeof("n")]; + char unicode_prop_name_pool_str1088[sizeof("inhanifirohingya")]; + char unicode_prop_name_pool_str1090[sizeof("insorasompeng")]; char unicode_prop_name_pool_str1092[sizeof("insiddham")]; - char unicode_prop_name_pool_str1095[sizeof("inogham")]; - char unicode_prop_name_pool_str1101[sizeof("inosage")]; char unicode_prop_name_pool_str1102[sizeof("incountingrodnumerals")]; - char unicode_prop_name_pool_str1104[sizeof("inwancho")]; - char unicode_prop_name_pool_str1105[sizeof("khojki")]; - char unicode_prop_name_pool_str1107[sizeof("ingeneralpunctuation")]; - char unicode_prop_name_pool_str1108[sizeof("georgian")]; - char unicode_prop_name_pool_str1117[sizeof("incyrillicextendedc")]; - char unicode_prop_name_pool_str1118[sizeof("inkayahli")]; - char unicode_prop_name_pool_str1121[sizeof("khar")]; - char unicode_prop_name_pool_str1124[sizeof("inoriya")]; - char unicode_prop_name_pool_str1126[sizeof("manichaean")]; + char unicode_prop_name_pool_str1111[sizeof("nandinagari")]; + char unicode_prop_name_pool_str1114[sizeof("no")]; + char unicode_prop_name_pool_str1121[sizeof("inhiragana")]; + char unicode_prop_name_pool_str1123[sizeof("nko")]; + char unicode_prop_name_pool_str1124[sizeof("nkoo")]; + char unicode_prop_name_pool_str1131[sizeof("xpeo")]; char unicode_prop_name_pool_str1132[sizeof("bamu")]; - char unicode_prop_name_pool_str1133[sizeof("zanb")]; - char unicode_prop_name_pool_str1136[sizeof("oriya")]; - char unicode_prop_name_pool_str1137[sizeof("inolchiki")]; - char unicode_prop_name_pool_str1138[sizeof("linb")]; - char unicode_prop_name_pool_str1141[sizeof("inethiopic")]; - char unicode_prop_name_pool_str1143[sizeof("wara")]; - char unicode_prop_name_pool_str1144[sizeof("ingeorgian")]; - char unicode_prop_name_pool_str1149[sizeof("innabataean")]; - char unicode_prop_name_pool_str1154[sizeof("inkanbun")]; + char unicode_prop_name_pool_str1133[sizeof("cherokee")]; + char unicode_prop_name_pool_str1137[sizeof("p")]; + char unicode_prop_name_pool_str1139[sizeof("phoenician")]; + char unicode_prop_name_pool_str1142[sizeof("cher")]; + char unicode_prop_name_pool_str1143[sizeof("emojimodifier")]; + char unicode_prop_name_pool_str1144[sizeof("inphoenician")]; + char unicode_prop_name_pool_str1145[sizeof("marchen")]; + char unicode_prop_name_pool_str1150[sizeof("vs")]; + char unicode_prop_name_pool_str1154[sizeof("graphemelink")]; char unicode_prop_name_pool_str1156[sizeof("adlam")]; - char unicode_prop_name_pool_str1157[sizeof("inbhaiksuki")]; - char unicode_prop_name_pool_str1162[sizeof("insinhala")]; - char unicode_prop_name_pool_str1163[sizeof("inelbasan")]; - char unicode_prop_name_pool_str1169[sizeof("lowercase")]; - char unicode_prop_name_pool_str1178[sizeof("takri")]; - char unicode_prop_name_pool_str1179[sizeof("inmongolian")]; - char unicode_prop_name_pool_str1181[sizeof("invai")]; - char unicode_prop_name_pool_str1182[sizeof("xsux")]; + char unicode_prop_name_pool_str1168[sizeof("inkhitansmallscript")]; + char unicode_prop_name_pool_str1170[sizeof("cham")]; + char unicode_prop_name_pool_str1172[sizeof("chakma")]; + char unicode_prop_name_pool_str1174[sizeof("kaithi")]; + char unicode_prop_name_pool_str1175[sizeof("inmahajani")]; + char unicode_prop_name_pool_str1176[sizeof("graphemebase")]; + char unicode_prop_name_pool_str1177[sizeof("oidc")]; + char unicode_prop_name_pool_str1179[sizeof("ingreekandcoptic")]; + char unicode_prop_name_pool_str1181[sizeof("inogham")]; char unicode_prop_name_pool_str1183[sizeof("cntrl")]; - char unicode_prop_name_pool_str1186[sizeof("emoji")]; char unicode_prop_name_pool_str1187[sizeof("sterm")]; - char unicode_prop_name_pool_str1189[sizeof("cuneiform")]; - char unicode_prop_name_pool_str1192[sizeof("cwcm")]; - char unicode_prop_name_pool_str1194[sizeof("hatran")]; - char unicode_prop_name_pool_str1197[sizeof("linearb")]; - char unicode_prop_name_pool_str1201[sizeof("taile")]; + char unicode_prop_name_pool_str1191[sizeof("khojki")]; + char unicode_prop_name_pool_str1199[sizeof("olck")]; + char unicode_prop_name_pool_str1200[sizeof("diacritic")]; char unicode_prop_name_pool_str1202[sizeof("inlatinextendedadditional")]; char unicode_prop_name_pool_str1203[sizeof("inenclosedalphanumerics")]; + char unicode_prop_name_pool_str1204[sizeof("olower")]; char unicode_prop_name_pool_str1205[sizeof("anatolianhieroglyphs")]; char unicode_prop_name_pool_str1206[sizeof("incyrillicsupplement")]; + char unicode_prop_name_pool_str1207[sizeof("khar")]; char unicode_prop_name_pool_str1208[sizeof("intamilsupplement")]; + char unicode_prop_name_pool_str1212[sizeof("manichaean")]; char unicode_prop_name_pool_str1215[sizeof("inmiscellaneoustechnical")]; - char unicode_prop_name_pool_str1217[sizeof("ahom")]; + char unicode_prop_name_pool_str1216[sizeof("olchiki")]; char unicode_prop_name_pool_str1218[sizeof("incherokeesupplement")]; - char unicode_prop_name_pool_str1219[sizeof("takr")]; - char unicode_prop_name_pool_str1224[sizeof("khmr")]; + char unicode_prop_name_pool_str1221[sizeof("inmeroitichieroglyphs")]; + char unicode_prop_name_pool_str1222[sizeof("oids")]; + char unicode_prop_name_pool_str1223[sizeof("inolchiki")]; + char unicode_prop_name_pool_str1227[sizeof("inethiopic")]; char unicode_prop_name_pool_str1228[sizeof("incjkunifiedideographsextensiona")]; char unicode_prop_name_pool_str1231[sizeof("quotationmark")]; - char unicode_prop_name_pool_str1233[sizeof("lyci")]; - char unicode_prop_name_pool_str1234[sizeof("lycian")]; - char unicode_prop_name_pool_str1235[sizeof("ingurmukhi")]; + char unicode_prop_name_pool_str1233[sizeof("inkangxiradicals")]; char unicode_prop_name_pool_str1237[sizeof("runic")]; - char unicode_prop_name_pool_str1238[sizeof("limb")]; - char unicode_prop_name_pool_str1240[sizeof("inscriptionalpahlavi")]; - char unicode_prop_name_pool_str1242[sizeof("hatr")]; - char unicode_prop_name_pool_str1244[sizeof("variationselector")]; - char unicode_prop_name_pool_str1246[sizeof("modifierletter")]; + char unicode_prop_name_pool_str1239[sizeof("enclosingmark")]; + char unicode_prop_name_pool_str1240[sizeof("innewa")]; + char unicode_prop_name_pool_str1242[sizeof("lepc")]; + char unicode_prop_name_pool_str1246[sizeof("beng")]; char unicode_prop_name_pool_str1247[sizeof("incjkunifiedideographsextensione")]; - char unicode_prop_name_pool_str1262[sizeof("mymr")]; - char unicode_prop_name_pool_str1265[sizeof("myanmar")]; + char unicode_prop_name_pool_str1248[sizeof("insinhala")]; + char unicode_prop_name_pool_str1252[sizeof("glag")]; + char unicode_prop_name_pool_str1255[sizeof("inphaistosdisc")]; + char unicode_prop_name_pool_str1256[sizeof("patternsyntax")]; char unicode_prop_name_pool_str1267[sizeof("lu")]; - char unicode_prop_name_pool_str1275[sizeof("kharoshthi")]; - char unicode_prop_name_pool_str1276[sizeof("inarabic")]; + char unicode_prop_name_pool_str1273[sizeof("chorasmian")]; char unicode_prop_name_pool_str1278[sizeof("radical")]; - char unicode_prop_name_pool_str1281[sizeof("khmer")]; - char unicode_prop_name_pool_str1286[sizeof("inunifiedcanadianaboriginalsyllabics")]; - char unicode_prop_name_pool_str1292[sizeof("osage")]; + char unicode_prop_name_pool_str1286[sizeof("regionalindicator")]; + char unicode_prop_name_pool_str1287[sizeof("ingreekextended")]; + char unicode_prop_name_pool_str1291[sizeof("emojimodifierbase")]; + char unicode_prop_name_pool_str1294[sizeof("indogra")]; char unicode_prop_name_pool_str1296[sizeof("sundanese")]; - char unicode_prop_name_pool_str1300[sizeof("innewtailue")]; - char unicode_prop_name_pool_str1302[sizeof("logicalorderexception")]; - char unicode_prop_name_pool_str1304[sizeof("math")]; + char unicode_prop_name_pool_str1303[sizeof("ahom")]; + char unicode_prop_name_pool_str1304[sizeof("chrs")]; char unicode_prop_name_pool_str1306[sizeof("braille")]; - char unicode_prop_name_pool_str1311[sizeof("goth")]; - char unicode_prop_name_pool_str1312[sizeof("insorasompeng")]; - char unicode_prop_name_pool_str1316[sizeof("insoyombo")]; - char unicode_prop_name_pool_str1317[sizeof("arab")]; + char unicode_prop_name_pool_str1310[sizeof("khmr")]; + char unicode_prop_name_pool_str1313[sizeof("inwarangciti")]; char unicode_prop_name_pool_str1322[sizeof("saur")]; + char unicode_prop_name_pool_str1326[sizeof("inscriptionalpahlavi")]; char unicode_prop_name_pool_str1329[sizeof("guru")]; - char unicode_prop_name_pool_str1333[sizeof("term")]; + char unicode_prop_name_pool_str1331[sizeof("decimalnumber")]; + char unicode_prop_name_pool_str1333[sizeof("inimperialaramaic")]; char unicode_prop_name_pool_str1337[sizeof("paucinhau")]; - char unicode_prop_name_pool_str1338[sizeof("inbasiclatin")]; - char unicode_prop_name_pool_str1339[sizeof("inarabicpresentationformsa")]; + char unicode_prop_name_pool_str1343[sizeof("emod")]; char unicode_prop_name_pool_str1347[sizeof("punct")]; char unicode_prop_name_pool_str1348[sizeof("gurmukhi")]; - char unicode_prop_name_pool_str1354[sizeof("grantha")]; - char unicode_prop_name_pool_str1355[sizeof("inshavian")]; - char unicode_prop_name_pool_str1356[sizeof("osma")]; + char unicode_prop_name_pool_str1354[sizeof("ingeometricshapes")]; + char unicode_prop_name_pool_str1356[sizeof("lower")]; char unicode_prop_name_pool_str1357[sizeof("inenclosedalphanumericsupplement")]; - char unicode_prop_name_pool_str1360[sizeof("intirhuta")]; - char unicode_prop_name_pool_str1361[sizeof("inelymaic")]; - char unicode_prop_name_pool_str1362[sizeof("inhatran")]; + char unicode_prop_name_pool_str1361[sizeof("kharoshthi")]; char unicode_prop_name_pool_str1364[sizeof("incjkunifiedideographsextensionc")]; - char unicode_prop_name_pool_str1365[sizeof("emojimodifier")]; - char unicode_prop_name_pool_str1368[sizeof("uideo")]; + char unicode_prop_name_pool_str1367[sizeof("khmer")]; char unicode_prop_name_pool_str1373[sizeof("bidicontrol")]; - char unicode_prop_name_pool_str1379[sizeof("phnx")]; char unicode_prop_name_pool_str1380[sizeof("limbu")]; - char unicode_prop_name_pool_str1384[sizeof("n")]; char unicode_prop_name_pool_str1388[sizeof("inenclosedideographicsupplement")]; + char unicode_prop_name_pool_str1390[sizeof("math")]; char unicode_prop_name_pool_str1392[sizeof("mult")]; - char unicode_prop_name_pool_str1398[sizeof("kthi")]; + char unicode_prop_name_pool_str1394[sizeof("inwancho")]; + char unicode_prop_name_pool_str1397[sizeof("goth")]; + char unicode_prop_name_pool_str1398[sizeof("han")]; char unicode_prop_name_pool_str1399[sizeof("incjkunifiedideographs")]; - char unicode_prop_name_pool_str1401[sizeof("ingreekandcoptic")]; - char unicode_prop_name_pool_str1403[sizeof("inoldhungarian")]; - char unicode_prop_name_pool_str1406[sizeof("incjkcompatibility")]; - char unicode_prop_name_pool_str1409[sizeof("grbase")]; - char unicode_prop_name_pool_str1411[sizeof("nandinagari")]; - char unicode_prop_name_pool_str1412[sizeof("brah")]; - char unicode_prop_name_pool_str1414[sizeof("no")]; + char unicode_prop_name_pool_str1400[sizeof("coptic")]; + char unicode_prop_name_pool_str1403[sizeof("hani")]; + char unicode_prop_name_pool_str1409[sizeof("incyrillicextendeda")]; + char unicode_prop_name_pool_str1412[sizeof("hano")]; + char unicode_prop_name_pool_str1418[sizeof("medf")]; char unicode_prop_name_pool_str1421[sizeof("sd")]; - char unicode_prop_name_pool_str1423[sizeof("nko")]; - char unicode_prop_name_pool_str1424[sizeof("nkoo")]; - char unicode_prop_name_pool_str1427[sizeof("xpeo")]; + char unicode_prop_name_pool_str1424[sizeof("insogdian")]; + char unicode_prop_name_pool_str1425[sizeof("indingbats")]; + char unicode_prop_name_pool_str1428[sizeof("medefaidrin")]; char unicode_prop_name_pool_str1430[sizeof("sidd")]; - char unicode_prop_name_pool_str1435[sizeof("inherited")]; - char unicode_prop_name_pool_str1437[sizeof("p")]; - char unicode_prop_name_pool_str1440[sizeof("phli")]; - char unicode_prop_name_pool_str1443[sizeof("inmeroitichieroglyphs")]; - char unicode_prop_name_pool_str1444[sizeof("inphoenician")]; - char unicode_prop_name_pool_str1449[sizeof("inmayannumerals")]; + char unicode_prop_name_pool_str1434[sizeof("hanunoo")]; + char unicode_prop_name_pool_str1437[sizeof("intaixuanjingsymbols")]; + char unicode_prop_name_pool_str1439[sizeof("inkhojki")]; + char unicode_prop_name_pool_str1440[sizeof("grantha")]; + char unicode_prop_name_pool_str1445[sizeof("copt")]; + char unicode_prop_name_pool_str1446[sizeof("intirhuta")]; + char unicode_prop_name_pool_str1447[sizeof("emojicomponent")]; + char unicode_prop_name_pool_str1448[sizeof("inhatran")]; + char unicode_prop_name_pool_str1450[sizeof("ideographic")]; char unicode_prop_name_pool_str1453[sizeof("saurashtra")]; - char unicode_prop_name_pool_str1455[sizeof("inkangxiradicals")]; - char unicode_prop_name_pool_str1461[sizeof("enclosingmark")]; - char unicode_prop_name_pool_str1467[sizeof("graphemeextend")]; - char unicode_prop_name_pool_str1468[sizeof("beng")]; - char unicode_prop_name_pool_str1473[sizeof("inbatak")]; - char unicode_prop_name_pool_str1474[sizeof("glag")]; - char unicode_prop_name_pool_str1475[sizeof("ahex")]; - char unicode_prop_name_pool_str1477[sizeof("patsyn")]; - char unicode_prop_name_pool_str1479[sizeof("odi")]; - char unicode_prop_name_pool_str1486[sizeof("dogra")]; - char unicode_prop_name_pool_str1489[sizeof("intibetan")]; - char unicode_prop_name_pool_str1491[sizeof("lydi")]; - char unicode_prop_name_pool_str1492[sizeof("lydian")]; - char unicode_prop_name_pool_str1499[sizeof("inblockelements")]; - char unicode_prop_name_pool_str1506[sizeof("cwcf")]; - char unicode_prop_name_pool_str1507[sizeof("inunifiedcanadianaboriginalsyllabicsextended")]; - char unicode_prop_name_pool_str1508[sizeof("regionalindicator")]; - char unicode_prop_name_pool_str1509[sizeof("ingreekextended")]; - char unicode_prop_name_pool_str1513[sizeof("emojimodifierbase")]; - char unicode_prop_name_pool_str1514[sizeof("inanatolianhieroglyphs")]; - char unicode_prop_name_pool_str1516[sizeof("indogra")]; - char unicode_prop_name_pool_str1517[sizeof("taiviet")]; - char unicode_prop_name_pool_str1524[sizeof("inyiradicals")]; - char unicode_prop_name_pool_str1525[sizeof("unassigned")]; + char unicode_prop_name_pool_str1455[sizeof("inshorthandformatcontrols")]; + char unicode_prop_name_pool_str1456[sizeof("hira")]; + char unicode_prop_name_pool_str1457[sizeof("ininscriptionalparthian")]; + char unicode_prop_name_pool_str1459[sizeof("lowercase")]; + char unicode_prop_name_pool_str1460[sizeof("ininscriptionalpahlavi")]; + char unicode_prop_name_pool_str1472[sizeof("inornamentaldingbats")]; + char unicode_prop_name_pool_str1480[sizeof("caseignorable")]; + char unicode_prop_name_pool_str1481[sizeof("nand")]; + char unicode_prop_name_pool_str1482[sizeof("cwcm")]; + char unicode_prop_name_pool_str1484[sizeof("kthi")]; + char unicode_prop_name_pool_str1485[sizeof("cprt")]; + char unicode_prop_name_pool_str1489[sizeof("inoldhungarian")]; + char unicode_prop_name_pool_str1490[sizeof("sogdian")]; + char unicode_prop_name_pool_str1493[sizeof("sogo")]; + char unicode_prop_name_pool_str1498[sizeof("brah")]; + char unicode_prop_name_pool_str1500[sizeof("intangut")]; + char unicode_prop_name_pool_str1508[sizeof("imperialaramaic")]; + char unicode_prop_name_pool_str1510[sizeof("bopo")]; + char unicode_prop_name_pool_str1519[sizeof("inoldpersian")]; + char unicode_prop_name_pool_str1521[sizeof("inherited")]; + char unicode_prop_name_pool_str1526[sizeof("phli")]; + char unicode_prop_name_pool_str1531[sizeof("intags")]; char unicode_prop_name_pool_str1532[sizeof("insundanesesupplement")]; - char unicode_prop_name_pool_str1535[sizeof("innumberforms")]; - char unicode_prop_name_pool_str1538[sizeof("lowercaseletter")]; - char unicode_prop_name_pool_str1541[sizeof("dogr")]; - char unicode_prop_name_pool_str1542[sizeof("lepc")]; - char unicode_prop_name_pool_str1545[sizeof("bhks")]; - char unicode_prop_name_pool_str1553[sizeof("word")]; - char unicode_prop_name_pool_str1554[sizeof("gujr")]; - char unicode_prop_name_pool_str1555[sizeof("inphaistosdisc")]; - char unicode_prop_name_pool_str1557[sizeof("bhaiksuki")]; - char unicode_prop_name_pool_str1560[sizeof("dash")]; - char unicode_prop_name_pool_str1562[sizeof("inarabicextendeda")]; - char unicode_prop_name_pool_str1565[sizeof("gujarati")]; + char unicode_prop_name_pool_str1533[sizeof("oldpersian")]; + char unicode_prop_name_pool_str1545[sizeof("incyrillicextendedc")]; + char unicode_prop_name_pool_str1552[sizeof("dsrt")]; + char unicode_prop_name_pool_str1553[sizeof("graphemeextend")]; char unicode_prop_name_pool_str1566[sizeof("inlatinextendedd")]; - char unicode_prop_name_pool_str1567[sizeof("innoblock")]; - char unicode_prop_name_pool_str1570[sizeof("java")]; - char unicode_prop_name_pool_str1576[sizeof("ingeometricshapes")]; - char unicode_prop_name_pool_str1577[sizeof("cyrl")]; - char unicode_prop_name_pool_str1582[sizeof("indevanagari")]; - char unicode_prop_name_pool_str1583[sizeof("inbalinese")]; - char unicode_prop_name_pool_str1585[sizeof("inbyzantinemusicalsymbols")]; - char unicode_prop_name_pool_str1589[sizeof("tale")]; - char unicode_prop_name_pool_str1591[sizeof("deva")]; - char unicode_prop_name_pool_str1594[sizeof("hex")]; - char unicode_prop_name_pool_str1601[sizeof("inmyanmarextendedb")]; - char unicode_prop_name_pool_str1603[sizeof("sinh")]; - char unicode_prop_name_pool_str1604[sizeof("cwt")]; - char unicode_prop_name_pool_str1606[sizeof("shavian")]; - char unicode_prop_name_pool_str1607[sizeof("devanagari")]; - char unicode_prop_name_pool_str1609[sizeof("oidc")]; - char unicode_prop_name_pool_str1615[sizeof("javanese")]; - char unicode_prop_name_pool_str1621[sizeof("mlym")]; + char unicode_prop_name_pool_str1569[sizeof("ogam")]; + char unicode_prop_name_pool_str1571[sizeof("closepunctuation")]; + char unicode_prop_name_pool_str1575[sizeof("ingeometricshapesextended")]; + char unicode_prop_name_pool_str1577[sizeof("zanb")]; + char unicode_prop_name_pool_str1587[sizeof("linb")]; + char unicode_prop_name_pool_str1590[sizeof("innewtailue")]; + char unicode_prop_name_pool_str1593[sizeof("deseret")]; + char unicode_prop_name_pool_str1598[sizeof("innabataean")]; + char unicode_prop_name_pool_str1600[sizeof("inanatolianhieroglyphs")]; + char unicode_prop_name_pool_str1603[sizeof("inkanbun")]; + char unicode_prop_name_pool_str1606[sizeof("inbhaiksuki")]; + char unicode_prop_name_pool_str1612[sizeof("inelbasan")]; + char unicode_prop_name_pool_str1614[sizeof("xsux")]; char unicode_prop_name_pool_str1622[sizeof("incjkunifiedideographsextensiond")]; char unicode_prop_name_pool_str1624[sizeof("sentenceterminal")]; - char unicode_prop_name_pool_str1626[sizeof("malayalam")]; - char unicode_prop_name_pool_str1628[sizeof("inhanunoo")]; - char unicode_prop_name_pool_str1629[sizeof("insinhalaarchaicnumbers")]; - char unicode_prop_name_pool_str1631[sizeof("olck")]; - char unicode_prop_name_pool_str1633[sizeof("inimperialaramaic")]; - char unicode_prop_name_pool_str1636[sizeof("olower")]; - char unicode_prop_name_pool_str1638[sizeof("palmyrene")]; - char unicode_prop_name_pool_str1642[sizeof("avestan")]; - char unicode_prop_name_pool_str1645[sizeof("incjkunifiedideographsextensionf")]; - char unicode_prop_name_pool_str1646[sizeof("insogdian")]; - char unicode_prop_name_pool_str1647[sizeof("indingbats")]; - char unicode_prop_name_pool_str1648[sizeof("olchiki")]; - char unicode_prop_name_pool_str1654[sizeof("oids")]; + char unicode_prop_name_pool_str1626[sizeof("incjksymbolsandpunctuation")]; + char unicode_prop_name_pool_str1631[sizeof("bhks")]; + char unicode_prop_name_pool_str1633[sizeof("bopomofo")]; + char unicode_prop_name_pool_str1636[sizeof("alpha")]; + char unicode_prop_name_pool_str1639[sizeof("inpalmyrene")]; + char unicode_prop_name_pool_str1643[sizeof("bhaiksuki")]; + char unicode_prop_name_pool_str1646[sizeof("linearb")]; + char unicode_prop_name_pool_str1648[sizeof("inoldpermic")]; + char unicode_prop_name_pool_str1650[sizeof("inlepcha")]; + char unicode_prop_name_pool_str1658[sizeof("uideo")]; char unicode_prop_name_pool_str1661[sizeof("punctuation")]; char unicode_prop_name_pool_str1663[sizeof("sund")]; - char unicode_prop_name_pool_str1666[sizeof("inbraillepatterns")]; - char unicode_prop_name_pool_str1669[sizeof("emojicomponent")]; - char unicode_prop_name_pool_str1672[sizeof("ideographic")]; - char unicode_prop_name_pool_str1673[sizeof("taml")]; - char unicode_prop_name_pool_str1688[sizeof("format")]; - char unicode_prop_name_pool_str1694[sizeof("inornamentaldingbats")]; - char unicode_prop_name_pool_str1700[sizeof("coptic")]; - char unicode_prop_name_pool_str1702[sizeof("caseignorable")]; - char unicode_prop_name_pool_str1708[sizeof("idsb")]; - char unicode_prop_name_pool_str1709[sizeof("inmiscellaneousmathematicalsymbolsb")]; - char unicode_prop_name_pool_str1712[sizeof("sogdian")]; - char unicode_prop_name_pool_str1715[sizeof("sogo")]; - char unicode_prop_name_pool_str1720[sizeof("terminalpunctuation")]; - char unicode_prop_name_pool_str1722[sizeof("intangut")]; - char unicode_prop_name_pool_str1729[sizeof("intifinagh")]; - char unicode_prop_name_pool_str1733[sizeof("inlowsurrogates")]; - char unicode_prop_name_pool_str1743[sizeof("invariationselectors")]; - char unicode_prop_name_pool_str1745[sizeof("copt")]; - char unicode_prop_name_pool_str1750[sizeof("soyo")]; - char unicode_prop_name_pool_str1753[sizeof("intags")]; - char unicode_prop_name_pool_str1756[sizeof("inverticalforms")]; - char unicode_prop_name_pool_str1757[sizeof("ininscriptionalparthian")]; - char unicode_prop_name_pool_str1760[sizeof("ininscriptionalpahlavi")]; - char unicode_prop_name_pool_str1763[sizeof("meroitichieroglyphs")]; - char unicode_prop_name_pool_str1764[sizeof("asciihexdigit")]; - char unicode_prop_name_pool_str1766[sizeof("inethiopicextendeda")]; - char unicode_prop_name_pool_str1767[sizeof("invedicextensions")]; - char unicode_prop_name_pool_str1781[sizeof("nand")]; - char unicode_prop_name_pool_str1782[sizeof("cwl")]; - char unicode_prop_name_pool_str1785[sizeof("cprt")]; - char unicode_prop_name_pool_str1791[sizeof("innushu")]; - char unicode_prop_name_pool_str1794[sizeof("zanabazarsquare")]; - char unicode_prop_name_pool_str1797[sizeof("ingeometricshapesextended")]; - char unicode_prop_name_pool_str1798[sizeof("avst")]; - char unicode_prop_name_pool_str1808[sizeof("imperialaramaic")]; - char unicode_prop_name_pool_str1810[sizeof("bopo")]; - char unicode_prop_name_pool_str1812[sizeof("sarb")]; - char unicode_prop_name_pool_str1819[sizeof("inoldpersian")]; - char unicode_prop_name_pool_str1830[sizeof("intaiviet")]; - char unicode_prop_name_pool_str1834[sizeof("mahj")]; - char unicode_prop_name_pool_str1838[sizeof("inkatakanaphoneticextensions")]; - char unicode_prop_name_pool_str1845[sizeof("mahajani")]; - char unicode_prop_name_pool_str1871[sizeof("closepunctuation")]; + char unicode_prop_name_pool_str1665[sizeof("oldpermic")]; + char unicode_prop_name_pool_str1666[sizeof("osge")]; + char unicode_prop_name_pool_str1672[sizeof("variationselector")]; + char unicode_prop_name_pool_str1674[sizeof("inkayahli")]; + char unicode_prop_name_pool_str1680[sizeof("inoriya")]; + char unicode_prop_name_pool_str1684[sizeof("inyezidi")]; + char unicode_prop_name_pool_str1687[sizeof("limb")]; + char unicode_prop_name_pool_str1689[sizeof("sinh")]; + char unicode_prop_name_pool_str1692[sizeof("shavian")]; + char unicode_prop_name_pool_str1697[sizeof("incoptic")]; + char unicode_prop_name_pool_str1698[sizeof("insyriacsupplement")]; + char unicode_prop_name_pool_str1699[sizeof("wara")]; + char unicode_prop_name_pool_str1714[sizeof("inhanunoo")]; + char unicode_prop_name_pool_str1715[sizeof("insinhalaarchaicnumbers")]; + char unicode_prop_name_pool_str1725[sizeof("inarabic")]; + char unicode_prop_name_pool_str1727[sizeof("hatran")]; + char unicode_prop_name_pool_str1729[sizeof("assigned")]; + char unicode_prop_name_pool_str1737[sizeof("sorasompeng")]; + char unicode_prop_name_pool_str1740[sizeof("spacingmark")]; + char unicode_prop_name_pool_str1748[sizeof("orkh")]; + char unicode_prop_name_pool_str1754[sizeof("space")]; + char unicode_prop_name_pool_str1757[sizeof("cuneiform")]; + char unicode_prop_name_pool_str1766[sizeof("arab")]; + char unicode_prop_name_pool_str1774[sizeof("format")]; + char unicode_prop_name_pool_str1775[sizeof("hatr")]; + char unicode_prop_name_pool_str1787[sizeof("inbasiclatin")]; + char unicode_prop_name_pool_str1788[sizeof("inarabicpresentationformsa")]; + char unicode_prop_name_pool_str1789[sizeof("lyci")]; + char unicode_prop_name_pool_str1790[sizeof("lycian")]; + char unicode_prop_name_pool_str1808[sizeof("takri")]; + char unicode_prop_name_pool_str1811[sizeof("invai")]; + char unicode_prop_name_pool_str1814[sizeof("modifierletter")]; + char unicode_prop_name_pool_str1815[sizeof("unassigned")]; + char unicode_prop_name_pool_str1818[sizeof("mymr")]; + char unicode_prop_name_pool_str1821[sizeof("myanmar")]; + char unicode_prop_name_pool_str1828[sizeof("lowercaseletter")]; + char unicode_prop_name_pool_str1831[sizeof("taile")]; + char unicode_prop_name_pool_str1832[sizeof("dogra")]; + char unicode_prop_name_pool_str1849[sizeof("takr")]; + char unicode_prop_name_pool_str1850[sizeof("asciihexdigit")]; + char unicode_prop_name_pool_str1851[sizeof("separator")]; + char unicode_prop_name_pool_str1854[sizeof("inunifiedcanadianaboriginalsyllabics")]; + char unicode_prop_name_pool_str1858[sizeof("grbase")]; + char unicode_prop_name_pool_str1862[sizeof("bugi")]; + char unicode_prop_name_pool_str1870[sizeof("nd")]; + char unicode_prop_name_pool_str1871[sizeof("sogd")]; + char unicode_prop_name_pool_str1872[sizeof("insoyombo")]; char unicode_prop_name_pool_str1875[sizeof("inlisu")]; - char unicode_prop_name_pool_str1878[sizeof("softdotted")]; - char unicode_prop_name_pool_str1897[sizeof("unknown")]; - char unicode_prop_name_pool_str1898[sizeof("invariationselectorssupplement")]; - char unicode_prop_name_pool_str1910[sizeof("syrc")]; - char unicode_prop_name_pool_str1911[sizeof("hang")]; - char unicode_prop_name_pool_str1926[sizeof("incjksymbolsandpunctuation")]; - char unicode_prop_name_pool_str1932[sizeof("ingujarati")]; - char unicode_prop_name_pool_str1933[sizeof("bopomofo")]; - char unicode_prop_name_pool_str1936[sizeof("alpha")]; - char unicode_prop_name_pool_str1937[sizeof("khoj")]; - char unicode_prop_name_pool_str1939[sizeof("inpalmyrene")]; - char unicode_prop_name_pool_str1942[sizeof("ingunjalagondi")]; - char unicode_prop_name_pool_str1948[sizeof("inoldpermic")]; - char unicode_prop_name_pool_str1950[sizeof("inlepcha")]; - char unicode_prop_name_pool_str1951[sizeof("assigned")]; - char unicode_prop_name_pool_str1955[sizeof("incurrencysymbols")]; - char unicode_prop_name_pool_str1958[sizeof("insmallformvariants")]; - char unicode_prop_name_pool_str1959[sizeof("sorasompeng")]; - char unicode_prop_name_pool_str1964[sizeof("inlinearbsyllabary")]; - char unicode_prop_name_pool_str1965[sizeof("oldpersian")]; - char unicode_prop_name_pool_str1972[sizeof("caucasianalbanian")]; - char unicode_prop_name_pool_str1973[sizeof("inenclosedcjklettersandmonths")]; - char unicode_prop_name_pool_str1975[sizeof("hiragana")]; - char unicode_prop_name_pool_str1976[sizeof("inbamum")]; - char unicode_prop_name_pool_str1979[sizeof("inrejang")]; - char unicode_prop_name_pool_str1984[sizeof("graph")]; - char unicode_prop_name_pool_str1990[sizeof("sharada")]; - char unicode_prop_name_pool_str1991[sizeof("inethiopicsupplement")]; - char unicode_prop_name_pool_str1996[sizeof("indevanagariextended")]; - char unicode_prop_name_pool_str1997[sizeof("incoptic")]; - char unicode_prop_name_pool_str1998[sizeof("insyriacsupplement")]; - char unicode_prop_name_pool_str2004[sizeof("inmeroiticcursive")]; - char unicode_prop_name_pool_str2014[sizeof("hmng")]; + char unicode_prop_name_pool_str1877[sizeof("innushu")]; + char unicode_prop_name_pool_str1886[sizeof("inmongoliansupplement")]; + char unicode_prop_name_pool_str1887[sizeof("dogr")]; + char unicode_prop_name_pool_str1893[sizeof("phnx")]; + char unicode_prop_name_pool_str1894[sizeof("cwt")]; + char unicode_prop_name_pool_str1900[sizeof("buginese")]; + char unicode_prop_name_pool_str1905[sizeof("ingeorgiansupplement")]; + char unicode_prop_name_pool_str1912[sizeof("nl")]; + char unicode_prop_name_pool_str1913[sizeof("induployan")]; + char unicode_prop_name_pool_str1916[sizeof("olditalic")]; + char unicode_prop_name_pool_str1917[sizeof("inelymaic")]; + char unicode_prop_name_pool_str1922[sizeof("inbatak")]; + char unicode_prop_name_pool_str1924[sizeof("inkatakanaphoneticextensions")]; + char unicode_prop_name_pool_str1925[sizeof("inlisusupplement")]; + char unicode_prop_name_pool_str1926[sizeof("inphagspa")]; + char unicode_prop_name_pool_str1928[sizeof("inhanguljamo")]; + char unicode_prop_name_pool_str1930[sizeof("phag")]; + char unicode_prop_name_pool_str1938[sizeof("intibetan")]; + char unicode_prop_name_pool_str1940[sizeof("inhanguljamoextendeda")]; + char unicode_prop_name_pool_str1947[sizeof("inglagolitic")]; + char unicode_prop_name_pool_str1948[sizeof("inblockelements")]; + char unicode_prop_name_pool_str1951[sizeof("number")]; + char unicode_prop_name_pool_str1962[sizeof("incjkcompatibility")]; + char unicode_prop_name_pool_str1963[sizeof("term")]; + char unicode_prop_name_pool_str1984[sizeof("innumberforms")]; + char unicode_prop_name_pool_str1985[sizeof("inshavian")]; + char unicode_prop_name_pool_str1989[sizeof("ahex")]; + char unicode_prop_name_pool_str1991[sizeof("cf")]; + char unicode_prop_name_pool_str1993[sizeof("incopticepactnumbers")]; + char unicode_prop_name_pool_str1998[sizeof("intagalog")]; + char unicode_prop_name_pool_str2005[sizeof("inmayannumerals")]; + char unicode_prop_name_pool_str2011[sizeof("inarabicextendeda")]; + char unicode_prop_name_pool_str2016[sizeof("innoblock")]; + char unicode_prop_name_pool_str2018[sizeof("insuttonsignwriting")]; char unicode_prop_name_pool_str2022[sizeof("intelugu")]; - char unicode_prop_name_pool_str2029[sizeof("incombiningdiacriticalmarks")]; - char unicode_prop_name_pool_str2031[sizeof("mathsymbol")]; - char unicode_prop_name_pool_str2036[sizeof("titlecaseletter")]; - char unicode_prop_name_pool_str2038[sizeof("ugar")]; - char unicode_prop_name_pool_str2039[sizeof("incombiningdiacriticalmarksforsymbols")]; - char unicode_prop_name_pool_str2040[sizeof("spacingmark")]; - char unicode_prop_name_pool_str2043[sizeof("shrd")]; - char unicode_prop_name_pool_str2047[sizeof("injavanese")]; - char unicode_prop_name_pool_str2048[sizeof("syriac")]; - char unicode_prop_name_pool_str2054[sizeof("space")]; - char unicode_prop_name_pool_str2056[sizeof("hebr")]; - char unicode_prop_name_pool_str2061[sizeof("ext")]; - char unicode_prop_name_pool_str2064[sizeof("inhanguljamo")]; - char unicode_prop_name_pool_str2066[sizeof("phag")]; - char unicode_prop_name_pool_str2076[sizeof("inhanguljamoextendeda")]; - char unicode_prop_name_pool_str2082[sizeof("wancho")]; - char unicode_prop_name_pool_str2084[sizeof("bugi")]; - char unicode_prop_name_pool_str2089[sizeof("tamil")]; - char unicode_prop_name_pool_str2093[sizeof("sogd")]; - char unicode_prop_name_pool_str2094[sizeof("orkh")]; - char unicode_prop_name_pool_str2097[sizeof("oldpermic")]; - char unicode_prop_name_pool_str2100[sizeof("siddham")]; - char unicode_prop_name_pool_str2102[sizeof("incyrillicextendedb")]; - char unicode_prop_name_pool_str2105[sizeof("inbrahmi")]; - char unicode_prop_name_pool_str2108[sizeof("inmongoliansupplement")]; - char unicode_prop_name_pool_str2113[sizeof("thaa")]; - char unicode_prop_name_pool_str2118[sizeof("thaana")]; - char unicode_prop_name_pool_str2120[sizeof("thai")]; - char unicode_prop_name_pool_str2122[sizeof("buginese")]; - char unicode_prop_name_pool_str2127[sizeof("ingeorgiansupplement")]; - char unicode_prop_name_pool_str2129[sizeof("digit")]; - char unicode_prop_name_pool_str2132[sizeof("cyrillic")]; - char unicode_prop_name_pool_str2149[sizeof("sylo")]; - char unicode_prop_name_pool_str2150[sizeof("inphoneticextensions")]; - char unicode_prop_name_pool_str2151[sizeof("separator")]; - char unicode_prop_name_pool_str2159[sizeof("inethiopicextended")]; - char unicode_prop_name_pool_str2163[sizeof("inmathematicalalphanumericsymbols")]; - char unicode_prop_name_pool_str2168[sizeof("insymbolsandpictographsextendeda")]; - char unicode_prop_name_pool_str2170[sizeof("nd")]; - char unicode_prop_name_pool_str2176[sizeof("tirh")]; - char unicode_prop_name_pool_str2178[sizeof("tirhuta")]; - char unicode_prop_name_pool_str2184[sizeof("incombiningdiacriticalmarkssupplement")]; - char unicode_prop_name_pool_str2193[sizeof("extender")]; - char unicode_prop_name_pool_str2197[sizeof("wcho")]; - char unicode_prop_name_pool_str2199[sizeof("inbengali")]; - char unicode_prop_name_pool_str2201[sizeof("talu")]; - char unicode_prop_name_pool_str2206[sizeof("tang")]; - char unicode_prop_name_pool_str2207[sizeof("warangciti")]; - char unicode_prop_name_pool_str2210[sizeof("tagbanwa")]; - char unicode_prop_name_pool_str2211[sizeof("orya")]; - char unicode_prop_name_pool_str2212[sizeof("nl")]; - char unicode_prop_name_pool_str2213[sizeof("induployan")]; - char unicode_prop_name_pool_str2214[sizeof("cwu")]; - char unicode_prop_name_pool_str2216[sizeof("inbuginese")]; - char unicode_prop_name_pool_str2220[sizeof("telu")]; - char unicode_prop_name_pool_str2223[sizeof("ogam")]; - char unicode_prop_name_pool_str2226[sizeof("taitham")]; - char unicode_prop_name_pool_str2237[sizeof("rjng")]; - char unicode_prop_name_pool_str2240[sizeof("insuttonsignwriting")]; - char unicode_prop_name_pool_str2250[sizeof("incombiningdiacriticalmarksextended")]; - char unicode_prop_name_pool_str2251[sizeof("number")]; - char unicode_prop_name_pool_str2264[sizeof("inyijinghexagramsymbols")]; - char unicode_prop_name_pool_str2277[sizeof("whitespace")]; - char unicode_prop_name_pool_str2278[sizeof("currencysymbol")]; - char unicode_prop_name_pool_str2285[sizeof("inottomansiyaqnumbers")]; - char unicode_prop_name_pool_str2288[sizeof("inlimbu")]; - char unicode_prop_name_pool_str2293[sizeof("incopticepactnumbers")]; - char unicode_prop_name_pool_str2295[sizeof("ingeorgianextended")]; - char unicode_prop_name_pool_str2305[sizeof("inphoneticextensionssupplement")]; - char unicode_prop_name_pool_str2308[sizeof("any")]; - char unicode_prop_name_pool_str2320[sizeof("osge")]; - char unicode_prop_name_pool_str2325[sizeof("defaultignorablecodepoint")]; - char unicode_prop_name_pool_str2326[sizeof("hangul")]; - char unicode_prop_name_pool_str2333[sizeof("newa")]; - char unicode_prop_name_pool_str2343[sizeof("ethi")]; - char unicode_prop_name_pool_str2345[sizeof("hanifirohingya")]; - char unicode_prop_name_pool_str2346[sizeof("hmnp")]; - char unicode_prop_name_pool_str2348[sizeof("olditalic")]; - char unicode_prop_name_pool_str2349[sizeof("incjkunifiedideographsextensionb")]; - char unicode_prop_name_pool_str2352[sizeof("rohg")]; - char unicode_prop_name_pool_str2353[sizeof("innyiakengpuachuehmong")]; - char unicode_prop_name_pool_str2355[sizeof("incjkradicalssupplement")]; - char unicode_prop_name_pool_str2364[sizeof("newtailue")]; - char unicode_prop_name_pool_str2365[sizeof("newline")]; - char unicode_prop_name_pool_str2366[sizeof("lepcha")]; - char unicode_prop_name_pool_str2375[sizeof("deprecated")]; - char unicode_prop_name_pool_str2381[sizeof("buhd")]; - char unicode_prop_name_pool_str2391[sizeof("inglagolitic")]; - char unicode_prop_name_pool_str2395[sizeof("inaegeannumbers")]; - char unicode_prop_name_pool_str2400[sizeof("modifiersymbol")]; - char unicode_prop_name_pool_str2410[sizeof("surrogate")]; - char unicode_prop_name_pool_str2418[sizeof("inletterlikesymbols")]; - char unicode_prop_name_pool_str2426[sizeof("idstrinaryoperator")]; - char unicode_prop_name_pool_str2442[sizeof("intagalog")]; - char unicode_prop_name_pool_str2443[sizeof("tangut")]; - char unicode_prop_name_pool_str2445[sizeof("osmanya")]; - char unicode_prop_name_pool_str2447[sizeof("oalpha")]; - char unicode_prop_name_pool_str2448[sizeof("inphagspa")]; - char unicode_prop_name_pool_str2455[sizeof("ugaritic")]; - char unicode_prop_name_pool_str2456[sizeof("otheridcontinue")]; - char unicode_prop_name_pool_str2460[sizeof("inarabicpresentationformsb")]; - char unicode_prop_name_pool_str2462[sizeof("inbassavah")]; - char unicode_prop_name_pool_str2469[sizeof("other")]; - char unicode_prop_name_pool_str2478[sizeof("othernumber")]; - char unicode_prop_name_pool_str2480[sizeof("sylotinagri")]; - char unicode_prop_name_pool_str2487[sizeof("ingothic")]; - char unicode_prop_name_pool_str2498[sizeof("inbuhid")]; - char unicode_prop_name_pool_str2502[sizeof("inlatin1supplement")]; - char unicode_prop_name_pool_str2516[sizeof("intagbanwa")]; - char unicode_prop_name_pool_str2523[sizeof("shaw")]; - char unicode_prop_name_pool_str2529[sizeof("oldhungarian")]; - char unicode_prop_name_pool_str2538[sizeof("inmahjongtiles")]; - char unicode_prop_name_pool_str2542[sizeof("hung")]; - char unicode_prop_name_pool_str2544[sizeof("tifinagh")]; - char unicode_prop_name_pool_str2554[sizeof("inbopomofo")]; - char unicode_prop_name_pool_str2561[sizeof("narb")]; - char unicode_prop_name_pool_str2571[sizeof("inyisyllables")]; - char unicode_prop_name_pool_str2573[sizeof("kayahli")]; - char unicode_prop_name_pool_str2578[sizeof("otheralphabetic")]; - char unicode_prop_name_pool_str2582[sizeof("phagspa")]; - char unicode_prop_name_pool_str2584[sizeof("inprivateusearea")]; - char unicode_prop_name_pool_str2588[sizeof("elba")]; - char unicode_prop_name_pool_str2591[sizeof("nchar")]; - char unicode_prop_name_pool_str2607[sizeof("spaceseparator")]; - char unicode_prop_name_pool_str2609[sizeof("tibt")]; - char unicode_prop_name_pool_str2611[sizeof("extendedpictographic")]; - char unicode_prop_name_pool_str2615[sizeof("tibetan")]; - char unicode_prop_name_pool_str2616[sizeof("sgnw")]; - char unicode_prop_name_pool_str2622[sizeof("hexdigit")]; - char unicode_prop_name_pool_str2623[sizeof("tfng")]; - char unicode_prop_name_pool_str2627[sizeof("inbamumsupplement")]; - char unicode_prop_name_pool_str2632[sizeof("tavt")]; - char unicode_prop_name_pool_str2633[sizeof("incombininghalfmarks")]; - char unicode_prop_name_pool_str2634[sizeof("inarabicmathematicalalphabeticsymbols")]; - char unicode_prop_name_pool_str2638[sizeof("khudawadi")]; - char unicode_prop_name_pool_str2641[sizeof("inhalfwidthandfullwidthforms")]; - char unicode_prop_name_pool_str2652[sizeof("inhangulsyllables")]; - char unicode_prop_name_pool_str2653[sizeof("elym")]; - char unicode_prop_name_pool_str2669[sizeof("symbol")]; - char unicode_prop_name_pool_str2692[sizeof("bassavah")]; - char unicode_prop_name_pool_str2696[sizeof("elymaic")]; - char unicode_prop_name_pool_str2698[sizeof("changeswhencasemapped")]; - char unicode_prop_name_pool_str2709[sizeof("oldturkic")]; - char unicode_prop_name_pool_str2725[sizeof("inplayingcards")]; - char unicode_prop_name_pool_str2767[sizeof("zp")]; - char unicode_prop_name_pool_str2775[sizeof("elbasan")]; - char unicode_prop_name_pool_str2776[sizeof("buhid")]; - char unicode_prop_name_pool_str2779[sizeof("noncharactercodepoint")]; - char unicode_prop_name_pool_str2785[sizeof("finalpunctuation")]; - char unicode_prop_name_pool_str2794[sizeof("inpsalterpahlavi")]; - char unicode_prop_name_pool_str2800[sizeof("inpaucinhau")]; - char unicode_prop_name_pool_str2801[sizeof("nonspacingmark")]; - char unicode_prop_name_pool_str2804[sizeof("changeswhentitlecased")]; - char unicode_prop_name_pool_str2808[sizeof("inindicsiyaqnumbers")]; - char unicode_prop_name_pool_str2813[sizeof("phlp")]; - char unicode_prop_name_pool_str2824[sizeof("wspace")]; - char unicode_prop_name_pool_str2831[sizeof("nbat")]; - char unicode_prop_name_pool_str2836[sizeof("hluw")]; - char unicode_prop_name_pool_str2838[sizeof("cypriot")]; - char unicode_prop_name_pool_str2839[sizeof("nabataean")]; - char unicode_prop_name_pool_str2841[sizeof("inalchemicalsymbols")]; - char unicode_prop_name_pool_str2847[sizeof("dupl")]; - char unicode_prop_name_pool_str2856[sizeof("otherlowercase")]; - char unicode_prop_name_pool_str2875[sizeof("inarabicsupplement")]; - char unicode_prop_name_pool_str2876[sizeof("inalphabeticpresentationforms")]; - char unicode_prop_name_pool_str2890[sizeof("otherletter")]; - char unicode_prop_name_pool_str2916[sizeof("emojipresentation")]; - char unicode_prop_name_pool_str2930[sizeof("changeswhenlowercased")]; - char unicode_prop_name_pool_str2953[sizeof("dashpunctuation")]; - char unicode_prop_name_pool_str2954[sizeof("oldsogdian")]; - char unicode_prop_name_pool_str2970[sizeof("intangutcomponents")]; - char unicode_prop_name_pool_str2977[sizeof("soyombo")]; - char unicode_prop_name_pool_str2996[sizeof("paragraphseparator")]; - char unicode_prop_name_pool_str3012[sizeof("changeswhencasefolded")]; - char unicode_prop_name_pool_str3015[sizeof("othersymbol")]; - char unicode_prop_name_pool_str3020[sizeof("inlatinextendedb")]; - char unicode_prop_name_pool_str3072[sizeof("otheruppercase")]; - char unicode_prop_name_pool_str3079[sizeof("otheridstart")]; - char unicode_prop_name_pool_str3084[sizeof("inhebrew")]; - char unicode_prop_name_pool_str3088[sizeof("oldnortharabian")]; - char unicode_prop_name_pool_str3092[sizeof("omath")]; - char unicode_prop_name_pool_str3099[sizeof("incypriotsyllabary")]; - char unicode_prop_name_pool_str3118[sizeof("inmathematicaloperators")]; - char unicode_prop_name_pool_str3146[sizeof("changeswhenuppercased")]; - char unicode_prop_name_pool_str3154[sizeof("nshu")]; - char unicode_prop_name_pool_str3182[sizeof("insuperscriptsandsubscripts")]; - char unicode_prop_name_pool_str3188[sizeof("ogrext")]; - char unicode_prop_name_pool_str3197[sizeof("inhanguljamoextendedb")]; - char unicode_prop_name_pool_str3198[sizeof("alphabetic")]; - char unicode_prop_name_pool_str3199[sizeof("dep")]; - char unicode_prop_name_pool_str3216[sizeof("oupper")]; - char unicode_prop_name_pool_str3258[sizeof("zyyy")]; - char unicode_prop_name_pool_str3267[sizeof("aghb")]; - char unicode_prop_name_pool_str3285[sizeof("ogham")]; - char unicode_prop_name_pool_str3306[sizeof("rejang")]; - char unicode_prop_name_pool_str3324[sizeof("tagb")]; - char unicode_prop_name_pool_str3350[sizeof("hyphen")]; - char unicode_prop_name_pool_str3355[sizeof("insupplementalarrowsa")]; - char unicode_prop_name_pool_str3413[sizeof("inpahawhhmong")]; - char unicode_prop_name_pool_str3459[sizeof("nushu")]; - char unicode_prop_name_pool_str3478[sizeof("othermath")]; - char unicode_prop_name_pool_str3491[sizeof("insupplementalarrowsc")]; - char unicode_prop_name_pool_str3494[sizeof("insupplementalmathematicaloperators")]; - char unicode_prop_name_pool_str3500[sizeof("otherdefaultignorablecodepoint")]; - char unicode_prop_name_pool_str3527[sizeof("inhighprivateusesurrogates")]; - char unicode_prop_name_pool_str3538[sizeof("duployan")]; - char unicode_prop_name_pool_str3663[sizeof("oldsoutharabian")]; - char unicode_prop_name_pool_str3666[sizeof("tglg")]; - char unicode_prop_name_pool_str3685[sizeof("tagalog")]; - char unicode_prop_name_pool_str3692[sizeof("inbopomofoextended")]; - char unicode_prop_name_pool_str3731[sizeof("signwriting")]; - char unicode_prop_name_pool_str3750[sizeof("inegyptianhieroglyphs")]; - char unicode_prop_name_pool_str3763[sizeof("inegyptianhieroglyphformatcontrols")]; - char unicode_prop_name_pool_str3807[sizeof("upper")]; - char unicode_prop_name_pool_str3810[sizeof("otherpunctuation")]; - char unicode_prop_name_pool_str3872[sizeof("ethiopic")]; - char unicode_prop_name_pool_str3881[sizeof("hebrew")]; - char unicode_prop_name_pool_str3901[sizeof("telugu")]; - char unicode_prop_name_pool_str3910[sizeof("uppercase")]; - char unicode_prop_name_pool_str3937[sizeof("insupplementalpunctuation")]; - char unicode_prop_name_pool_str3946[sizeof("unifiedideograph")]; - char unicode_prop_name_pool_str3961[sizeof("pahawhhmong")]; - char unicode_prop_name_pool_str3971[sizeof("inglagoliticsupplement")]; - char unicode_prop_name_pool_str3992[sizeof("inboxdrawing")]; - char unicode_prop_name_pool_str4279[sizeof("uppercaseletter")]; - char unicode_prop_name_pool_str4298[sizeof("othergraphemeextend")]; - char unicode_prop_name_pool_str4401[sizeof("inhighsurrogates")]; - char unicode_prop_name_pool_str4476[sizeof("insupplementalarrowsb")]; - char unicode_prop_name_pool_str4562[sizeof("inhangulcompatibilityjamo")]; - char unicode_prop_name_pool_str4563[sizeof("egyp")]; - char unicode_prop_name_pool_str4577[sizeof("insupplementalsymbolsandpictographs")]; - char unicode_prop_name_pool_str4659[sizeof("nyiakengpuachuehmong")]; - char unicode_prop_name_pool_str4688[sizeof("insupplementaryprivateuseareaa")]; - char unicode_prop_name_pool_str4822[sizeof("openpunctuation")]; - char unicode_prop_name_pool_str4851[sizeof("egyptianhieroglyphs")]; - char unicode_prop_name_pool_str5809[sizeof("insupplementaryprivateuseareab")]; + char unicode_prop_name_pool_str2023[sizeof("inlowsurrogates")]; + char unicode_prop_name_pool_str2028[sizeof("pf")]; + char unicode_prop_name_pool_str2032[sizeof("inbalinese")]; + char unicode_prop_name_pool_str2033[sizeof("patsyn")]; + char unicode_prop_name_pool_str2038[sizeof("inbyzantinemusicalsymbols")]; + char unicode_prop_name_pool_str2047[sizeof("lydi")]; + char unicode_prop_name_pool_str2048[sizeof("lydian")]; + char unicode_prop_name_pool_str2050[sizeof("inmyanmarextendedb")]; + char unicode_prop_name_pool_str2054[sizeof("incjkunifiedideographsextensiong")]; + char unicode_prop_name_pool_str2055[sizeof("incjkradicalssupplement")]; + char unicode_prop_name_pool_str2072[sizeof("cwl")]; + char unicode_prop_name_pool_str2075[sizeof("inunifiedcanadianaboriginalsyllabicsextended")]; + char unicode_prop_name_pool_str2076[sizeof("sharada")]; + char unicode_prop_name_pool_str2077[sizeof("inethiopicsupplement")]; + char unicode_prop_name_pool_str2080[sizeof("inyiradicals")]; + char unicode_prop_name_pool_str2087[sizeof("gujr")]; + char unicode_prop_name_pool_str2097[sizeof("oldhungarian")]; + char unicode_prop_name_pool_str2098[sizeof("gujarati")]; + char unicode_prop_name_pool_str2101[sizeof("oalpha")]; + char unicode_prop_name_pool_str2106[sizeof("ugar")]; + char unicode_prop_name_pool_str2109[sizeof("word")]; + char unicode_prop_name_pool_str2110[sizeof("otheridcontinue")]; + char unicode_prop_name_pool_str2115[sizeof("inbraillepatterns")]; + char unicode_prop_name_pool_str2123[sizeof("other")]; + char unicode_prop_name_pool_str2126[sizeof("idstrinaryoperator")]; + char unicode_prop_name_pool_str2129[sizeof("shrd")]; + char unicode_prop_name_pool_str2131[sizeof("innyiakengpuachuehmong")]; + char unicode_prop_name_pool_str2132[sizeof("othernumber")]; + char unicode_prop_name_pool_str2133[sizeof("cyrl")]; + char unicode_prop_name_pool_str2147[sizeof("taiviet")]; + char unicode_prop_name_pool_str2152[sizeof("lepcha")]; + char unicode_prop_name_pool_str2156[sizeof("graph")]; + char unicode_prop_name_pool_str2157[sizeof("idsb")]; + char unicode_prop_name_pool_str2158[sizeof("inmiscellaneousmathematicalsymbolsb")]; + char unicode_prop_name_pool_str2162[sizeof("zp")]; + char unicode_prop_name_pool_str2177[sizeof("mlym")]; + char unicode_prop_name_pool_str2182[sizeof("malayalam")]; + char unicode_prop_name_pool_str2186[sizeof("siddham")]; + char unicode_prop_name_pool_str2188[sizeof("surrogate")]; + char unicode_prop_name_pool_str2194[sizeof("palmyrene")]; + char unicode_prop_name_pool_str2200[sizeof("java")]; + char unicode_prop_name_pool_str2201[sizeof("nonspacingmark")]; + char unicode_prop_name_pool_str2202[sizeof("inlatin1supplement")]; + char unicode_prop_name_pool_str2207[sizeof("ethi")]; + char unicode_prop_name_pool_str2212[sizeof("indevanagari")]; + char unicode_prop_name_pool_str2213[sizeof("incjkunifiedideographsextensionf")]; + char unicode_prop_name_pool_str2214[sizeof("dash")]; + char unicode_prop_name_pool_str2216[sizeof("rohg")]; + char unicode_prop_name_pool_str2219[sizeof("tale")]; + char unicode_prop_name_pool_str2222[sizeof("hang")]; + char unicode_prop_name_pool_str2232[sizeof("otheralphabetic")]; + char unicode_prop_name_pool_str2238[sizeof("zanabazarsquare")]; + char unicode_prop_name_pool_str2243[sizeof("ingujarati")]; + char unicode_prop_name_pool_str2245[sizeof("javanese")]; + char unicode_prop_name_pool_str2249[sizeof("inmathematicalalphanumericsymbols")]; + char unicode_prop_name_pool_str2253[sizeof("ingunjalagondi")]; + char unicode_prop_name_pool_str2261[sizeof("sarb")]; + char unicode_prop_name_pool_str2267[sizeof("ext")]; + char unicode_prop_name_pool_str2272[sizeof("avestan")]; + char unicode_prop_name_pool_str2277[sizeof("oldturkic")]; + char unicode_prop_name_pool_str2278[sizeof("inphoneticextensions")]; + char unicode_prop_name_pool_str2280[sizeof("inethiopicextendeda")]; + char unicode_prop_name_pool_str2286[sizeof("hiragana")]; + char unicode_prop_name_pool_str2290[sizeof("inrejang")]; + char unicode_prop_name_pool_str2300[sizeof("oldsogdian")]; + char unicode_prop_name_pool_str2303[sizeof("taml")]; + char unicode_prop_name_pool_str2306[sizeof("soyo")]; + char unicode_prop_name_pool_str2307[sizeof("spaceseparator")]; + char unicode_prop_name_pool_str2319[sizeof("meroitichieroglyphs")]; + char unicode_prop_name_pool_str2323[sizeof("newa")]; + char unicode_prop_name_pool_str2325[sizeof("hmng")]; + char unicode_prop_name_pool_str2329[sizeof("epres")]; + char unicode_prop_name_pool_str2335[sizeof("orya")]; + char unicode_prop_name_pool_str2350[sizeof("terminalpunctuation")]; + char unicode_prop_name_pool_str2351[sizeof("ingothic")]; + char unicode_prop_name_pool_str2354[sizeof("newtailue")]; + char unicode_prop_name_pool_str2355[sizeof("newline")]; + char unicode_prop_name_pool_str2364[sizeof("cwcf")]; + char unicode_prop_name_pool_str2368[sizeof("phagspa")]; + char unicode_prop_name_pool_str2373[sizeof("invariationselectors")]; + char unicode_prop_name_pool_str2377[sizeof("nchar")]; + char unicode_prop_name_pool_str2383[sizeof("intifinagh")]; + char unicode_prop_name_pool_str2386[sizeof("inverticalforms")]; + char unicode_prop_name_pool_str2394[sizeof("emojipresentation")]; + char unicode_prop_name_pool_str2397[sizeof("invedicextensions")]; + char unicode_prop_name_pool_str2399[sizeof("extender")]; + char unicode_prop_name_pool_str2414[sizeof("extpict")]; + char unicode_prop_name_pool_str2421[sizeof("caucasianalbanian")]; + char unicode_prop_name_pool_str2425[sizeof("inbamum")]; + char unicode_prop_name_pool_str2426[sizeof("inbengali")]; + char unicode_prop_name_pool_str2428[sizeof("avst")]; + char unicode_prop_name_pool_str2433[sizeof("inphoneticextensionssupplement")]; + char unicode_prop_name_pool_str2439[sizeof("ebase")]; + char unicode_prop_name_pool_str2443[sizeof("inbuginese")]; + char unicode_prop_name_pool_str2446[sizeof("softdotted")]; + char unicode_prop_name_pool_str2448[sizeof("intangutcomponents")]; + char unicode_prop_name_pool_str2453[sizeof("mahj")]; + char unicode_prop_name_pool_str2460[sizeof("intaiviet")]; + char unicode_prop_name_pool_str2464[sizeof("mahajani")]; + char unicode_prop_name_pool_str2466[sizeof("syrc")]; + char unicode_prop_name_pool_str2467[sizeof("buhd")]; + char unicode_prop_name_pool_str2474[sizeof("paragraphseparator")]; + char unicode_prop_name_pool_str2475[sizeof("digit")]; + char unicode_prop_name_pool_str2477[sizeof("unknown")]; + char unicode_prop_name_pool_str2478[sizeof("incombiningdiacriticalmarks")]; + char unicode_prop_name_pool_str2484[sizeof("oupper")]; + char unicode_prop_name_pool_str2488[sizeof("incombiningdiacriticalmarksforsymbols")]; + char unicode_prop_name_pool_str2500[sizeof("inpaucinhau")]; + char unicode_prop_name_pool_str2501[sizeof("ingeorgianextended")]; + char unicode_prop_name_pool_str2504[sizeof("cwu")]; + char unicode_prop_name_pool_str2506[sizeof("inenclosedcjklettersandmonths")]; + char unicode_prop_name_pool_str2510[sizeof("otherlowercase")]; + char unicode_prop_name_pool_str2511[sizeof("incurrencysymbols")]; + char unicode_prop_name_pool_str2516[sizeof("inhangulsyllables")]; + char unicode_prop_name_pool_str2520[sizeof("inlinearbsyllabary")]; + char unicode_prop_name_pool_str2523[sizeof("ugaritic")]; + char unicode_prop_name_pool_str2528[sizeof("invariationselectorssupplement")]; + char unicode_prop_name_pool_str2541[sizeof("warangciti")]; + char unicode_prop_name_pool_str2544[sizeof("otherletter")]; + char unicode_prop_name_pool_str2548[sizeof("rjng")]; + char unicode_prop_name_pool_str2556[sizeof("khoj")]; + char unicode_prop_name_pool_str2562[sizeof("changeswhencasemapped")]; + char unicode_prop_name_pool_str2565[sizeof("noncharactercodepoint")]; + char unicode_prop_name_pool_str2569[sizeof("osmanya")]; + char unicode_prop_name_pool_str2571[sizeof("finalpunctuation")]; + char unicode_prop_name_pool_str2579[sizeof("hmnp")]; + char unicode_prop_name_pool_str2580[sizeof("inpsalterpahlavi")]; + char unicode_prop_name_pool_str2582[sizeof("insuperscriptsandsubscripts")]; + char unicode_prop_name_pool_str2587[sizeof("mathsymbol")]; + char unicode_prop_name_pool_str2588[sizeof("insmallformvariants")]; + char unicode_prop_name_pool_str2599[sizeof("phlp")]; + char unicode_prop_name_pool_str2604[sizeof("syriac")]; + char unicode_prop_name_pool_str2609[sizeof("intangutsupplement")]; + char unicode_prop_name_pool_str2614[sizeof("tang")]; + char unicode_prop_name_pool_str2617[sizeof("insymbolsandpictographsextendeda")]; + char unicode_prop_name_pool_str2618[sizeof("tagbanwa")]; + char unicode_prop_name_pool_str2622[sizeof("inaegeannumbers")]; + char unicode_prop_name_pool_str2626[sizeof("indevanagariextended")]; + char unicode_prop_name_pool_str2633[sizeof("incombiningdiacriticalmarkssupplement")]; + char unicode_prop_name_pool_str2634[sizeof("inmeroiticcursive")]; + char unicode_prop_name_pool_str2640[sizeof("inbrahmi")]; + char unicode_prop_name_pool_str2643[sizeof("deprecated")]; + char unicode_prop_name_pool_str2662[sizeof("inalphabeticpresentationforms")]; + char unicode_prop_name_pool_str2666[sizeof("titlecaseletter")]; + char unicode_prop_name_pool_str2668[sizeof("changeswhentitlecased")]; + char unicode_prop_name_pool_str2669[sizeof("othersymbol")]; + char unicode_prop_name_pool_str2673[sizeof("inethiopicextended")]; + char unicode_prop_name_pool_str2684[sizeof("sgnw")]; + char unicode_prop_name_pool_str2688[sizeof("cyrillic")]; + char unicode_prop_name_pool_str2699[sizeof("incombiningdiacriticalmarksextended")]; + char unicode_prop_name_pool_str2703[sizeof("inbopomofo")]; + char unicode_prop_name_pool_str2705[sizeof("sylo")]; + char unicode_prop_name_pool_str2710[sizeof("narb")]; + char unicode_prop_name_pool_str2717[sizeof("ogham")]; + char unicode_prop_name_pool_str2719[sizeof("tamil")]; + char unicode_prop_name_pool_str2724[sizeof("wancho")]; + char unicode_prop_name_pool_str2726[sizeof("otheruppercase")]; + char unicode_prop_name_pool_str2727[sizeof("currencysymbol")]; + char unicode_prop_name_pool_str2733[sizeof("otheridstart")]; + char unicode_prop_name_pool_str2737[sizeof("inlimbu")]; + char unicode_prop_name_pool_str2743[sizeof("intagbanwa")]; + char unicode_prop_name_pool_str2755[sizeof("insupplementalarrowsa")]; + char unicode_prop_name_pool_str2761[sizeof("khitansmallscript")]; + char unicode_prop_name_pool_str2774[sizeof("insymbolsforlegacycomputing")]; + char unicode_prop_name_pool_str2789[sizeof("deva")]; + char unicode_prop_name_pool_str2794[sizeof("changeswhenlowercased")]; + char unicode_prop_name_pool_str2798[sizeof("incjkunifiedideographsextensionb")]; + char unicode_prop_name_pool_str2805[sizeof("devanagari")]; + char unicode_prop_name_pool_str2808[sizeof("tirhuta")]; + char unicode_prop_name_pool_str2815[sizeof("elba")]; + char unicode_prop_name_pool_str2817[sizeof("extendedpictographic")]; + char unicode_prop_name_pool_str2825[sizeof("inindicsiyaqnumbers")]; + char unicode_prop_name_pool_str2829[sizeof("thaa")]; + char unicode_prop_name_pool_str2831[sizeof("talu")]; + char unicode_prop_name_pool_str2832[sizeof("omath")]; + char unicode_prop_name_pool_str2834[sizeof("thaana")]; + char unicode_prop_name_pool_str2836[sizeof("thai")]; + char unicode_prop_name_pool_str2839[sizeof("wcho")]; + char unicode_prop_name_pool_str2841[sizeof("inottomansiyaqnumbers")]; + char unicode_prop_name_pool_str2850[sizeof("telu")]; + char unicode_prop_name_pool_str2853[sizeof("hung")]; + char unicode_prop_name_pool_str2859[sizeof("hangul")]; + char unicode_prop_name_pool_str2862[sizeof("buhid")]; + char unicode_prop_name_pool_str2867[sizeof("inletterlikesymbols")]; + char unicode_prop_name_pool_str2891[sizeof("insupplementalarrowsc")]; + char unicode_prop_name_pool_str2892[sizeof("tirh")]; + char unicode_prop_name_pool_str2894[sizeof("insupplementalmathematicaloperators")]; + char unicode_prop_name_pool_str2899[sizeof("shaw")]; + char unicode_prop_name_pool_str2904[sizeof("inmathematicaloperators")]; + char unicode_prop_name_pool_str2914[sizeof("inprivateusearea")]; + char unicode_prop_name_pool_str2919[sizeof("whitespace")]; + char unicode_prop_name_pool_str2940[sizeof("nshu")]; + char unicode_prop_name_pool_str2942[sizeof("taitham")]; + char unicode_prop_name_pool_str2962[sizeof("ogrext")]; + char unicode_prop_name_pool_str2977[sizeof("inpahawhhmong")]; + char unicode_prop_name_pool_str2979[sizeof("incyrillicextendedb")]; + char unicode_prop_name_pool_str2980[sizeof("nbat")]; + char unicode_prop_name_pool_str2981[sizeof("inplayingcards")]; + char unicode_prop_name_pool_str2983[sizeof("hex")]; + char unicode_prop_name_pool_str2987[sizeof("elym")]; + char unicode_prop_name_pool_str2988[sizeof("nabataean")]; + char unicode_prop_name_pool_str2997[sizeof("inbassavah")]; + char unicode_prop_name_pool_str3002[sizeof("elbasan")]; + char unicode_prop_name_pool_str3010[sizeof("changeswhenuppercased")]; + char unicode_prop_name_pool_str3014[sizeof("khudawadi")]; + char unicode_prop_name_pool_str3024[sizeof("inarabicsupplement")]; + char unicode_prop_name_pool_str3030[sizeof("elymaic")]; + char unicode_prop_name_pool_str3033[sizeof("inbuhid")]; + char unicode_prop_name_pool_str3036[sizeof("sylotinagri")]; + char unicode_prop_name_pool_str3038[sizeof("hebr")]; + char unicode_prop_name_pool_str3073[sizeof("tangut")]; + char unicode_prop_name_pool_str3076[sizeof("inbamumsupplement")]; + char unicode_prop_name_pool_str3080[sizeof("wspace")]; + char unicode_prop_name_pool_str3094[sizeof("cypriot")]; + char unicode_prop_name_pool_str3105[sizeof("oldnortharabian")]; + char unicode_prop_name_pool_str3115[sizeof("dupl")]; + char unicode_prop_name_pool_str3143[sizeof("ecomp")]; + char unicode_prop_name_pool_str3154[sizeof("otherdefaultignorablecodepoint")]; + char unicode_prop_name_pool_str3157[sizeof("inmahjongtiles")]; + char unicode_prop_name_pool_str3159[sizeof("nushu")]; + char unicode_prop_name_pool_str3164[sizeof("otherpunctuation")]; + char unicode_prop_name_pool_str3167[sizeof("dep")]; + char unicode_prop_name_pool_str3168[sizeof("incombininghalfmarks")]; + char unicode_prop_name_pool_str3169[sizeof("inarabicmathematicalalphabeticsymbols")]; + char unicode_prop_name_pool_str3210[sizeof("injavanese")]; + char unicode_prop_name_pool_str3215[sizeof("kayahli")]; + char unicode_prop_name_pool_str3218[sizeof("othermath")]; + char unicode_prop_name_pool_str3221[sizeof("dashpunctuation")]; + char unicode_prop_name_pool_str3224[sizeof("hanifirohingya")]; + char unicode_prop_name_pool_str3225[sizeof("symbol")]; + char unicode_prop_name_pool_str3227[sizeof("inglagoliticsupplement")]; + char unicode_prop_name_pool_str3295[sizeof("inhalfwidthandfullwidthforms")]; + char unicode_prop_name_pool_str3337[sizeof("insupplementalpunctuation")]; + char unicode_prop_name_pool_str3347[sizeof("alphabetic")]; + char unicode_prop_name_pool_str3353[sizeof("inyijinghexagramsymbols")]; + char unicode_prop_name_pool_str3355[sizeof("incypriotsyllabary")]; + char unicode_prop_name_pool_str3358[sizeof("inarabicpresentationformsb")]; + char unicode_prop_name_pool_str3361[sizeof("hexdigit")]; + char unicode_prop_name_pool_str3376[sizeof("inalchemicalsymbols")]; + char unicode_prop_name_pool_str3395[sizeof("rejang")]; + char unicode_prop_name_pool_str3408[sizeof("bassavah")]; + char unicode_prop_name_pool_str3417[sizeof("modifiersymbol")]; + char unicode_prop_name_pool_str3420[sizeof("any")]; + char unicode_prop_name_pool_str3436[sizeof("ethiopic")]; + char unicode_prop_name_pool_str3444[sizeof("changeswhencasefolded")]; + char unicode_prop_name_pool_str3461[sizeof("defaultignorablecodepoint")]; + char unicode_prop_name_pool_str3477[sizeof("inhighprivateusesurrogates")]; + char unicode_prop_name_pool_str3479[sizeof("indivesakuru")]; + char unicode_prop_name_pool_str3497[sizeof("upper")]; + char unicode_prop_name_pool_str3510[sizeof("inhanguljamoextendedb")]; + char unicode_prop_name_pool_str3577[sizeof("signwriting")]; + char unicode_prop_name_pool_str3580[sizeof("aghb")]; + char unicode_prop_name_pool_str3595[sizeof("divesakuru")]; + char unicode_prop_name_pool_str3599[sizeof("tfng")]; + char unicode_prop_name_pool_str3600[sizeof("uppercase")]; + char unicode_prop_name_pool_str3659[sizeof("hluw")]; + char unicode_prop_name_pool_str3680[sizeof("oldsoutharabian")]; + char unicode_prop_name_pool_str3683[sizeof("inyisyllables")]; + char unicode_prop_name_pool_str3688[sizeof("tibt")]; + char unicode_prop_name_pool_str3694[sizeof("tibetan")]; + char unicode_prop_name_pool_str3730[sizeof("othergraphemeextend")]; + char unicode_prop_name_pool_str3784[sizeof("inegyptianhieroglyphs")]; + char unicode_prop_name_pool_str3788[sizeof("insupplementaryprivateuseareaa")]; + char unicode_prop_name_pool_str3790[sizeof("openpunctuation")]; + char unicode_prop_name_pool_str3797[sizeof("inegyptianhieroglyphformatcontrols")]; + char unicode_prop_name_pool_str3828[sizeof("tifinagh")]; + char unicode_prop_name_pool_str3852[sizeof("tglg")]; + char unicode_prop_name_pool_str3871[sizeof("tagalog")]; + char unicode_prop_name_pool_str3892[sizeof("tavt")]; + char unicode_prop_name_pool_str3909[sizeof("inhebrew")]; + char unicode_prop_name_pool_str3918[sizeof("inlatinextendedb")]; + char unicode_prop_name_pool_str3969[sizeof("uppercaseletter")]; + char unicode_prop_name_pool_str3982[sizeof("soyombo")]; + char unicode_prop_name_pool_str4129[sizeof("inhighsurrogates")]; + char unicode_prop_name_pool_str4139[sizeof("hyphen")]; + char unicode_prop_name_pool_str4181[sizeof("tagb")]; + char unicode_prop_name_pool_str4186[sizeof("unifiedideograph")]; + char unicode_prop_name_pool_str4201[sizeof("pahawhhmong")]; + char unicode_prop_name_pool_str4269[sizeof("inbopomofoextended")]; + char unicode_prop_name_pool_str4309[sizeof("telugu")]; + char unicode_prop_name_pool_str4325[sizeof("insupplementalarrowsb")]; + char unicode_prop_name_pool_str4362[sizeof("duployan")]; + char unicode_prop_name_pool_str4375[sizeof("egyp")]; + char unicode_prop_name_pool_str4425[sizeof("inboxdrawing")]; + char unicode_prop_name_pool_str4533[sizeof("insupplementalsymbolsandpictographs")]; + char unicode_prop_name_pool_str4575[sizeof("inhangulcompatibilityjamo")]; + char unicode_prop_name_pool_str4779[sizeof("nyiakengpuachuehmong")]; + char unicode_prop_name_pool_str4921[sizeof("zyyy")]; + char unicode_prop_name_pool_str5358[sizeof("insupplementaryprivateuseareab")]; + char unicode_prop_name_pool_str5443[sizeof("hebrew")]; + char unicode_prop_name_pool_str5519[sizeof("egyptianhieroglyphs")]; }; static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = { "lana", "z", - "yi", "lina", + "yi", "mn", - "yiii", "cn", "maka", "mani", - "zzzz", + "yiii", "inkannada", "ci", "lo", "lao", "laoo", + "zzzz", "miao", + "yezi", "innko", "co", "me", @@ -26283,6 +26760,7 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "combiningmark", "incuneiformnumbersandpunctuation", "merc", + "inchorasmian", "perm", "inahom", "inipaextensions", @@ -26295,8 +26773,8 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "incuneiform", "mc", "cc", - "inzanabazarsquare", "lineseparator", + "inzanabazarsquare", "armn", "qmark", "armi", @@ -26316,11 +26794,9 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "incyrillic", "inthai", "incham", - "inkaithi", "zs", + "inkaithi", "mtei", - "vai", - "vaii", "initialpunctuation", "cs", "insyriac", @@ -26329,18 +26805,17 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "mand", "l", "ps", - "dia", "inkanaextendeda", - "di", "mend", "modi", "ideo", "katakana", "prti", + "yezidi", "inideographicdescriptioncharacters", "inlineara", - "xidcontinue", "brai", + "xidcontinue", "inlao", "ascii", "privateuse", @@ -26365,8 +26840,8 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "inmendekikakui", "intransportandmapsymbols", "letternumber", - "xidc", "inmedefaidrin", + "xidc", "inchesssymbols", "inemoticons", "brahmi", @@ -26378,7 +26853,7 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "psalterpahlavi", "insundanese", "inoldsogdian", - "diacritic", + "kits", "gothic", "inancientsymbols", "meroiticcursive", @@ -26396,13 +26871,13 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "inoldsoutharabian", "insylotinagri", "idsbinaryoperator", + "oriya", "sora", "bamum", "inkanasupplement", "incjkstrokes", "joinc", "inopticalcharacterrecognition", - "vs", "indominotiles", "batk", "grext", @@ -26411,7 +26886,6 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "inmodifiertoneletters", "ital", "bass", - "decimalnumber", "alnum", "ids", "print", @@ -26420,19 +26894,20 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "inmusicalsymbols", "intaile", "samr", + "vai", + "vaii", "samaritan", "s", "inlatinextendede", "bali", "lisu", "pauc", - "patternsyntax", "incontrolpictures", "blank", + "zl", "inmiscellaneoussymbols", "ll", "inancientgreekmusicalnotation", - "zl", "inlydian", "sm", "inmiscellaneoussymbolsandarrows", @@ -26440,584 +26915,606 @@ static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = "bengali", "pd", "inmiscellaneoussymbolsandpictographs", - "medf", - "cf", + "ingrantha", + "gong", "balinese", - "medefaidrin", - "han", + "osage", + "mong", "intamil", - "hani", "inmultani", - "hano", - "inshorthandformatcontrols", + "inosage", + "ingeneralpunctuation", + "georgian", "insaurashtra", - "pf", "inoldturkic", "idcontinue", - "hanunoo", "sc", - "inkhojki", "idst", "canadianaboriginal", - "hira", + "ingeorgian", + "osma", "plrd", "incaucasianalbanian", "indeseret", "inearlydynasticcuneiform", "inspacingmodifierletters", - "innewa", + "inmongolian", + "emoji", "idstart", - "zinh", - "incyrillicextendeda", - "dsrt", "cased", + "dia", "glagolitic", - "inhanifirohingya", + "di", + "diak", "inancientgreeknumbers", "inmeeteimayekextensions", - "intaixuanjingsymbols", + "ingurmukhi", "joincontrol", "runr", - "inwarangciti", - "deseret", - "inhiragana", "sind", - "cherokee", + "odi", "inlatinextendedc", "adlm", - "phoenician", - "cher", - "marchen", + "zinh", "inkhudawadi", "sinhala", - "lower", - "graphemelink", - "xidstart", - "ingrantha", "bidic", - "xdigit", + "xidstart", "casedletter", - "gong", + "xdigit", "multani", + "logicalorderexception", "gunjalagondi", + "n", + "inhanifirohingya", + "insorasompeng", + "insiddham", + "incountingrodnumerals", + "nandinagari", + "no", + "inhiragana", + "nko", + "nkoo", + "xpeo", + "bamu", + "cherokee", + "p", + "phoenician", + "cher", + "emojimodifier", + "inphoenician", + "marchen", + "vs", + "graphemelink", + "adlam", + "inkhitansmallscript", "cham", "chakma", - "mong", "kaithi", "inmahajani", "graphemebase", - "insiddham", + "oidc", + "ingreekandcoptic", "inogham", - "inosage", - "incountingrodnumerals", - "inwancho", - "khojki", - "ingeneralpunctuation", - "georgian", - "incyrillicextendedc", - "inkayahli", - "khar", - "inoriya", - "manichaean", - "bamu", - "zanb", - "oriya", - "inolchiki", - "linb", - "inethiopic", - "wara", - "ingeorgian", - "innabataean", - "inkanbun", - "adlam", - "inbhaiksuki", - "insinhala", - "inelbasan", - "lowercase", - "takri", - "inmongolian", - "invai", - "xsux", "cntrl", - "emoji", "sterm", - "cuneiform", - "cwcm", - "hatran", - "linearb", - "taile", + "khojki", + "olck", + "diacritic", "inlatinextendedadditional", "inenclosedalphanumerics", + "olower", "anatolianhieroglyphs", "incyrillicsupplement", + "khar", "intamilsupplement", + "manichaean", "inmiscellaneoustechnical", - "ahom", + "olchiki", "incherokeesupplement", - "takr", - "khmr", + "inmeroitichieroglyphs", + "oids", + "inolchiki", + "inethiopic", "incjkunifiedideographsextensiona", "quotationmark", - "lyci", - "lycian", - "ingurmukhi", + "inkangxiradicals", "runic", - "limb", - "inscriptionalpahlavi", - "hatr", - "variationselector", - "modifierletter", + "enclosingmark", + "innewa", + "lepc", + "beng", "incjkunifiedideographsextensione", - "mymr", - "myanmar", + "insinhala", + "glag", + "inphaistosdisc", + "patternsyntax", "lu", - "kharoshthi", - "inarabic", + "chorasmian", "radical", - "khmer", - "inunifiedcanadianaboriginalsyllabics", - "osage", + "regionalindicator", + "ingreekextended", + "emojimodifierbase", + "indogra", "sundanese", - "innewtailue", - "logicalorderexception", - "math", + "ahom", + "chrs", "braille", - "goth", - "insorasompeng", - "insoyombo", - "arab", + "khmr", + "inwarangciti", "saur", + "inscriptionalpahlavi", "guru", - "term", + "decimalnumber", + "inimperialaramaic", "paucinhau", - "inbasiclatin", - "inarabicpresentationformsa", + "emod", "punct", "gurmukhi", - "grantha", - "inshavian", - "osma", + "ingeometricshapes", + "lower", "inenclosedalphanumericsupplement", - "intirhuta", - "inelymaic", - "inhatran", + "kharoshthi", "incjkunifiedideographsextensionc", - "emojimodifier", - "uideo", + "khmer", "bidicontrol", - "phnx", "limbu", - "n", "inenclosedideographicsupplement", + "math", "mult", - "kthi", + "inwancho", + "goth", + "han", "incjkunifiedideographs", - "ingreekandcoptic", - "inoldhungarian", - "incjkcompatibility", - "grbase", - "nandinagari", - "brah", - "no", + "coptic", + "hani", + "incyrillicextendeda", + "hano", + "medf", "sd", - "nko", - "nkoo", - "xpeo", - "sidd", - "inherited", - "p", - "phli", - "inmeroitichieroglyphs", - "inphoenician", - "inmayannumerals", - "saurashtra", - "inkangxiradicals", - "enclosingmark", - "graphemeextend", - "beng", - "inbatak", - "glag", - "ahex", - "patsyn", - "odi", - "dogra", - "intibetan", - "lydi", - "lydian", - "inblockelements", - "cwcf", - "inunifiedcanadianaboriginalsyllabicsextended", - "regionalindicator", - "ingreekextended", - "emojimodifierbase", - "inanatolianhieroglyphs", - "indogra", - "taiviet", - "inyiradicals", - "unassigned", - "insundanesesupplement", - "innumberforms", - "lowercaseletter", - "dogr", - "lepc", - "bhks", - "word", - "gujr", - "inphaistosdisc", - "bhaiksuki", - "dash", - "inarabicextendeda", - "gujarati", - "inlatinextendedd", - "innoblock", - "java", - "ingeometricshapes", - "cyrl", - "indevanagari", - "inbalinese", - "inbyzantinemusicalsymbols", - "tale", - "deva", - "hex", - "inmyanmarextendedb", - "sinh", - "cwt", - "shavian", - "devanagari", - "oidc", - "javanese", - "mlym", - "incjkunifiedideographsextensiond", - "sentenceterminal", - "malayalam", - "inhanunoo", - "insinhalaarchaicnumbers", - "olck", - "inimperialaramaic", - "olower", - "palmyrene", - "avestan", - "incjkunifiedideographsextensionf", "insogdian", "indingbats", - "olchiki", - "oids", - "punctuation", - "sund", - "inbraillepatterns", + "medefaidrin", + "sidd", + "hanunoo", + "intaixuanjingsymbols", + "inkhojki", + "grantha", + "copt", + "intirhuta", "emojicomponent", + "inhatran", "ideographic", - "taml", - "format", + "saurashtra", + "inshorthandformatcontrols", + "hira", + "ininscriptionalparthian", + "lowercase", + "ininscriptionalpahlavi", "inornamentaldingbats", - "coptic", "caseignorable", - "idsb", - "inmiscellaneousmathematicalsymbolsb", + "nand", + "cwcm", + "kthi", + "cprt", + "inoldhungarian", "sogdian", "sogo", - "terminalpunctuation", + "brah", "intangut", - "intifinagh", - "inlowsurrogates", - "invariationselectors", - "copt", - "soyo", - "intags", - "inverticalforms", - "ininscriptionalparthian", - "ininscriptionalpahlavi", - "meroitichieroglyphs", - "asciihexdigit", - "inethiopicextendeda", - "invedicextensions", - "nand", - "cwl", - "cprt", - "innushu", - "zanabazarsquare", - "ingeometricshapesextended", - "avst", "imperialaramaic", "bopo", - "sarb", "inoldpersian", - "intaiviet", - "mahj", - "inkatakanaphoneticextensions", - "mahajani", + "inherited", + "phli", + "intags", + "insundanesesupplement", + "oldpersian", + "incyrillicextendedc", + "dsrt", + "graphemeextend", + "inlatinextendedd", + "ogam", "closepunctuation", - "inlisu", - "softdotted", - "unknown", - "invariationselectorssupplement", - "syrc", - "hang", + "ingeometricshapesextended", + "zanb", + "linb", + "innewtailue", + "deseret", + "innabataean", + "inanatolianhieroglyphs", + "inkanbun", + "inbhaiksuki", + "inelbasan", + "xsux", + "incjkunifiedideographsextensiond", + "sentenceterminal", "incjksymbolsandpunctuation", - "ingujarati", + "bhks", "bopomofo", "alpha", - "khoj", "inpalmyrene", - "ingunjalagondi", + "bhaiksuki", + "linearb", "inoldpermic", "inlepcha", - "assigned", - "incurrencysymbols", - "insmallformvariants", - "sorasompeng", - "inlinearbsyllabary", - "oldpersian", - "caucasianalbanian", - "inenclosedcjklettersandmonths", - "hiragana", - "inbamum", - "inrejang", - "graph", - "sharada", - "inethiopicsupplement", - "indevanagariextended", + "uideo", + "punctuation", + "sund", + "oldpermic", + "osge", + "variationselector", + "inkayahli", + "inoriya", + "inyezidi", + "limb", + "sinh", + "shavian", "incoptic", "insyriacsupplement", - "inmeroiticcursive", - "hmng", - "intelugu", - "incombiningdiacriticalmarks", - "mathsymbol", - "titlecaseletter", - "ugar", - "incombiningdiacriticalmarksforsymbols", + "wara", + "inhanunoo", + "insinhalaarchaicnumbers", + "inarabic", + "hatran", + "assigned", + "sorasompeng", "spacingmark", - "shrd", - "injavanese", - "syriac", + "orkh", "space", - "hebr", - "ext", - "inhanguljamo", - "phag", - "inhanguljamoextendeda", - "wancho", + "cuneiform", + "arab", + "format", + "hatr", + "inbasiclatin", + "inarabicpresentationformsa", + "lyci", + "lycian", + "takri", + "invai", + "modifierletter", + "unassigned", + "mymr", + "myanmar", + "lowercaseletter", + "taile", + "dogra", + "takr", + "asciihexdigit", + "separator", + "inunifiedcanadianaboriginalsyllabics", + "grbase", "bugi", - "tamil", + "nd", "sogd", - "orkh", - "oldpermic", - "siddham", - "incyrillicextendedb", - "inbrahmi", + "insoyombo", + "inlisu", + "innushu", "inmongoliansupplement", - "thaa", - "thaana", - "thai", + "dogr", + "phnx", + "cwt", "buginese", "ingeorgiansupplement", - "digit", - "cyrillic", - "sylo", - "inphoneticextensions", - "separator", - "inethiopicextended", - "inmathematicalalphanumericsymbols", - "insymbolsandpictographsextendeda", - "nd", - "tirh", - "tirhuta", - "incombiningdiacriticalmarkssupplement", - "extender", - "wcho", - "inbengali", - "talu", - "tang", - "warangciti", - "tagbanwa", - "orya", "nl", "induployan", - "cwu", - "inbuginese", - "telu", - "ogam", - "taitham", - "rjng", - "insuttonsignwriting", - "incombiningdiacriticalmarksextended", - "number", - "inyijinghexagramsymbols", - "whitespace", - "currencysymbol", - "inottomansiyaqnumbers", - "inlimbu", - "incopticepactnumbers", - "ingeorgianextended", - "inphoneticextensionssupplement", - "any", - "osge", - "defaultignorablecodepoint", - "hangul", - "newa", - "ethi", - "hanifirohingya", - "hmnp", "olditalic", - "incjkunifiedideographsextensionb", - "rohg", - "innyiakengpuachuehmong", - "incjkradicalssupplement", - "newtailue", - "newline", - "lepcha", - "deprecated", - "buhd", + "inelymaic", + "inbatak", + "inkatakanaphoneticextensions", + "inlisusupplement", + "inphagspa", + "inhanguljamo", + "phag", + "intibetan", + "inhanguljamoextendeda", "inglagolitic", - "inaegeannumbers", - "modifiersymbol", - "surrogate", - "inletterlikesymbols", - "idstrinaryoperator", + "inblockelements", + "number", + "incjkcompatibility", + "term", + "innumberforms", + "inshavian", + "ahex", + "cf", + "incopticepactnumbers", "intagalog", - "tangut", - "osmanya", + "inmayannumerals", + "inarabicextendeda", + "innoblock", + "insuttonsignwriting", + "intelugu", + "inlowsurrogates", + "pf", + "inbalinese", + "patsyn", + "inbyzantinemusicalsymbols", + "lydi", + "lydian", + "inmyanmarextendedb", + "incjkunifiedideographsextensiong", + "incjkradicalssupplement", + "cwl", + "inunifiedcanadianaboriginalsyllabicsextended", + "sharada", + "inethiopicsupplement", + "inyiradicals", + "gujr", + "oldhungarian", + "gujarati", "oalpha", - "inphagspa", - "ugaritic", + "ugar", + "word", "otheridcontinue", - "inarabicpresentationformsb", - "inbassavah", + "inbraillepatterns", "other", + "idstrinaryoperator", + "shrd", + "innyiakengpuachuehmong", "othernumber", - "sylotinagri", - "ingothic", - "inbuhid", + "cyrl", + "taiviet", + "lepcha", + "graph", + "idsb", + "inmiscellaneousmathematicalsymbolsb", + "zp", + "mlym", + "malayalam", + "siddham", + "surrogate", + "palmyrene", + "java", + "nonspacingmark", "inlatin1supplement", - "intagbanwa", - "shaw", - "oldhungarian", - "inmahjongtiles", - "hung", - "tifinagh", - "inbopomofo", - "narb", - "inyisyllables", - "kayahli", + "ethi", + "indevanagari", + "incjkunifiedideographsextensionf", + "dash", + "rohg", + "tale", + "hang", "otheralphabetic", + "zanabazarsquare", + "ingujarati", + "javanese", + "inmathematicalalphanumericsymbols", + "ingunjalagondi", + "sarb", + "ext", + "avestan", + "oldturkic", + "inphoneticextensions", + "inethiopicextendeda", + "hiragana", + "inrejang", + "oldsogdian", + "taml", + "soyo", + "spaceseparator", + "meroitichieroglyphs", + "newa", + "hmng", + "epres", + "orya", + "terminalpunctuation", + "ingothic", + "newtailue", + "newline", + "cwcf", "phagspa", - "inprivateusearea", - "elba", + "invariationselectors", "nchar", - "spaceseparator", - "tibt", - "extendedpictographic", - "tibetan", - "sgnw", - "hexdigit", - "tfng", - "inbamumsupplement", - "tavt", - "incombininghalfmarks", - "inarabicmathematicalalphabeticsymbols", - "khudawadi", - "inhalfwidthandfullwidthforms", + "intifinagh", + "inverticalforms", + "emojipresentation", + "invedicextensions", + "extender", + "extpict", + "caucasianalbanian", + "inbamum", + "inbengali", + "avst", + "inphoneticextensionssupplement", + "ebase", + "inbuginese", + "softdotted", + "intangutcomponents", + "mahj", + "intaiviet", + "mahajani", + "syrc", + "buhd", + "paragraphseparator", + "digit", + "unknown", + "incombiningdiacriticalmarks", + "oupper", + "incombiningdiacriticalmarksforsymbols", + "inpaucinhau", + "ingeorgianextended", + "cwu", + "inenclosedcjklettersandmonths", + "otherlowercase", + "incurrencysymbols", "inhangulsyllables", - "elym", - "symbol", - "bassavah", - "elymaic", + "inlinearbsyllabary", + "ugaritic", + "invariationselectorssupplement", + "warangciti", + "otherletter", + "rjng", + "khoj", "changeswhencasemapped", - "oldturkic", - "inplayingcards", - "zp", - "elbasan", - "buhid", "noncharactercodepoint", + "osmanya", "finalpunctuation", + "hmnp", "inpsalterpahlavi", - "inpaucinhau", - "nonspacingmark", - "changeswhentitlecased", - "inindicsiyaqnumbers", + "insuperscriptsandsubscripts", + "mathsymbol", + "insmallformvariants", "phlp", - "wspace", - "nbat", - "hluw", - "cypriot", - "nabataean", - "inalchemicalsymbols", - "dupl", - "otherlowercase", - "inarabicsupplement", + "syriac", + "intangutsupplement", + "tang", + "insymbolsandpictographsextendeda", + "tagbanwa", + "inaegeannumbers", + "indevanagariextended", + "incombiningdiacriticalmarkssupplement", + "inmeroiticcursive", + "inbrahmi", + "deprecated", "inalphabeticpresentationforms", - "otherletter", - "emojipresentation", - "changeswhenlowercased", - "dashpunctuation", - "oldsogdian", - "intangutcomponents", - "soyombo", - "paragraphseparator", - "changeswhencasefolded", + "titlecaseletter", + "changeswhentitlecased", "othersymbol", - "inlatinextendedb", + "inethiopicextended", + "sgnw", + "cyrillic", + "incombiningdiacriticalmarksextended", + "inbopomofo", + "sylo", + "narb", + "ogham", + "tamil", + "wancho", "otheruppercase", + "currencysymbol", "otheridstart", - "inhebrew", - "oldnortharabian", + "inlimbu", + "intagbanwa", + "insupplementalarrowsa", + "khitansmallscript", + "insymbolsforlegacycomputing", + "deva", + "changeswhenlowercased", + "incjkunifiedideographsextensionb", + "devanagari", + "tirhuta", + "elba", + "extendedpictographic", + "inindicsiyaqnumbers", + "thaa", + "talu", "omath", - "incypriotsyllabary", + "thaana", + "thai", + "wcho", + "inottomansiyaqnumbers", + "telu", + "hung", + "hangul", + "buhid", + "inletterlikesymbols", + "insupplementalarrowsc", + "tirh", + "insupplementalmathematicaloperators", + "shaw", "inmathematicaloperators", - "changeswhenuppercased", + "inprivateusearea", + "whitespace", "nshu", - "insuperscriptsandsubscripts", + "taitham", "ogrext", - "inhanguljamoextendedb", - "alphabetic", - "dep", - "oupper", - "zyyy", - "aghb", - "ogham", - "rejang", - "tagb", - "hyphen", - "insupplementalarrowsa", "inpahawhhmong", + "incyrillicextendedb", + "nbat", + "inplayingcards", + "hex", + "elym", + "nabataean", + "inbassavah", + "elbasan", + "changeswhenuppercased", + "khudawadi", + "inarabicsupplement", + "elymaic", + "inbuhid", + "sylotinagri", + "hebr", + "tangut", + "inbamumsupplement", + "wspace", + "cypriot", + "oldnortharabian", + "dupl", + "ecomp", + "otherdefaultignorablecodepoint", + "inmahjongtiles", "nushu", + "otherpunctuation", + "dep", + "incombininghalfmarks", + "inarabicmathematicalalphabeticsymbols", + "injavanese", + "kayahli", "othermath", - "insupplementalarrowsc", - "insupplementalmathematicaloperators", - "otherdefaultignorablecodepoint", + "dashpunctuation", + "hanifirohingya", + "symbol", + "inglagoliticsupplement", + "inhalfwidthandfullwidthforms", + "insupplementalpunctuation", + "alphabetic", + "inyijinghexagramsymbols", + "incypriotsyllabary", + "inarabicpresentationformsb", + "hexdigit", + "inalchemicalsymbols", + "rejang", + "bassavah", + "modifiersymbol", + "any", + "ethiopic", + "changeswhencasefolded", + "defaultignorablecodepoint", "inhighprivateusesurrogates", - "duployan", - "oldsoutharabian", - "tglg", - "tagalog", - "inbopomofoextended", + "indivesakuru", + "upper", + "inhanguljamoextendedb", "signwriting", + "aghb", + "divesakuru", + "tfng", + "uppercase", + "hluw", + "oldsoutharabian", + "inyisyllables", + "tibt", + "tibetan", + "othergraphemeextend", "inegyptianhieroglyphs", + "insupplementaryprivateuseareaa", + "openpunctuation", "inegyptianhieroglyphformatcontrols", - "upper", - "otherpunctuation", - "ethiopic", - "hebrew", - "telugu", - "uppercase", - "insupplementalpunctuation", - "unifiedideograph", - "pahawhhmong", - "inglagoliticsupplement", - "inboxdrawing", + "tifinagh", + "tglg", + "tagalog", + "tavt", + "inhebrew", + "inlatinextendedb", "uppercaseletter", - "othergraphemeextend", + "soyombo", "inhighsurrogates", + "hyphen", + "tagb", + "unifiedideograph", + "pahawhhmong", + "inbopomofoextended", + "telugu", "insupplementalarrowsb", - "inhangulcompatibilityjamo", + "duployan", "egyp", + "inboxdrawing", "insupplementalsymbolsandpictographs", + "inhangulcompatibilityjamo", "nyiakengpuachuehmong", - "insupplementaryprivateuseareaa", - "openpunctuation", - "egyptianhieroglyphs", - "insupplementaryprivateuseareab" + "zyyy", + "insupplementaryprivateuseareab", + "hebrew", + "egyptianhieroglyphs" }; #define unicode_prop_name_pool ((const char *) &unicode_prop_name_pool_contents) static const struct PoolPropertyNameCtype * @@ -27028,69 +27525,73 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(10), 238}, + {pool_offset(10), 241}, + {-1}, {-1}, + + {pool_offset(13), 266}, {-1}, {-1}, {-1}, - {pool_offset(14), 262}, - {-1}, + {pool_offset(17), 128}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(16), 261}, + {pool_offset(22), 265}, + {-1}, - {pool_offset(17), 125}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(24), 157}, + {-1}, {-1}, - {pool_offset(24), 154}, + {pool_offset(27), 54}, - {pool_offset(25), 261}, + {pool_offset(28), 142}, {-1}, - {pool_offset(27), 53}, - - {pool_offset(28), 139}, - {-1}, + {pool_offset(30), 145}, - {pool_offset(30), 142}, + {pool_offset(31), 265}, {-1}, {-1}, - {pool_offset(33), 252}, - - {pool_offset(34), 295}, + {pool_offset(34), 299}, {pool_offset(35), 40}, {-1}, - {pool_offset(37), 130}, + {pool_offset(37), 133}, - {pool_offset(38), 121}, + {pool_offset(38), 124}, - {pool_offset(39), 121}, - {-1}, {-1}, {-1}, + {pool_offset(39), 124}, - {pool_offset(43), 153}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(40), 255}, + {-1}, {-1}, - {pool_offset(51), 283}, + {pool_offset(43), 156}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(48), 264}, + {-1}, {-1}, + + {pool_offset(51), 287}, {-1}, - {pool_offset(53), 54}, + {pool_offset(53), 55}, {-1}, {-1}, - {pool_offset(56), 147}, + {pool_offset(56), 150}, {-1}, {-1}, {-1}, - {pool_offset(60), 131}, + {pool_offset(60), 134}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(70), 83}, + {pool_offset(70), 85}, {-1}, - {pool_offset(72), 205}, + {pool_offset(72), 208}, {-1}, {-1}, {-1}, - {pool_offset(76), 125}, + {pool_offset(76), 128}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(84), 137}, + {pool_offset(84), 140}, {-1}, {pool_offset(86), 39}, @@ -27098,2250 +27599,2286 @@ unicode_lookup_property_name (register const char *str, register size_t len) {pool_offset(87), 39}, {-1}, {-1}, - {pool_offset(90), 206}, + {pool_offset(90), 209}, - {pool_offset(91), 150}, + {pool_offset(91), 153}, {-1}, {-1}, - {pool_offset(94), 87}, + {pool_offset(94), 89}, {-1}, - {pool_offset(96), 201}, + {pool_offset(96), 204}, {-1}, {-1}, - {pool_offset(99), 149}, + {pool_offset(99), 152}, {-1}, - {pool_offset(101), 460}, + {pool_offset(101), 464}, - {pool_offset(102), 80}, + {pool_offset(102), 82}, - {pool_offset(103), 87}, + {pool_offset(103), 89}, {-1}, {-1}, {-1}, - {pool_offset(107), 157}, + {pool_offset(107), 160}, - {pool_offset(108), 157}, + {pool_offset(108), 160}, {-1}, - {pool_offset(110), 113}, + {pool_offset(110), 115}, - {pool_offset(111), 152}, + {pool_offset(111), 155}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(120), 137}, + {pool_offset(120), 140}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(133), 144}, + {pool_offset(133), 147}, {pool_offset(134), 45}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(139), 446}, + {pool_offset(139), 450}, {-1}, {-1}, - {pool_offset(142), 463}, + {pool_offset(142), 467}, {-1}, {-1}, {-1}, - {pool_offset(146), 277}, + {pool_offset(146), 281}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(151), 512}, + {pool_offset(151), 520}, - {pool_offset(152), 516}, + {pool_offset(152), 524}, {-1}, {-1}, {-1}, {pool_offset(156), 37}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(163), 478}, + {pool_offset(163), 484}, {-1}, {-1}, - {pool_offset(166), 55}, + {pool_offset(166), 56}, {-1}, {-1}, - {pool_offset(169), 141}, + {pool_offset(169), 144}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(182), 301}, + {pool_offset(182), 305}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(187), 503}, + {pool_offset(187), 510}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(192), 106}, + {pool_offset(192), 108}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(200), 517}, + {pool_offset(200), 525}, {-1}, {-1}, {-1}, - {pool_offset(204), 314}, + {pool_offset(204), 318}, {-1}, {-1}, {pool_offset(207), 38}, {-1}, - {pool_offset(209), 207}, + {pool_offset(209), 210}, {-1}, - {pool_offset(211), 129}, + {pool_offset(211), 132}, - {pool_offset(212), 143}, + {pool_offset(212), 146}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(219), 199}, + {pool_offset(219), 202}, - {pool_offset(220), 309}, + {pool_offset(220), 313}, - {pool_offset(221), 437}, + {pool_offset(221), 441}, {-1}, - {pool_offset(223), 464}, + {pool_offset(223), 468}, {-1}, - {pool_offset(225), 137}, + {pool_offset(225), 140}, - {pool_offset(226), 506}, + {pool_offset(226), 514}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(231), 151}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(231), 154}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(237), 479}, - {pool_offset(238), 177}, + {pool_offset(238), 180}, - {pool_offset(239), 492}, + {pool_offset(239), 498}, - {pool_offset(240), 271}, + {pool_offset(240), 275}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(251), 306}, + {pool_offset(251), 310}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(257), 480}, + {pool_offset(257), 486}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(268), 139}, + {pool_offset(268), 142}, {-1}, {-1}, {-1}, - {pool_offset(272), 344}, + {pool_offset(272), 348}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(277), 120}, + {pool_offset(277), 123}, {-1}, - {pool_offset(279), 144}, + {pool_offset(279), 147}, {-1}, {-1}, {-1}, - {pool_offset(283), 505}, + {pool_offset(283), 513}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(290), 146}, + {pool_offset(290), 149}, {-1}, {-1}, {pool_offset(293), 43}, - {-1}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(295), 496}, - {-1}, {-1}, + {pool_offset(298), 268}, - {pool_offset(298), 264}, - {-1}, {-1}, {-1}, + {pool_offset(299), 503}, + {-1}, {-1}, {pool_offset(302), 22}, {-1}, {-1}, - {pool_offset(305), 210}, + {pool_offset(305), 213}, - {pool_offset(306), 105}, + {pool_offset(306), 107}, {-1}, {-1}, - {pool_offset(309), 284}, + {pool_offset(309), 288}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {pool_offset(316), 22}, {-1}, - {pool_offset(318), 500}, + {pool_offset(318), 507}, - {pool_offset(319), 501}, + {pool_offset(319), 508}, {-1}, - {pool_offset(321), 56}, + {pool_offset(321), 57}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(330), 199}, + {pool_offset(330), 202}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(335), 108}, + {pool_offset(335), 110}, - {pool_offset(336), 122}, + {pool_offset(336), 125}, {-1}, {-1}, {-1}, - {pool_offset(340), 122}, + {pool_offset(340), 125}, {-1}, - {pool_offset(342), 212}, + {pool_offset(342), 215}, {-1}, {-1}, - {pool_offset(345), 282}, + {pool_offset(345), 286}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(352), 320}, + {pool_offset(352), 324}, {-1}, {-1}, - {pool_offset(355), 375}, + {pool_offset(355), 379}, - {pool_offset(356), 275}, + {pool_offset(356), 279}, - {pool_offset(357), 298}, + {pool_offset(357), 302}, {-1}, - {pool_offset(359), 405}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(367), 476}, - {-1}, - - {pool_offset(369), 266}, - {-1}, {-1}, + {pool_offset(359), 409}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(372), 149}, + {pool_offset(364), 270}, {-1}, {-1}, - {pool_offset(375), 254}, + {pool_offset(367), 482}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(376), 254}, - {-1}, {-1}, + {pool_offset(372), 152}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(379), 205}, + {pool_offset(379), 208}, {-1}, {-1}, {-1}, - {pool_offset(383), 57}, + {pool_offset(383), 58}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(399), 280}, + {pool_offset(399), 284}, {-1}, - {pool_offset(401), 207}, + {pool_offset(401), 210}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(415), 491}, + {pool_offset(415), 497}, {-1}, - {pool_offset(417), 141}, + {pool_offset(417), 144}, {-1}, - {pool_offset(419), 119}, - - {pool_offset(420), 208}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(427), 66}, - {-1}, {-1}, {-1}, + {pool_offset(419), 122}, - {pool_offset(431), 521}, + {pool_offset(420), 211}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(433), 62}, - {-1}, {-1}, + {pool_offset(431), 531}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(436), 150}, + {pool_offset(436), 153}, - {pool_offset(437), 155}, + {pool_offset(437), 158}, {-1}, {-1}, {-1}, - {pool_offset(441), 104}, + {pool_offset(441), 106}, {-1}, {-1}, {-1}, - {pool_offset(445), 113}, + {pool_offset(445), 115}, {-1}, {-1}, {-1}, - {pool_offset(449), 108}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(449), 110}, + {-1}, - {pool_offset(456), 372}, + {pool_offset(451), 264}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(457), 450}, + {pool_offset(456), 376}, - {pool_offset(458), 259}, - {-1}, {-1}, + {pool_offset(457), 454}, + {-1}, {-1}, {-1}, {pool_offset(461), 34}, - {-1}, - {pool_offset(463), 299}, + {pool_offset(462), 262}, + + {pool_offset(463), 303}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {pool_offset(472), 14}, {-1}, - {pool_offset(474), 54}, + {pool_offset(474), 55}, {-1}, {-1}, {pool_offset(477), 21}, - {pool_offset(478), 156}, + {pool_offset(478), 159}, {-1}, - {pool_offset(480), 406}, + {pool_offset(480), 410}, - {pool_offset(481), 471}, + {pool_offset(481), 475}, {-1}, {-1}, {-1}, - {pool_offset(485), 119}, + {pool_offset(485), 122}, {-1}, - {pool_offset(487), 495}, + {pool_offset(487), 502}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(499), 412}, + {pool_offset(499), 416}, - {pool_offset(500), 462}, + {pool_offset(500), 466}, {-1}, {-1}, - {pool_offset(503), 86}, + {pool_offset(503), 88}, - {pool_offset(504), 112}, + {pool_offset(504), 114}, {-1}, {-1}, - {pool_offset(507), 112}, + {pool_offset(507), 114}, {-1}, - {pool_offset(509), 425}, + {pool_offset(509), 429}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(514), 419}, + {pool_offset(514), 423}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(528), 489}, + {pool_offset(528), 495}, - {pool_offset(529), 114}, + {pool_offset(529), 116}, {-1}, {-1}, {pool_offset(532), 43}, {-1}, {-1}, {-1}, - {pool_offset(536), 538}, + {pool_offset(536), 548}, {-1}, - {pool_offset(538), 429}, + {pool_offset(538), 433}, {-1}, - {pool_offset(540), 537}, + {pool_offset(540), 547}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(545), 550}, + {pool_offset(545), 560}, {-1}, - {pool_offset(547), 167}, - {-1}, + {pool_offset(547), 170}, + {-1}, {-1}, - {pool_offset(549), 259}, + {pool_offset(550), 523}, + {-1}, {-1}, - {pool_offset(550), 515}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(553), 262}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(558), 555}, + {pool_offset(558), 565}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(564), 548}, + {pool_offset(564), 558}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {pool_offset(573), 33}, {-1}, - {pool_offset(575), 439}, + {pool_offset(575), 443}, {-1}, {-1}, - {pool_offset(578), 195}, + {pool_offset(578), 198}, {-1}, - {pool_offset(580), 436}, + {pool_offset(580), 440}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(588), 355}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(588), 359}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(594), 260}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(598), 263}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(603), 209}, + {pool_offset(603), 212}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(619), 325}, + {pool_offset(619), 329}, - {pool_offset(620), 472}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, + {pool_offset(620), 477}, - {pool_offset(632), 66}, - {-1}, + {pool_offset(621), 118}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {pool_offset(634), 82}, + {pool_offset(634), 84}, - {pool_offset(635), 434}, + {pool_offset(635), 438}, {-1}, {-1}, {-1}, - {pool_offset(639), 151}, + {pool_offset(639), 154}, {-1}, {-1}, {-1}, - {pool_offset(643), 296}, + {pool_offset(643), 300}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(648), 197}, + {pool_offset(648), 200}, - {pool_offset(649), 225}, + {pool_offset(649), 228}, {-1}, {-1}, - {pool_offset(652), 522}, + {pool_offset(652), 532}, {-1}, {-1}, {-1}, - {pool_offset(656), 285}, + {pool_offset(656), 289}, - {pool_offset(657), 133}, + {pool_offset(657), 136}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(665), 227}, + {pool_offset(665), 230}, - {pool_offset(666), 431}, + {pool_offset(666), 435}, {-1}, - {pool_offset(668), 396}, + {pool_offset(668), 400}, - {pool_offset(669), 562}, + {pool_offset(669), 573}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(677), 102}, + {pool_offset(677), 104}, - {pool_offset(678), 461}, + {pool_offset(678), 465}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(699), 395}, + {pool_offset(699), 399}, {-1}, - {pool_offset(701), 100}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(701), 102}, + {-1}, {-1}, + + {pool_offset(704), 185}, + {-1}, {-1}, - {pool_offset(707), 230}, + {pool_offset(707), 233}, {pool_offset(708), 26}, - {pool_offset(709), 520}, + {pool_offset(709), 530}, - {pool_offset(710), 380}, + {pool_offset(710), 384}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(715), 110}, + {pool_offset(715), 112}, {-1}, {-1}, - {pool_offset(718), 348}, - {-1}, {-1}, {-1}, - - {pool_offset(722), 255}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(718), 352}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(728), 543}, + {pool_offset(728), 553}, {-1}, {-1}, {-1}, {pool_offset(732), 28}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(739), 85}, + {pool_offset(739), 87}, {-1}, {pool_offset(741), 28}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(749), 197}, + {pool_offset(749), 200}, {-1}, {-1}, - {pool_offset(752), 393}, + {pool_offset(752), 397}, - {pool_offset(753), 175}, + {pool_offset(753), 178}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {pool_offset(760), 27}, - {-1}, {-1}, - - {pool_offset(763), 163}, - {-1}, + {-1}, {-1}, {-1}, {-1}, {pool_offset(765), 13}, {-1}, - {pool_offset(767), 103}, + {pool_offset(767), 105}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {pool_offset(776), 7}, {-1}, - {pool_offset(778), 269}, + {pool_offset(778), 273}, {-1}, {-1}, {-1}, - {pool_offset(782), 322}, + {pool_offset(782), 326}, - {pool_offset(783), 527}, + {pool_offset(783), 537}, {-1}, - {pool_offset(785), 318}, + {pool_offset(785), 322}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(794), 216}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(794), 219}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(809), 216}, + {pool_offset(803), 257}, + + {pool_offset(804), 257}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(809), 219}, {-1}, {-1}, {-1}, - {pool_offset(813), 215}, + {pool_offset(813), 218}, {-1}, {-1}, - {pool_offset(816), 410}, + {pool_offset(816), 414}, {-1}, {-1}, {-1}, {pool_offset(820), 25}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(826), 127}, - - {pool_offset(827), 198}, + {pool_offset(826), 130}, - {pool_offset(828), 196}, + {pool_offset(827), 201}, + {-1}, - {pool_offset(829), 347}, + {pool_offset(829), 351}, {pool_offset(830), 2}, - {-1}, {-1}, {-1}, + {-1}, - {pool_offset(834), 353}, + {pool_offset(832), 268}, + {-1}, - {pool_offset(835), 128}, + {pool_offset(834), 357}, - {pool_offset(836), 528}, + {pool_offset(835), 131}, - {pool_offset(837), 264}, + {pool_offset(836), 538}, + {-1}, - {pool_offset(838), 457}, + {pool_offset(838), 461}, - {pool_offset(839), 226}, + {pool_offset(839), 229}, {-1}, {-1}, {-1}, - {pool_offset(843), 361}, + {pool_offset(843), 365}, - {pool_offset(844), 442}, + {pool_offset(844), 446}, {pool_offset(845), 29}, - {pool_offset(846), 200}, + {pool_offset(846), 203}, {-1}, - {pool_offset(848), 547}, - {-1}, + {pool_offset(848), 557}, - {pool_offset(850), 148}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(849), 491}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(855), 44}, - {-1}, {-1}, + {pool_offset(856), 91}, + {-1}, {pool_offset(858), 25}, {-1}, - {pool_offset(860), 148}, + {pool_offset(860), 186}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(865), 91}, + {pool_offset(865), 159}, {-1}, {-1}, {-1}, - {pool_offset(869), 293}, - - {pool_offset(870), 91}, + {pool_offset(869), 297}, + {-1}, - {pool_offset(871), 483}, + {pool_offset(871), 489}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(879), 94}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(879), 451}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(887), 525}, - {-1}, {-1}, {-1}, + {pool_offset(885), 342}, - {pool_offset(891), 398}, + {pool_offset(886), 82}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(892), 202}, - {-1}, + {pool_offset(891), 402}, + {-1}, {-1}, - {pool_offset(894), 468}, + {pool_offset(894), 472}, {-1}, - {pool_offset(896), 102}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(901), 94}, - {-1}, {-1}, {-1}, - - {pool_offset(905), 218}, + {pool_offset(896), 104}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(906), 482}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(905), 221}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(913), 101}, + {pool_offset(913), 103}, {-1}, {pool_offset(915), 38}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(923), 98}, + {pool_offset(922), 306}, {-1}, - {pool_offset(925), 153}, + {pool_offset(924), 187}, + + {pool_offset(925), 156}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(939), 449}, + {pool_offset(939), 453}, - {pool_offset(940), 444}, + {pool_offset(940), 448}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(945), 507}, - - {pool_offset(946), 272}, - {-1}, {-1}, {-1}, + {pool_offset(945), 515}, - {pool_offset(950), 486}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(946), 276}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, + {-1}, - {pool_offset(972), 103}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(957), 319}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(977), 106}, - {-1}, {-1}, {-1}, + {pool_offset(964), 74}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(981), 368}, + {pool_offset(972), 105}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(984), 64}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(993), 41}, - {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(999), 81}, - {-1}, {-1}, + {-1}, - {pool_offset(1002), 470}, + {pool_offset(995), 67}, {-1}, {-1}, {-1}, - {pool_offset(1006), 433}, + {pool_offset(999), 83}, {-1}, - {pool_offset(1008), 408}, + {pool_offset(1001), 63}, + {-1}, {-1}, - {pool_offset(1009), 530}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1004), 68}, + {-1}, - {pool_offset(1016), 110}, - {-1}, {-1}, {-1}, + {pool_offset(1006), 437}, + {-1}, - {pool_offset(1020), 214}, + {pool_offset(1008), 412}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1013), 294}, {-1}, {-1}, - {pool_offset(1023), 494}, - {-1}, + {pool_offset(1016), 112}, + {-1}, {-1}, {-1}, - {pool_offset(1025), 64}, + {pool_offset(1020), 217}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1035), 374}, - {-1}, {-1}, {-1}, - - {pool_offset(1039), 118}, + {pool_offset(1039), 121}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1047), 52}, + {pool_offset(1047), 189}, {-1}, {-1}, - {pool_offset(1050), 363}, + {pool_offset(1050), 367}, {-1}, {pool_offset(1052), 16}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1053), 204}, - {-1}, {-1}, + {pool_offset(1058), 108}, + {-1}, {-1}, {-1}, - {pool_offset(1056), 52}, - {-1}, {-1}, + {pool_offset(1062), 490}, - {pool_offset(1059), 143}, - {-1}, {-1}, + {pool_offset(1063), 227}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1062), 484}, + {pool_offset(1072), 31}, - {pool_offset(1063), 224}, + {pool_offset(1073), 263}, {-1}, {-1}, - {pool_offset(1066), 6}, + {pool_offset(1076), 123}, {-1}, - {pool_offset(1068), 86}, + {pool_offset(1078), 11}, - {pool_offset(1069), 260}, - {-1}, + {pool_offset(1079), 161}, - {pool_offset(1071), 485}, + {pool_offset(1080), 134}, + {-1}, - {pool_offset(1072), 31}, + {pool_offset(1082), 91}, {-1}, - {pool_offset(1074), 11}, + {pool_offset(1084), 163}, + {-1}, {-1}, {-1}, + + {pool_offset(1088), 474}, {-1}, - {pool_offset(1076), 120}, + {pool_offset(1090), 483}, {-1}, - {pool_offset(1078), 89}, + {pool_offset(1092), 494}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1079), 158}, + {pool_offset(1102), 541}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1111), 165}, {-1}, {-1}, - {pool_offset(1082), 89}, - {-1}, + {pool_offset(1114), 171}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1084), 46}, + {pool_offset(1121), 378}, {-1}, - {pool_offset(1086), 45}, + {pool_offset(1123), 169}, + + {pool_offset(1124), 169}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1087), 156}, + {pool_offset(1131), 181}, - {pool_offset(1088), 111}, + {pool_offset(1132), 26}, - {pool_offset(1089), 479}, + {pool_offset(1133), 52}, + {-1}, {-1}, {-1}, - {pool_offset(1090), 84}, + {pool_offset(1137), 196}, {-1}, - {pool_offset(1092), 488}, + {pool_offset(1139), 207}, {-1}, {-1}, - {pool_offset(1095), 308}, - {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(1101), 447}, + {pool_offset(1142), 52}, - {pool_offset(1102), 531}, - {-1}, + {pool_offset(1143), 76}, - {pool_offset(1104), 536}, + {pool_offset(1144), 460}, - {pool_offset(1105), 117}, - {-1}, - - {pool_offset(1107), 338}, - - {pool_offset(1108), 80}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1145), 146}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1117), 329}, + {pool_offset(1150), 258}, + {-1}, {-1}, {-1}, - {pool_offset(1118), 400}, - {-1}, {-1}, + {pool_offset(1154), 88}, + {-1}, - {pool_offset(1121), 115}, + {pool_offset(1156), 16}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1124), 292}, + {pool_offset(1168), 528}, {-1}, - {pool_offset(1126), 142}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1170), 46}, + {-1}, - {pool_offset(1132), 26}, + {pool_offset(1172), 45}, + {-1}, - {pool_offset(1133), 263}, - {-1}, {-1}, + {pool_offset(1174), 113}, - {pool_offset(1136), 182}, + {pool_offset(1175), 485}, - {pool_offset(1137), 328}, + {pool_offset(1176), 86}, - {pool_offset(1138), 126}, - {-1}, {-1}, + {pool_offset(1177), 191}, + {-1}, - {pool_offset(1141), 304}, + {pool_offset(1179), 278}, {-1}, - {pool_offset(1143), 257}, + {pool_offset(1181), 312}, + {-1}, - {pool_offset(1144), 302}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1183), 3}, + {-1}, {-1}, {-1}, - {pool_offset(1149), 454}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1187), 222}, + {-1}, {-1}, {-1}, - {pool_offset(1154), 378}, - {-1}, + {pool_offset(1191), 120}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1156), 16}, + {pool_offset(1199), 176}, - {pool_offset(1157), 499}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1200), 67}, + {-1}, - {pool_offset(1162), 297}, + {pool_offset(1202), 340}, - {pool_offset(1163), 448}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1203), 353}, - {pool_offset(1169), 132}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1204), 193}, - {pool_offset(1178), 240}, + {pool_offset(1205), 19}, - {pool_offset(1179), 315}, - {-1}, + {pool_offset(1206), 280}, - {pool_offset(1181), 390}, + {pool_offset(1207), 117}, - {pool_offset(1182), 58}, + {pool_offset(1208), 512}, + {-1}, {-1}, {-1}, - {pool_offset(1183), 3}, + {pool_offset(1212), 145}, {-1}, {-1}, - {pool_offset(1186), 72}, + {pool_offset(1215), 350}, - {pool_offset(1187), 219}, + {pool_offset(1216), 176}, {-1}, - {pool_offset(1189), 58}, + {pool_offset(1218), 415}, {-1}, {-1}, - {pool_offset(1192), 48}, - {-1}, + {pool_offset(1221), 462}, - {pool_offset(1194), 95}, - {-1}, {-1}, + {pool_offset(1222), 192}, - {pool_offset(1197), 126}, + {pool_offset(1223), 332}, {-1}, {-1}, {-1}, - {pool_offset(1201), 237}, + {pool_offset(1227), 308}, - {pool_offset(1202), 336}, + {pool_offset(1228), 388}, + {-1}, {-1}, - {pool_offset(1203), 349}, + {pool_offset(1231), 213}, {-1}, - {pool_offset(1205), 19}, + {pool_offset(1233), 375}, + {-1}, {-1}, {-1}, - {pool_offset(1206), 276}, + {pool_offset(1237), 217}, {-1}, - {pool_offset(1208), 504}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1239), 150}, - {pool_offset(1215), 346}, + {pool_offset(1240), 492}, {-1}, - {pool_offset(1217), 17}, + {pool_offset(1242), 126}, + {-1}, {-1}, {-1}, - {pool_offset(1218), 411}, + {pool_offset(1246), 29}, - {pool_offset(1219), 240}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1247), 571}, - {pool_offset(1224), 116}, + {pool_offset(1248), 301}, {-1}, {-1}, {-1}, - {pool_offset(1228), 384}, + {pool_offset(1252), 83}, {-1}, {-1}, - {pool_offset(1231), 210}, - {-1}, - - {pool_offset(1233), 135}, - - {pool_offset(1234), 135}, - - {pool_offset(1235), 290}, - {-1}, - - {pool_offset(1237), 214}, + {pool_offset(1255), 439}, - {pool_offset(1238), 124}, + {pool_offset(1256), 199}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1240), 107}, - {-1}, + {pool_offset(1267), 137}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1242), 95}, - {-1}, + {pool_offset(1273), 53}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1244), 255}, - {-1}, + {pool_offset(1278), 214}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1246), 129}, + {pool_offset(1286), 215}, - {pool_offset(1247), 560}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1287), 341}, + {-1}, {-1}, {-1}, - {pool_offset(1262), 159}, + {pool_offset(1291), 77}, {-1}, {-1}, - {pool_offset(1265), 159}, + {pool_offset(1294), 499}, {-1}, - {pool_offset(1267), 134}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1296), 235}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1275), 115}, + {pool_offset(1303), 17}, - {pool_offset(1276), 279}, + {pool_offset(1304), 53}, {-1}, - {pool_offset(1278), 211}, - {-1}, {-1}, - - {pool_offset(1281), 116}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1306), 34}, + {-1}, {-1}, {-1}, - {pool_offset(1286), 307}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1310), 119}, + {-1}, {-1}, - {pool_offset(1292), 183}, - {-1}, {-1}, {-1}, + {pool_offset(1313), 500}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1296), 232}, + {pool_offset(1322), 220}, {-1}, {-1}, {-1}, - {pool_offset(1300), 319}, - {-1}, + {pool_offset(1326), 109}, + {-1}, {-1}, - {pool_offset(1302), 131}, + {pool_offset(1329), 92}, {-1}, - {pool_offset(1304), 145}, + {pool_offset(1331), 166}, {-1}, - {pool_offset(1306), 34}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1333), 456}, + {-1}, {-1}, {-1}, - {pool_offset(1311), 82}, + {pool_offset(1337), 201}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1312), 477}, + {pool_offset(1343), 76}, {-1}, {-1}, {-1}, - {pool_offset(1316), 497}, + {pool_offset(1347), 8}, - {pool_offset(1317), 21}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1348), 92}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1322), 217}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1354), 356}, + {-1}, - {pool_offset(1329), 90}, - {-1}, {-1}, {-1}, + {pool_offset(1356), 6}, - {pool_offset(1333), 244}, + {pool_offset(1357), 555}, {-1}, {-1}, {-1}, - {pool_offset(1337), 198}, + {pool_offset(1361), 117}, + {-1}, {-1}, - {pool_offset(1338), 267}, + {pool_offset(1364), 569}, + {-1}, {-1}, - {pool_offset(1339), 421}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1367), 119}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1347), 8}, + {pool_offset(1373), 31}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1348), 90}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1380), 127}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1354), 83}, + {pool_offset(1388), 556}, + {-1}, - {pool_offset(1355), 445}, + {pool_offset(1390), 148}, + {-1}, - {pool_offset(1356), 184}, + {pool_offset(1392), 161}, + {-1}, - {pool_offset(1357), 545}, + {pool_offset(1394), 546}, {-1}, {-1}, - {pool_offset(1360), 487}, + {pool_offset(1397), 84}, - {pool_offset(1361), 474}, + {pool_offset(1398), 93}, - {pool_offset(1362), 455}, - {-1}, - - {pool_offset(1364), 558}, + {pool_offset(1399), 390}, - {pool_offset(1365), 74}, + {pool_offset(1400), 57}, {-1}, {-1}, - {pool_offset(1368), 251}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(1373), 31}, + {pool_offset(1403), 93}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1379), 204}, - - {pool_offset(1380), 124}, - {-1}, {-1}, {-1}, + {pool_offset(1409), 372}, + {-1}, {-1}, - {pool_offset(1384), 160}, - {-1}, {-1}, {-1}, + {pool_offset(1412), 96}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1388), 546}, - {-1}, {-1}, {-1}, + {pool_offset(1418), 151}, + {-1}, {-1}, - {pool_offset(1392), 158}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1421), 231}, + {-1}, {-1}, - {pool_offset(1398), 111}, + {pool_offset(1424), 478}, - {pool_offset(1399), 386}, - {-1}, + {pool_offset(1425), 358}, + {-1}, {-1}, - {pool_offset(1401), 274}, + {pool_offset(1428), 151}, {-1}, - {pool_offset(1403), 469}, - {-1}, {-1}, + {pool_offset(1430), 225}, + {-1}, {-1}, {-1}, - {pool_offset(1406), 383}, + {pool_offset(1434), 96}, {-1}, {-1}, - {pool_offset(1409), 84}, + {pool_offset(1437), 540}, {-1}, - {pool_offset(1411), 162}, + {pool_offset(1439), 488}, - {pool_offset(1412), 33}, - {-1}, + {pool_offset(1440), 85}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1414), 168}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1445), 57}, - {pool_offset(1421), 228}, - {-1}, + {pool_offset(1446), 493}, - {pool_offset(1423), 166}, + {pool_offset(1447), 75}, - {pool_offset(1424), 166}, - {-1}, {-1}, + {pool_offset(1448), 459}, + {-1}, - {pool_offset(1427), 178}, + {pool_offset(1450), 106}, {-1}, {-1}, - {pool_offset(1430), 222}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1453), 220}, + {-1}, - {pool_offset(1435), 106}, + {pool_offset(1455), 535}, + + {pool_offset(1456), 100}, + + {pool_offset(1457), 469}, {-1}, - {pool_offset(1437), 193}, - {-1}, {-1}, + {pool_offset(1459), 135}, - {pool_offset(1440), 107}, + {pool_offset(1460), 470}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1443), 458}, + {pool_offset(1472), 559}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1444), 456}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1480), 40}, - {pool_offset(1449), 529}, - {-1}, {-1}, {-1}, + {pool_offset(1481), 165}, - {pool_offset(1453), 217}, + {pool_offset(1482), 48}, {-1}, - {pool_offset(1455), 371}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1484), 113}, - {pool_offset(1461), 147}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1485), 60}, + {-1}, {-1}, {-1}, - {pool_offset(1467), 85}, + {pool_offset(1489), 473}, - {pool_offset(1468), 29}, + {pool_offset(1490), 232}, + {-1}, {-1}, + + {pool_offset(1493), 182}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1473), 326}, + {pool_offset(1498), 33}, + {-1}, - {pool_offset(1474), 81}, + {pool_offset(1500), 526}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1475), 15}, + {pool_offset(1508), 107}, {-1}, - {pool_offset(1477), 196}, + {pool_offset(1510), 32}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1519), 447}, {-1}, - {pool_offset(1479), 186}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1521), 108}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1486), 67}, - {-1}, {-1}, + {pool_offset(1526), 109}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1489), 300}, - {-1}, + {pool_offset(1531), 575}, - {pool_offset(1491), 136}, + {pool_offset(1532), 335}, - {pool_offset(1492), 136}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1533), 181}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(1499), 351}, + {pool_offset(1545), 333}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1506), 47}, - - {pool_offset(1507), 316}, + {pool_offset(1552), 65}, - {pool_offset(1508), 212}, - - {pool_offset(1509), 337}, + {pool_offset(1553), 87}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1513), 75}, + {pool_offset(1566), 398}, + {-1}, {-1}, - {pool_offset(1514), 510}, + {pool_offset(1569), 175}, {-1}, - {pool_offset(1516), 493}, + {pool_offset(1571), 204}, + {-1}, {-1}, {-1}, - {pool_offset(1517), 239}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1575), 562}, + {-1}, - {pool_offset(1524), 388}, + {pool_offset(1577), 267}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1525), 53}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1587), 129}, + {-1}, {-1}, - {pool_offset(1532), 331}, + {pool_offset(1590), 323}, {-1}, {-1}, - {pool_offset(1535), 343}, + {pool_offset(1593), 65}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1598), 458}, + {-1}, + + {pool_offset(1600), 518}, {-1}, {-1}, - {pool_offset(1538), 128}, + {pool_offset(1603), 382}, {-1}, {-1}, - {pool_offset(1541), 67}, + {pool_offset(1606), 506}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1542), 123}, - {-1}, {-1}, + {pool_offset(1612), 452}, + {-1}, - {pool_offset(1545), 30}, + {pool_offset(1614), 59}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1553), 12}, - - {pool_offset(1554), 88}, + {pool_offset(1622), 570}, + {-1}, - {pool_offset(1555), 435}, + {pool_offset(1624), 222}, {-1}, - {pool_offset(1557), 30}, - {-1}, {-1}, + {pool_offset(1626), 377}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1560), 61}, + {pool_offset(1631), 30}, {-1}, - {pool_offset(1562), 287}, + {pool_offset(1633), 32}, {-1}, {-1}, - {pool_offset(1565), 88}, + {pool_offset(1636), 1}, + {-1}, {-1}, - {pool_offset(1566), 394}, + {pool_offset(1639), 457}, + {-1}, {-1}, {-1}, - {pool_offset(1567), 567}, + {pool_offset(1643), 30}, {-1}, {-1}, - {pool_offset(1570), 109}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1646), 129}, + {-1}, - {pool_offset(1576), 352}, + {pool_offset(1648), 445}, + {-1}, - {pool_offset(1577), 60}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1650), 331}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1582), 288}, + {pool_offset(1658), 254}, + {-1}, {-1}, - {pool_offset(1583), 324}, + {pool_offset(1661), 196}, {-1}, - {pool_offset(1585), 526}, - {-1}, {-1}, {-1}, - - {pool_offset(1589), 237}, + {pool_offset(1663), 235}, {-1}, - {pool_offset(1591), 65}, - {-1}, {-1}, + {pool_offset(1665), 180}, - {pool_offset(1594), 97}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1666), 186}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1601), 404}, + {pool_offset(1672), 258}, {-1}, - {pool_offset(1603), 224}, + {pool_offset(1674), 404}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1604), 50}, - {-1}, + {pool_offset(1680), 296}, + {-1}, {-1}, {-1}, - {pool_offset(1606), 221}, + {pool_offset(1684), 476}, + {-1}, {-1}, - {pool_offset(1607), 65}, + {pool_offset(1687), 127}, {-1}, - {pool_offset(1609), 188}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1689), 227}, + {-1}, {-1}, - {pool_offset(1615), 109}, + {pool_offset(1692), 224}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1697), 368}, + + {pool_offset(1698), 290}, + + {pool_offset(1699), 260}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1621), 140}, + {pool_offset(1714), 315}, - {pool_offset(1622), 559}, - {-1}, + {pool_offset(1715), 487}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1624), 219}, + {pool_offset(1725), 283}, {-1}, - {pool_offset(1626), 140}, + {pool_offset(1727), 97}, {-1}, - {pool_offset(1628), 311}, + {pool_offset(1729), 23}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1737), 233}, + {-1}, {-1}, - {pool_offset(1629), 481}, - {-1}, + {pool_offset(1740), 149}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1631), 173}, - {-1}, + {pool_offset(1748), 184}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1633), 452}, + {pool_offset(1754), 9}, {-1}, {-1}, - {pool_offset(1636), 190}, - {-1}, + {pool_offset(1757), 59}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1638), 195}, - {-1}, {-1}, {-1}, + {pool_offset(1766), 21}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1642), 24}, + {pool_offset(1774), 44}, + + {pool_offset(1775), 97}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1645), 561}, + {pool_offset(1787), 271}, - {pool_offset(1646), 473}, + {pool_offset(1788), 425}, - {pool_offset(1647), 354}, + {pool_offset(1789), 138}, - {pool_offset(1648), 173}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1790), 138}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1654), 189}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1808), 243}, + {-1}, {-1}, - {pool_offset(1661), 193}, - {-1}, + {pool_offset(1811), 394}, + {-1}, {-1}, + + {pool_offset(1814), 132}, - {pool_offset(1663), 232}, + {pool_offset(1815), 54}, {-1}, {-1}, - {pool_offset(1666), 357}, + {pool_offset(1818), 162}, {-1}, {-1}, - {pool_offset(1669), 73}, + {pool_offset(1821), 162}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1828), 131}, {-1}, {-1}, - {pool_offset(1672), 104}, + {pool_offset(1831), 240}, - {pool_offset(1673), 241}, + {pool_offset(1832), 69}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1688), 44}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1849), 243}, - {pool_offset(1694), 549}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1850), 15}, - {pool_offset(1700), 56}, - {-1}, + {pool_offset(1851), 266}, + {-1}, {-1}, - {pool_offset(1702), 40}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1854), 311}, + {-1}, {-1}, {-1}, - {pool_offset(1708), 100}, + {pool_offset(1858), 86}, + {-1}, {-1}, {-1}, - {pool_offset(1709), 359}, - {-1}, {-1}, + {pool_offset(1862), 35}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1712), 229}, - {-1}, {-1}, + {pool_offset(1870), 166}, - {pool_offset(1715), 179}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1871), 232}, - {pool_offset(1720), 244}, + {pool_offset(1872), 504}, + {-1}, {-1}, + + {pool_offset(1875), 393}, {-1}, - {pool_offset(1722), 518}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1877), 533}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1729), 366}, - {-1}, {-1}, {-1}, + {pool_offset(1886), 496}, - {pool_offset(1733), 417}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1887), 69}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1743), 422}, - {-1}, + {pool_offset(1893), 207}, - {pool_offset(1745), 56}, + {pool_offset(1894), 50}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1900), 35}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1750), 231}, - {-1}, {-1}, + {pool_offset(1905), 369}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1912), 170}, - {pool_offset(1753), 563}, + {pool_offset(1913), 534}, {-1}, {-1}, - {pool_offset(1756), 423}, + {pool_offset(1916), 178}, - {pool_offset(1757), 465}, - {-1}, {-1}, + {pool_offset(1917), 480}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1760), 466}, - {-1}, {-1}, + {pool_offset(1922), 330}, + {-1}, - {pool_offset(1763), 152}, + {pool_offset(1924), 385}, - {pool_offset(1764), 15}, + {pool_offset(1925), 511}, + + {pool_offset(1926), 401}, {-1}, - {pool_offset(1766), 409}, + {pool_offset(1928), 307}, + {-1}, - {pool_offset(1767), 332}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(1930), 206}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1781), 162}, + {pool_offset(1938), 304}, + {-1}, - {pool_offset(1782), 49}, - {-1}, {-1}, + {pool_offset(1940), 406}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1785), 59}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(1947), 366}, - {pool_offset(1791), 523}, + {pool_offset(1948), 355}, {-1}, {-1}, - {pool_offset(1794), 263}, - {-1}, {-1}, + {pool_offset(1951), 163}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {pool_offset(1797), 552}, + {pool_offset(1962), 387}, - {pool_offset(1798), 24}, + {pool_offset(1963), 247}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(1984), 347}, + + {pool_offset(1985), 449}, + {-1}, {-1}, {-1}, - {pool_offset(1808), 105}, + {pool_offset(1989), 15}, {-1}, - {pool_offset(1810), 32}, + {pool_offset(1991), 44}, {-1}, - {pool_offset(1812), 180}, + {pool_offset(1993), 442}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(1998), 314}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1819), 443}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {pool_offset(2005), 539}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1830), 407}, - {-1}, {-1}, {-1}, + {pool_offset(2011), 291}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1834), 138}, + {pool_offset(2016), 579}, + {-1}, + + {pool_offset(2018), 543}, {-1}, {-1}, {-1}, - {pool_offset(1838), 381}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2022), 298}, - {pool_offset(1845), 138}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2023), 421}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1871), 201}, + {pool_offset(2028), 205}, {-1}, {-1}, {-1}, - {pool_offset(1875), 389}, - {-1}, {-1}, + {pool_offset(2032), 328}, - {pool_offset(1878), 228}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2033), 199}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(1897), 252}, + {pool_offset(2038), 536}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1898), 564}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, + {pool_offset(2047), 139}, - {pool_offset(1910), 234}, + {pool_offset(2048), 139}, + {-1}, - {pool_offset(1911), 92}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2050), 408}, + {-1}, {-1}, {-1}, - {pool_offset(1926), 373}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2054), 574}, - {pool_offset(1932), 291}, + {pool_offset(2055), 374}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1933), 32}, + {pool_offset(2072), 49}, {-1}, {-1}, - {pool_offset(1936), 1}, + {pool_offset(2075), 320}, - {pool_offset(1937), 117}, - {-1}, + {pool_offset(2076), 223}, - {pool_offset(1939), 453}, + {pool_offset(2077), 309}, {-1}, {-1}, - {pool_offset(1942), 502}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2080), 392}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1948), 441}, - {-1}, + {pool_offset(2087), 90}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1950), 327}, + {pool_offset(2097), 177}, - {pool_offset(1951), 23}, - {-1}, {-1}, {-1}, + {pool_offset(2098), 90}, + {-1}, {-1}, - {pool_offset(1955), 340}, + {pool_offset(2101), 188}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2106), 253}, {-1}, {-1}, - {pool_offset(1958), 426}, + {pool_offset(2109), 12}, - {pool_offset(1959), 230}, + {pool_offset(2110), 191}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1964), 430}, + {pool_offset(2115), 361}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1965), 178}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2123), 37}, + {-1}, {-1}, - {pool_offset(1972), 42}, + {pool_offset(2126), 103}, + {-1}, {-1}, - {pool_offset(1973), 382}, + {pool_offset(2129), 223}, {-1}, - {pool_offset(1975), 98}, + {pool_offset(2131), 545}, - {pool_offset(1976), 392}, - {-1}, {-1}, + {pool_offset(2132), 171}, - {pool_offset(1979), 401}, + {pool_offset(2133), 61}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1984), 5}, - {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(1990), 220}, - - {pool_offset(1991), 305}, + {pool_offset(2147), 242}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(1996), 399}, + {pool_offset(2152), 126}, + {-1}, {-1}, {-1}, - {pool_offset(1997), 364}, + {pool_offset(2156), 5}, - {pool_offset(1998), 286}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2157), 102}, + + {pool_offset(2158), 363}, + {-1}, {-1}, {-1}, - {pool_offset(2004), 459}, + {pool_offset(2162), 269}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2014), 194}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2177), 143}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2022), 294}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2182), 143}, + {-1}, {-1}, {-1}, - {pool_offset(2029), 273}, + {pool_offset(2186), 225}, {-1}, - {pool_offset(2031), 226}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2188), 58}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2036), 133}, - {-1}, + {pool_offset(2194), 198}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2038), 250}, + {pool_offset(2200), 111}, - {pool_offset(2039), 341}, + {pool_offset(2201), 157}, - {pool_offset(2040), 146}, - {-1}, {-1}, + {pool_offset(2202), 272}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2043), 220}, - {-1}, {-1}, {-1}, + {pool_offset(2207), 79}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2047), 403}, + {pool_offset(2212), 292}, - {pool_offset(2048), 234}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2213), 572}, - {pool_offset(2054), 9}, + {pool_offset(2214), 62}, {-1}, - {pool_offset(2056), 96}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(2061), 79}, + {pool_offset(2216), 95}, {-1}, {-1}, - {pool_offset(2064), 303}, - {-1}, + {pool_offset(2219), 240}, + {-1}, {-1}, - {pool_offset(2066), 203}, + {pool_offset(2222), 94}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2076), 402}, + {pool_offset(2232), 188}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2082), 256}, + {pool_offset(2238), 267}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2243), 295}, {-1}, - {pool_offset(2084), 35}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2245), 111}, + {-1}, {-1}, {-1}, - {pool_offset(2089), 241}, + {pool_offset(2249), 542}, {-1}, {-1}, {-1}, - {pool_offset(2093), 229}, + {pool_offset(2253), 509}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2094), 181}, - {-1}, {-1}, + {pool_offset(2261), 183}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2097), 177}, - {-1}, {-1}, + {pool_offset(2267), 81}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2100), 222}, + {pool_offset(2272), 24}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2277), 184}, + + {pool_offset(2278), 337}, {-1}, - {pool_offset(2102), 391}, + {pool_offset(2280), 413}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2286), 100}, + {-1}, {-1}, {-1}, + + {pool_offset(2290), 405}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2300), 182}, {-1}, {-1}, - {pool_offset(2105), 475}, + {pool_offset(2303), 244}, {-1}, {-1}, - {pool_offset(2108), 490}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2306), 234}, - {pool_offset(2113), 245}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2307), 270}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(2118), 245}, - {-1}, + {pool_offset(2319), 155}, + {-1}, {-1}, {-1}, - {pool_offset(2120), 246}, + {pool_offset(2323), 168}, {-1}, - {pool_offset(2122), 35}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(2127), 365}, - {-1}, + {pool_offset(2325), 197}, + {-1}, {-1}, {-1}, - {pool_offset(2129), 4}, - {-1}, {-1}, + {pool_offset(2329), 78}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2132), 60}, + {pool_offset(2335), 185}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2149), 233}, + {pool_offset(2350), 247}, - {pool_offset(2150), 333}, + {pool_offset(2351), 444}, + {-1}, {-1}, - {pool_offset(2151), 262}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2354), 167}, + + {pool_offset(2355), 0}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2159), 367}, + {pool_offset(2364), 47}, {-1}, {-1}, {-1}, - {pool_offset(2163), 532}, + {pool_offset(2368), 206}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2168), 556}, - {-1}, + {pool_offset(2373), 426}, + {-1}, {-1}, {-1}, - {pool_offset(2170), 163}, + {pool_offset(2377), 172}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2176), 249}, + {pool_offset(2383), 370}, + {-1}, {-1}, + + {pool_offset(2386), 427}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2394), 78}, + {-1}, {-1}, + + {pool_offset(2397), 336}, {-1}, - {pool_offset(2178), 249}, + {pool_offset(2399), 81}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2184), 335}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2414), 80}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2193), 79}, + {pool_offset(2421), 42}, {-1}, {-1}, {-1}, - {pool_offset(2197), 256}, - {-1}, + {pool_offset(2425), 396}, - {pool_offset(2199), 289}, + {pool_offset(2426), 293}, {-1}, - {pool_offset(2201), 164}, + {pool_offset(2428), 24}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2206), 242}, + {pool_offset(2433), 338}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2439), 77}, + {-1}, {-1}, {-1}, - {pool_offset(2207), 257}, + {pool_offset(2443), 325}, {-1}, {-1}, - {pool_offset(2210), 236}, + {pool_offset(2446), 231}, + {-1}, - {pool_offset(2211), 182}, + {pool_offset(2448), 527}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2212), 167}, + {pool_offset(2453), 141}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2213), 524}, + {pool_offset(2460), 411}, + {-1}, {-1}, {-1}, - {pool_offset(2214), 51}, + {pool_offset(2464), 141}, {-1}, - {pool_offset(2216), 321}, - {-1}, {-1}, {-1}, + {pool_offset(2466), 237}, - {pool_offset(2220), 243}, - {-1}, {-1}, + {pool_offset(2467), 36}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2223), 172}, - {-1}, {-1}, + {pool_offset(2474), 269}, - {pool_offset(2226), 238}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2475), 4}, {-1}, - {pool_offset(2237), 213}, - {-1}, {-1}, - - {pool_offset(2240), 533}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2477), 255}, - {pool_offset(2250), 323}, + {pool_offset(2478), 277}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2251), 160}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2484), 195}, {-1}, {-1}, {-1}, - {pool_offset(2264), 385}, + {pool_offset(2488), 345}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, - - {pool_offset(2277), 258}, + {-1}, {-1}, - {pool_offset(2278), 218}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2500), 505}, - {pool_offset(2285), 540}, + {pool_offset(2501), 334}, {-1}, {-1}, - {pool_offset(2288), 317}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(2293), 438}, + {pool_offset(2504), 51}, {-1}, - {pool_offset(2295), 330}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2506), 386}, + {-1}, {-1}, {-1}, - {pool_offset(2305), 334}, - {-1}, {-1}, + {pool_offset(2510), 193}, - {pool_offset(2308), 20}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, - - {pool_offset(2320), 183}, + {pool_offset(2511), 344}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2325), 62}, + {pool_offset(2516), 417}, + {-1}, {-1}, {-1}, + + {pool_offset(2520), 434}, + {-1}, {-1}, - {pool_offset(2326), 92}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2523), 253}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2333), 165}, + {pool_offset(2528), 576}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {pool_offset(2343), 77}, - {-1}, + {pool_offset(2541), 260}, + {-1}, {-1}, - {pool_offset(2345), 93}, + {pool_offset(2544), 133}, + {-1}, {-1}, {-1}, - {pool_offset(2346), 171}, - {-1}, + {pool_offset(2548), 216}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2348), 175}, + {pool_offset(2556), 120}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2349), 557}, + {pool_offset(2562), 48}, {-1}, {-1}, - {pool_offset(2352), 93}, + {pool_offset(2565), 172}, + {-1}, {-1}, {-1}, - {pool_offset(2353), 535}, + {pool_offset(2569), 187}, {-1}, - {pool_offset(2355), 370}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2571), 205}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2364), 164}, + {pool_offset(2579), 174}, - {pool_offset(2365), 0}, + {pool_offset(2580), 471}, + {-1}, - {pool_offset(2366), 123}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2582), 343}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2375), 63}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2587), 229}, - {pool_offset(2381), 36}, + {pool_offset(2588), 430}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {pool_offset(2391), 362}, - {-1}, {-1}, {-1}, + {pool_offset(2599), 212}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2395), 432}, + {pool_offset(2604), 237}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2400), 225}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2609), 529}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2410), 57}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2614), 245}, + {-1}, {-1}, - {pool_offset(2418), 342}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2617), 566}, - {pool_offset(2426), 101}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2618), 239}, + {-1}, {-1}, {-1}, - {pool_offset(2442), 310}, + {pool_offset(2622), 436}, + {-1}, {-1}, {-1}, - {pool_offset(2443), 242}, - {-1}, + {pool_offset(2626), 403}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2445), 184}, - {-1}, + {pool_offset(2633), 339}, - {pool_offset(2447), 185}, + {pool_offset(2634), 463}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2448), 397}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2640), 481}, + {-1}, {-1}, - {pool_offset(2455), 250}, + {pool_offset(2643), 64}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2456), 188}, + {pool_offset(2662), 424}, {-1}, {-1}, {-1}, - {pool_offset(2460), 427}, + {pool_offset(2666), 136}, {-1}, - {pool_offset(2462), 513}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2668), 50}, - {pool_offset(2469), 37}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2669), 230}, + {-1}, {-1}, {-1}, - {pool_offset(2478), 168}, + {pool_offset(2673), 371}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2480), 233}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2684), 226}, + {-1}, {-1}, {-1}, - {pool_offset(2487), 440}, + {pool_offset(2688), 61}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2498), 312}, + {pool_offset(2699), 327}, {-1}, {-1}, {-1}, - {pool_offset(2502), 268}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2703), 380}, + {-1}, + + {pool_offset(2705), 236}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2516), 313}, + {pool_offset(2710), 179}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2523), 221}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2717), 175}, + {-1}, - {pool_offset(2529), 174}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2719), 244}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2538), 542}, + {pool_offset(2724), 259}, + {-1}, + + {pool_offset(2726), 195}, + + {pool_offset(2727), 221}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2733), 192}, {-1}, {-1}, {-1}, - {pool_offset(2542), 174}, - {-1}, + {pool_offset(2737), 321}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2544), 248}, + {pool_offset(2743), 317}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(2554), 376}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2755), 360}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2561), 176}, + {pool_offset(2761), 118}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {pool_offset(2571), 387}, - {-1}, + {pool_offset(2774), 567}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2573), 114}, + {pool_offset(2789), 66}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2578), 185}, + {pool_offset(2794), 49}, {-1}, {-1}, {-1}, - {pool_offset(2582), 203}, - {-1}, - - {pool_offset(2584), 418}, - {-1}, {-1}, {-1}, + {pool_offset(2798), 568}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2588), 70}, + {pool_offset(2805), 66}, {-1}, {-1}, - {pool_offset(2591), 169}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2808), 252}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2607), 266}, + {pool_offset(2815), 72}, {-1}, - {pool_offset(2609), 247}, - {-1}, + {pool_offset(2817), 80}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2611), 78}, + {pool_offset(2825), 549}, {-1}, {-1}, {-1}, - {pool_offset(2615), 247}, - - {pool_offset(2616), 223}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2829), 248}, + {-1}, - {pool_offset(2622), 97}, + {pool_offset(2831), 167}, - {pool_offset(2623), 248}, - {-1}, {-1}, {-1}, + {pool_offset(2832), 194}, + {-1}, - {pool_offset(2627), 511}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2834), 248}, + {-1}, - {pool_offset(2632), 239}, + {pool_offset(2836), 249}, + {-1}, {-1}, - {pool_offset(2633), 424}, + {pool_offset(2839), 259}, + {-1}, - {pool_offset(2634), 541}, - {-1}, {-1}, {-1}, + {pool_offset(2841), 550}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2638), 118}, + {pool_offset(2850), 246}, {-1}, {-1}, - {pool_offset(2641), 428}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {pool_offset(2853), 177}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2652), 413}, + {pool_offset(2859), 94}, + {-1}, {-1}, - {pool_offset(2653), 71}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2862), 36}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2669), 215}, + {pool_offset(2867), 346}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2692), 27}, - {-1}, {-1}, {-1}, + {pool_offset(2891), 563}, - {pool_offset(2696), 71}, + {pool_offset(2892), 252}, {-1}, - {pool_offset(2698), 48}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {pool_offset(2894), 364}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2709), 181}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2899), 224}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(2725), 544}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2904), 349}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2914), 422}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2919), 261}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(2767), 265}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(2775), 70}, + {pool_offset(2940), 173}, + {-1}, - {pool_offset(2776), 36}, - {-1}, {-1}, + {pool_offset(2942), 241}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {pool_offset(2779), 169}, + {pool_offset(2962), 190}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2785), 202}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2977), 522}, + {-1}, - {pool_offset(2794), 467}, - {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2979), 395}, - {pool_offset(2800), 498}, + {pool_offset(2980), 164}, - {pool_offset(2801), 154}, - {-1}, {-1}, + {pool_offset(2981), 554}, + {-1}, - {pool_offset(2804), 50}, + {pool_offset(2983), 99}, {-1}, {-1}, {-1}, - {pool_offset(2808), 539}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(2987), 73}, - {pool_offset(2813), 209}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, - - {pool_offset(2824), 258}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(2988), 164}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2831), 161}, + {pool_offset(2997), 521}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2836), 19}, - {-1}, + {pool_offset(3002), 72}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2838), 59}, + {pool_offset(3010), 51}, + {-1}, {-1}, {-1}, - {pool_offset(2839), 161}, - {-1}, + {pool_offset(3014), 121}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2841), 551}, + {pool_offset(3024), 285}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2847), 68}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3030), 73}, + {-1}, {-1}, - {pool_offset(2856), 190}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3033), 316}, + {-1}, {-1}, - {pool_offset(2875), 281}, + {pool_offset(3036), 236}, + {-1}, - {pool_offset(2876), 420}, + {pool_offset(3038), 98}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(2890), 130}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2916), 76}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, + {pool_offset(3073), 245}, + {-1}, {-1}, - {pool_offset(2930), 49}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3076), 519}, + {-1}, {-1}, {-1}, + + {pool_offset(3080), 261}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2953), 200}, - - {pool_offset(2954), 179}, + {pool_offset(3094), 60}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {pool_offset(2970), 519}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3105), 179}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2977), 231}, + {pool_offset(3115), 70}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(2996), 265}, + {pool_offset(3143), 75}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {pool_offset(3012), 47}, + {pool_offset(3154), 189}, {-1}, {-1}, - {pool_offset(3015), 227}, + {pool_offset(3157), 552}, + {-1}, + + {pool_offset(3159), 173}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3020), 270}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3164), 209}, + {-1}, {-1}, + + {pool_offset(3167), 64}, + + {pool_offset(3168), 428}, + + {pool_offset(3169), 551}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3072), 192}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(3079), 189}, + {pool_offset(3210), 407}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3084), 278}, - {-1}, {-1}, {-1}, + {pool_offset(3215), 116}, + {-1}, {-1}, - {pool_offset(3088), 176}, - {-1}, {-1}, {-1}, + {pool_offset(3218), 194}, + {-1}, {-1}, - {pool_offset(3092), 191}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3221), 203}, + {-1}, {-1}, - {pool_offset(3099), 451}, + {pool_offset(3224), 95}, + + {pool_offset(3225), 218}, + {-1}, + + {pool_offset(3227), 544}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3118), 345}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(3146), 51}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3154), 170}, + {pool_offset(3295), 432}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3337), 373}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3182), 339}, + {pool_offset(3347), 18}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3188), 187}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3353), 389}, + {-1}, - {pool_offset(3197), 414}, + {pool_offset(3355), 455}, + {-1}, {-1}, - {pool_offset(3198), 18}, + {pool_offset(3358), 431}, + {-1}, {-1}, - {pool_offset(3199), 63}, + {pool_offset(3361), 99}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3216), 192}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3376), 561}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3258), 55}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3267), 42}, + {pool_offset(3395), 216}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(3408), 27}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3285), 172}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3417), 228}, {-1}, {-1}, - {pool_offset(3306), 213}, + {pool_offset(3420), 20}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3324), 236}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3436), 79}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3350), 99}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3355), 356}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3444), 47}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3461), 63}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3477), 420}, + {-1}, + + {pool_offset(3479), 501}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3497), 10}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3413), 514}, + {pool_offset(3510), 418}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3459), 170}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3478), 191}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3491), 553}, + {pool_offset(3577), 226}, {-1}, {-1}, - {pool_offset(3494), 360}, + {pool_offset(3580), 42}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3500), 186}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3595), 68}, + {-1}, {-1}, {-1}, - {pool_offset(3527), 416}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {pool_offset(3599), 251}, - {pool_offset(3538), 68}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3600), 256}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3663), 180}, - {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(3666), 235}, + {pool_offset(3659), 19}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(3685), 235}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3692), 379}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3680), 183}, {-1}, {-1}, - {pool_offset(3731), 223}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3683), 391}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(3750), 508}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, + {pool_offset(3688), 250}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3763), 509}, + {pool_offset(3694), 250}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3807), 10}, - {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3810), 206}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3730), 190}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3872), 77}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3881), 96}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3784), 516}, + {-1}, {-1}, {-1}, + + {pool_offset(3788), 577}, {-1}, - {pool_offset(3901), 243}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3790), 211}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3910), 253}, + {pool_offset(3797), 517}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(3937), 369}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {pool_offset(3946), 251}, + {pool_offset(3828), 251}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3961), 194}, + {pool_offset(3852), 238}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3971), 534}, + {pool_offset(3871), 238}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(3992), 350}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(3892), 242}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3909), 282}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3918), 274}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3969), 137}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(3982), 234}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29358,79 +29895,65 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {pool_offset(4279), 134}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(4129), 419}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(4298), 187}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(4139), 101}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(4401), 415}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, + {pool_offset(4181), 239}, + {-1}, {-1}, {-1}, {-1}, - {pool_offset(4476), 358}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(4186), 254}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4201), 197}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, - - {pool_offset(4562), 377}, - - {pool_offset(4563), 69}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(4577), 554}, + {pool_offset(4269), 383}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(4309), 246}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4325), 362}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(4659), 171}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(4362), 70}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {-1}, {-1}, {-1}, - {pool_offset(4688), 565}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {pool_offset(4375), 71}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4425), 354}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29439,19 +29962,19 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - - {pool_offset(4822), 208}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(4851), 69}, + {pool_offset(4533), 564}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4575), 381}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29474,6 +29997,9 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4779), 174}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29489,6 +30015,9 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4921), 56}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29537,6 +30066,9 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(5358), 578}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29546,6 +30078,9 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(5443), 98}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, @@ -29556,7 +30091,7 @@ unicode_lookup_property_name (register const char *str, register size_t len) {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {pool_offset(5809), 566} + {pool_offset(5519), 71} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -29580,11 +30115,11 @@ unicode_lookup_property_name (register const char *str, register size_t len) -#define UNICODE_PROPERTY_VERSION 120100 -#define UNICODE_EMOJI_VERSION 1201 +#define UNICODE_PROPERTY_VERSION 130000 +#define UNICODE_EMOJI_VERSION 1300 #define PROPERTY_NAME_MAX_SIZE 59 -#define CODE_RANGES_NUM 568 +#define CODE_RANGES_NUM 580 #define PROP_INDEX_NEWLINE 0 #define PROP_INDEX_ALPHA 1 @@ -29671,719 +30206,740 @@ unicode_lookup_property_name (register const char *str, register size_t len) #define PROP_INDEX_CHANGESWHENUPPERCASED 51 #define PROP_INDEX_CHER 52 #define PROP_INDEX_CHEROKEE 52 -#define PROP_INDEX_CN 53 -#define PROP_INDEX_UNASSIGNED 53 -#define PROP_INDEX_PRIVATEUSE 54 -#define PROP_INDEX_CO 54 -#define PROP_INDEX_ZYYY 55 -#define PROP_INDEX_COMMON 55 -#define PROP_INDEX_COPT 56 -#define PROP_INDEX_COPTIC 56 -#define PROP_INDEX_QAAC 56 -#define PROP_INDEX_CS 57 -#define PROP_INDEX_SURROGATE 57 -#define PROP_INDEX_XSUX 58 -#define PROP_INDEX_CUNEIFORM 58 -#define PROP_INDEX_CPRT 59 -#define PROP_INDEX_CYPRIOT 59 -#define PROP_INDEX_CYRILLIC 60 -#define PROP_INDEX_CYRL 60 -#define PROP_INDEX_DASH 61 -#define PROP_INDEX_DEFAULTIGNORABLECODEPOINT 62 -#define PROP_INDEX_DI 62 -#define PROP_INDEX_DEP 63 -#define PROP_INDEX_DEPRECATED 63 -#define PROP_INDEX_DSRT 64 -#define PROP_INDEX_DESERET 64 -#define PROP_INDEX_DEVA 65 -#define PROP_INDEX_DEVANAGARI 65 -#define PROP_INDEX_DIACRITIC 66 -#define PROP_INDEX_DIA 66 -#define PROP_INDEX_DOGRA 67 -#define PROP_INDEX_DOGR 67 -#define PROP_INDEX_DUPL 68 -#define PROP_INDEX_DUPLOYAN 68 -#define PROP_INDEX_EGYPTIANHIEROGLYPHS 69 -#define PROP_INDEX_EGYP 69 -#define PROP_INDEX_ELBA 70 -#define PROP_INDEX_ELBASAN 70 -#define PROP_INDEX_ELYM 71 -#define PROP_INDEX_ELYMAIC 71 -#define PROP_INDEX_EMOJI 72 -#define PROP_INDEX_EMOJICOMPONENT 73 -#define PROP_INDEX_EMOJIMODIFIER 74 -#define PROP_INDEX_EMOJIMODIFIERBASE 75 -#define PROP_INDEX_EMOJIPRESENTATION 76 -#define PROP_INDEX_ETHIOPIC 77 -#define PROP_INDEX_ETHI 77 -#define PROP_INDEX_EXTENDEDPICTOGRAPHIC 78 -#define PROP_INDEX_EXT 79 -#define PROP_INDEX_EXTENDER 79 -#define PROP_INDEX_GEORGIAN 80 -#define PROP_INDEX_GEOR 80 -#define PROP_INDEX_GLAG 81 -#define PROP_INDEX_GLAGOLITIC 81 -#define PROP_INDEX_GOTH 82 -#define PROP_INDEX_GOTHIC 82 -#define PROP_INDEX_GRAN 83 -#define PROP_INDEX_GRANTHA 83 -#define PROP_INDEX_GRBASE 84 -#define PROP_INDEX_GRAPHEMEBASE 84 -#define PROP_INDEX_GREXT 85 -#define PROP_INDEX_GRAPHEMEEXTEND 85 -#define PROP_INDEX_GRAPHEMELINK 86 -#define PROP_INDEX_GRLINK 86 -#define PROP_INDEX_GREEK 87 -#define PROP_INDEX_GREK 87 -#define PROP_INDEX_GUJARATI 88 -#define PROP_INDEX_GUJR 88 -#define PROP_INDEX_GUNJALAGONDI 89 -#define PROP_INDEX_GONG 89 -#define PROP_INDEX_GURU 90 -#define PROP_INDEX_GURMUKHI 90 -#define PROP_INDEX_HANI 91 -#define PROP_INDEX_HAN 91 -#define PROP_INDEX_HANG 92 -#define PROP_INDEX_HANGUL 92 -#define PROP_INDEX_HANIFIROHINGYA 93 -#define PROP_INDEX_ROHG 93 -#define PROP_INDEX_HANUNOO 94 -#define PROP_INDEX_HANO 94 -#define PROP_INDEX_HATR 95 -#define PROP_INDEX_HATRAN 95 -#define PROP_INDEX_HEBREW 96 -#define PROP_INDEX_HEBR 96 -#define PROP_INDEX_HEXDIGIT 97 -#define PROP_INDEX_HEX 97 -#define PROP_INDEX_HIRAGANA 98 -#define PROP_INDEX_HIRA 98 -#define PROP_INDEX_HYPHEN 99 -#define PROP_INDEX_IDSB 100 -#define PROP_INDEX_IDSBINARYOPERATOR 100 -#define PROP_INDEX_IDST 101 -#define PROP_INDEX_IDSTRINARYOPERATOR 101 -#define PROP_INDEX_IDCONTINUE 102 -#define PROP_INDEX_IDC 102 -#define PROP_INDEX_IDS 103 -#define PROP_INDEX_IDSTART 103 -#define PROP_INDEX_IDEO 104 -#define PROP_INDEX_IDEOGRAPHIC 104 -#define PROP_INDEX_ARMI 105 -#define PROP_INDEX_IMPERIALARAMAIC 105 -#define PROP_INDEX_ZINH 106 -#define PROP_INDEX_INHERITED 106 -#define PROP_INDEX_QAAI 106 -#define PROP_INDEX_PHLI 107 -#define PROP_INDEX_INSCRIPTIONALPAHLAVI 107 -#define PROP_INDEX_INSCRIPTIONALPARTHIAN 108 -#define PROP_INDEX_PRTI 108 -#define PROP_INDEX_JAVANESE 109 -#define PROP_INDEX_JAVA 109 -#define PROP_INDEX_JOINC 110 -#define PROP_INDEX_JOINCONTROL 110 -#define PROP_INDEX_KAITHI 111 -#define PROP_INDEX_KTHI 111 -#define PROP_INDEX_KNDA 112 -#define PROP_INDEX_KANNADA 112 -#define PROP_INDEX_KATAKANA 113 -#define PROP_INDEX_KANA 113 -#define PROP_INDEX_KAYAHLI 114 -#define PROP_INDEX_KALI 114 -#define PROP_INDEX_KHAROSHTHI 115 -#define PROP_INDEX_KHAR 115 -#define PROP_INDEX_KHMR 116 -#define PROP_INDEX_KHMER 116 -#define PROP_INDEX_KHOJ 117 -#define PROP_INDEX_KHOJKI 117 -#define PROP_INDEX_KHUDAWADI 118 -#define PROP_INDEX_SIND 118 -#define PROP_INDEX_L 119 -#define PROP_INDEX_LETTER 119 -#define PROP_INDEX_LC 120 -#define PROP_INDEX_CASEDLETTER 120 -#define PROP_INDEX_LAO 121 -#define PROP_INDEX_LAOO 121 -#define PROP_INDEX_LATN 122 -#define PROP_INDEX_LATIN 122 -#define PROP_INDEX_LEPC 123 -#define PROP_INDEX_LEPCHA 123 -#define PROP_INDEX_LIMBU 124 -#define PROP_INDEX_LIMB 124 -#define PROP_INDEX_LINA 125 -#define PROP_INDEX_LINEARA 125 -#define PROP_INDEX_LINB 126 -#define PROP_INDEX_LINEARB 126 -#define PROP_INDEX_LISU 127 -#define PROP_INDEX_LOWERCASELETTER 128 -#define PROP_INDEX_LL 128 -#define PROP_INDEX_LM 129 -#define PROP_INDEX_MODIFIERLETTER 129 -#define PROP_INDEX_LO 130 -#define PROP_INDEX_OTHERLETTER 130 -#define PROP_INDEX_LOGICALORDEREXCEPTION 131 -#define PROP_INDEX_LOE 131 -#define PROP_INDEX_LOWERCASE 132 -#define PROP_INDEX_LT 133 -#define PROP_INDEX_TITLECASELETTER 133 -#define PROP_INDEX_UPPERCASELETTER 134 -#define PROP_INDEX_LU 134 -#define PROP_INDEX_LYCI 135 -#define PROP_INDEX_LYCIAN 135 -#define PROP_INDEX_LYDI 136 -#define PROP_INDEX_LYDIAN 136 -#define PROP_INDEX_M 137 -#define PROP_INDEX_COMBININGMARK 137 -#define PROP_INDEX_MARK 137 -#define PROP_INDEX_MAHJ 138 -#define PROP_INDEX_MAHAJANI 138 -#define PROP_INDEX_MAKASAR 139 -#define PROP_INDEX_MAKA 139 -#define PROP_INDEX_MALAYALAM 140 -#define PROP_INDEX_MLYM 140 -#define PROP_INDEX_MAND 141 -#define PROP_INDEX_MANDAIC 141 -#define PROP_INDEX_MANI 142 -#define PROP_INDEX_MANICHAEAN 142 -#define PROP_INDEX_MARC 143 -#define PROP_INDEX_MARCHEN 143 -#define PROP_INDEX_MASARAMGONDI 144 -#define PROP_INDEX_GONM 144 -#define PROP_INDEX_MATH 145 -#define PROP_INDEX_MC 146 -#define PROP_INDEX_SPACINGMARK 146 -#define PROP_INDEX_ENCLOSINGMARK 147 -#define PROP_INDEX_ME 147 -#define PROP_INDEX_MEDEFAIDRIN 148 -#define PROP_INDEX_MEDF 148 -#define PROP_INDEX_MEETEIMAYEK 149 -#define PROP_INDEX_MTEI 149 -#define PROP_INDEX_MENDEKIKAKUI 150 -#define PROP_INDEX_MEND 150 -#define PROP_INDEX_MEROITICCURSIVE 151 -#define PROP_INDEX_MERC 151 -#define PROP_INDEX_MEROITICHIEROGLYPHS 152 -#define PROP_INDEX_MERO 152 -#define PROP_INDEX_PLRD 153 -#define PROP_INDEX_MIAO 153 -#define PROP_INDEX_MN 154 -#define PROP_INDEX_NONSPACINGMARK 154 -#define PROP_INDEX_MODI 155 -#define PROP_INDEX_MONGOLIAN 156 -#define PROP_INDEX_MONG 156 -#define PROP_INDEX_MRO 157 -#define PROP_INDEX_MROO 157 -#define PROP_INDEX_MULT 158 -#define PROP_INDEX_MULTANI 158 -#define PROP_INDEX_MYANMAR 159 -#define PROP_INDEX_MYMR 159 -#define PROP_INDEX_NUMBER 160 -#define PROP_INDEX_N 160 -#define PROP_INDEX_NABATAEAN 161 -#define PROP_INDEX_NBAT 161 -#define PROP_INDEX_NAND 162 -#define PROP_INDEX_NANDINAGARI 162 -#define PROP_INDEX_DECIMALNUMBER 163 -#define PROP_INDEX_ND 163 -#define PROP_INDEX_TALU 164 -#define PROP_INDEX_NEWTAILUE 164 -#define PROP_INDEX_NEWA 165 -#define PROP_INDEX_NKO 166 -#define PROP_INDEX_NKOO 166 -#define PROP_INDEX_LETTERNUMBER 167 -#define PROP_INDEX_NL 167 -#define PROP_INDEX_OTHERNUMBER 168 -#define PROP_INDEX_NO 168 -#define PROP_INDEX_NONCHARACTERCODEPOINT 169 -#define PROP_INDEX_NCHAR 169 -#define PROP_INDEX_NSHU 170 -#define PROP_INDEX_NUSHU 170 -#define PROP_INDEX_HMNP 171 -#define PROP_INDEX_NYIAKENGPUACHUEHMONG 171 -#define PROP_INDEX_OGHAM 172 -#define PROP_INDEX_OGAM 172 -#define PROP_INDEX_OLCK 173 -#define PROP_INDEX_OLCHIKI 173 -#define PROP_INDEX_HUNG 174 -#define PROP_INDEX_OLDHUNGARIAN 174 -#define PROP_INDEX_ITAL 175 -#define PROP_INDEX_OLDITALIC 175 -#define PROP_INDEX_NARB 176 -#define PROP_INDEX_OLDNORTHARABIAN 176 -#define PROP_INDEX_OLDPERMIC 177 -#define PROP_INDEX_PERM 177 -#define PROP_INDEX_OLDPERSIAN 178 -#define PROP_INDEX_XPEO 178 -#define PROP_INDEX_SOGO 179 -#define PROP_INDEX_OLDSOGDIAN 179 -#define PROP_INDEX_SARB 180 -#define PROP_INDEX_OLDSOUTHARABIAN 180 -#define PROP_INDEX_OLDTURKIC 181 -#define PROP_INDEX_ORKH 181 -#define PROP_INDEX_ORYA 182 -#define PROP_INDEX_ORIYA 182 -#define PROP_INDEX_OSAGE 183 -#define PROP_INDEX_OSGE 183 -#define PROP_INDEX_OSMANYA 184 -#define PROP_INDEX_OSMA 184 -#define PROP_INDEX_OTHERALPHABETIC 185 -#define PROP_INDEX_OALPHA 185 -#define PROP_INDEX_ODI 186 -#define PROP_INDEX_OTHERDEFAULTIGNORABLECODEPOINT 186 -#define PROP_INDEX_OGREXT 187 -#define PROP_INDEX_OTHERGRAPHEMEEXTEND 187 -#define PROP_INDEX_OIDC 188 -#define PROP_INDEX_OTHERIDCONTINUE 188 -#define PROP_INDEX_OTHERIDSTART 189 -#define PROP_INDEX_OIDS 189 -#define PROP_INDEX_OTHERLOWERCASE 190 -#define PROP_INDEX_OLOWER 190 -#define PROP_INDEX_OTHERMATH 191 -#define PROP_INDEX_OMATH 191 -#define PROP_INDEX_OTHERUPPERCASE 192 -#define PROP_INDEX_OUPPER 192 -#define PROP_INDEX_P 193 -#define PROP_INDEX_PUNCTUATION 193 -#define PROP_INDEX_PAHAWHHMONG 194 -#define PROP_INDEX_HMNG 194 -#define PROP_INDEX_PALMYRENE 195 -#define PROP_INDEX_PALM 195 -#define PROP_INDEX_PATTERNSYNTAX 196 -#define PROP_INDEX_PATSYN 196 -#define PROP_INDEX_PATWS 197 -#define PROP_INDEX_PATTERNWHITESPACE 197 -#define PROP_INDEX_PAUCINHAU 198 -#define PROP_INDEX_PAUC 198 -#define PROP_INDEX_CONNECTORPUNCTUATION 199 -#define PROP_INDEX_PC 199 -#define PROP_INDEX_DASHPUNCTUATION 200 -#define PROP_INDEX_PD 200 -#define PROP_INDEX_PE 201 -#define PROP_INDEX_CLOSEPUNCTUATION 201 -#define PROP_INDEX_FINALPUNCTUATION 202 -#define PROP_INDEX_PF 202 -#define PROP_INDEX_PHAG 203 -#define PROP_INDEX_PHAGSPA 203 -#define PROP_INDEX_PHOENICIAN 204 -#define PROP_INDEX_PHNX 204 -#define PROP_INDEX_PI 205 -#define PROP_INDEX_INITIALPUNCTUATION 205 -#define PROP_INDEX_OTHERPUNCTUATION 206 -#define PROP_INDEX_PO 206 -#define PROP_INDEX_PREPENDEDCONCATENATIONMARK 207 -#define PROP_INDEX_PCM 207 -#define PROP_INDEX_PS 208 -#define PROP_INDEX_OPENPUNCTUATION 208 -#define PROP_INDEX_PHLP 209 -#define PROP_INDEX_PSALTERPAHLAVI 209 -#define PROP_INDEX_QUOTATIONMARK 210 -#define PROP_INDEX_QMARK 210 -#define PROP_INDEX_RADICAL 211 -#define PROP_INDEX_RI 212 -#define PROP_INDEX_REGIONALINDICATOR 212 -#define PROP_INDEX_REJANG 213 -#define PROP_INDEX_RJNG 213 -#define PROP_INDEX_RUNIC 214 -#define PROP_INDEX_RUNR 214 -#define PROP_INDEX_S 215 -#define PROP_INDEX_SYMBOL 215 -#define PROP_INDEX_SAMARITAN 216 -#define PROP_INDEX_SAMR 216 -#define PROP_INDEX_SAURASHTRA 217 -#define PROP_INDEX_SAUR 217 -#define PROP_INDEX_SC 218 -#define PROP_INDEX_CURRENCYSYMBOL 218 -#define PROP_INDEX_SENTENCETERMINAL 219 -#define PROP_INDEX_STERM 219 -#define PROP_INDEX_SHARADA 220 -#define PROP_INDEX_SHRD 220 -#define PROP_INDEX_SHAVIAN 221 -#define PROP_INDEX_SHAW 221 -#define PROP_INDEX_SIDDHAM 222 -#define PROP_INDEX_SIDD 222 -#define PROP_INDEX_SIGNWRITING 223 -#define PROP_INDEX_SGNW 223 -#define PROP_INDEX_SINH 224 -#define PROP_INDEX_SINHALA 224 -#define PROP_INDEX_SK 225 -#define PROP_INDEX_MODIFIERSYMBOL 225 -#define PROP_INDEX_MATHSYMBOL 226 -#define PROP_INDEX_SM 226 -#define PROP_INDEX_SO 227 -#define PROP_INDEX_OTHERSYMBOL 227 -#define PROP_INDEX_SD 228 -#define PROP_INDEX_SOFTDOTTED 228 -#define PROP_INDEX_SOGD 229 -#define PROP_INDEX_SOGDIAN 229 -#define PROP_INDEX_SORASOMPENG 230 -#define PROP_INDEX_SORA 230 -#define PROP_INDEX_SOYO 231 -#define PROP_INDEX_SOYOMBO 231 -#define PROP_INDEX_SUND 232 -#define PROP_INDEX_SUNDANESE 232 -#define PROP_INDEX_SYLO 233 -#define PROP_INDEX_SYLOTINAGRI 233 -#define PROP_INDEX_SYRC 234 -#define PROP_INDEX_SYRIAC 234 -#define PROP_INDEX_TGLG 235 -#define PROP_INDEX_TAGALOG 235 -#define PROP_INDEX_TAGBANWA 236 -#define PROP_INDEX_TAGB 236 -#define PROP_INDEX_TAILE 237 -#define PROP_INDEX_TALE 237 -#define PROP_INDEX_TAITHAM 238 -#define PROP_INDEX_LANA 238 -#define PROP_INDEX_TAIVIET 239 -#define PROP_INDEX_TAVT 239 -#define PROP_INDEX_TAKRI 240 -#define PROP_INDEX_TAKR 240 -#define PROP_INDEX_TAMIL 241 -#define PROP_INDEX_TAML 241 -#define PROP_INDEX_TANG 242 -#define PROP_INDEX_TANGUT 242 -#define PROP_INDEX_TELUGU 243 -#define PROP_INDEX_TELU 243 -#define PROP_INDEX_TERM 244 -#define PROP_INDEX_TERMINALPUNCTUATION 244 -#define PROP_INDEX_THAA 245 -#define PROP_INDEX_THAANA 245 -#define PROP_INDEX_THAI 246 -#define PROP_INDEX_TIBT 247 -#define PROP_INDEX_TIBETAN 247 -#define PROP_INDEX_TFNG 248 -#define PROP_INDEX_TIFINAGH 248 -#define PROP_INDEX_TIRHUTA 249 -#define PROP_INDEX_TIRH 249 -#define PROP_INDEX_UGAR 250 -#define PROP_INDEX_UGARITIC 250 -#define PROP_INDEX_UNIFIEDIDEOGRAPH 251 -#define PROP_INDEX_UIDEO 251 -#define PROP_INDEX_ZZZZ 252 -#define PROP_INDEX_UNKNOWN 252 -#define PROP_INDEX_UPPERCASE 253 -#define PROP_INDEX_VAII 254 -#define PROP_INDEX_VAI 254 -#define PROP_INDEX_VARIATIONSELECTOR 255 -#define PROP_INDEX_VS 255 -#define PROP_INDEX_WCHO 256 -#define PROP_INDEX_WANCHO 256 -#define PROP_INDEX_WARANGCITI 257 -#define PROP_INDEX_WARA 257 -#define PROP_INDEX_WHITESPACE 258 -#define PROP_INDEX_WSPACE 258 -#define PROP_INDEX_XIDC 259 -#define PROP_INDEX_XIDCONTINUE 259 -#define PROP_INDEX_XIDS 260 -#define PROP_INDEX_XIDSTART 260 -#define PROP_INDEX_YI 261 -#define PROP_INDEX_YIII 261 -#define PROP_INDEX_SEPARATOR 262 -#define PROP_INDEX_Z 262 -#define PROP_INDEX_ZANABAZARSQUARE 263 -#define PROP_INDEX_ZANB 263 -#define PROP_INDEX_ZL 264 -#define PROP_INDEX_LINESEPARATOR 264 -#define PROP_INDEX_ZP 265 -#define PROP_INDEX_PARAGRAPHSEPARATOR 265 -#define PROP_INDEX_ZS 266 -#define PROP_INDEX_SPACESEPARATOR 266 -#define PROP_INDEX_INBASICLATIN 267 -#define PROP_INDEX_INLATIN1SUPPLEMENT 268 -#define PROP_INDEX_INLATINEXTENDEDA 269 -#define PROP_INDEX_INLATINEXTENDEDB 270 -#define PROP_INDEX_INIPAEXTENSIONS 271 -#define PROP_INDEX_INSPACINGMODIFIERLETTERS 272 -#define PROP_INDEX_INCOMBININGDIACRITICALMARKS 273 -#define PROP_INDEX_INGREEKANDCOPTIC 274 -#define PROP_INDEX_INCYRILLIC 275 -#define PROP_INDEX_INCYRILLICSUPPLEMENT 276 -#define PROP_INDEX_INARMENIAN 277 -#define PROP_INDEX_INHEBREW 278 -#define PROP_INDEX_INARABIC 279 -#define PROP_INDEX_INSYRIAC 280 -#define PROP_INDEX_INARABICSUPPLEMENT 281 -#define PROP_INDEX_INTHAANA 282 -#define PROP_INDEX_INNKO 283 -#define PROP_INDEX_INSAMARITAN 284 -#define PROP_INDEX_INMANDAIC 285 -#define PROP_INDEX_INSYRIACSUPPLEMENT 286 -#define PROP_INDEX_INARABICEXTENDEDA 287 -#define PROP_INDEX_INDEVANAGARI 288 -#define PROP_INDEX_INBENGALI 289 -#define PROP_INDEX_INGURMUKHI 290 -#define PROP_INDEX_INGUJARATI 291 -#define PROP_INDEX_INORIYA 292 -#define PROP_INDEX_INTAMIL 293 -#define PROP_INDEX_INTELUGU 294 -#define PROP_INDEX_INKANNADA 295 -#define PROP_INDEX_INMALAYALAM 296 -#define PROP_INDEX_INSINHALA 297 -#define PROP_INDEX_INTHAI 298 -#define PROP_INDEX_INLAO 299 -#define PROP_INDEX_INTIBETAN 300 -#define PROP_INDEX_INMYANMAR 301 -#define PROP_INDEX_INGEORGIAN 302 -#define PROP_INDEX_INHANGULJAMO 303 -#define PROP_INDEX_INETHIOPIC 304 -#define PROP_INDEX_INETHIOPICSUPPLEMENT 305 -#define PROP_INDEX_INCHEROKEE 306 -#define PROP_INDEX_INUNIFIEDCANADIANABORIGINALSYLLABICS 307 -#define PROP_INDEX_INOGHAM 308 -#define PROP_INDEX_INRUNIC 309 -#define PROP_INDEX_INTAGALOG 310 -#define PROP_INDEX_INHANUNOO 311 -#define PROP_INDEX_INBUHID 312 -#define PROP_INDEX_INTAGBANWA 313 -#define PROP_INDEX_INKHMER 314 -#define PROP_INDEX_INMONGOLIAN 315 -#define PROP_INDEX_INUNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED 316 -#define PROP_INDEX_INLIMBU 317 -#define PROP_INDEX_INTAILE 318 -#define PROP_INDEX_INNEWTAILUE 319 -#define PROP_INDEX_INKHMERSYMBOLS 320 -#define PROP_INDEX_INBUGINESE 321 -#define PROP_INDEX_INTAITHAM 322 -#define PROP_INDEX_INCOMBININGDIACRITICALMARKSEXTENDED 323 -#define PROP_INDEX_INBALINESE 324 -#define PROP_INDEX_INSUNDANESE 325 -#define PROP_INDEX_INBATAK 326 -#define PROP_INDEX_INLEPCHA 327 -#define PROP_INDEX_INOLCHIKI 328 -#define PROP_INDEX_INCYRILLICEXTENDEDC 329 -#define PROP_INDEX_INGEORGIANEXTENDED 330 -#define PROP_INDEX_INSUNDANESESUPPLEMENT 331 -#define PROP_INDEX_INVEDICEXTENSIONS 332 -#define PROP_INDEX_INPHONETICEXTENSIONS 333 -#define PROP_INDEX_INPHONETICEXTENSIONSSUPPLEMENT 334 -#define PROP_INDEX_INCOMBININGDIACRITICALMARKSSUPPLEMENT 335 -#define PROP_INDEX_INLATINEXTENDEDADDITIONAL 336 -#define PROP_INDEX_INGREEKEXTENDED 337 -#define PROP_INDEX_INGENERALPUNCTUATION 338 -#define PROP_INDEX_INSUPERSCRIPTSANDSUBSCRIPTS 339 -#define PROP_INDEX_INCURRENCYSYMBOLS 340 -#define PROP_INDEX_INCOMBININGDIACRITICALMARKSFORSYMBOLS 341 -#define PROP_INDEX_INLETTERLIKESYMBOLS 342 -#define PROP_INDEX_INNUMBERFORMS 343 -#define PROP_INDEX_INARROWS 344 -#define PROP_INDEX_INMATHEMATICALOPERATORS 345 -#define PROP_INDEX_INMISCELLANEOUSTECHNICAL 346 -#define PROP_INDEX_INCONTROLPICTURES 347 -#define PROP_INDEX_INOPTICALCHARACTERRECOGNITION 348 -#define PROP_INDEX_INENCLOSEDALPHANUMERICS 349 -#define PROP_INDEX_INBOXDRAWING 350 -#define PROP_INDEX_INBLOCKELEMENTS 351 -#define PROP_INDEX_INGEOMETRICSHAPES 352 -#define PROP_INDEX_INMISCELLANEOUSSYMBOLS 353 -#define PROP_INDEX_INDINGBATS 354 -#define PROP_INDEX_INMISCELLANEOUSMATHEMATICALSYMBOLSA 355 -#define PROP_INDEX_INSUPPLEMENTALARROWSA 356 -#define PROP_INDEX_INBRAILLEPATTERNS 357 -#define PROP_INDEX_INSUPPLEMENTALARROWSB 358 -#define PROP_INDEX_INMISCELLANEOUSMATHEMATICALSYMBOLSB 359 -#define PROP_INDEX_INSUPPLEMENTALMATHEMATICALOPERATORS 360 -#define PROP_INDEX_INMISCELLANEOUSSYMBOLSANDARROWS 361 -#define PROP_INDEX_INGLAGOLITIC 362 -#define PROP_INDEX_INLATINEXTENDEDC 363 -#define PROP_INDEX_INCOPTIC 364 -#define PROP_INDEX_INGEORGIANSUPPLEMENT 365 -#define PROP_INDEX_INTIFINAGH 366 -#define PROP_INDEX_INETHIOPICEXTENDED 367 -#define PROP_INDEX_INCYRILLICEXTENDEDA 368 -#define PROP_INDEX_INSUPPLEMENTALPUNCTUATION 369 -#define PROP_INDEX_INCJKRADICALSSUPPLEMENT 370 -#define PROP_INDEX_INKANGXIRADICALS 371 -#define PROP_INDEX_INIDEOGRAPHICDESCRIPTIONCHARACTERS 372 -#define PROP_INDEX_INCJKSYMBOLSANDPUNCTUATION 373 -#define PROP_INDEX_INHIRAGANA 374 -#define PROP_INDEX_INKATAKANA 375 -#define PROP_INDEX_INBOPOMOFO 376 -#define PROP_INDEX_INHANGULCOMPATIBILITYJAMO 377 -#define PROP_INDEX_INKANBUN 378 -#define PROP_INDEX_INBOPOMOFOEXTENDED 379 -#define PROP_INDEX_INCJKSTROKES 380 -#define PROP_INDEX_INKATAKANAPHONETICEXTENSIONS 381 -#define PROP_INDEX_INENCLOSEDCJKLETTERSANDMONTHS 382 -#define PROP_INDEX_INCJKCOMPATIBILITY 383 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONA 384 -#define PROP_INDEX_INYIJINGHEXAGRAMSYMBOLS 385 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHS 386 -#define PROP_INDEX_INYISYLLABLES 387 -#define PROP_INDEX_INYIRADICALS 388 -#define PROP_INDEX_INLISU 389 -#define PROP_INDEX_INVAI 390 -#define PROP_INDEX_INCYRILLICEXTENDEDB 391 -#define PROP_INDEX_INBAMUM 392 -#define PROP_INDEX_INMODIFIERTONELETTERS 393 -#define PROP_INDEX_INLATINEXTENDEDD 394 -#define PROP_INDEX_INSYLOTINAGRI 395 -#define PROP_INDEX_INCOMMONINDICNUMBERFORMS 396 -#define PROP_INDEX_INPHAGSPA 397 -#define PROP_INDEX_INSAURASHTRA 398 -#define PROP_INDEX_INDEVANAGARIEXTENDED 399 -#define PROP_INDEX_INKAYAHLI 400 -#define PROP_INDEX_INREJANG 401 -#define PROP_INDEX_INHANGULJAMOEXTENDEDA 402 -#define PROP_INDEX_INJAVANESE 403 -#define PROP_INDEX_INMYANMAREXTENDEDB 404 -#define PROP_INDEX_INCHAM 405 -#define PROP_INDEX_INMYANMAREXTENDEDA 406 -#define PROP_INDEX_INTAIVIET 407 -#define PROP_INDEX_INMEETEIMAYEKEXTENSIONS 408 -#define PROP_INDEX_INETHIOPICEXTENDEDA 409 -#define PROP_INDEX_INLATINEXTENDEDE 410 -#define PROP_INDEX_INCHEROKEESUPPLEMENT 411 -#define PROP_INDEX_INMEETEIMAYEK 412 -#define PROP_INDEX_INHANGULSYLLABLES 413 -#define PROP_INDEX_INHANGULJAMOEXTENDEDB 414 -#define PROP_INDEX_INHIGHSURROGATES 415 -#define PROP_INDEX_INHIGHPRIVATEUSESURROGATES 416 -#define PROP_INDEX_INLOWSURROGATES 417 -#define PROP_INDEX_INPRIVATEUSEAREA 418 -#define PROP_INDEX_INCJKCOMPATIBILITYIDEOGRAPHS 419 -#define PROP_INDEX_INALPHABETICPRESENTATIONFORMS 420 -#define PROP_INDEX_INARABICPRESENTATIONFORMSA 421 -#define PROP_INDEX_INVARIATIONSELECTORS 422 -#define PROP_INDEX_INVERTICALFORMS 423 -#define PROP_INDEX_INCOMBININGHALFMARKS 424 -#define PROP_INDEX_INCJKCOMPATIBILITYFORMS 425 -#define PROP_INDEX_INSMALLFORMVARIANTS 426 -#define PROP_INDEX_INARABICPRESENTATIONFORMSB 427 -#define PROP_INDEX_INHALFWIDTHANDFULLWIDTHFORMS 428 -#define PROP_INDEX_INSPECIALS 429 -#define PROP_INDEX_INLINEARBSYLLABARY 430 -#define PROP_INDEX_INLINEARBIDEOGRAMS 431 -#define PROP_INDEX_INAEGEANNUMBERS 432 -#define PROP_INDEX_INANCIENTGREEKNUMBERS 433 -#define PROP_INDEX_INANCIENTSYMBOLS 434 -#define PROP_INDEX_INPHAISTOSDISC 435 -#define PROP_INDEX_INLYCIAN 436 -#define PROP_INDEX_INCARIAN 437 -#define PROP_INDEX_INCOPTICEPACTNUMBERS 438 -#define PROP_INDEX_INOLDITALIC 439 -#define PROP_INDEX_INGOTHIC 440 -#define PROP_INDEX_INOLDPERMIC 441 -#define PROP_INDEX_INUGARITIC 442 -#define PROP_INDEX_INOLDPERSIAN 443 -#define PROP_INDEX_INDESERET 444 -#define PROP_INDEX_INSHAVIAN 445 -#define PROP_INDEX_INOSMANYA 446 -#define PROP_INDEX_INOSAGE 447 -#define PROP_INDEX_INELBASAN 448 -#define PROP_INDEX_INCAUCASIANALBANIAN 449 -#define PROP_INDEX_INLINEARA 450 -#define PROP_INDEX_INCYPRIOTSYLLABARY 451 -#define PROP_INDEX_INIMPERIALARAMAIC 452 -#define PROP_INDEX_INPALMYRENE 453 -#define PROP_INDEX_INNABATAEAN 454 -#define PROP_INDEX_INHATRAN 455 -#define PROP_INDEX_INPHOENICIAN 456 -#define PROP_INDEX_INLYDIAN 457 -#define PROP_INDEX_INMEROITICHIEROGLYPHS 458 -#define PROP_INDEX_INMEROITICCURSIVE 459 -#define PROP_INDEX_INKHAROSHTHI 460 -#define PROP_INDEX_INOLDSOUTHARABIAN 461 -#define PROP_INDEX_INOLDNORTHARABIAN 462 -#define PROP_INDEX_INMANICHAEAN 463 -#define PROP_INDEX_INAVESTAN 464 -#define PROP_INDEX_ININSCRIPTIONALPARTHIAN 465 -#define PROP_INDEX_ININSCRIPTIONALPAHLAVI 466 -#define PROP_INDEX_INPSALTERPAHLAVI 467 -#define PROP_INDEX_INOLDTURKIC 468 -#define PROP_INDEX_INOLDHUNGARIAN 469 -#define PROP_INDEX_INHANIFIROHINGYA 470 -#define PROP_INDEX_INRUMINUMERALSYMBOLS 471 -#define PROP_INDEX_INOLDSOGDIAN 472 -#define PROP_INDEX_INSOGDIAN 473 -#define PROP_INDEX_INELYMAIC 474 -#define PROP_INDEX_INBRAHMI 475 -#define PROP_INDEX_INKAITHI 476 -#define PROP_INDEX_INSORASOMPENG 477 -#define PROP_INDEX_INCHAKMA 478 -#define PROP_INDEX_INMAHAJANI 479 -#define PROP_INDEX_INSHARADA 480 -#define PROP_INDEX_INSINHALAARCHAICNUMBERS 481 -#define PROP_INDEX_INKHOJKI 482 -#define PROP_INDEX_INMULTANI 483 -#define PROP_INDEX_INKHUDAWADI 484 -#define PROP_INDEX_INGRANTHA 485 -#define PROP_INDEX_INNEWA 486 -#define PROP_INDEX_INTIRHUTA 487 -#define PROP_INDEX_INSIDDHAM 488 -#define PROP_INDEX_INMODI 489 -#define PROP_INDEX_INMONGOLIANSUPPLEMENT 490 -#define PROP_INDEX_INTAKRI 491 -#define PROP_INDEX_INAHOM 492 -#define PROP_INDEX_INDOGRA 493 -#define PROP_INDEX_INWARANGCITI 494 -#define PROP_INDEX_INNANDINAGARI 495 -#define PROP_INDEX_INZANABAZARSQUARE 496 -#define PROP_INDEX_INSOYOMBO 497 -#define PROP_INDEX_INPAUCINHAU 498 -#define PROP_INDEX_INBHAIKSUKI 499 -#define PROP_INDEX_INMARCHEN 500 -#define PROP_INDEX_INMASARAMGONDI 501 -#define PROP_INDEX_INGUNJALAGONDI 502 -#define PROP_INDEX_INMAKASAR 503 -#define PROP_INDEX_INTAMILSUPPLEMENT 504 -#define PROP_INDEX_INCUNEIFORM 505 -#define PROP_INDEX_INCUNEIFORMNUMBERSANDPUNCTUATION 506 -#define PROP_INDEX_INEARLYDYNASTICCUNEIFORM 507 -#define PROP_INDEX_INEGYPTIANHIEROGLYPHS 508 -#define PROP_INDEX_INEGYPTIANHIEROGLYPHFORMATCONTROLS 509 -#define PROP_INDEX_INANATOLIANHIEROGLYPHS 510 -#define PROP_INDEX_INBAMUMSUPPLEMENT 511 -#define PROP_INDEX_INMRO 512 -#define PROP_INDEX_INBASSAVAH 513 -#define PROP_INDEX_INPAHAWHHMONG 514 -#define PROP_INDEX_INMEDEFAIDRIN 515 -#define PROP_INDEX_INMIAO 516 -#define PROP_INDEX_INIDEOGRAPHICSYMBOLSANDPUNCTUATION 517 -#define PROP_INDEX_INTANGUT 518 -#define PROP_INDEX_INTANGUTCOMPONENTS 519 -#define PROP_INDEX_INKANASUPPLEMENT 520 -#define PROP_INDEX_INKANAEXTENDEDA 521 -#define PROP_INDEX_INSMALLKANAEXTENSION 522 -#define PROP_INDEX_INNUSHU 523 -#define PROP_INDEX_INDUPLOYAN 524 -#define PROP_INDEX_INSHORTHANDFORMATCONTROLS 525 -#define PROP_INDEX_INBYZANTINEMUSICALSYMBOLS 526 -#define PROP_INDEX_INMUSICALSYMBOLS 527 -#define PROP_INDEX_INANCIENTGREEKMUSICALNOTATION 528 -#define PROP_INDEX_INMAYANNUMERALS 529 -#define PROP_INDEX_INTAIXUANJINGSYMBOLS 530 -#define PROP_INDEX_INCOUNTINGRODNUMERALS 531 -#define PROP_INDEX_INMATHEMATICALALPHANUMERICSYMBOLS 532 -#define PROP_INDEX_INSUTTONSIGNWRITING 533 -#define PROP_INDEX_INGLAGOLITICSUPPLEMENT 534 -#define PROP_INDEX_INNYIAKENGPUACHUEHMONG 535 -#define PROP_INDEX_INWANCHO 536 -#define PROP_INDEX_INMENDEKIKAKUI 537 -#define PROP_INDEX_INADLAM 538 -#define PROP_INDEX_ININDICSIYAQNUMBERS 539 -#define PROP_INDEX_INOTTOMANSIYAQNUMBERS 540 -#define PROP_INDEX_INARABICMATHEMATICALALPHABETICSYMBOLS 541 -#define PROP_INDEX_INMAHJONGTILES 542 -#define PROP_INDEX_INDOMINOTILES 543 -#define PROP_INDEX_INPLAYINGCARDS 544 -#define PROP_INDEX_INENCLOSEDALPHANUMERICSUPPLEMENT 545 -#define PROP_INDEX_INENCLOSEDIDEOGRAPHICSUPPLEMENT 546 -#define PROP_INDEX_INMISCELLANEOUSSYMBOLSANDPICTOGRAPHS 547 -#define PROP_INDEX_INEMOTICONS 548 -#define PROP_INDEX_INORNAMENTALDINGBATS 549 -#define PROP_INDEX_INTRANSPORTANDMAPSYMBOLS 550 -#define PROP_INDEX_INALCHEMICALSYMBOLS 551 -#define PROP_INDEX_INGEOMETRICSHAPESEXTENDED 552 -#define PROP_INDEX_INSUPPLEMENTALARROWSC 553 -#define PROP_INDEX_INSUPPLEMENTALSYMBOLSANDPICTOGRAPHS 554 -#define PROP_INDEX_INCHESSSYMBOLS 555 -#define PROP_INDEX_INSYMBOLSANDPICTOGRAPHSEXTENDEDA 556 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONB 557 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONC 558 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIOND 559 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONE 560 -#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONF 561 -#define PROP_INDEX_INCJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT 562 -#define PROP_INDEX_INTAGS 563 -#define PROP_INDEX_INVARIATIONSELECTORSSUPPLEMENT 564 -#define PROP_INDEX_INSUPPLEMENTARYPRIVATEUSEAREAA 565 -#define PROP_INDEX_INSUPPLEMENTARYPRIVATEUSEAREAB 566 -#define PROP_INDEX_INNOBLOCK 567 +#define PROP_INDEX_CHRS 53 +#define PROP_INDEX_CHORASMIAN 53 +#define PROP_INDEX_CN 54 +#define PROP_INDEX_UNASSIGNED 54 +#define PROP_INDEX_PRIVATEUSE 55 +#define PROP_INDEX_CO 55 +#define PROP_INDEX_ZYYY 56 +#define PROP_INDEX_COMMON 56 +#define PROP_INDEX_COPT 57 +#define PROP_INDEX_COPTIC 57 +#define PROP_INDEX_QAAC 57 +#define PROP_INDEX_CS 58 +#define PROP_INDEX_SURROGATE 58 +#define PROP_INDEX_XSUX 59 +#define PROP_INDEX_CUNEIFORM 59 +#define PROP_INDEX_CPRT 60 +#define PROP_INDEX_CYPRIOT 60 +#define PROP_INDEX_CYRILLIC 61 +#define PROP_INDEX_CYRL 61 +#define PROP_INDEX_DASH 62 +#define PROP_INDEX_DEFAULTIGNORABLECODEPOINT 63 +#define PROP_INDEX_DI 63 +#define PROP_INDEX_DEP 64 +#define PROP_INDEX_DEPRECATED 64 +#define PROP_INDEX_DSRT 65 +#define PROP_INDEX_DESERET 65 +#define PROP_INDEX_DEVA 66 +#define PROP_INDEX_DEVANAGARI 66 +#define PROP_INDEX_DIACRITIC 67 +#define PROP_INDEX_DIA 67 +#define PROP_INDEX_DIAK 68 +#define PROP_INDEX_DIVESAKURU 68 +#define PROP_INDEX_DOGR 69 +#define PROP_INDEX_DOGRA 69 +#define PROP_INDEX_DUPL 70 +#define PROP_INDEX_DUPLOYAN 70 +#define PROP_INDEX_EGYPTIANHIEROGLYPHS 71 +#define PROP_INDEX_EGYP 71 +#define PROP_INDEX_ELBA 72 +#define PROP_INDEX_ELBASAN 72 +#define PROP_INDEX_ELYM 73 +#define PROP_INDEX_ELYMAIC 73 +#define PROP_INDEX_EMOJI 74 +#define PROP_INDEX_EMOJICOMPONENT 75 +#define PROP_INDEX_ECOMP 75 +#define PROP_INDEX_EMOD 76 +#define PROP_INDEX_EMOJIMODIFIER 76 +#define PROP_INDEX_EMOJIMODIFIERBASE 77 +#define PROP_INDEX_EBASE 77 +#define PROP_INDEX_EMOJIPRESENTATION 78 +#define PROP_INDEX_EPRES 78 +#define PROP_INDEX_ETHIOPIC 79 +#define PROP_INDEX_ETHI 79 +#define PROP_INDEX_EXTPICT 80 +#define PROP_INDEX_EXTENDEDPICTOGRAPHIC 80 +#define PROP_INDEX_EXT 81 +#define PROP_INDEX_EXTENDER 81 +#define PROP_INDEX_GEORGIAN 82 +#define PROP_INDEX_GEOR 82 +#define PROP_INDEX_GLAG 83 +#define PROP_INDEX_GLAGOLITIC 83 +#define PROP_INDEX_GOTH 84 +#define PROP_INDEX_GOTHIC 84 +#define PROP_INDEX_GRAN 85 +#define PROP_INDEX_GRANTHA 85 +#define PROP_INDEX_GRBASE 86 +#define PROP_INDEX_GRAPHEMEBASE 86 +#define PROP_INDEX_GREXT 87 +#define PROP_INDEX_GRAPHEMEEXTEND 87 +#define PROP_INDEX_GRAPHEMELINK 88 +#define PROP_INDEX_GRLINK 88 +#define PROP_INDEX_GREEK 89 +#define PROP_INDEX_GREK 89 +#define PROP_INDEX_GUJARATI 90 +#define PROP_INDEX_GUJR 90 +#define PROP_INDEX_GUNJALAGONDI 91 +#define PROP_INDEX_GONG 91 +#define PROP_INDEX_GURU 92 +#define PROP_INDEX_GURMUKHI 92 +#define PROP_INDEX_HANI 93 +#define PROP_INDEX_HAN 93 +#define PROP_INDEX_HANG 94 +#define PROP_INDEX_HANGUL 94 +#define PROP_INDEX_HANIFIROHINGYA 95 +#define PROP_INDEX_ROHG 95 +#define PROP_INDEX_HANUNOO 96 +#define PROP_INDEX_HANO 96 +#define PROP_INDEX_HATR 97 +#define PROP_INDEX_HATRAN 97 +#define PROP_INDEX_HEBREW 98 +#define PROP_INDEX_HEBR 98 +#define PROP_INDEX_HEX 99 +#define PROP_INDEX_HEXDIGIT 99 +#define PROP_INDEX_HIRAGANA 100 +#define PROP_INDEX_HIRA 100 +#define PROP_INDEX_HYPHEN 101 +#define PROP_INDEX_IDSB 102 +#define PROP_INDEX_IDSBINARYOPERATOR 102 +#define PROP_INDEX_IDST 103 +#define PROP_INDEX_IDSTRINARYOPERATOR 103 +#define PROP_INDEX_IDCONTINUE 104 +#define PROP_INDEX_IDC 104 +#define PROP_INDEX_IDS 105 +#define PROP_INDEX_IDSTART 105 +#define PROP_INDEX_IDEO 106 +#define PROP_INDEX_IDEOGRAPHIC 106 +#define PROP_INDEX_ARMI 107 +#define PROP_INDEX_IMPERIALARAMAIC 107 +#define PROP_INDEX_ZINH 108 +#define PROP_INDEX_INHERITED 108 +#define PROP_INDEX_QAAI 108 +#define PROP_INDEX_PHLI 109 +#define PROP_INDEX_INSCRIPTIONALPAHLAVI 109 +#define PROP_INDEX_INSCRIPTIONALPARTHIAN 110 +#define PROP_INDEX_PRTI 110 +#define PROP_INDEX_JAVANESE 111 +#define PROP_INDEX_JAVA 111 +#define PROP_INDEX_JOINC 112 +#define PROP_INDEX_JOINCONTROL 112 +#define PROP_INDEX_KAITHI 113 +#define PROP_INDEX_KTHI 113 +#define PROP_INDEX_KNDA 114 +#define PROP_INDEX_KANNADA 114 +#define PROP_INDEX_KATAKANA 115 +#define PROP_INDEX_KANA 115 +#define PROP_INDEX_KAYAHLI 116 +#define PROP_INDEX_KALI 116 +#define PROP_INDEX_KHAROSHTHI 117 +#define PROP_INDEX_KHAR 117 +#define PROP_INDEX_KITS 118 +#define PROP_INDEX_KHITANSMALLSCRIPT 118 +#define PROP_INDEX_KHMR 119 +#define PROP_INDEX_KHMER 119 +#define PROP_INDEX_KHOJ 120 +#define PROP_INDEX_KHOJKI 120 +#define PROP_INDEX_KHUDAWADI 121 +#define PROP_INDEX_SIND 121 +#define PROP_INDEX_L 122 +#define PROP_INDEX_LETTER 122 +#define PROP_INDEX_LC 123 +#define PROP_INDEX_CASEDLETTER 123 +#define PROP_INDEX_LAO 124 +#define PROP_INDEX_LAOO 124 +#define PROP_INDEX_LATN 125 +#define PROP_INDEX_LATIN 125 +#define PROP_INDEX_LEPC 126 +#define PROP_INDEX_LEPCHA 126 +#define PROP_INDEX_LIMBU 127 +#define PROP_INDEX_LIMB 127 +#define PROP_INDEX_LINA 128 +#define PROP_INDEX_LINEARA 128 +#define PROP_INDEX_LINB 129 +#define PROP_INDEX_LINEARB 129 +#define PROP_INDEX_LISU 130 +#define PROP_INDEX_LOWERCASELETTER 131 +#define PROP_INDEX_LL 131 +#define PROP_INDEX_LM 132 +#define PROP_INDEX_MODIFIERLETTER 132 +#define PROP_INDEX_LO 133 +#define PROP_INDEX_OTHERLETTER 133 +#define PROP_INDEX_LOGICALORDEREXCEPTION 134 +#define PROP_INDEX_LOE 134 +#define PROP_INDEX_LOWERCASE 135 +#define PROP_INDEX_LT 136 +#define PROP_INDEX_TITLECASELETTER 136 +#define PROP_INDEX_UPPERCASELETTER 137 +#define PROP_INDEX_LU 137 +#define PROP_INDEX_LYCI 138 +#define PROP_INDEX_LYCIAN 138 +#define PROP_INDEX_LYDI 139 +#define PROP_INDEX_LYDIAN 139 +#define PROP_INDEX_M 140 +#define PROP_INDEX_COMBININGMARK 140 +#define PROP_INDEX_MARK 140 +#define PROP_INDEX_MAHJ 141 +#define PROP_INDEX_MAHAJANI 141 +#define PROP_INDEX_MAKASAR 142 +#define PROP_INDEX_MAKA 142 +#define PROP_INDEX_MALAYALAM 143 +#define PROP_INDEX_MLYM 143 +#define PROP_INDEX_MAND 144 +#define PROP_INDEX_MANDAIC 144 +#define PROP_INDEX_MANI 145 +#define PROP_INDEX_MANICHAEAN 145 +#define PROP_INDEX_MARC 146 +#define PROP_INDEX_MARCHEN 146 +#define PROP_INDEX_MASARAMGONDI 147 +#define PROP_INDEX_GONM 147 +#define PROP_INDEX_MATH 148 +#define PROP_INDEX_MC 149 +#define PROP_INDEX_SPACINGMARK 149 +#define PROP_INDEX_ENCLOSINGMARK 150 +#define PROP_INDEX_ME 150 +#define PROP_INDEX_MEDEFAIDRIN 151 +#define PROP_INDEX_MEDF 151 +#define PROP_INDEX_MEETEIMAYEK 152 +#define PROP_INDEX_MTEI 152 +#define PROP_INDEX_MENDEKIKAKUI 153 +#define PROP_INDEX_MEND 153 +#define PROP_INDEX_MEROITICCURSIVE 154 +#define PROP_INDEX_MERC 154 +#define PROP_INDEX_MEROITICHIEROGLYPHS 155 +#define PROP_INDEX_MERO 155 +#define PROP_INDEX_PLRD 156 +#define PROP_INDEX_MIAO 156 +#define PROP_INDEX_MN 157 +#define PROP_INDEX_NONSPACINGMARK 157 +#define PROP_INDEX_MODI 158 +#define PROP_INDEX_MONGOLIAN 159 +#define PROP_INDEX_MONG 159 +#define PROP_INDEX_MRO 160 +#define PROP_INDEX_MROO 160 +#define PROP_INDEX_MULT 161 +#define PROP_INDEX_MULTANI 161 +#define PROP_INDEX_MYANMAR 162 +#define PROP_INDEX_MYMR 162 +#define PROP_INDEX_NUMBER 163 +#define PROP_INDEX_N 163 +#define PROP_INDEX_NABATAEAN 164 +#define PROP_INDEX_NBAT 164 +#define PROP_INDEX_NAND 165 +#define PROP_INDEX_NANDINAGARI 165 +#define PROP_INDEX_DECIMALNUMBER 166 +#define PROP_INDEX_ND 166 +#define PROP_INDEX_TALU 167 +#define PROP_INDEX_NEWTAILUE 167 +#define PROP_INDEX_NEWA 168 +#define PROP_INDEX_NKO 169 +#define PROP_INDEX_NKOO 169 +#define PROP_INDEX_LETTERNUMBER 170 +#define PROP_INDEX_NL 170 +#define PROP_INDEX_OTHERNUMBER 171 +#define PROP_INDEX_NO 171 +#define PROP_INDEX_NONCHARACTERCODEPOINT 172 +#define PROP_INDEX_NCHAR 172 +#define PROP_INDEX_NSHU 173 +#define PROP_INDEX_NUSHU 173 +#define PROP_INDEX_HMNP 174 +#define PROP_INDEX_NYIAKENGPUACHUEHMONG 174 +#define PROP_INDEX_OGHAM 175 +#define PROP_INDEX_OGAM 175 +#define PROP_INDEX_OLCK 176 +#define PROP_INDEX_OLCHIKI 176 +#define PROP_INDEX_HUNG 177 +#define PROP_INDEX_OLDHUNGARIAN 177 +#define PROP_INDEX_ITAL 178 +#define PROP_INDEX_OLDITALIC 178 +#define PROP_INDEX_NARB 179 +#define PROP_INDEX_OLDNORTHARABIAN 179 +#define PROP_INDEX_OLDPERMIC 180 +#define PROP_INDEX_PERM 180 +#define PROP_INDEX_OLDPERSIAN 181 +#define PROP_INDEX_XPEO 181 +#define PROP_INDEX_SOGO 182 +#define PROP_INDEX_OLDSOGDIAN 182 +#define PROP_INDEX_SARB 183 +#define PROP_INDEX_OLDSOUTHARABIAN 183 +#define PROP_INDEX_OLDTURKIC 184 +#define PROP_INDEX_ORKH 184 +#define PROP_INDEX_ORYA 185 +#define PROP_INDEX_ORIYA 185 +#define PROP_INDEX_OSAGE 186 +#define PROP_INDEX_OSGE 186 +#define PROP_INDEX_OSMANYA 187 +#define PROP_INDEX_OSMA 187 +#define PROP_INDEX_OTHERALPHABETIC 188 +#define PROP_INDEX_OALPHA 188 +#define PROP_INDEX_ODI 189 +#define PROP_INDEX_OTHERDEFAULTIGNORABLECODEPOINT 189 +#define PROP_INDEX_OGREXT 190 +#define PROP_INDEX_OTHERGRAPHEMEEXTEND 190 +#define PROP_INDEX_OIDC 191 +#define PROP_INDEX_OTHERIDCONTINUE 191 +#define PROP_INDEX_OTHERIDSTART 192 +#define PROP_INDEX_OIDS 192 +#define PROP_INDEX_OTHERLOWERCASE 193 +#define PROP_INDEX_OLOWER 193 +#define PROP_INDEX_OTHERMATH 194 +#define PROP_INDEX_OMATH 194 +#define PROP_INDEX_OTHERUPPERCASE 195 +#define PROP_INDEX_OUPPER 195 +#define PROP_INDEX_P 196 +#define PROP_INDEX_PUNCTUATION 196 +#define PROP_INDEX_PAHAWHHMONG 197 +#define PROP_INDEX_HMNG 197 +#define PROP_INDEX_PALMYRENE 198 +#define PROP_INDEX_PALM 198 +#define PROP_INDEX_PATTERNSYNTAX 199 +#define PROP_INDEX_PATSYN 199 +#define PROP_INDEX_PATWS 200 +#define PROP_INDEX_PATTERNWHITESPACE 200 +#define PROP_INDEX_PAUCINHAU 201 +#define PROP_INDEX_PAUC 201 +#define PROP_INDEX_CONNECTORPUNCTUATION 202 +#define PROP_INDEX_PC 202 +#define PROP_INDEX_DASHPUNCTUATION 203 +#define PROP_INDEX_PD 203 +#define PROP_INDEX_PE 204 +#define PROP_INDEX_CLOSEPUNCTUATION 204 +#define PROP_INDEX_FINALPUNCTUATION 205 +#define PROP_INDEX_PF 205 +#define PROP_INDEX_PHAG 206 +#define PROP_INDEX_PHAGSPA 206 +#define PROP_INDEX_PHOENICIAN 207 +#define PROP_INDEX_PHNX 207 +#define PROP_INDEX_PI 208 +#define PROP_INDEX_INITIALPUNCTUATION 208 +#define PROP_INDEX_OTHERPUNCTUATION 209 +#define PROP_INDEX_PO 209 +#define PROP_INDEX_PREPENDEDCONCATENATIONMARK 210 +#define PROP_INDEX_PCM 210 +#define PROP_INDEX_PS 211 +#define PROP_INDEX_OPENPUNCTUATION 211 +#define PROP_INDEX_PHLP 212 +#define PROP_INDEX_PSALTERPAHLAVI 212 +#define PROP_INDEX_QUOTATIONMARK 213 +#define PROP_INDEX_QMARK 213 +#define PROP_INDEX_RADICAL 214 +#define PROP_INDEX_RI 215 +#define PROP_INDEX_REGIONALINDICATOR 215 +#define PROP_INDEX_REJANG 216 +#define PROP_INDEX_RJNG 216 +#define PROP_INDEX_RUNIC 217 +#define PROP_INDEX_RUNR 217 +#define PROP_INDEX_S 218 +#define PROP_INDEX_SYMBOL 218 +#define PROP_INDEX_SAMARITAN 219 +#define PROP_INDEX_SAMR 219 +#define PROP_INDEX_SAURASHTRA 220 +#define PROP_INDEX_SAUR 220 +#define PROP_INDEX_SC 221 +#define PROP_INDEX_CURRENCYSYMBOL 221 +#define PROP_INDEX_SENTENCETERMINAL 222 +#define PROP_INDEX_STERM 222 +#define PROP_INDEX_SHARADA 223 +#define PROP_INDEX_SHRD 223 +#define PROP_INDEX_SHAVIAN 224 +#define PROP_INDEX_SHAW 224 +#define PROP_INDEX_SIDDHAM 225 +#define PROP_INDEX_SIDD 225 +#define PROP_INDEX_SIGNWRITING 226 +#define PROP_INDEX_SGNW 226 +#define PROP_INDEX_SINH 227 +#define PROP_INDEX_SINHALA 227 +#define PROP_INDEX_SK 228 +#define PROP_INDEX_MODIFIERSYMBOL 228 +#define PROP_INDEX_MATHSYMBOL 229 +#define PROP_INDEX_SM 229 +#define PROP_INDEX_SO 230 +#define PROP_INDEX_OTHERSYMBOL 230 +#define PROP_INDEX_SD 231 +#define PROP_INDEX_SOFTDOTTED 231 +#define PROP_INDEX_SOGD 232 +#define PROP_INDEX_SOGDIAN 232 +#define PROP_INDEX_SORASOMPENG 233 +#define PROP_INDEX_SORA 233 +#define PROP_INDEX_SOYO 234 +#define PROP_INDEX_SOYOMBO 234 +#define PROP_INDEX_SUND 235 +#define PROP_INDEX_SUNDANESE 235 +#define PROP_INDEX_SYLO 236 +#define PROP_INDEX_SYLOTINAGRI 236 +#define PROP_INDEX_SYRC 237 +#define PROP_INDEX_SYRIAC 237 +#define PROP_INDEX_TGLG 238 +#define PROP_INDEX_TAGALOG 238 +#define PROP_INDEX_TAGBANWA 239 +#define PROP_INDEX_TAGB 239 +#define PROP_INDEX_TAILE 240 +#define PROP_INDEX_TALE 240 +#define PROP_INDEX_TAITHAM 241 +#define PROP_INDEX_LANA 241 +#define PROP_INDEX_TAIVIET 242 +#define PROP_INDEX_TAVT 242 +#define PROP_INDEX_TAKRI 243 +#define PROP_INDEX_TAKR 243 +#define PROP_INDEX_TAMIL 244 +#define PROP_INDEX_TAML 244 +#define PROP_INDEX_TANG 245 +#define PROP_INDEX_TANGUT 245 +#define PROP_INDEX_TELUGU 246 +#define PROP_INDEX_TELU 246 +#define PROP_INDEX_TERM 247 +#define PROP_INDEX_TERMINALPUNCTUATION 247 +#define PROP_INDEX_THAA 248 +#define PROP_INDEX_THAANA 248 +#define PROP_INDEX_THAI 249 +#define PROP_INDEX_TIBT 250 +#define PROP_INDEX_TIBETAN 250 +#define PROP_INDEX_TFNG 251 +#define PROP_INDEX_TIFINAGH 251 +#define PROP_INDEX_TIRHUTA 252 +#define PROP_INDEX_TIRH 252 +#define PROP_INDEX_UGAR 253 +#define PROP_INDEX_UGARITIC 253 +#define PROP_INDEX_UNIFIEDIDEOGRAPH 254 +#define PROP_INDEX_UIDEO 254 +#define PROP_INDEX_ZZZZ 255 +#define PROP_INDEX_UNKNOWN 255 +#define PROP_INDEX_UPPERCASE 256 +#define PROP_INDEX_VAII 257 +#define PROP_INDEX_VAI 257 +#define PROP_INDEX_VARIATIONSELECTOR 258 +#define PROP_INDEX_VS 258 +#define PROP_INDEX_WCHO 259 +#define PROP_INDEX_WANCHO 259 +#define PROP_INDEX_WARANGCITI 260 +#define PROP_INDEX_WARA 260 +#define PROP_INDEX_WHITESPACE 261 +#define PROP_INDEX_WSPACE 261 +#define PROP_INDEX_XIDC 262 +#define PROP_INDEX_XIDCONTINUE 262 +#define PROP_INDEX_XIDS 263 +#define PROP_INDEX_XIDSTART 263 +#define PROP_INDEX_YEZIDI 264 +#define PROP_INDEX_YEZI 264 +#define PROP_INDEX_YI 265 +#define PROP_INDEX_YIII 265 +#define PROP_INDEX_SEPARATOR 266 +#define PROP_INDEX_Z 266 +#define PROP_INDEX_ZANABAZARSQUARE 267 +#define PROP_INDEX_ZANB 267 +#define PROP_INDEX_ZL 268 +#define PROP_INDEX_LINESEPARATOR 268 +#define PROP_INDEX_ZP 269 +#define PROP_INDEX_PARAGRAPHSEPARATOR 269 +#define PROP_INDEX_ZS 270 +#define PROP_INDEX_SPACESEPARATOR 270 +#define PROP_INDEX_INBASICLATIN 271 +#define PROP_INDEX_INLATIN1SUPPLEMENT 272 +#define PROP_INDEX_INLATINEXTENDEDA 273 +#define PROP_INDEX_INLATINEXTENDEDB 274 +#define PROP_INDEX_INIPAEXTENSIONS 275 +#define PROP_INDEX_INSPACINGMODIFIERLETTERS 276 +#define PROP_INDEX_INCOMBININGDIACRITICALMARKS 277 +#define PROP_INDEX_INGREEKANDCOPTIC 278 +#define PROP_INDEX_INCYRILLIC 279 +#define PROP_INDEX_INCYRILLICSUPPLEMENT 280 +#define PROP_INDEX_INARMENIAN 281 +#define PROP_INDEX_INHEBREW 282 +#define PROP_INDEX_INARABIC 283 +#define PROP_INDEX_INSYRIAC 284 +#define PROP_INDEX_INARABICSUPPLEMENT 285 +#define PROP_INDEX_INTHAANA 286 +#define PROP_INDEX_INNKO 287 +#define PROP_INDEX_INSAMARITAN 288 +#define PROP_INDEX_INMANDAIC 289 +#define PROP_INDEX_INSYRIACSUPPLEMENT 290 +#define PROP_INDEX_INARABICEXTENDEDA 291 +#define PROP_INDEX_INDEVANAGARI 292 +#define PROP_INDEX_INBENGALI 293 +#define PROP_INDEX_INGURMUKHI 294 +#define PROP_INDEX_INGUJARATI 295 +#define PROP_INDEX_INORIYA 296 +#define PROP_INDEX_INTAMIL 297 +#define PROP_INDEX_INTELUGU 298 +#define PROP_INDEX_INKANNADA 299 +#define PROP_INDEX_INMALAYALAM 300 +#define PROP_INDEX_INSINHALA 301 +#define PROP_INDEX_INTHAI 302 +#define PROP_INDEX_INLAO 303 +#define PROP_INDEX_INTIBETAN 304 +#define PROP_INDEX_INMYANMAR 305 +#define PROP_INDEX_INGEORGIAN 306 +#define PROP_INDEX_INHANGULJAMO 307 +#define PROP_INDEX_INETHIOPIC 308 +#define PROP_INDEX_INETHIOPICSUPPLEMENT 309 +#define PROP_INDEX_INCHEROKEE 310 +#define PROP_INDEX_INUNIFIEDCANADIANABORIGINALSYLLABICS 311 +#define PROP_INDEX_INOGHAM 312 +#define PROP_INDEX_INRUNIC 313 +#define PROP_INDEX_INTAGALOG 314 +#define PROP_INDEX_INHANUNOO 315 +#define PROP_INDEX_INBUHID 316 +#define PROP_INDEX_INTAGBANWA 317 +#define PROP_INDEX_INKHMER 318 +#define PROP_INDEX_INMONGOLIAN 319 +#define PROP_INDEX_INUNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED 320 +#define PROP_INDEX_INLIMBU 321 +#define PROP_INDEX_INTAILE 322 +#define PROP_INDEX_INNEWTAILUE 323 +#define PROP_INDEX_INKHMERSYMBOLS 324 +#define PROP_INDEX_INBUGINESE 325 +#define PROP_INDEX_INTAITHAM 326 +#define PROP_INDEX_INCOMBININGDIACRITICALMARKSEXTENDED 327 +#define PROP_INDEX_INBALINESE 328 +#define PROP_INDEX_INSUNDANESE 329 +#define PROP_INDEX_INBATAK 330 +#define PROP_INDEX_INLEPCHA 331 +#define PROP_INDEX_INOLCHIKI 332 +#define PROP_INDEX_INCYRILLICEXTENDEDC 333 +#define PROP_INDEX_INGEORGIANEXTENDED 334 +#define PROP_INDEX_INSUNDANESESUPPLEMENT 335 +#define PROP_INDEX_INVEDICEXTENSIONS 336 +#define PROP_INDEX_INPHONETICEXTENSIONS 337 +#define PROP_INDEX_INPHONETICEXTENSIONSSUPPLEMENT 338 +#define PROP_INDEX_INCOMBININGDIACRITICALMARKSSUPPLEMENT 339 +#define PROP_INDEX_INLATINEXTENDEDADDITIONAL 340 +#define PROP_INDEX_INGREEKEXTENDED 341 +#define PROP_INDEX_INGENERALPUNCTUATION 342 +#define PROP_INDEX_INSUPERSCRIPTSANDSUBSCRIPTS 343 +#define PROP_INDEX_INCURRENCYSYMBOLS 344 +#define PROP_INDEX_INCOMBININGDIACRITICALMARKSFORSYMBOLS 345 +#define PROP_INDEX_INLETTERLIKESYMBOLS 346 +#define PROP_INDEX_INNUMBERFORMS 347 +#define PROP_INDEX_INARROWS 348 +#define PROP_INDEX_INMATHEMATICALOPERATORS 349 +#define PROP_INDEX_INMISCELLANEOUSTECHNICAL 350 +#define PROP_INDEX_INCONTROLPICTURES 351 +#define PROP_INDEX_INOPTICALCHARACTERRECOGNITION 352 +#define PROP_INDEX_INENCLOSEDALPHANUMERICS 353 +#define PROP_INDEX_INBOXDRAWING 354 +#define PROP_INDEX_INBLOCKELEMENTS 355 +#define PROP_INDEX_INGEOMETRICSHAPES 356 +#define PROP_INDEX_INMISCELLANEOUSSYMBOLS 357 +#define PROP_INDEX_INDINGBATS 358 +#define PROP_INDEX_INMISCELLANEOUSMATHEMATICALSYMBOLSA 359 +#define PROP_INDEX_INSUPPLEMENTALARROWSA 360 +#define PROP_INDEX_INBRAILLEPATTERNS 361 +#define PROP_INDEX_INSUPPLEMENTALARROWSB 362 +#define PROP_INDEX_INMISCELLANEOUSMATHEMATICALSYMBOLSB 363 +#define PROP_INDEX_INSUPPLEMENTALMATHEMATICALOPERATORS 364 +#define PROP_INDEX_INMISCELLANEOUSSYMBOLSANDARROWS 365 +#define PROP_INDEX_INGLAGOLITIC 366 +#define PROP_INDEX_INLATINEXTENDEDC 367 +#define PROP_INDEX_INCOPTIC 368 +#define PROP_INDEX_INGEORGIANSUPPLEMENT 369 +#define PROP_INDEX_INTIFINAGH 370 +#define PROP_INDEX_INETHIOPICEXTENDED 371 +#define PROP_INDEX_INCYRILLICEXTENDEDA 372 +#define PROP_INDEX_INSUPPLEMENTALPUNCTUATION 373 +#define PROP_INDEX_INCJKRADICALSSUPPLEMENT 374 +#define PROP_INDEX_INKANGXIRADICALS 375 +#define PROP_INDEX_INIDEOGRAPHICDESCRIPTIONCHARACTERS 376 +#define PROP_INDEX_INCJKSYMBOLSANDPUNCTUATION 377 +#define PROP_INDEX_INHIRAGANA 378 +#define PROP_INDEX_INKATAKANA 379 +#define PROP_INDEX_INBOPOMOFO 380 +#define PROP_INDEX_INHANGULCOMPATIBILITYJAMO 381 +#define PROP_INDEX_INKANBUN 382 +#define PROP_INDEX_INBOPOMOFOEXTENDED 383 +#define PROP_INDEX_INCJKSTROKES 384 +#define PROP_INDEX_INKATAKANAPHONETICEXTENSIONS 385 +#define PROP_INDEX_INENCLOSEDCJKLETTERSANDMONTHS 386 +#define PROP_INDEX_INCJKCOMPATIBILITY 387 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONA 388 +#define PROP_INDEX_INYIJINGHEXAGRAMSYMBOLS 389 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHS 390 +#define PROP_INDEX_INYISYLLABLES 391 +#define PROP_INDEX_INYIRADICALS 392 +#define PROP_INDEX_INLISU 393 +#define PROP_INDEX_INVAI 394 +#define PROP_INDEX_INCYRILLICEXTENDEDB 395 +#define PROP_INDEX_INBAMUM 396 +#define PROP_INDEX_INMODIFIERTONELETTERS 397 +#define PROP_INDEX_INLATINEXTENDEDD 398 +#define PROP_INDEX_INSYLOTINAGRI 399 +#define PROP_INDEX_INCOMMONINDICNUMBERFORMS 400 +#define PROP_INDEX_INPHAGSPA 401 +#define PROP_INDEX_INSAURASHTRA 402 +#define PROP_INDEX_INDEVANAGARIEXTENDED 403 +#define PROP_INDEX_INKAYAHLI 404 +#define PROP_INDEX_INREJANG 405 +#define PROP_INDEX_INHANGULJAMOEXTENDEDA 406 +#define PROP_INDEX_INJAVANESE 407 +#define PROP_INDEX_INMYANMAREXTENDEDB 408 +#define PROP_INDEX_INCHAM 409 +#define PROP_INDEX_INMYANMAREXTENDEDA 410 +#define PROP_INDEX_INTAIVIET 411 +#define PROP_INDEX_INMEETEIMAYEKEXTENSIONS 412 +#define PROP_INDEX_INETHIOPICEXTENDEDA 413 +#define PROP_INDEX_INLATINEXTENDEDE 414 +#define PROP_INDEX_INCHEROKEESUPPLEMENT 415 +#define PROP_INDEX_INMEETEIMAYEK 416 +#define PROP_INDEX_INHANGULSYLLABLES 417 +#define PROP_INDEX_INHANGULJAMOEXTENDEDB 418 +#define PROP_INDEX_INHIGHSURROGATES 419 +#define PROP_INDEX_INHIGHPRIVATEUSESURROGATES 420 +#define PROP_INDEX_INLOWSURROGATES 421 +#define PROP_INDEX_INPRIVATEUSEAREA 422 +#define PROP_INDEX_INCJKCOMPATIBILITYIDEOGRAPHS 423 +#define PROP_INDEX_INALPHABETICPRESENTATIONFORMS 424 +#define PROP_INDEX_INARABICPRESENTATIONFORMSA 425 +#define PROP_INDEX_INVARIATIONSELECTORS 426 +#define PROP_INDEX_INVERTICALFORMS 427 +#define PROP_INDEX_INCOMBININGHALFMARKS 428 +#define PROP_INDEX_INCJKCOMPATIBILITYFORMS 429 +#define PROP_INDEX_INSMALLFORMVARIANTS 430 +#define PROP_INDEX_INARABICPRESENTATIONFORMSB 431 +#define PROP_INDEX_INHALFWIDTHANDFULLWIDTHFORMS 432 +#define PROP_INDEX_INSPECIALS 433 +#define PROP_INDEX_INLINEARBSYLLABARY 434 +#define PROP_INDEX_INLINEARBIDEOGRAMS 435 +#define PROP_INDEX_INAEGEANNUMBERS 436 +#define PROP_INDEX_INANCIENTGREEKNUMBERS 437 +#define PROP_INDEX_INANCIENTSYMBOLS 438 +#define PROP_INDEX_INPHAISTOSDISC 439 +#define PROP_INDEX_INLYCIAN 440 +#define PROP_INDEX_INCARIAN 441 +#define PROP_INDEX_INCOPTICEPACTNUMBERS 442 +#define PROP_INDEX_INOLDITALIC 443 +#define PROP_INDEX_INGOTHIC 444 +#define PROP_INDEX_INOLDPERMIC 445 +#define PROP_INDEX_INUGARITIC 446 +#define PROP_INDEX_INOLDPERSIAN 447 +#define PROP_INDEX_INDESERET 448 +#define PROP_INDEX_INSHAVIAN 449 +#define PROP_INDEX_INOSMANYA 450 +#define PROP_INDEX_INOSAGE 451 +#define PROP_INDEX_INELBASAN 452 +#define PROP_INDEX_INCAUCASIANALBANIAN 453 +#define PROP_INDEX_INLINEARA 454 +#define PROP_INDEX_INCYPRIOTSYLLABARY 455 +#define PROP_INDEX_INIMPERIALARAMAIC 456 +#define PROP_INDEX_INPALMYRENE 457 +#define PROP_INDEX_INNABATAEAN 458 +#define PROP_INDEX_INHATRAN 459 +#define PROP_INDEX_INPHOENICIAN 460 +#define PROP_INDEX_INLYDIAN 461 +#define PROP_INDEX_INMEROITICHIEROGLYPHS 462 +#define PROP_INDEX_INMEROITICCURSIVE 463 +#define PROP_INDEX_INKHAROSHTHI 464 +#define PROP_INDEX_INOLDSOUTHARABIAN 465 +#define PROP_INDEX_INOLDNORTHARABIAN 466 +#define PROP_INDEX_INMANICHAEAN 467 +#define PROP_INDEX_INAVESTAN 468 +#define PROP_INDEX_ININSCRIPTIONALPARTHIAN 469 +#define PROP_INDEX_ININSCRIPTIONALPAHLAVI 470 +#define PROP_INDEX_INPSALTERPAHLAVI 471 +#define PROP_INDEX_INOLDTURKIC 472 +#define PROP_INDEX_INOLDHUNGARIAN 473 +#define PROP_INDEX_INHANIFIROHINGYA 474 +#define PROP_INDEX_INRUMINUMERALSYMBOLS 475 +#define PROP_INDEX_INYEZIDI 476 +#define PROP_INDEX_INOLDSOGDIAN 477 +#define PROP_INDEX_INSOGDIAN 478 +#define PROP_INDEX_INCHORASMIAN 479 +#define PROP_INDEX_INELYMAIC 480 +#define PROP_INDEX_INBRAHMI 481 +#define PROP_INDEX_INKAITHI 482 +#define PROP_INDEX_INSORASOMPENG 483 +#define PROP_INDEX_INCHAKMA 484 +#define PROP_INDEX_INMAHAJANI 485 +#define PROP_INDEX_INSHARADA 486 +#define PROP_INDEX_INSINHALAARCHAICNUMBERS 487 +#define PROP_INDEX_INKHOJKI 488 +#define PROP_INDEX_INMULTANI 489 +#define PROP_INDEX_INKHUDAWADI 490 +#define PROP_INDEX_INGRANTHA 491 +#define PROP_INDEX_INNEWA 492 +#define PROP_INDEX_INTIRHUTA 493 +#define PROP_INDEX_INSIDDHAM 494 +#define PROP_INDEX_INMODI 495 +#define PROP_INDEX_INMONGOLIANSUPPLEMENT 496 +#define PROP_INDEX_INTAKRI 497 +#define PROP_INDEX_INAHOM 498 +#define PROP_INDEX_INDOGRA 499 +#define PROP_INDEX_INWARANGCITI 500 +#define PROP_INDEX_INDIVESAKURU 501 +#define PROP_INDEX_INNANDINAGARI 502 +#define PROP_INDEX_INZANABAZARSQUARE 503 +#define PROP_INDEX_INSOYOMBO 504 +#define PROP_INDEX_INPAUCINHAU 505 +#define PROP_INDEX_INBHAIKSUKI 506 +#define PROP_INDEX_INMARCHEN 507 +#define PROP_INDEX_INMASARAMGONDI 508 +#define PROP_INDEX_INGUNJALAGONDI 509 +#define PROP_INDEX_INMAKASAR 510 +#define PROP_INDEX_INLISUSUPPLEMENT 511 +#define PROP_INDEX_INTAMILSUPPLEMENT 512 +#define PROP_INDEX_INCUNEIFORM 513 +#define PROP_INDEX_INCUNEIFORMNUMBERSANDPUNCTUATION 514 +#define PROP_INDEX_INEARLYDYNASTICCUNEIFORM 515 +#define PROP_INDEX_INEGYPTIANHIEROGLYPHS 516 +#define PROP_INDEX_INEGYPTIANHIEROGLYPHFORMATCONTROLS 517 +#define PROP_INDEX_INANATOLIANHIEROGLYPHS 518 +#define PROP_INDEX_INBAMUMSUPPLEMENT 519 +#define PROP_INDEX_INMRO 520 +#define PROP_INDEX_INBASSAVAH 521 +#define PROP_INDEX_INPAHAWHHMONG 522 +#define PROP_INDEX_INMEDEFAIDRIN 523 +#define PROP_INDEX_INMIAO 524 +#define PROP_INDEX_INIDEOGRAPHICSYMBOLSANDPUNCTUATION 525 +#define PROP_INDEX_INTANGUT 526 +#define PROP_INDEX_INTANGUTCOMPONENTS 527 +#define PROP_INDEX_INKHITANSMALLSCRIPT 528 +#define PROP_INDEX_INTANGUTSUPPLEMENT 529 +#define PROP_INDEX_INKANASUPPLEMENT 530 +#define PROP_INDEX_INKANAEXTENDEDA 531 +#define PROP_INDEX_INSMALLKANAEXTENSION 532 +#define PROP_INDEX_INNUSHU 533 +#define PROP_INDEX_INDUPLOYAN 534 +#define PROP_INDEX_INSHORTHANDFORMATCONTROLS 535 +#define PROP_INDEX_INBYZANTINEMUSICALSYMBOLS 536 +#define PROP_INDEX_INMUSICALSYMBOLS 537 +#define PROP_INDEX_INANCIENTGREEKMUSICALNOTATION 538 +#define PROP_INDEX_INMAYANNUMERALS 539 +#define PROP_INDEX_INTAIXUANJINGSYMBOLS 540 +#define PROP_INDEX_INCOUNTINGRODNUMERALS 541 +#define PROP_INDEX_INMATHEMATICALALPHANUMERICSYMBOLS 542 +#define PROP_INDEX_INSUTTONSIGNWRITING 543 +#define PROP_INDEX_INGLAGOLITICSUPPLEMENT 544 +#define PROP_INDEX_INNYIAKENGPUACHUEHMONG 545 +#define PROP_INDEX_INWANCHO 546 +#define PROP_INDEX_INMENDEKIKAKUI 547 +#define PROP_INDEX_INADLAM 548 +#define PROP_INDEX_ININDICSIYAQNUMBERS 549 +#define PROP_INDEX_INOTTOMANSIYAQNUMBERS 550 +#define PROP_INDEX_INARABICMATHEMATICALALPHABETICSYMBOLS 551 +#define PROP_INDEX_INMAHJONGTILES 552 +#define PROP_INDEX_INDOMINOTILES 553 +#define PROP_INDEX_INPLAYINGCARDS 554 +#define PROP_INDEX_INENCLOSEDALPHANUMERICSUPPLEMENT 555 +#define PROP_INDEX_INENCLOSEDIDEOGRAPHICSUPPLEMENT 556 +#define PROP_INDEX_INMISCELLANEOUSSYMBOLSANDPICTOGRAPHS 557 +#define PROP_INDEX_INEMOTICONS 558 +#define PROP_INDEX_INORNAMENTALDINGBATS 559 +#define PROP_INDEX_INTRANSPORTANDMAPSYMBOLS 560 +#define PROP_INDEX_INALCHEMICALSYMBOLS 561 +#define PROP_INDEX_INGEOMETRICSHAPESEXTENDED 562 +#define PROP_INDEX_INSUPPLEMENTALARROWSC 563 +#define PROP_INDEX_INSUPPLEMENTALSYMBOLSANDPICTOGRAPHS 564 +#define PROP_INDEX_INCHESSSYMBOLS 565 +#define PROP_INDEX_INSYMBOLSANDPICTOGRAPHSEXTENDEDA 566 +#define PROP_INDEX_INSYMBOLSFORLEGACYCOMPUTING 567 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONB 568 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONC 569 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIOND 570 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONE 571 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONF 572 +#define PROP_INDEX_INCJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT 573 +#define PROP_INDEX_INCJKUNIFIEDIDEOGRAPHSEXTENSIONG 574 +#define PROP_INDEX_INTAGS 575 +#define PROP_INDEX_INVARIATIONSELECTORSSUPPLEMENT 576 +#define PROP_INDEX_INSUPPLEMENTARYPRIVATEUSEAREAA 577 +#define PROP_INDEX_INSUPPLEMENTARYPRIVATEUSEAREAB 578 +#define PROP_INDEX_INNOBLOCK 579 diff --git a/src/unicode_property_data_posix.c b/src/unicode_property_data_posix.c index e299e85..b050ff2 100644 --- a/src/unicode_property_data_posix.c +++ b/src/unicode_property_data_posix.c @@ -33,6 +33,32 @@ /* Generated by make_unicode_property_data.py. */ +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */ static const OnigCodePoint @@ -42,7 +68,7 @@ CR_NEWLINE[] = { 1, /* PROPERTY: 'Alpha': POSIX [[:Alpha:]] */ static const OnigCodePoint -CR_Alpha[] = { 679, +CR_Alpha[] = { 695, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -96,7 +122,7 @@ CR_Alpha[] = { 679, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d4, 0x08df, 0x08e3, 0x08e9, 0x08f0, 0x093b, @@ -200,8 +226,7 @@ CR_Alpha[] = { 679, 0x0cde, 0x0cde, 0x0ce0, 0x0ce3, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d44, @@ -211,7 +236,7 @@ CR_Alpha[] = { 679, 0x0d54, 0x0d57, 0x0d5f, 0x0d63, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -299,6 +324,7 @@ CR_Alpha[] = { 679, 0x1a20, 0x1a5e, 0x1a61, 0x1a74, 0x1aa7, 0x1aa7, +0x1abf, 0x1ac0, 0x1b00, 0x1b33, 0x1b35, 0x1b43, 0x1b45, 0x1b4b, @@ -386,10 +412,10 @@ CR_Alpha[] = { 679, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -401,8 +427,8 @@ CR_Alpha[] = { 679, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa805, +0xa7c2, 0xa7ca, +0xa7f5, 0xa805, 0xa807, 0xa827, 0xa840, 0xa873, 0xa880, 0xa8c3, @@ -433,7 +459,7 @@ CR_Alpha[] = { 679, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, @@ -518,19 +544,24 @@ CR_Alpha[] = { 679, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11045, 0x11082, 0x110b8, 0x110d0, 0x110e8, 0x11100, 0x11132, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11180, 0x111bf, 0x111c1, 0x111c4, +0x111ce, 0x111cf, 0x111da, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, @@ -559,7 +590,7 @@ CR_Alpha[] = { 679, 0x11400, 0x11441, 0x11443, 0x11445, 0x11447, 0x1144a, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114c1, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -575,7 +606,14 @@ CR_Alpha[] = { 679, 0x1171d, 0x1172a, 0x11800, 0x11838, 0x118a0, 0x118df, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193c, +0x1193f, 0x11942, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119df, @@ -608,6 +646,7 @@ CR_Alpha[] = { 679, 0x11d93, 0x11d96, 0x11d98, 0x11d98, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -626,8 +665,10 @@ CR_Alpha[] = { 679, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -716,12 +757,13 @@ CR_Alpha[] = { 679, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Alpha */ /* PROPERTY: 'Blank': POSIX [[:Blank:]] */ @@ -746,7 +788,7 @@ CR_Cntrl[] = { 2, /* PROPERTY: 'Digit': POSIX [[:Digit:]] */ static const OnigCodePoint -CR_Digit[] = { 59, +CR_Digit[] = { 61, 0x0030, 0x0039, 0x0660, 0x0669, 0x06f0, 0x06f9, @@ -797,6 +839,7 @@ CR_Digit[] = { 59, 0x116c0, 0x116c9, 0x11730, 0x11739, 0x118e0, 0x118e9, +0x11950, 0x11959, 0x11c50, 0x11c59, 0x11d50, 0x11d59, 0x11da0, 0x11da9, @@ -806,11 +849,12 @@ CR_Digit[] = { 59, 0x1e140, 0x1e149, 0x1e2f0, 0x1e2f9, 0x1e950, 0x1e959, +0x1fbf0, 0x1fbf9, }; /* END of CR_Digit */ /* PROPERTY: 'Graph': POSIX [[:Graph:]] */ static const OnigCodePoint -CR_Graph[] = { 671, +CR_Graph[] = { 682, 0x0021, 0x007e, 0x00a1, 0x0377, 0x037a, 0x037f, @@ -835,7 +879,7 @@ CR_Graph[] = { 671, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -890,7 +934,7 @@ CR_Graph[] = { 671, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -934,15 +978,14 @@ CR_Graph[] = { 671, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, 0x0d4a, 0x0d4f, 0x0d54, 0x0d63, 0x0d66, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -1029,7 +1072,7 @@ CR_Graph[] = { 671, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa0, 0x1aad, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b7c, 0x1b80, 0x1bf3, @@ -1070,7 +1113,7 @@ CR_Graph[] = { 671, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cf3, 0x2cf9, 0x2d25, @@ -1087,7 +1130,7 @@ CR_Graph[] = { 671, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2de0, 0x2e4f, +0x2de0, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -1097,18 +1140,16 @@ CR_Graph[] = { 671, 0x3099, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, 0xa640, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa82b, +0xa7c2, 0xa7ca, +0xa7f5, 0xa82c, 0xa830, 0xa839, 0xa840, 0xa877, 0xa880, 0xa8c5, @@ -1128,7 +1169,7 @@ CR_Graph[] = { 671, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabed, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -1173,7 +1214,7 @@ CR_Graph[] = { 671, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fd, 0x10280, 0x1029c, @@ -1233,8 +1274,12 @@ CR_Graph[] = { 671, 0x10cfa, 0x10d27, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x1104d, 0x11052, 0x1106f, @@ -1243,10 +1288,9 @@ CR_Graph[] = { 671, 0x110d0, 0x110e8, 0x110f0, 0x110f9, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11176, -0x11180, 0x111cd, -0x111d0, 0x111df, +0x11180, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, 0x11213, 0x1123e, @@ -1272,9 +1316,8 @@ CR_Graph[] = { 671, 0x1135d, 0x11363, 0x11366, 0x1136c, 0x11370, 0x11374, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +0x11400, 0x1145b, +0x1145d, 0x11461, 0x11480, 0x114c7, 0x114d0, 0x114d9, 0x11580, 0x115b5, @@ -1289,7 +1332,14 @@ CR_Graph[] = { 671, 0x11730, 0x1173f, 0x11800, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e4, @@ -1317,6 +1367,7 @@ CR_Graph[] = { 671, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -1340,9 +1391,11 @@ CR_Graph[] = { 671, 0x16f00, 0x16f4a, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, -0x16fe0, 0x16fe3, +0x16fe0, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -1440,17 +1493,15 @@ CR_Graph[] = { 671, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -1459,24 +1510,28 @@ CR_Graph[] = { 671, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, @@ -1486,7 +1541,7 @@ CR_Graph[] = { 671, /* PROPERTY: 'Lower': POSIX [[:Lower:]] */ static const OnigCodePoint -CR_Lower[] = { 649, +CR_Lower[] = { 652, 0x0061, 0x007a, 0x00aa, 0x00aa, 0x00b5, 0x00b5, @@ -2095,9 +2150,12 @@ CR_Lower[] = { 649, 0xa7bd, 0xa7bd, 0xa7bf, 0xa7bf, 0xa7c3, 0xa7c3, +0xa7c8, 0xa7c8, +0xa7ca, 0xa7ca, +0xa7f6, 0xa7f6, 0xa7f8, 0xa7fa, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab68, 0xab70, 0xabbf, 0xfb00, 0xfb06, 0xfb13, 0xfb17, @@ -2140,7 +2198,7 @@ CR_Lower[] = { 649, /* PROPERTY: 'Print': POSIX [[:Print:]] */ static const OnigCodePoint -CR_Print[] = { 668, +CR_Print[] = { 679, 0x0020, 0x007e, 0x00a0, 0x0377, 0x037a, 0x037f, @@ -2165,7 +2223,7 @@ CR_Print[] = { 668, 0x085e, 0x085e, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x0983, 0x0985, 0x098c, 0x098f, 0x0990, @@ -2220,7 +2278,7 @@ CR_Print[] = { 668, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b77, @@ -2264,15 +2322,14 @@ CR_Print[] = { 668, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, 0x0d4a, 0x0d4f, 0x0d54, 0x0d63, 0x0d66, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -2358,7 +2415,7 @@ CR_Print[] = { 668, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa0, 0x1aad, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b7c, 0x1b80, 0x1bf3, @@ -2397,7 +2454,7 @@ CR_Print[] = { 668, 0x2440, 0x244a, 0x2460, 0x2b73, 0x2b76, 0x2b95, -0x2b98, 0x2c2e, +0x2b97, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2cf3, 0x2cf9, 0x2d25, @@ -2414,7 +2471,7 @@ CR_Print[] = { 668, 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, -0x2de0, 0x2e4f, +0x2de0, 0x2e52, 0x2e80, 0x2e99, 0x2e9b, 0x2ef3, 0x2f00, 0x2fd5, @@ -2424,18 +2481,16 @@ CR_Print[] = { 668, 0x3099, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x3190, 0x31ba, -0x31c0, 0x31e3, +0x3190, 0x31e3, 0x31f0, 0x321e, -0x3220, 0x4db5, -0x4dc0, 0x9fef, +0x3220, 0x9ffc, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, 0xa640, 0xa6f7, 0xa700, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa82b, +0xa7c2, 0xa7ca, +0xa7f5, 0xa82c, 0xa830, 0xa839, 0xa840, 0xa877, 0xa880, 0xa8c5, @@ -2455,7 +2510,7 @@ CR_Print[] = { 668, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, -0xab30, 0xab67, +0xab30, 0xab6b, 0xab70, 0xabed, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -2500,7 +2555,7 @@ CR_Print[] = { 668, 0x10100, 0x10102, 0x10107, 0x10133, 0x10137, 0x1018e, -0x10190, 0x1019b, +0x10190, 0x1019c, 0x101a0, 0x101a0, 0x101d0, 0x101fd, 0x10280, 0x1029c, @@ -2560,8 +2615,12 @@ CR_Print[] = { 668, 0x10cfa, 0x10d27, 0x10d30, 0x10d39, 0x10e60, 0x10e7e, +0x10e80, 0x10ea9, +0x10eab, 0x10ead, +0x10eb0, 0x10eb1, 0x10f00, 0x10f27, 0x10f30, 0x10f59, +0x10fb0, 0x10fcb, 0x10fe0, 0x10ff6, 0x11000, 0x1104d, 0x11052, 0x1106f, @@ -2570,10 +2629,9 @@ CR_Print[] = { 668, 0x110d0, 0x110e8, 0x110f0, 0x110f9, 0x11100, 0x11134, -0x11136, 0x11146, +0x11136, 0x11147, 0x11150, 0x11176, -0x11180, 0x111cd, -0x111d0, 0x111df, +0x11180, 0x111df, 0x111e1, 0x111f4, 0x11200, 0x11211, 0x11213, 0x1123e, @@ -2599,9 +2657,8 @@ CR_Print[] = { 668, 0x1135d, 0x11363, 0x11366, 0x1136c, 0x11370, 0x11374, -0x11400, 0x11459, -0x1145b, 0x1145b, -0x1145d, 0x1145f, +0x11400, 0x1145b, +0x1145d, 0x11461, 0x11480, 0x114c7, 0x114d0, 0x114d9, 0x11580, 0x115b5, @@ -2616,7 +2673,14 @@ CR_Print[] = { 668, 0x11730, 0x1173f, 0x11800, 0x1183b, 0x118a0, 0x118f2, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11946, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e4, @@ -2644,6 +2708,7 @@ CR_Print[] = { 668, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef8, +0x11fb0, 0x11fb0, 0x11fc0, 0x11ff1, 0x11fff, 0x12399, 0x12400, 0x1246e, @@ -2667,9 +2732,11 @@ CR_Print[] = { 668, 0x16f00, 0x16f4a, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, -0x16fe0, 0x16fe3, +0x16fe0, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -2767,17 +2834,15 @@ CR_Print[] = { 668, 0x1f0b1, 0x1f0bf, 0x1f0c1, 0x1f0cf, 0x1f0d1, 0x1f0f5, -0x1f100, 0x1f10c, -0x1f110, 0x1f16c, -0x1f170, 0x1f1ac, +0x1f100, 0x1f1ad, 0x1f1e6, 0x1f202, 0x1f210, 0x1f23b, 0x1f240, 0x1f248, 0x1f250, 0x1f251, 0x1f260, 0x1f265, -0x1f300, 0x1f6d5, +0x1f300, 0x1f6d7, 0x1f6e0, 0x1f6ec, -0x1f6f0, 0x1f6fa, +0x1f6f0, 0x1f6fc, 0x1f700, 0x1f773, 0x1f780, 0x1f7d8, 0x1f7e0, 0x1f7eb, @@ -2786,24 +2851,28 @@ CR_Print[] = { 668, 0x1f850, 0x1f859, 0x1f860, 0x1f887, 0x1f890, 0x1f8ad, -0x1f900, 0x1f90b, -0x1f90d, 0x1f971, -0x1f973, 0x1f976, -0x1f97a, 0x1f9a2, -0x1f9a5, 0x1f9aa, -0x1f9ae, 0x1f9ca, +0x1f8b0, 0x1f8b1, +0x1f900, 0x1f978, +0x1f97a, 0x1f9cb, 0x1f9cd, 0x1fa53, 0x1fa60, 0x1fa6d, -0x1fa70, 0x1fa73, +0x1fa70, 0x1fa74, 0x1fa78, 0x1fa7a, -0x1fa80, 0x1fa82, -0x1fa90, 0x1fa95, -0x20000, 0x2a6d6, +0x1fa80, 0x1fa86, +0x1fa90, 0x1faa8, +0x1fab0, 0x1fab6, +0x1fac0, 0x1fac2, +0x1fad0, 0x1fad6, +0x1fb00, 0x1fb92, +0x1fb94, 0x1fbca, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, @@ -2813,7 +2882,7 @@ CR_Print[] = { 668, /* PROPERTY: 'Punct': POSIX [[:Punct:]] */ static const OnigCodePoint -CR_Punct[] = { 182, +CR_Punct[] = { 185, 0x0021, 0x0023, 0x0025, 0x002a, 0x002c, 0x002f, @@ -2904,6 +2973,7 @@ CR_Punct[] = { 182, 0x2d70, 0x2d70, 0x2e00, 0x2e2e, 0x2e30, 0x2e4f, +0x2e52, 0x2e52, 0x3001, 0x3003, 0x3008, 0x3011, 0x3014, 0x301f, @@ -2957,6 +3027,7 @@ CR_Punct[] = { 182, 0x10af0, 0x10af6, 0x10b39, 0x10b3f, 0x10b99, 0x10b9c, +0x10ead, 0x10ead, 0x10f55, 0x10f59, 0x11047, 0x1104d, 0x110bb, 0x110bc, @@ -2970,7 +3041,7 @@ CR_Punct[] = { 182, 0x11238, 0x1123d, 0x112a9, 0x112a9, 0x1144b, 0x1144f, -0x1145b, 0x1145b, +0x1145a, 0x1145b, 0x1145d, 0x1145d, 0x114c6, 0x114c6, 0x115c1, 0x115d7, @@ -2978,6 +3049,7 @@ CR_Punct[] = { 182, 0x11660, 0x1166c, 0x1173c, 0x1173e, 0x1183b, 0x1183b, +0x11944, 0x11946, 0x119e2, 0x119e2, 0x11a3f, 0x11a46, 0x11a9a, 0x11a9c, @@ -3015,7 +3087,7 @@ CR_Space[] = { 10, /* PROPERTY: 'Upper': POSIX [[:Upper:]] */ static const OnigCodePoint -CR_Upper[] = { 641, +CR_Upper[] = { 643, 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, @@ -3615,7 +3687,9 @@ CR_Upper[] = { 641, 0xa7bc, 0xa7bc, 0xa7be, 0xa7be, 0xa7c2, 0xa7c2, -0xa7c4, 0xa7c6, +0xa7c4, 0xa7c7, +0xa7c9, 0xa7c9, +0xa7f5, 0xa7f5, 0xff21, 0xff3a, 0x10400, 0x10427, 0x104b0, 0x104d3, @@ -3669,7 +3743,7 @@ CR_XDigit[] = { 3, /* PROPERTY: 'Word': POSIX [[:Word:]] */ static const OnigCodePoint -CR_Word[] = { 716, +CR_Word[] = { 732, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3721,7 +3795,7 @@ CR_Word[] = { 716, 0x0840, 0x085b, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d3, 0x08e1, 0x08e3, 0x0963, 0x0966, 0x096f, @@ -3781,7 +3855,7 @@ CR_Word[] = { 716, 0x0b3c, 0x0b44, 0x0b47, 0x0b48, 0x0b4b, 0x0b4d, -0x0b56, 0x0b57, +0x0b55, 0x0b57, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b63, 0x0b66, 0x0b6f, @@ -3827,8 +3901,7 @@ CR_Word[] = { 716, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d44, 0x0d46, 0x0d48, @@ -3837,7 +3910,7 @@ CR_Word[] = { 716, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -3936,7 +4009,7 @@ CR_Word[] = { 716, 0x1a7f, 0x1a89, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, -0x1ab0, 0x1abe, +0x1ab0, 0x1ac0, 0x1b00, 0x1b4b, 0x1b50, 0x1b59, 0x1b6b, 0x1b73, @@ -4021,10 +4094,10 @@ CR_Word[] = { 716, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -4035,8 +4108,9 @@ CR_Word[] = { 716, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa827, +0xa7c2, 0xa7ca, +0xa7f5, 0xa827, +0xa82c, 0xa82c, 0xa840, 0xa873, 0xa880, 0xa8c5, 0xa8d0, 0xa8d9, @@ -4062,7 +4136,7 @@ CR_Word[] = { 716, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, @@ -4161,9 +4235,13 @@ CR_Word[] = { 716, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f50, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11046, 0x11066, 0x1106f, @@ -4172,12 +4250,12 @@ CR_Word[] = { 716, 0x110f0, 0x110f9, 0x11100, 0x11134, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11173, 0x11176, 0x11176, 0x11180, 0x111c4, 0x111c9, 0x111cc, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11237, @@ -4206,7 +4284,7 @@ CR_Word[] = { 716, 0x11370, 0x11374, 0x11400, 0x1144a, 0x11450, 0x11459, -0x1145e, 0x1145f, +0x1145e, 0x11461, 0x11480, 0x114c5, 0x114c7, 0x114c7, 0x114d0, 0x114d9, @@ -4223,7 +4301,14 @@ CR_Word[] = { 716, 0x11730, 0x11739, 0x11800, 0x1183a, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x11943, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119e1, @@ -4254,6 +4339,7 @@ CR_Word[] = { 716, 0x11d93, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -4274,9 +4360,11 @@ CR_Word[] = { 716, 0x16f4f, 0x16f87, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, -0x16fe3, 0x16fe3, +0x16fe3, 0x16fe4, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -4379,18 +4467,20 @@ CR_Word[] = { 716, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, 0xe0100, 0xe01ef, }; /* END of CR_Word */ /* PROPERTY: 'Alnum': POSIX [[:Alnum:]] */ static const OnigCodePoint -CR_Alnum[] = { 715, +CR_Alnum[] = { 732, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -4444,7 +4534,7 @@ CR_Alnum[] = { 715, 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, -0x08b6, 0x08bd, +0x08b6, 0x08c7, 0x08d4, 0x08df, 0x08e3, 0x08e9, 0x08f0, 0x093b, @@ -4554,8 +4644,7 @@ CR_Alnum[] = { 715, 0x0ce0, 0x0ce3, 0x0ce6, 0x0cef, 0x0cf1, 0x0cf2, -0x0d00, 0x0d03, -0x0d05, 0x0d0c, +0x0d00, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d3d, 0x0d44, @@ -4566,7 +4655,7 @@ CR_Alnum[] = { 715, 0x0d5f, 0x0d63, 0x0d66, 0x0d6f, 0x0d7a, 0x0d7f, -0x0d82, 0x0d83, +0x0d81, 0x0d83, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, @@ -4662,6 +4751,7 @@ CR_Alnum[] = { 715, 0x1a80, 0x1a89, 0x1a90, 0x1a99, 0x1aa7, 0x1aa7, +0x1abf, 0x1ac0, 0x1b00, 0x1b33, 0x1b35, 0x1b43, 0x1b45, 0x1b4b, @@ -4749,10 +4839,10 @@ CR_Alnum[] = { 715, 0x30fc, 0x30ff, 0x3105, 0x312f, 0x3131, 0x318e, -0x31a0, 0x31ba, +0x31a0, 0x31bf, 0x31f0, 0x31ff, -0x3400, 0x4db5, -0x4e00, 0x9fef, +0x3400, 0x4dbf, +0x4e00, 0x9ffc, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -4763,8 +4853,8 @@ CR_Alnum[] = { 715, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7bf, -0xa7c2, 0xa7c6, -0xa7f7, 0xa805, +0xa7c2, 0xa7ca, +0xa7f5, 0xa805, 0xa807, 0xa827, 0xa840, 0xa873, 0xa880, 0xa8c3, @@ -4795,7 +4885,7 @@ CR_Alnum[] = { 715, 0xab20, 0xab26, 0xab28, 0xab2e, 0xab30, 0xab5a, -0xab5c, 0xab67, +0xab5c, 0xab69, 0xab70, 0xabea, 0xabf0, 0xabf9, 0xac00, 0xd7a3, @@ -4884,9 +4974,13 @@ CR_Alnum[] = { 715, 0x10cc0, 0x10cf2, 0x10d00, 0x10d27, 0x10d30, 0x10d39, +0x10e80, 0x10ea9, +0x10eab, 0x10eac, +0x10eb0, 0x10eb1, 0x10f00, 0x10f1c, 0x10f27, 0x10f27, 0x10f30, 0x10f45, +0x10fb0, 0x10fc4, 0x10fe0, 0x10ff6, 0x11000, 0x11045, 0x11066, 0x1106f, @@ -4895,12 +4989,12 @@ CR_Alnum[] = { 715, 0x110f0, 0x110f9, 0x11100, 0x11132, 0x11136, 0x1113f, -0x11144, 0x11146, +0x11144, 0x11147, 0x11150, 0x11172, 0x11176, 0x11176, 0x11180, 0x111bf, 0x111c1, 0x111c4, -0x111d0, 0x111da, +0x111ce, 0x111da, 0x111dc, 0x111dc, 0x11200, 0x11211, 0x11213, 0x11234, @@ -4930,7 +5024,7 @@ CR_Alnum[] = { 715, 0x11443, 0x11445, 0x11447, 0x1144a, 0x11450, 0x11459, -0x1145f, 0x1145f, +0x1145f, 0x11461, 0x11480, 0x114c1, 0x114c4, 0x114c5, 0x114c7, 0x114c7, @@ -4950,7 +5044,15 @@ CR_Alnum[] = { 715, 0x11730, 0x11739, 0x11800, 0x11838, 0x118a0, 0x118e9, -0x118ff, 0x118ff, +0x118ff, 0x11906, +0x11909, 0x11909, +0x1190c, 0x11913, +0x11915, 0x11916, +0x11918, 0x11935, +0x11937, 0x11938, +0x1193b, 0x1193c, +0x1193f, 0x11942, +0x11950, 0x11959, 0x119a0, 0x119a7, 0x119aa, 0x119d7, 0x119da, 0x119df, @@ -4986,6 +5088,7 @@ CR_Alnum[] = { 715, 0x11d98, 0x11d98, 0x11da0, 0x11da9, 0x11ee0, 0x11ef6, +0x11fb0, 0x11fb0, 0x12000, 0x12399, 0x12400, 0x1246e, 0x12480, 0x12543, @@ -5006,8 +5109,10 @@ CR_Alnum[] = { 715, 0x16f8f, 0x16f9f, 0x16fe0, 0x16fe1, 0x16fe3, 0x16fe3, +0x16ff0, 0x16ff1, 0x17000, 0x187f7, -0x18800, 0x18af2, +0x18800, 0x18cd5, +0x18d00, 0x18d08, 0x1b000, 0x1b11e, 0x1b150, 0x1b152, 0x1b164, 0x1b167, @@ -5100,12 +5205,14 @@ CR_Alnum[] = { 715, 0x1f130, 0x1f149, 0x1f150, 0x1f169, 0x1f170, 0x1f189, -0x20000, 0x2a6d6, +0x1fbf0, 0x1fbf9, +0x20000, 0x2a6dd, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, 0x2ceb0, 0x2ebe0, 0x2f800, 0x2fa1d, +0x30000, 0x3134a, }; /* END of CR_Alnum */ /* PROPERTY: 'ASCII': POSIX [[:ASCII:]] */ diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index 51a037b..bc89081 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -9,7 +9,7 @@ /* This gperf source file was generated by make_unicode_fold_data.py */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,15 +33,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <string.h> -#include "regenc.h" +#include "regint.h" -#define TOTAL_KEYWORDS 1487 +#define TOTAL_KEYWORDS 1490 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 3 #define MIN_HASH_VALUE 10 -#define MAX_HASH_VALUE 1958 -/* maximum key range = 1949, duplicates = 0 */ +#define MAX_HASH_VALUE 1946 +/* maximum key range = 1937, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -56,36 +55,36 @@ hash(OnigCodePoint codes[]) { static const unsigned short asso_values[] = { - 9, 5, 2, 111, 176, 1, 110, 1959, 1959, 1959, - 1959, 1959, 1959, 13, 1959, 1959, 1959, 77, 1959, 1959, - 63, 1959, 1959, 1959, 1959, 86, 1959, 1959, 1959, 7, - 1959, 0, 3, 1959, 191, 624, 1536, 132, 1519, 613, - 1513, 607, 1602, 586, 1505, 4, 1497, 570, 1492, 554, - 1482, 907, 1601, 898, 1469, 595, 1468, 405, 1463, 546, - 1458, 538, 1453, 474, 1444, 890, 1596, 877, 1419, 725, - 1590, 698, 1588, 635, 1160, 682, 1021, 338, 1122, 671, - 812, 559, 819, 1580, 756, 1082, 744, 1438, 637, 1428, - 266, 1429, 1502, 1285, 1440, 1578, 467, 1419, 873, 490, - 1380, 779, 731, 1057, 1261, 1034, 1567, 762, 1389, 495, - 1510, 156, 648, 805, 1537, 862, 1523, 853, 1469, 845, - 1252, 796, 342, 1297, 801, 1287, 542, 1408, 477, 1400, - 237, 1109, 20, 1099, 548, 1067, 1, 1280, 326, 1043, - 767, 1030, 756, 938, 598, 254, 26, 1275, 165, 1270, - 149, 1575, 15, 1368, 133, 1202, 625, 403, 408, 373, - 296, 1397, 283, 519, 1381, 503, 1372, 18, 6, 415, - 1253, 769, 8, 484, 1358, 658, 1565, 464, 427, 244, - 1241, 360, 1233, 390, 1350, 219, 377, 206, 1223, 328, - 1211, 145, 1198, 454, 135, 73, 1020, 103, 1012, 63, - 995, 53, 748, 196, 573, 116, 684, 93, 983, 83, - 1004, 182, 735, 31, 713, 319, 977, 306, 706, 44, - 836, 293, 1187, 280, 1135, 268, 671, 976, 1350, 447, - 1182, 437, 964, 946, 351, 932, 1349, 923, 1090, 1194, - 1175, 827, 1299, 1165, 1410, 1155, 1522, 1557, 313, 1146, - 424, 1136, 233, 1130, 161, 1316, 93, 167, 134, 618, - 1959, 1118, 128, 1342, 1959, 1335, 124, 1331, 75, 1327, - 191, 1308, 45, 1545, 1959, 1534, 1, 230, 9, 643, - 5, 1564, 40, 1553, 65, 531, 55, 993, 25, 1476, - 2 + 9, 5, 2, 124, 176, 1, 123, 1947, 1947, 1947, + 1947, 1947, 1947, 13, 1947, 1947, 1947, 77, 1947, 1947, + 30, 1947, 1947, 1947, 1947, 86, 1947, 1947, 1947, 7, + 1947, 0, 3, 1947, 53, 616, 1530, 132, 1528, 610, + 1520, 604, 1598, 578, 1519, 4, 1504, 565, 1499, 556, + 1494, 916, 1597, 907, 1488, 595, 1483, 587, 1459, 548, + 1478, 540, 1473, 440, 314, 899, 1592, 890, 1316, 681, + 1587, 656, 1578, 622, 1576, 636, 1566, 232, 1465, 570, + 257, 528, 1341, 866, 674, 1130, 561, 1455, 504, 1448, + 393, 1441, 479, 954, 1462, 1562, 1432, 1437, 463, 1249, + 1391, 1305, 1389, 1055, 1224, 1046, 1434, 879, 733, 521, + 1192, 507, 909, 840, 1201, 870, 1148, 847, 856, 833, + 695, 826, 300, 1335, 491, 1301, 334, 1427, 260, 1420, + 95, 1123, 204, 1113, 226, 1068, 201, 1036, 25, 1104, + 1605, 1091, 1305, 1081, 1217, 748, 214, 1296, 223, 1286, + 184, 1544, 15, 1291, 158, 1539, 1598, 819, 775, 546, + 653, 1417, 300, 497, 1407, 484, 1399, 18, 6, 427, + 1274, 806, 8, 471, 937, 705, 759, 459, 1343, 244, + 1266, 360, 1243, 414, 1396, 219, 1041, 206, 1256, 335, + 1235, 155, 1225, 449, 1216, 73, 1026, 103, 1018, 63, + 1008, 53, 785, 196, 782, 116, 766, 93, 998, 83, + 989, 182, 735, 31, 718, 326, 981, 306, 689, 44, + 134, 293, 1207, 280, 1184, 268, 1180, 980, 1321, 401, + 517, 387, 970, 947, 319, 933, 1206, 924, 1170, 1196, + 1163, 969, 693, 1185, 702, 376, 141, 347, 113, 1158, + 146, 1151, 166, 1145, 671, 173, 142, 1138, 134, 409, + 1947, 665, 111, 1387, 1947, 1383, 85, 1378, 65, 1374, + 39, 1367, 54, 756, 1947, 729, 170, 1362, 9, 1354, + 195, 1535, 467, 240, 164, 1330, 0, 151, 19, 632, + 11 }; return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+35] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)+1] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } @@ -98,10 +97,10 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1040a, 3441, 1}, + {0x1040a, 3450, 1}, {0xffffffff, -1, 0}, - {0x01f1, 483, 1}, + {0x1ffb, 2420, 1}, {0x1e0a, 1882, 1}, @@ -125,11 +124,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab85, 1663, 1}, - {0x10c85, 3654, 1}, + {0x10c85, 3663, 1}, {0xab89, 1675, 1}, - {0x10c89, 3666, 1}, + {0x10c89, 3675, 1}, {0x1e84, 2066, 1}, @@ -149,13 +148,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab84, 1660, 1}, - {0x10c84, 3651, 1}, + {0x10c84, 3660, 1}, - {0x104b2, 3537, 1}, + {0x104b2, 3546, 1}, - {0x1f6f, 2378, 1}, + {0x2c67, 2726, 1}, - {0x2c6f, 604, 1}, + {0x13fb, 1849, 1}, {0x1eb2, 2120, 1}, @@ -175,13 +174,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabb2, 1798, 1}, - {0x10cb2, 3789, 1}, + {0x10cb2, 3798, 1}, - {0x104b8, 3555, 1}, + {0x104b8, 3564, 1}, - {0x01f7, 414, 1}, + {0x1feb, 2414, 1}, - {0x1ff7, 67, 3}, + {0x2ceb, 2891, 1}, {0x1eb8, 2129, 1}, @@ -193,7 +192,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2cb8, 2825, 1}, - {0x2ced, 2894, 1}, + {0x13fd, 1855, 1}, {0xa7b8, 3315, 1}, @@ -211,7 +210,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2ca6, 2798, 1}, - {0x1ffb, 2420, 1}, + {0x2ced, 2894, 1}, {0xa7a6, 3303, 1}, @@ -219,7 +218,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xaba6, 1762, 1}, - {0x10ca6, 3753, 1}, + {0x10ca6, 3762, 1}, {0x1ea4, 2099, 1}, @@ -231,7 +230,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2ca4, 2795, 1}, - {0x1ff9, 2408, 1}, + {0x1fe9, 2438, 1}, {0xa7a4, 3300, 1}, @@ -239,7 +238,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xaba4, 1756, 1}, - {0x10ca4, 3747, 1}, + {0x10ca4, 3756, 1}, {0x1ea0, 2093, 1}, @@ -251,7 +250,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2ca0, 2789, 1}, - {0x1fe9, 2438, 1}, + {0x2167, 2468, 1}, {0xa7a0, 3294, 1}, @@ -259,7 +258,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xaba0, 1744, 1}, - {0x10ca0, 3735, 1}, + {0x10ca0, 3744, 1}, {0x1eae, 2114, 1}, @@ -271,7 +270,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2cae, 2810, 1}, - {0x13fd, 1855, 1}, + {0x1fe7, 47, 3}, {0xa7ae, 655, 1}, @@ -279,7 +278,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabae, 1786, 1}, - {0x10cae, 3777, 1}, + {0x10cae, 3786, 1}, {0x1eac, 2111, 1}, @@ -291,7 +290,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2cac, 2807, 1}, - {0x00dd, 162, 1}, + {0x1f5f, 2354, 1}, {0xa7ac, 637, 1}, @@ -299,7 +298,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabac, 1780, 1}, - {0x10cac, 3771, 1}, + {0x10cac, 3780, 1}, {0x1ea2, 2096, 1}, @@ -319,19 +318,19 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xaba2, 1750, 1}, - {0x10ca2, 3741, 1}, + {0x10ca2, 3750, 1}, - {0x118b2, 3846, 1}, + {0x118b2, 3855, 1}, - {0x050a, 1291, 1}, + {0x1fe3, 41, 3}, - {0x020a, 517, 1}, + {0x01d5, 441, 1}, {0x1eaa, 2108, 1}, {0x04aa, 1147, 1}, - {0x13fb, 1849, 1}, + {0x00d5, 141, 1}, {0x1faa, 219, 2}, @@ -345,19 +344,19 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabaa, 1774, 1}, - {0x10caa, 3765, 1}, + {0x10caa, 3774, 1}, - {0x118b8, 3864, 1}, + {0x118b8, 3873, 1}, - {0x1fe7, 47, 3}, + {0x050a, 1291, 1}, - {0x13f9, 1843, 1}, + {0x020a, 517, 1}, - {0x10402, 3417, 1}, + {0x10402, 3426, 1}, {0x10a6, 2918, 1}, - {0x1fe3, 41, 3}, + {0x104b9, 3567, 1}, {0x1e02, 1870, 1}, @@ -365,99 +364,99 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0102, 174, 1}, - {0x118a6, 3810, 1}, + {0x118a6, 3819, 1}, {0x2c02, 2582, 1}, - {0x019f, 673, 1}, + {0x1fb9, 2426, 1}, - {0x1f9f, 204, 2}, + {0x104d3, 3645, 1}, {0x00df, 24, 2}, {0x10a4, 2912, 1}, - {0xab77, 1621, 1}, + {0x1cb9, 1585, 1}, - {0x1c9f, 1507, 1}, + {0xabb9, 1819, 1}, - {0xab9f, 1741, 1}, + {0x01d3, 438, 1}, - {0x10c9f, 3732, 1}, + {0x1fd3, 25, 3}, - {0x118a4, 3804, 1}, + {0x118a4, 3813, 1}, - {0x049c, 1126, 1}, + {0x00d3, 135, 1}, - {0x019c, 664, 1}, + {0x00dd, 162, 1}, - {0x1f9c, 189, 2}, + {0x01d7, 444, 1}, - {0x2c9c, 2783, 1}, + {0x1fd7, 31, 3}, {0x10a0, 2900, 1}, - {0xa79c, 3288, 1}, + {0x1efc, 2231, 1}, - {0x1c9c, 1498, 1}, + {0x04fc, 1270, 1}, - {0xab9c, 1732, 1}, + {0x01fc, 496, 1}, - {0x10c9c, 3723, 1}, + {0x1ffc, 96, 2}, - {0x118a0, 3792, 1}, + {0x118a0, 3801, 1}, - {0x1e4c, 1981, 1}, + {0x049c, 1126, 1}, - {0xab73, 1609, 1}, + {0x019c, 664, 1}, - {0x014c, 279, 1}, + {0x1f9c, 189, 2}, - {0x1f4c, 2339, 1}, + {0x2c9c, 2783, 1}, {0x10ae, 2942, 1}, - {0x004c, 31, 1}, + {0xa79c, 3288, 1}, - {0xa74c, 3192, 1}, + {0x1c9c, 1498, 1}, - {0x01db, 450, 1}, + {0xab9c, 1732, 1}, - {0x1fdb, 2402, 1}, + {0x10c9c, 3732, 1}, - {0x118ae, 3834, 1}, + {0x118ae, 3843, 1}, - {0x00db, 156, 1}, + {0xab77, 1621, 1}, - {0x1ede, 2186, 1}, + {0x1ff9, 2408, 1}, - {0x04de, 1225, 1}, + {0x01d9, 447, 1}, - {0x01de, 456, 1}, + {0x1fd9, 2432, 1}, {0x10ac, 2936, 1}, - {0x2cde, 2882, 1}, + {0x00d9, 150, 1}, - {0x00de, 165, 1}, + {0x01f1, 483, 1}, - {0xab71, 1603, 1}, + {0x1edc, 2183, 1}, - {0xa64c, 3039, 1}, + {0x04dc, 1222, 1}, - {0x118ac, 3828, 1}, + {0x118ac, 3837, 1}, {0x24b8, 2504, 1}, - {0x03f1, 802, 1}, + {0x2cdc, 2879, 1}, - {0x03ff, 730, 1}, + {0x00dc, 159, 1}, - {0x104b0, 3531, 1}, + {0x104b0, 3540, 1}, {0x10a2, 2906, 1}, - {0x03f5, 758, 1}, + {0x13fc, 1852, 1}, {0x1eb0, 2117, 1}, @@ -465,10 +464,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0389, 739, 1}, - {0x118a2, 3798, 1}, + {0x118a2, 3807, 1}, {0x2cb0, 2813, 1}, - {0xffffffff, -1, 0}, + + {0x03ff, 730, 1}, {0xa7b0, 712, 1}, @@ -476,27 +476,27 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabb0, 1792, 1}, - {0x10cb0, 3783, 1}, + {0x10cb0, 3792, 1}, - {0x2165, 2462, 1}, + {0xab73, 1609, 1}, {0x10aa, 2930, 1}, - {0x1feb, 2414, 1}, + {0x13f9, 1843, 1}, - {0x2ceb, 2891, 1}, + {0x03fd, 724, 1}, {0x1ea8, 2105, 1}, {0x04a8, 1144, 1}, - {0x118aa, 3822, 1}, + {0x118aa, 3831, 1}, {0x1fa8, 209, 2}, {0x2ca8, 2801, 1}, - {0x03fd, 724, 1}, + {0xa7f5, 3336, 1}, {0xa7a8, 3306, 1}, @@ -504,7 +504,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xaba8, 1768, 1}, - {0x10ca8, 3759, 1}, + {0x10ca8, 3768, 1}, {0x1e98, 38, 2}, @@ -516,7 +516,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c98, 2777, 1}, - {0x2161, 2450, 1}, + {0x10b9, 2975, 1}, {0xa798, 3282, 1}, @@ -524,12 +524,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab98, 1720, 1}, - {0x10c98, 3711, 1}, + {0x10c98, 3720, 1}, - {0x03f7, 890, 1}, + {0x118b9, 3876, 1}, - {0x216f, 2492, 1}, - {0xffffffff, -1, 0}, + {0x1f6f, 2378, 1}, + + {0x2c6f, 604, 1}, {0x1e96, 16, 2}, @@ -549,37 +550,37 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab96, 1714, 1}, - {0x10c96, 3705, 1}, + {0x10c96, 3714, 1}, {0x03a6, 816, 1}, - {0x1ef2, 2216, 1}, + {0x2c63, 1861, 1}, - {0x04f2, 1255, 1}, + {0xab71, 1603, 1}, - {0x01f2, 483, 1}, + {0x1e2a, 1930, 1}, - {0x1ff2, 257, 2}, + {0x042a, 980, 1}, - {0x2cf2, 2897, 1}, + {0x012a, 234, 1}, - {0x01d9, 447, 1}, + {0x1f2a, 2285, 1}, - {0x1fd9, 2432, 1}, + {0x2c2a, 2702, 1}, {0xa696, 3123, 1}, - {0x00d9, 150, 1}, + {0xa72a, 3144, 1}, {0x03a4, 810, 1}, - {0x1f5f, 2354, 1}, + {0x1ef8, 2225, 1}, - {0x03f9, 884, 1}, + {0x04f8, 1264, 1}, - {0x0502, 1279, 1}, + {0x01f8, 490, 1}, - {0x0202, 505, 1}, + {0x1ff8, 2405, 1}, {0x1e90, 2084, 1}, @@ -599,38 +600,41 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab90, 1696, 1}, - {0x10c90, 3687, 1}, + {0x10c90, 3696, 1}, - {0x1e6e, 2033, 1}, + {0x2165, 2462, 1}, - {0x046e, 1069, 1}, + {0x0502, 1279, 1}, - {0x016e, 330, 1}, + {0x0202, 505, 1}, - {0x1f6e, 2375, 1}, + {0x2161, 2450, 1}, - {0x2c6e, 667, 1}, + {0x042d, 990, 1}, {0x10b0, 2948, 1}, - {0xa76e, 3243, 1}, - {0xffffffff, -1, 0}, + {0x1f2d, 2294, 1}, + + {0x2c2d, 2711, 1}, {0xa690, 3114, 1}, - {0xffffffff, -1, 0}, - {0x118b0, 3840, 1}, + {0x1f5d, 2351, 1}, + + {0x118b0, 3849, 1}, - {0x104be, 3573, 1}, + {0x104be, 3582, 1}, - {0x054c, 1429, 1}, + {0x24b9, 2507, 1}, - {0x024c, 598, 1}, + {0x216f, 2492, 1}, {0x1ebe, 2138, 1}, {0x04be, 1177, 1}, - {0xffffffff, -1, 0}, + + {0x13f8, 1840, 1}, {0x1fbe, 773, 1}, @@ -644,10 +648,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabbe, 1834, 1}, - {0x104bc, 3567, 1}, + {0x104bc, 3576, 1}, - {0x118a8, 3816, 1}, - {0xffffffff, -1, 0}, + {0x118a8, 3825, 1}, + + {0x2163, 2456, 1}, {0x1ebc, 2135, 1}, @@ -659,18 +664,17 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2cbc, 2831, 1}, - {0x017f, 52, 1}, + {0x212a, 27, 1}, {0xa7bc, 3321, 1}, - - {0x2c7f, 583, 1}, + {0xffffffff, -1, 0}, {0xabbc, 1828, 1}, - {0xffffffff, -1, 0}, - {0x104ba, 3561, 1}, + {0x03d5, 816, 1}, - {0xab7f, 1645, 1}, + {0x104ba, 3570, 1}, + {0xffffffff, -1, 0}, {0x03aa, 830, 1}, @@ -683,7 +687,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2cba, 2828, 1}, - {0x017d, 351, 1}, + {0xfb02, 12, 2}, {0xa7ba, 3318, 1}, @@ -691,24 +695,25 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabba, 1822, 1}, - {0xa77d, 1858, 1}, + {0x017f, 52, 1}, - {0x104b6, 3549, 1}, + {0x104b6, 3558, 1}, - {0xab7d, 1639, 1}, - {0xffffffff, -1, 0}, + {0x2c7f, 583, 1}, + + {0x0057, 65, 1}, {0x1eb6, 2126, 1}, {0x04b6, 1165, 1}, - {0x16e4c, 3924, 1}, + {0xab7f, 1645, 1}, {0x1fb6, 58, 2}, {0x2cb6, 2822, 1}, - {0x039f, 795, 1}, + {0x1041d, 3507, 1}, {0xa7b6, 3312, 1}, @@ -716,23 +721,38 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xabb6, 1810, 1}, - {0x01d5, 441, 1}, + {0x041d, 937, 1}, - {0x104b4, 3543, 1}, - {0xffffffff, -1, 0}, + {0x104c7, 3609, 1}, - {0x00d5, 141, 1}, + {0x1f1d, 2276, 1}, + + {0x2c1d, 2663, 1}, + + {0x1e90a, 4023, 1}, + + {0x04c7, 1189, 1}, + + {0x01c7, 421, 1}, + + {0x1fc7, 15, 3}, + + {0x104b4, 3552, 1}, + + {0x00c7, 99, 1}, + + {0xa7c7, 3330, 1}, {0x1eb4, 2123, 1}, {0x04b4, 1162, 1}, - - {0x039c, 785, 1}, + {0xffffffff, -1, 0}, {0x1fb4, 50, 2}, {0x2cb4, 2819, 1}, - {0xffffffff, -1, 0}, + + {0x039c, 785, 1}, {0xa7b4, 3309, 1}, @@ -744,12 +764,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x049a, 1123, 1}, - {0x2c67, 2726, 1}, + {0x1f5b, 2348, 1}, {0x1f9a, 179, 2}, {0x2c9a, 2780, 1}, - {0xffffffff, -1, 0}, + + {0x03f9, 884, 1}, {0xa79a, 3285, 1}, @@ -757,61 +778,46 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab9a, 1726, 1}, - {0x10c9a, 3717, 1}, + {0x10c9a, 3726, 1}, - {0x1e2a, 1930, 1}, + {0x10be, 2990, 1}, - {0x042a, 980, 1}, + {0x03f1, 802, 1}, - {0x012a, 234, 1}, + {0x1ed4, 2171, 1}, - {0x1f2a, 2285, 1}, + {0x04d4, 1210, 1}, - {0x2c2a, 2702, 1}, + {0x03dc, 854, 1}, - {0x03de, 857, 1}, + {0x118be, 3891, 1}, - {0xa72a, 3144, 1}, + {0x2cd4, 2867, 1}, - {0x10be, 2990, 1}, + {0x00d4, 138, 1}, {0xa69a, 3129, 1}, - - {0x0057, 65, 1}, - - {0x104c7, 3600, 1}, {0xffffffff, -1, 0}, - {0x118be, 3882, 1}, - - {0x1e90a, 4014, 1}, - - {0x04c7, 1189, 1}, - - {0x01c7, 421, 1}, - - {0x1fc7, 15, 3}, - {0xffffffff, -1, 0}, + {0x052a, 1339, 1}, - {0x00c7, 99, 1}, + {0x022a, 562, 1}, {0x10bc, 2984, 1}, {0x03b0, 41, 3}, - - {0xff37, 3399, 1}, + {0xffffffff, -1, 0}, {0x1e92, 2087, 1}, {0x0492, 1111, 1}, - {0x118bc, 3876, 1}, + {0x118bc, 3885, 1}, {0x1f92, 179, 2}, {0x2c92, 2768, 1}, - - {0xfb02, 12, 2}, + {0xffffffff, -1, 0}, {0xa792, 3273, 1}, @@ -819,181 +825,163 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab92, 1702, 1}, - {0x10c92, 3693, 1}, + {0x10c92, 3702, 1}, {0x10ba, 2978, 1}, - {0xffffffff, -1, 0}, + + {0x03f5, 758, 1}, {0x03a8, 823, 1}, - {0x1e7c, 2054, 1}, + {0x104d2, 3642, 1}, - {0x047c, 1090, 1}, + {0xff2a, 3369, 1}, - {0x118ba, 3870, 1}, + {0x118ba, 3879, 1}, - {0x0537, 1366, 1}, + {0x1ed2, 2168, 1}, - {0x1e97, 34, 2}, + {0x04d2, 1207, 1}, {0xa692, 3117, 1}, - {0x0197, 649, 1}, + {0x1fd2, 20, 3}, - {0x1f97, 204, 2}, + {0x2cd2, 2864, 1}, - {0xab7c, 1636, 1}, + {0x00d2, 132, 1}, {0x0398, 768, 1}, {0x10b6, 2966, 1}, - {0x1c97, 1483, 1}, - - {0xab97, 1717, 1}, - - {0x10c97, 3708, 1}, - {0xffffffff, -1, 0}, + {0x104c4, 3600, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x118b6, 3858, 1}, + {0x1ec4, 2147, 1}, - {0x16e5f, 3981, 1}, + {0x118b6, 3867, 1}, - {0x1e94, 2090, 1}, + {0x01c4, 417, 1}, - {0x0494, 1114, 1}, + {0x1fc4, 54, 2}, - {0x0194, 640, 1}, + {0x2cc4, 2843, 1}, - {0x1f94, 189, 2}, + {0x00c4, 89, 1}, - {0x2c94, 2771, 1}, + {0xa7c4, 3276, 1}, + {0xffffffff, -1, 0}, {0x0396, 762, 1}, - {0x10b4, 2960, 1}, - - {0x1c94, 1474, 1}, + {0x10c7, 3014, 1}, + {0xffffffff, -1, 0}, - {0xab94, 1708, 1}, + {0x104c2, 3594, 1}, - {0x10c94, 3699, 1}, + {0xff2d, 3378, 1}, {0x24be, 2522, 1}, - {0x118b4, 3852, 1}, - - {0x10416, 3477, 1}, + {0x1ec2, 2144, 1}, + {0xffffffff, -1, 0}, - {0x1e7a, 2051, 1}, + {0x10b4, 2960, 1}, - {0x047a, 1087, 1}, + {0x1fc2, 253, 2}, - {0x1e16, 1900, 1}, + {0x2cc2, 2840, 1}, - {0x0416, 916, 1}, + {0x00c2, 83, 1}, - {0x0116, 204, 1}, + {0xa7c2, 3327, 1}, - {0xa694, 3120, 1}, + {0x118b4, 3861, 1}, - {0x2c16, 2642, 1}, + {0x1ee0, 2189, 1}, - {0x017b, 348, 1}, + {0x04e0, 1228, 1}, - {0xab7a, 1630, 1}, + {0x01e0, 459, 1}, {0x24bc, 2516, 1}, - {0xffffffff, -1, 0}, - {0xa77b, 3249, 1}, + {0x2ce0, 2885, 1}, - {0x1e86, 2069, 1}, + {0x1e94, 2090, 1}, - {0xab7b, 1633, 1}, + {0x0494, 1114, 1}, - {0x0186, 616, 1}, + {0x0194, 640, 1}, - {0x1f86, 159, 2}, + {0x1f94, 189, 2}, - {0x2c86, 2750, 1}, + {0x2c94, 2771, 1}, + {0xffffffff, -1, 0}, {0x0390, 25, 3}, - {0xa786, 3264, 1}, - - {0x1c86, 980, 1}, + {0x1c94, 1474, 1}, - {0xab86, 1666, 1}, + {0xab94, 1708, 1}, - {0x10c86, 3657, 1}, + {0x10c94, 3708, 1}, + {0xffffffff, -1, 0}, {0x24ba, 2510, 1}, - - {0x01d7, 444, 1}, - - {0x1fd7, 31, 3}, - - {0x10c7, 3014, 1}, - - {0x018f, 625, 1}, - - {0x1f8f, 164, 2}, - - {0xff2a, 3360, 1}, {0xffffffff, -1, 0}, - {0xa686, 3099, 1}, - - {0x104c4, 3591, 1}, - - {0xab8f, 1693, 1}, + {0x1e86, 2069, 1}, + {0xffffffff, -1, 0}, - {0x10c8f, 3684, 1}, + {0x0186, 616, 1}, - {0x1ec4, 2147, 1}, + {0x1f86, 159, 2}, - {0x24b6, 2498, 1}, + {0x2c86, 2750, 1}, - {0x01c4, 417, 1}, + {0xa694, 3120, 1}, - {0x1fc4, 54, 2}, + {0xa786, 3264, 1}, - {0x2cc4, 2843, 1}, + {0x1c86, 980, 1}, - {0x00c4, 89, 1}, + {0xab86, 1666, 1}, - {0xa7c4, 3276, 1}, + {0x10c86, 3666, 1}, - {0x104c2, 3585, 1}, + {0x1041c, 3504, 1}, - {0x216e, 2489, 1}, + {0x24b6, 2498, 1}, {0xffffffff, -1, 0}, - {0x1ec2, 2144, 1}, + {0x1e1c, 1909, 1}, - {0x052a, 1339, 1}, + {0x041c, 934, 1}, - {0x022a, 562, 1}, + {0x011c, 213, 1}, - {0x1fc2, 253, 2}, + {0x1f1c, 2273, 1}, - {0x2cc2, 2840, 1}, + {0x2c1c, 2660, 1}, - {0x00c2, 83, 1}, + {0xa686, 3099, 1}, + {0xffffffff, -1, 0}, - {0xa7c2, 3327, 1}, + {0x1e902, 3999, 1}, + {0xffffffff, -1, 0}, {0x1e9e, 24, 2}, {0x049e, 1129, 1}, - {0xffffffff, -1, 0}, + + {0x24c7, 2549, 1}, {0x1f9e, 199, 2}, {0x2c9e, 2786, 1}, - - {0x037f, 887, 1}, + {0xffffffff, -1, 0}, {0xa79e, 3291, 1}, @@ -1001,7 +989,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab9e, 1738, 1}, - {0x10c9e, 3729, 1}, + {0x10c9e, 3738, 1}, {0x1e8e, 2081, 1}, @@ -1013,34 +1001,19 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c8e, 2762, 1}, - {0x013d, 258, 1}, + {0x10c4, 3008, 1}, - {0x1f3d, 2318, 1}, + {0x013f, 261, 1}, - {0x1041c, 3495, 1}, + {0x1f3f, 2324, 1}, {0xab8e, 1690, 1}, - {0x10c8e, 3681, 1}, + {0x10c8e, 3690, 1}, - {0x1e1c, 1909, 1}, - - {0x041c, 934, 1}, - - {0x011c, 213, 1}, - - {0x1f1c, 2273, 1}, - - {0x2c1c, 2660, 1}, - - {0x1e902, 3990, 1}, - - {0x1f5d, 2351, 1}, - {0xffffffff, -1, 0}, - - {0xa68e, 3111, 1}, + {0x01f7, 414, 1}, - {0x24c7, 2549, 1}, + {0x1ff7, 67, 3}, {0x1e8a, 2075, 1}, @@ -1052,35 +1025,23 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c8a, 2756, 1}, - {0x03d5, 816, 1}, - - {0x1e40, 1963, 1}, - {0xffffffff, -1, 0}, - - {0xab8a, 1678, 1}, + {0x037f, 887, 1}, - {0x10c8a, 3669, 1}, + {0xa68e, 3111, 1}, - {0x16e57, 3957, 1}, + {0x10c2, 3002, 1}, - {0x1e4a, 1978, 1}, + {0xab8a, 1678, 1}, - {0xa740, 3174, 1}, + {0x10c8a, 3678, 1}, - {0x014a, 276, 1}, + {0x0139, 252, 1}, - {0x1f4a, 2333, 1}, + {0x1f39, 2306, 1}, {0xffffffff, -1, 0}, - {0x004a, 24, 1}, - - {0xa74a, 3189, 1}, - - {0xa68a, 3105, 1}, - {0x1e82, 2063, 1}, - - {0x039a, 778, 1}, + {0xffffffff, -1, 0}, {0x0182, 357, 1}, @@ -1088,7 +1049,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c82, 2744, 1}, - {0xa640, 3020, 1}, + {0xa68a, 3105, 1}, {0xa782, 3258, 1}, @@ -1096,30 +1057,23 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab82, 1654, 1}, - {0x10c82, 3645, 1}, - - {0xa64a, 3035, 1}, - - {0x10c4, 3008, 1}, - - {0x0516, 1309, 1}, - - {0x0216, 535, 1}, + {0x10c82, 3654, 1}, - {0x2167, 2468, 1}, + {0x1f59, 2345, 1}, {0xffffffff, -1, 0}, + {0x0059, 71, 1}, + {0x1e80, 2060, 1}, {0x0480, 1096, 1}, - - {0xa682, 3093, 1}, + {0xffffffff, -1, 0}, {0x1f80, 129, 2}, {0x2c80, 2741, 1}, - {0x10c2, 3002, 1}, + {0xa682, 3093, 1}, {0xa780, 3255, 1}, @@ -1127,25 +1081,71 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xab80, 1648, 1}, - {0x10c80, 3639, 1}, + {0x10c80, 3648, 1}, - {0x212a, 27, 1}, + {0x1e4c, 1981, 1}, {0xffffffff, -1, 0}, - {0x1efa, 2228, 1}, + {0x014c, 279, 1}, - {0x04fa, 1267, 1}, + {0x1f4c, 2339, 1}, - {0x01fa, 493, 1}, + {0x039a, 778, 1}, - {0x1ffa, 2417, 1}, + {0x004c, 31, 1}, - {0x1041a, 3489, 1}, + {0xa74c, 3192, 1}, - {0x0392, 748, 1}, + {0x104c3, 3597, 1}, {0xa680, 3090, 1}, + {0x0537, 1366, 1}, + {0xffffffff, -1, 0}, + + {0x04c3, 1183, 1}, + + {0x24c4, 2540, 1}, + + {0x1fc3, 71, 2}, + + {0x1e4a, 1978, 1}, + + {0x00c3, 86, 1}, + + {0x014a, 276, 1}, + + {0x1f4a, 2333, 1}, + + {0xa64c, 3039, 1}, + + {0x004a, 24, 1}, + + {0xa74a, 3189, 1}, + + {0x1e2e, 1936, 1}, + + {0x042e, 993, 1}, + + {0x012e, 240, 1}, + + {0x1f2e, 2297, 1}, + + {0x2c2e, 2714, 1}, + + {0x24c2, 2534, 1}, + + {0xa72e, 3150, 1}, + + {0xff37, 3408, 1}, + + {0x0392, 748, 1}, + + {0x1041a, 3498, 1}, + {0xffffffff, -1, 0}, + + {0xa64a, 3035, 1}, + {0x1e1a, 1906, 1}, {0x041a, 928, 1}, @@ -1156,10 +1156,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c1a, 2654, 1}, - {0x10418, 3483, 1}, - {0xffffffff, -1, 0}, + {0x10418, 3492, 1}, - {0x1f5b, 2348, 1}, + {0x1e7c, 2054, 1}, + + {0x047c, 1090, 1}, {0x1e18, 1903, 1}, @@ -1171,11 +1172,10 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c18, 2648, 1}, - {0x1040e, 3453, 1}, - - {0x2c63, 1861, 1}, + {0x1040e, 3462, 1}, - {0x0397, 765, 1}, + {0xab7c, 1636, 1}, + {0xffffffff, -1, 0}, {0x1e0e, 1888, 1}, @@ -1186,26 +1186,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x1f0e, 2255, 1}, {0x2c0e, 2618, 1}, - - {0x1e2e, 1936, 1}, - - {0x042e, 993, 1}, - - {0x012e, 240, 1}, - - {0x1f2e, 2297, 1}, - - {0x2c2e, 2714, 1}, {0xffffffff, -1, 0}, - {0xa72e, 3150, 1}, - - {0x0394, 755, 1}, + {0x1040c, 3456, 1}, - {0x1040c, 3447, 1}, - {0xffffffff, -1, 0}, + {0x051c, 1318, 1}, - {0x24c4, 2540, 1}, + {0x021c, 544, 1}, {0x1e0c, 1885, 1}, @@ -1217,27 +1204,21 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x2c0c, 2612, 1}, - {0x01a9, 685, 1}, - - {0x1fa9, 214, 2}, - - {0x053d, 1384, 1}, - - {0x023d, 378, 1}, + {0x1e2c, 1933, 1}, - {0x24c2, 2534, 1}, + {0x042c, 987, 1}, - {0x1ca9, 1537, 1}, + {0x012c, 237, 1}, - {0xaba9, 1771, 1}, + {0x1f2c, 2291, 1}, - {0x10ca9, 3762, 1}, + {0x2c2c, 2708, 1}, - {0x10408, 3435, 1}, + {0x10408, 3444, 1}, - {0x051c, 1318, 1}, + {0xa72c, 3147, 1}, - {0x021c, 544, 1}, + {0x03c2, 806, 1}, {0x1e08, 1879, 1}, @@ -1248,36 +1229,45 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x1f08, 2237, 1}, {0x2c08, 2600, 1}, + {0xffffffff, -1, 0}, - {0x0386, 733, 1}, + {0x10416, 3486, 1}, - {0x10414, 3471, 1}, - {0xffffffff, -1, 0}, + {0x03e0, 860, 1}, - {0x13fa, 1846, 1}, + {0x053f, 1390, 1}, - {0x1e14, 1897, 1}, + {0x1e16, 1900, 1}, - {0x0414, 909, 1}, + {0x0416, 916, 1}, - {0x0114, 201, 1}, + {0x0116, 204, 1}, + + {0x0394, 755, 1}, + + {0x2c16, 2642, 1}, + + {0x10414, 3480, 1}, {0xffffffff, -1, 0}, - {0x2c14, 2636, 1}, + {0x10c3, 3005, 1}, - {0x0540, 1393, 1}, + {0x1e14, 1897, 1}, - {0x1f6d, 2372, 1}, + {0x0414, 909, 1}, - {0x2c6d, 607, 1}, + {0x0114, 201, 1}, - {0x038f, 842, 1}, + {0x16e5f, 3990, 1}, - {0x10406, 3429, 1}, + {0x2c14, 2636, 1}, + {0xffffffff, -1, 0}, - {0x054a, 1423, 1}, + {0x10406, 3438, 1}, - {0x024a, 595, 1}, + {0x0539, 1372, 1}, + + {0x0386, 733, 1}, {0x1e06, 1876, 1}, @@ -1285,7 +1275,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0106, 180, 1}, - {0x10404, 3423, 1}, + {0x10404, 3432, 1}, {0x2c06, 2594, 1}, {0xffffffff, -1, 0}, @@ -1295,21 +1285,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0404, 1011, 1}, {0x0104, 177, 1}, - {0xffffffff, -1, 0}, - - {0x2c04, 2588, 1}, - - {0x1ee0, 2189, 1}, - - {0x04e0, 1228, 1}, - - {0x01e0, 459, 1}, - - {0x10400, 3411, 1}, - {0x2ce0, 2885, 1}, + {0x10400, 3420, 1}, - {0x03c2, 806, 1}, + {0x2c04, 2588, 1}, + {0xffffffff, -1, 0}, {0x1e00, 1867, 1}, @@ -1317,20 +1297,11 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0100, 171, 1}, - {0x0179, 345, 1}, + {0x10426, 3534, 1}, {0x2c00, 2576, 1}, - {0x16e5d, 3975, 1}, - - {0x039e, 792, 1}, - - {0xa779, 3246, 1}, - - {0x10426, 3525, 1}, - - {0xab79, 1627, 1}, - {0xffffffff, -1, 0}, + {0xff39, 3414, 1}, {0x1e26, 1924, 1}, @@ -1338,464 +1309,508 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0126, 228, 1}, - {0xfb16, 125, 2}, + {0x039e, 792, 1}, {0x2c26, 2690, 1}, - {0x038e, 839, 1}, + {0x0535, 1360, 1}, {0xa726, 3138, 1}, - {0x16e40, 3888, 1}, + {0x1e91d, 4080, 1}, - {0x1ef4, 2219, 1}, + {0x054c, 1429, 1}, - {0x04f4, 1258, 1}, + {0x024c, 598, 1}, - {0x01f4, 487, 1}, + {0x1efe, 2234, 1}, - {0x1ff4, 101, 2}, + {0x04fe, 1273, 1}, - {0x16e4a, 3918, 1}, + {0x01fe, 499, 1}, - {0x051a, 1315, 1}, + {0x038e, 839, 1}, - {0x021a, 541, 1}, + {0x1e28, 1927, 1}, - {0x10a9, 2927, 1}, + {0x0428, 974, 1}, - {0x1f4d, 2342, 1}, + {0x0128, 231, 1}, - {0xff2e, 3372, 1}, + {0x1f28, 2279, 1}, - {0x004d, 34, 1}, + {0x2c28, 2696, 1}, {0xffffffff, -1, 0}, - {0x118a9, 3819, 1}, + {0xa728, 3141, 1}, - {0x0518, 1312, 1}, + {0x03f7, 890, 1}, - {0x0218, 538, 1}, + {0x054a, 1423, 1}, - {0x1e8c, 2078, 1}, + {0x024a, 595, 1}, - {0x048c, 1102, 1}, + {0xff35, 3402, 1}, {0x038a, 742, 1}, + {0xffffffff, -1, 0}, - {0x1f8c, 149, 2}, - - {0x2c8c, 2759, 1}, + {0x24c3, 2537, 1}, {0xffffffff, -1, 0}, - {0x050e, 1297, 1}, + {0x052e, 1345, 1}, - {0x020e, 523, 1}, + {0x022e, 568, 1}, - {0xab8c, 1684, 1}, + {0x10424, 3528, 1}, + {0xffffffff, -1, 0}, - {0x10c8c, 3675, 1}, + {0x017d, 351, 1}, - {0x104bf, 3576, 1}, + {0x1e24, 1921, 1}, - {0x052e, 1345, 1}, + {0x0424, 962, 1}, - {0x022e, 568, 1}, + {0x0124, 225, 1}, - {0x1e2c, 1933, 1}, + {0xa77d, 1858, 1}, - {0x042c, 987, 1}, + {0x2c24, 2684, 1}, - {0x012c, 237, 1}, + {0xab7d, 1639, 1}, - {0x1f2c, 2291, 1}, + {0xa724, 3135, 1}, - {0x2c2c, 2708, 1}, + {0x051a, 1315, 1}, - {0xa68c, 3108, 1}, + {0x021a, 541, 1}, - {0xa72c, 3147, 1}, + {0x1ee2, 2192, 1}, - {0x1cbf, 1597, 1}, + {0x04e2, 1231, 1}, - {0xabbf, 1837, 1}, + {0x01e2, 462, 1}, - {0x050c, 1294, 1}, + {0x1fe2, 36, 3}, - {0x020c, 520, 1}, + {0x2ce2, 2888, 1}, - {0x1e28, 1927, 1}, + {0xff2e, 3381, 1}, - {0x0428, 974, 1}, + {0x0518, 1312, 1}, - {0x0128, 231, 1}, + {0x0218, 538, 1}, - {0x1f28, 2279, 1}, + {0x01db, 450, 1}, - {0x2c28, 2696, 1}, + {0x1fdb, 2402, 1}, - {0x1fab, 224, 2}, + {0x2126, 826, 1}, - {0xa728, 3141, 1}, + {0x00db, 156, 1}, {0xffffffff, -1, 0}, - {0xa7ab, 631, 1}, + {0x10422, 3522, 1}, - {0x1cab, 1543, 1}, + {0x050e, 1297, 1}, - {0xabab, 1777, 1}, + {0x020e, 523, 1}, + + {0x1e22, 1918, 1}, + + {0x0422, 954, 1}, + + {0x0122, 222, 1}, + + {0x0533, 1354, 1}, + + {0x2c22, 2678, 1}, - {0x10cab, 3768, 1}, + {0x104b7, 3561, 1}, - {0x16e5b, 3969, 1}, + {0xa722, 3132, 1}, - {0x10424, 3519, 1}, + {0x050c, 1294, 1}, + + {0x020c, 520, 1}, + + {0x104cf, 3633, 1}, + + {0x01b7, 706, 1}, + + {0x1fb7, 10, 3}, + + {0x052c, 1342, 1}, + + {0x022c, 565, 1}, + + {0x01cf, 432, 1}, + + {0x1cb7, 1579, 1}, + + {0xabb7, 1813, 1}, + + {0x00cf, 123, 1}, + + {0x104d1, 3639, 1}, + + {0x0055, 59, 1}, {0x0508, 1288, 1}, {0x0208, 514, 1}, - {0x1e24, 1921, 1}, + {0xff33, 3396, 1}, - {0x0424, 962, 1}, + {0x01d1, 435, 1}, - {0x0124, 225, 1}, - {0xffffffff, -1, 0}, + {0x1e8c, 2078, 1}, - {0x2c24, 2684, 1}, + {0x048c, 1102, 1}, - {0x104b7, 3552, 1}, + {0x00d1, 129, 1}, - {0xa724, 3135, 1}, + {0x1f8c, 149, 2}, - {0x0514, 1306, 1}, + {0x2c8c, 2759, 1}, - {0x0214, 532, 1}, + {0x0516, 1309, 1}, - {0x03fa, 893, 1}, + {0x0216, 535, 1}, - {0x01b7, 706, 1}, + {0xff2c, 3375, 1}, - {0x1fb7, 10, 3}, + {0xab8c, 1684, 1}, + + {0x10c8c, 3684, 1}, - {0x104b3, 3540, 1}, + {0x104b3, 3549, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1cb7, 1579, 1}, + {0x0514, 1306, 1}, - {0xabb7, 1813, 1}, + {0x0214, 532, 1}, {0x01b3, 402, 1}, {0x1fb3, 62, 2}, + {0xffffffff, -1, 0}, - {0x0506, 1285, 1}, - - {0x0206, 511, 1}, + {0xa68c, 3108, 1}, - {0xa7b3, 3330, 1}, + {0xa7b3, 3339, 1}, {0x1cb3, 1567, 1}, {0xabb3, 1801, 1}, - {0x10422, 3513, 1}, + {0x0506, 1285, 1}, - {0x0504, 1282, 1}, + {0x0206, 511, 1}, - {0x0204, 508, 1}, + {0x1ef0, 2213, 1}, - {0x1e22, 1918, 1}, + {0x04f0, 1252, 1}, - {0x0422, 954, 1}, + {0x01f0, 20, 2}, - {0x0122, 222, 1}, + {0x104b1, 3543, 1}, - {0xff26, 3348, 1}, + {0x0504, 1282, 1}, - {0x2c22, 2678, 1}, + {0x0204, 508, 1}, - {0xff35, 3393, 1}, + {0x0149, 46, 2}, - {0xa722, 3132, 1}, + {0x1f49, 2330, 1}, - {0x104b1, 3534, 1}, + {0x01b1, 697, 1}, - {0x0143, 267, 1}, + {0x0049, 4095, 1}, {0x0500, 1276, 1}, {0x0200, 502, 1}, - {0x0043, 6, 1}, - - {0x01b1, 697, 1}, - {0xffffffff, -1, 0}, - - {0x2163, 2456, 1}, - {0xffffffff, -1, 0}, - {0xa7b1, 688, 1}, {0x1cb1, 1561, 1}, {0xabb1, 1795, 1}, - {0x10cb1, 3786, 1}, + {0x10cb1, 3795, 1}, {0x0526, 1333, 1}, {0x0226, 556, 1}, + {0xffffffff, -1, 0}, - {0x0535, 1360, 1}, + {0x1e6e, 2033, 1}, - {0x10bf, 2993, 1}, + {0x046e, 1069, 1}, - {0x03a9, 826, 1}, + {0x016e, 330, 1}, - {0x01a7, 393, 1}, + {0x1f6e, 2375, 1}, - {0x1fa7, 244, 2}, + {0x2c6e, 667, 1}, - {0x1e916, 4050, 1}, + {0xfb16, 125, 2}, - {0x118bf, 3885, 1}, - {0xffffffff, -1, 0}, + {0xa76e, 3243, 1}, - {0x1ca7, 1531, 1}, + {0x1e91c, 4077, 1}, - {0xaba7, 1765, 1}, + {0x1eee, 2210, 1}, - {0x10ca7, 3756, 1}, + {0x04ee, 1249, 1}, - {0x054d, 1432, 1}, + {0x01ee, 480, 1}, - {0x1f6b, 2366, 1}, + {0x0528, 1336, 1}, - {0x2c6b, 2732, 1}, + {0x0228, 559, 1}, - {0x10ab, 2933, 1}, + {0xfb14, 109, 2}, - {0x1e48, 1975, 1}, - {0xffffffff, -1, 0}, + {0x1f8d, 154, 2}, - {0xff2c, 3366, 1}, + {0x16e5d, 3984, 1}, - {0x1f48, 2327, 1}, + {0xff26, 3357, 1}, - {0x118ab, 3825, 1}, + {0xa78d, 643, 1}, - {0x0048, 21, 1}, + {0x10b7, 2969, 1}, - {0xa748, 3186, 1}, + {0xab8d, 1687, 1}, - {0x1e88, 2072, 1}, + {0x10c8d, 3687, 1}, - {0x1f69, 2360, 1}, + {0x1fab, 224, 2}, - {0x2c69, 2729, 1}, + {0xfb06, 29, 2}, - {0x1f88, 129, 2}, + {0x118b7, 3870, 1}, - {0x2c88, 2753, 1}, - {0xffffffff, -1, 0}, + {0xa7ab, 631, 1}, - {0xff28, 3354, 1}, + {0x1cab, 1543, 1}, - {0x1c88, 3035, 1}, + {0xabab, 1777, 1}, - {0xab88, 1672, 1}, + {0x10cab, 3777, 1}, - {0x10c88, 3663, 1}, + {0xfb04, 5, 3}, - {0x1e42, 1966, 1}, + {0x017b, 348, 1}, - {0xa648, 3032, 1}, + {0xff28, 3363, 1}, - {0x052c, 1342, 1}, + {0x0524, 1330, 1}, - {0x022c, 565, 1}, + {0x0224, 553, 1}, - {0x10b7, 2969, 1}, + {0xa77b, 3249, 1}, - {0x0042, 3, 1}, + {0xfb00, 4, 2}, - {0xa742, 3177, 1}, - {0xffffffff, -1, 0}, + {0xab7b, 1633, 1}, - {0xa688, 3102, 1}, + {0x01a9, 685, 1}, - {0x118b7, 3861, 1}, + {0x1fa9, 214, 2}, + {0xffffffff, -1, 0}, - {0x216d, 2486, 1}, + {0x01a7, 393, 1}, - {0x10b3, 2957, 1}, + {0x1fa7, 244, 2}, - {0xff24, 3342, 1}, + {0x1ca9, 1537, 1}, - {0x0528, 1336, 1}, + {0xaba9, 1771, 1}, - {0x0228, 559, 1}, + {0x10ca9, 3771, 1}, - {0x03e0, 860, 1}, + {0x1ca7, 1531, 1}, - {0x118b3, 3849, 1}, + {0xaba7, 1765, 1}, - {0x1e56, 1996, 1}, + {0x10ca7, 3765, 1}, - {0xa642, 3023, 1}, + {0x10b3, 2957, 1}, + {0xffffffff, -1, 0}, - {0x0156, 294, 1}, + {0x0531, 1348, 1}, - {0x1f56, 62, 3}, + {0xff24, 3351, 1}, + {0xffffffff, -1, 0}, - {0x16e4d, 3927, 1}, + {0x118b3, 3858, 1}, - {0x0056, 62, 1}, + {0x216e, 2489, 1}, + {0xffffffff, -1, 0}, - {0xa756, 3207, 1}, + {0x16e57, 3966, 1}, - {0x24bf, 2525, 1}, + {0x0522, 1327, 1}, - {0x1f59, 2345, 1}, + {0x0222, 550, 1}, - {0x1e4e, 1984, 1}, + {0x1e88, 2072, 1}, + {0xffffffff, -1, 0}, - {0x0059, 71, 1}, + {0x03fe, 727, 1}, - {0x014e, 282, 1}, + {0x1f88, 129, 2}, - {0x0524, 1330, 1}, + {0x2c88, 2753, 1}, + {0xffffffff, -1, 0}, - {0x0224, 553, 1}, + {0x10b1, 2951, 1}, - {0x004e, 37, 1}, + {0x1c88, 3035, 1}, - {0xa74e, 3195, 1}, + {0xab88, 1672, 1}, - {0x10b1, 2951, 1}, + {0x10c88, 3672, 1}, - {0x042d, 990, 1}, + {0xff31, 3390, 1}, - {0xa656, 3054, 1}, + {0x118b1, 3852, 1}, - {0x1f2d, 2294, 1}, + {0x0555, 1456, 1}, - {0x2c2d, 2711, 1}, + {0x1e7a, 2051, 1}, - {0x118b1, 3843, 1}, + {0x047a, 1087, 1}, - {0xff22, 3336, 1}, + {0x24b7, 2501, 1}, + {0xffffffff, -1, 0}, - {0x03f4, 768, 1}, + {0xff22, 3345, 1}, - {0x042f, 996, 1}, + {0xa688, 3102, 1}, - {0x1e91c, 4068, 1}, + {0x24cf, 2573, 1}, - {0x1f2f, 2300, 1}, + {0x1e56, 1996, 1}, - {0xa64e, 3042, 1}, + {0xab7a, 1630, 1}, - {0x104ce, 3621, 1}, + {0x0156, 294, 1}, - {0x10a7, 2921, 1}, + {0x1f56, 62, 3}, + {0xffffffff, -1, 0}, - {0x2126, 826, 1}, + {0x0056, 62, 1}, - {0x1ece, 2162, 1}, + {0xa756, 3207, 1}, - {0xfb14, 109, 2}, + {0x1e54, 1993, 1}, {0xffffffff, -1, 0}, - {0x118a7, 3813, 1}, + {0x0154, 291, 1}, - {0x2cce, 2858, 1}, + {0x1f54, 57, 3}, - {0x00ce, 120, 1}, + {0x16e5b, 3978, 1}, - {0x104b9, 3558, 1}, + {0x0054, 56, 1}, - {0x038c, 836, 1}, + {0xa754, 3204, 1}, - {0x0522, 1327, 1}, + {0x1e4e, 1984, 1}, - {0x0222, 550, 1}, + {0x03e2, 863, 1}, - {0xff33, 3387, 1}, + {0x014e, 282, 1}, - {0x24b7, 2501, 1}, + {0x10ab, 2933, 1}, - {0x1fb9, 2426, 1}, + {0xa656, 3054, 1}, - {0xfb06, 29, 2}, + {0x004e, 37, 1}, - {0x0543, 1402, 1}, + {0xa74e, 3195, 1}, - {0x0243, 354, 1}, + {0x1e52, 1990, 1}, - {0x1cb9, 1585, 1}, + {0x118ab, 3834, 1}, - {0xabb9, 1819, 1}, + {0x0152, 288, 1}, - {0x1e54, 1993, 1}, + {0x1f52, 52, 3}, - {0xfb04, 5, 3}, + {0xa654, 3051, 1}, - {0x0154, 291, 1}, + {0x0052, 49, 1}, - {0x1f54, 57, 3}, + {0xa752, 3201, 1}, + {0xffffffff, -1, 0}, - {0xff31, 3381, 1}, + {0x1e91a, 4071, 1}, - {0x0054, 56, 1}, + {0x0549, 1420, 1}, + {0xffffffff, -1, 0}, - {0xa754, 3204, 1}, + {0xa64e, 3042, 1}, + + {0x10a9, 2927, 1}, {0xffffffff, -1, 0}, - {0x1e52, 1990, 1}, + {0x0053, 52, 1}, - {0x0533, 1354, 1}, + {0x10a7, 2921, 1}, - {0x0152, 288, 1}, + {0x1e918, 4065, 1}, - {0x1f52, 52, 3}, + {0x118a9, 3828, 1}, - {0xfb00, 4, 2}, + {0xa652, 3048, 1}, - {0x0052, 49, 1}, + {0x1e30, 1939, 1}, - {0xa752, 3201, 1}, + {0x118a7, 3822, 1}, - {0x03ab, 833, 1}, - {0xffffffff, -1, 0}, + {0x0130, 261, 2}, + + {0x03cf, 845, 1}, {0x1e50, 1987, 1}, - {0xa654, 3051, 1}, + {0x1e90e, 4035, 1}, {0x0150, 285, 1}, {0x1f50, 84, 2}, - - {0x0531, 1348, 1}, + {0xffffffff, -1, 0}, {0x0050, 43, 1}, {0xa750, 3198, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, - {0xa652, 3048, 1}, + {0x03d1, 768, 1}, - {0x0548, 1417, 1}, + {0x1e48, 1975, 1}, - {0x0248, 592, 1}, + {0x1e90c, 4029, 1}, - {0x10420, 3507, 1}, + {0x038c, 836, 1}, - {0x013f, 261, 1}, + {0x1f48, 2327, 1}, + {0xffffffff, -1, 0}, - {0x1f3f, 2324, 1}, + {0x0048, 21, 1}, + + {0xa748, 3186, 1}, + {0xffffffff, -1, 0}, + + {0x10420, 3516, 1}, + + {0xa650, 3045, 1}, + {0xffffffff, -1, 0}, {0x1e20, 1915, 1}, @@ -1803,34 +1818,30 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0120, 219, 1}, - {0xa650, 3045, 1}, + {0x1e908, 4017, 1}, {0x2c20, 2672, 1}, {0xffffffff, -1, 0}, - {0x16e43, 3897, 1}, - {0xffffffff, -1, 0}, - - {0x1e91a, 4062, 1}, - {0xffffffff, -1, 0}, + {0x1041e, 3510, 1}, - {0x1041e, 3501, 1}, + {0xa648, 3032, 1}, {0xffffffff, -1, 0}, - {0x0542, 1399, 1}, - {0x1e1e, 1912, 1}, {0x041e, 940, 1}, {0x011e, 216, 1}, - {0x1e918, 4056, 1}, + {0x1e916, 4059, 1}, {0x2c1e, 2666, 1}, - {0x10412, 3465, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x10412, 3474, 1}, + + {0x03f0, 778, 1}, + {0xffffffff, -1, 0}, {0x1e12, 1894, 1}, @@ -1838,47 +1849,33 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0112, 198, 1}, - {0x1e90e, 4026, 1}, + {0x1e914, 4053, 1}, {0x2c12, 2630, 1}, - {0xffffffff, -1, 0}, - {0x10410, 3459, 1}, + {0x1f4d, 2342, 1}, - {0xff2d, 3369, 1}, + {0x10410, 3468, 1}, - {0x0556, 1459, 1}, + {0x004d, 34, 1}, + {0xffffffff, -1, 0}, {0x1e10, 1891, 1}, {0x0410, 896, 1}, {0x0110, 195, 1}, - {0xffffffff, -1, 0}, - {0x2c10, 2624, 1}, - - {0xff2f, 3375, 1}, - - {0x10b9, 2975, 1}, + {0x1e906, 4011, 1}, - {0x16e48, 3912, 1}, - - {0x054e, 1435, 1}, - - {0x024e, 601, 1}, - - {0x1e90c, 4020, 1}, - - {0x118b9, 3867, 1}, - {0xffffffff, -1, 0}, + {0x2c10, 2624, 1}, - {0x104ca, 3609, 1}, + {0x104ca, 3618, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0x1eca, 2156, 1}, - {0x03a7, 820, 1}, + {0x1e904, 4005, 1}, {0x01ca, 425, 1}, @@ -1888,14 +1885,14 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x00ca, 108, 1}, - {0x104c8, 3603, 1}, - {0xffffffff, -1, 0}, + {0x104c8, 3612, 1}, - {0x16e42, 3894, 1}, + {0x1e900, 3993, 1}, - {0x1ec8, 2153, 1}, + {0x03ee, 881, 1}, - {0x1e908, 4008, 1}, + {0x1ec8, 2153, 1}, + {0xffffffff, -1, 0}, {0x01c8, 421, 1}, @@ -1905,27 +1902,25 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x00c8, 102, 1}, - {0x1e6c, 2030, 1}, - - {0x046c, 1066, 1}, + {0x018b, 366, 1}, - {0x016c, 327, 1}, + {0x1f8b, 144, 2}, + {0xffffffff, -1, 0}, - {0x1f6c, 2369, 1}, + {0x03ab, 833, 1}, - {0x1e914, 4044, 1}, + {0xa78b, 3267, 1}, - {0x104c6, 3597, 1}, + {0x104c6, 3606, 1}, - {0xa76c, 3240, 1}, + {0xab8b, 1681, 1}, - {0x0388, 736, 1}, + {0x10c8b, 3681, 1}, {0x1ec6, 2150, 1}, + {0xffffffff, -1, 0}, - {0x216b, 2480, 1}, - - {0x16e56, 3954, 1}, + {0x0556, 1459, 1}, {0x1fc6, 67, 2}, @@ -1935,52 +1930,58 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xa7c6, 1864, 1}, - {0x16e59, 3963, 1}, - - {0x1e906, 4002, 1}, + {0x1e3a, 1954, 1}, + {0xffffffff, -1, 0}, {0x0554, 1453, 1}, - {0xa66c, 3087, 1}, + {0x1f3a, 2309, 1}, - {0x16e4e, 3930, 1}, + {0x03a9, 826, 1}, + {0xffffffff, -1, 0}, - {0x2169, 2474, 1}, + {0xa73a, 3165, 1}, - {0x24ce, 2570, 1}, + {0x03a7, 820, 1}, + {0xffffffff, -1, 0}, + + {0x054e, 1435, 1}, - {0x1e904, 3996, 1}, + {0x024e, 601, 1}, + {0xffffffff, -1, 0}, - {0x104c5, 3594, 1}, + {0x104ce, 3630, 1}, + + {0x104c5, 3603, 1}, {0xffffffff, -1, 0}, + {0x1ece, 2162, 1}, + {0x0552, 1447, 1}, - {0xffffffff, -1, 0}, {0x04c5, 1186, 1}, {0x01c5, 417, 1}, - {0xffffffff, -1, 0}, - {0x24b9, 2507, 1}, + {0x2cce, 2858, 1}, + + {0x00ce, 120, 1}, {0x00c5, 92, 1}, {0xa7c5, 682, 1}, - {0x1e900, 3984, 1}, - - {0x0550, 1441, 1}, + {0x104c0, 3588, 1}, - {0x104c0, 3579, 1}, + {0x104b5, 3555, 1}, - {0x104b5, 3546, 1}, - {0xffffffff, -1, 0}, + {0x0553, 1450, 1}, {0x1ec0, 2141, 1}, {0x04c0, 1201, 1}, - {0xffffffff, -1, 0}, + + {0x0388, 736, 1}, {0x01b5, 405, 1}, @@ -1989,158 +1990,149 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x00c0, 77, 1}, {0x00b5, 785, 1}, - - {0x053f, 1390, 1}, + {0xffffffff, -1, 0}, {0x1cb5, 1573, 1}, {0xabb5, 1807, 1}, - {0x1fad, 234, 2}, - - {0x0520, 1324, 1}, - - {0x0220, 381, 1}, - - {0xa7ad, 661, 1}, - - {0x1cad, 1549, 1}, - - {0xabad, 1783, 1}, + {0x0230, 571, 1}, - {0x10cad, 3774, 1}, + {0x01af, 399, 1}, - {0x1efc, 2231, 1}, + {0x1faf, 244, 2}, - {0x04fc, 1270, 1}, + {0x0550, 1441, 1}, - {0x01fc, 496, 1}, + {0x16e59, 3972, 1}, + {0xffffffff, -1, 0}, - {0x1ffc, 96, 2}, + {0x1caf, 1555, 1}, - {0x16e54, 3948, 1}, + {0xabaf, 1789, 1}, - {0x1fa5, 234, 2}, - {0xffffffff, -1, 0}, + {0x10caf, 3789, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x051e, 1321, 1}, + {0x1fad, 234, 2}, - {0x021e, 547, 1}, + {0x0548, 1417, 1}, - {0x1ca5, 1525, 1}, + {0x0248, 592, 1}, - {0xaba5, 1759, 1}, + {0xa7ad, 661, 1}, - {0x10ca5, 3750, 1}, + {0x1cad, 1549, 1}, - {0x16e52, 3942, 1}, + {0xabad, 1783, 1}, - {0x01af, 399, 1}, + {0x10cad, 3783, 1}, - {0x1faf, 244, 2}, + {0xff30, 3387, 1}, + {0xffffffff, -1, 0}, - {0x0512, 1303, 1}, + {0x16e4c, 3933, 1}, - {0x0212, 529, 1}, + {0x1fa5, 234, 2}, {0xffffffff, -1, 0}, - {0x1caf, 1555, 1}, + {0x0520, 1324, 1}, - {0xabaf, 1789, 1}, + {0x0220, 381, 1}, + + {0x1ca5, 1525, 1}, - {0x10caf, 3780, 1}, + {0xaba5, 1759, 1}, - {0x16e50, 3936, 1}, + {0x10ca5, 3759, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0x1fa3, 224, 2}, - {0xffffffff, -1, 0}, - {0x0510, 1300, 1}, + {0x051e, 1321, 1}, - {0x0210, 526, 1}, + {0x021e, 547, 1}, + + {0x16e4a, 3927, 1}, {0x1ca3, 1519, 1}, {0xaba3, 1753, 1}, - {0x10ca3, 3744, 1}, - - {0x0429, 977, 1}, + {0x10ca3, 3753, 1}, + {0xffffffff, -1, 0}, {0x1fa1, 214, 2}, - {0x1f29, 2282, 1}, + {0x0512, 1303, 1}, - {0x2c29, 2699, 1}, - {0xffffffff, -1, 0}, + {0x0212, 529, 1}, + + {0x054d, 1432, 1}, {0x1ca1, 1513, 1}, {0xaba1, 1747, 1}, - {0x10ca1, 3738, 1}, + {0x10ca1, 3747, 1}, - {0x1e6a, 2027, 1}, + {0x1e66, 2021, 1}, - {0x046a, 1063, 1}, + {0x0466, 1057, 1}, - {0x016a, 324, 1}, + {0x0166, 318, 1}, - {0x1f6a, 2363, 1}, + {0x0510, 1300, 1}, - {0x1e46, 1972, 1}, - {0xffffffff, -1, 0}, + {0x0210, 526, 1}, - {0xa76a, 3237, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x1e97, 34, 2}, - {0x0046, 15, 1}, + {0xa766, 3231, 1}, - {0xa746, 3183, 1}, + {0x0197, 649, 1}, - {0x10c5, 3011, 1}, + {0x1f97, 204, 2}, {0xffffffff, -1, 0}, - {0x1e68, 2024, 1}, + {0x1e46, 1972, 1}, - {0x0468, 1060, 1}, + {0x10c5, 3011, 1}, - {0x0168, 321, 1}, + {0x1c97, 1483, 1}, - {0x1f68, 2357, 1}, - {0xffffffff, -1, 0}, + {0xab97, 1717, 1}, - {0xa66a, 3084, 1}, + {0x10c97, 3717, 1}, - {0xa768, 3234, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x0046, 15, 1}, - {0xa646, 3029, 1}, + {0xa746, 3183, 1}, + {0xffffffff, -1, 0}, - {0x10c0, 2996, 1}, + {0xa666, 3078, 1}, - {0x10b5, 2963, 1}, + {0x1e44, 1969, 1}, {0x24ca, 2558, 1}, - {0x13fc, 1852, 1}, + {0x10c0, 2996, 1}, - {0x1e44, 1969, 1}, + {0x10b5, 2963, 1}, {0xffffffff, -1, 0}, - {0x118b5, 3855, 1}, - - {0x10ad, 2939, 1}, - - {0xa668, 3081, 1}, - {0x0044, 9, 1}, {0xa744, 3180, 1}, + {0xffffffff, -1, 0}, + + {0x118b5, 3864, 1}, + + {0xa646, 3029, 1}, {0x24c8, 2552, 1}, - {0x118ad, 3831, 1}, + {0x10af, 2945, 1}, {0xffffffff, -1, 0}, {0x1e64, 2018, 1}, @@ -2148,71 +2140,103 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0464, 1054, 1}, {0x0164, 315, 1}, - {0xffffffff, -1, 0}, + + {0x118af, 3846, 1}, {0x2c64, 676, 1}, - {0x10a5, 2915, 1}, + {0xa644, 3026, 1}, {0xa764, 3228, 1}, - {0xffffffff, -1, 0}, - {0xa644, 3026, 1}, + {0x10ad, 2939, 1}, {0xffffffff, -1, 0}, - {0x118a5, 3807, 1}, + {0x053a, 1375, 1}, + + {0x023a, 2720, 1}, {0x24c6, 2546, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x10af, 2945, 1}, + {0x118ad, 3840, 1}, - {0x1e32, 1942, 1}, - {0xffffffff, -1, 0}, + {0x1e6c, 2030, 1}, - {0x0132, 243, 1}, + {0x046c, 1066, 1}, + + {0x016c, 327, 1}, + + {0x1f6c, 2369, 1}, + + {0x10a5, 2915, 1}, {0xa664, 3075, 1}, - {0x118af, 3837, 1}, + {0xa76c, 3240, 1}, + {0xffffffff, -1, 0}, + + {0x2166, 2465, 1}, - {0x104cb, 3612, 1}, + {0x118a5, 3816, 1}, - {0xa732, 3153, 1}, + {0x1e6a, 2027, 1}, + + {0x046a, 1063, 1}, + + {0x016a, 324, 1}, + + {0x1f6a, 2363, 1}, {0x10a3, 2909, 1}, - {0xffffffff, -1, 0}, - {0x04cb, 1195, 1}, + {0xff3a, 3417, 1}, - {0x01cb, 425, 1}, + {0xa76a, 3237, 1}, + {0xffffffff, -1, 0}, - {0x1fcb, 2396, 1}, + {0xa66c, 3087, 1}, - {0x118a3, 3801, 1}, + {0x118a3, 3810, 1}, - {0x00cb, 111, 1}, + {0x24ce, 2570, 1}, {0x24c5, 2543, 1}, {0x10a1, 2903, 1}, + + {0x1e68, 2024, 1}, + + {0x0468, 1060, 1}, + + {0x0168, 321, 1}, + + {0x1f68, 2357, 1}, + + {0x118a1, 3804, 1}, + + {0xa66a, 3084, 1}, + + {0xa768, 3234, 1}, {0xffffffff, -1, 0}, + {0x24c0, 2528, 1}, + {0x1e62, 2015, 1}, {0x0462, 1050, 1}, {0x0162, 312, 1}, - - {0x118a1, 3795, 1}, + {0xffffffff, -1, 0}, {0x2c62, 658, 1}, {0xffffffff, -1, 0}, {0xa762, 3225, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, - {0x24c0, 2528, 1}, + {0x2164, 2459, 1}, + + {0xa668, 3081, 1}, {0x1e60, 2011, 1}, @@ -2222,41 +2246,41 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xffffffff, -1, 0}, {0x2c60, 2717, 1}, - - {0xff29, 3357, 1}, + {0xffffffff, -1, 0}, {0xa760, 3222, 1}, - {0xffffffff, -1, 0}, + + {0x1e32, 1942, 1}, {0xa662, 3072, 1}, - {0x1ee2, 2192, 1}, + {0x0132, 243, 1}, + {0xffffffff, -1, 0}, - {0x04e2, 1231, 1}, + {0x216c, 2483, 1}, + {0xffffffff, -1, 0}, - {0x01e2, 462, 1}, + {0xa732, 3153, 1}, + {0xffffffff, -1, 0}, - {0x1fe2, 36, 3}, + {0x1ede, 2186, 1}, - {0x2ce2, 2888, 1}, + {0x04de, 1225, 1}, - {0x042b, 984, 1}, - {0xffffffff, -1, 0}, + {0x01de, 456, 1}, - {0x1f2b, 2288, 1}, + {0xa660, 3069, 1}, - {0x2c2b, 2705, 1}, + {0x2cde, 2882, 1}, - {0xa660, 3069, 1}, - {0xffffffff, -1, 0}, + {0x00de, 165, 1}, - {0x216c, 2483, 1}, + {0x216a, 2477, 1}, {0x1eda, 2180, 1}, {0x04da, 1219, 1}, - - {0x104bd, 3570, 1}, + {0xffffffff, -1, 0}, {0x1fda, 2399, 1}, @@ -2267,7 +2291,8 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x1ed8, 2177, 1}, {0x04d8, 1216, 1}, - {0xffffffff, -1, 0}, + + {0x0051, 46, 1}, {0x1fd8, 2429, 1}, @@ -2275,18 +2300,13 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x00d8, 147, 1}, - {0x1cbd, 1591, 1}, - - {0xabbd, 1831, 1}, - - {0x0546, 1411, 1}, - - {0x0246, 589, 1}, + {0x2168, 2471, 1}, {0x1ed6, 2174, 1}, {0x04d6, 1213, 1}, - {0xffffffff, -1, 0}, + + {0x104cd, 3627, 1}, {0x1fd6, 76, 2}, @@ -2294,316 +2314,402 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x00d6, 144, 1}, - {0x104d2, 3633, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x04cd, 1198, 1}, - {0x1ed2, 2168, 1}, + {0x01cd, 429, 1}, - {0x04d2, 1207, 1}, + {0x2162, 2453, 1}, - {0x10427, 3528, 1}, + {0x104cb, 3621, 1}, - {0x1fd2, 20, 3}, + {0x00cd, 117, 1}, - {0x2cd2, 2864, 1}, + {0x0546, 1411, 1}, - {0x00d2, 132, 1}, + {0x0246, 589, 1}, - {0x0427, 971, 1}, + {0x04cb, 1195, 1}, - {0x104d0, 3627, 1}, + {0x01cb, 425, 1}, + + {0x1fcb, 2396, 1}, {0xffffffff, -1, 0}, - {0x2c27, 2693, 1}, + {0x00cb, 111, 1}, - {0x1ed0, 2165, 1}, + {0x2160, 2447, 1}, - {0x04d0, 1204, 1}, + {0x104bf, 3585, 1}, {0x0544, 1405, 1}, {0x0244, 694, 1}, + {0xffffffff, -1, 0}, + + {0x104bd, 3579, 1}, + + {0x104d0, 3636, 1}, + + {0x2132, 2444, 1}, + + {0x03a5, 813, 1}, + + {0x1ed0, 2165, 1}, + + {0x04d0, 1204, 1}, + + {0x1cbf, 1597, 1}, + + {0xabbf, 1837, 1}, {0x2cd0, 2861, 1}, {0x00d0, 126, 1}, - {0x03a5, 813, 1}, + {0x1cbd, 1591, 1}, - {0x104cd, 3618, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xabbd, 1831, 1}, - {0xff32, 3384, 1}, + {0x104cc, 3624, 1}, - {0x04cd, 1198, 1}, + {0x03a3, 806, 1}, - {0x01cd, 429, 1}, - {0xffffffff, -1, 0}, + {0x1f4b, 2336, 1}, - {0x104c3, 3588, 1}, + {0x1ecc, 2159, 1}, - {0x00cd, 117, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x004b, 27, 1}, - {0x04c3, 1183, 1}, + {0x16e55, 3960, 1}, - {0x104bb, 3564, 1}, + {0x1fcc, 71, 2}, - {0x1fc3, 71, 2}, + {0x2ccc, 2855, 1}, - {0x16e46, 3906, 1}, + {0x00cc, 114, 1}, - {0x00c3, 86, 1}, + {0x03a1, 802, 1}, - {0x03a3, 806, 1}, + {0x104c9, 3615, 1}, + + {0x104bb, 3573, 1}, + + {0x1e920, 4089, 1}, + + {0x004f, 40, 1}, + + {0x04c9, 1192, 1}, {0xffffffff, -1, 0}, - {0x1fbb, 2384, 1}, + {0x1fc9, 2390, 1}, - {0x104cc, 3615, 1}, + {0x1fbb, 2384, 1}, - {0x0532, 1351, 1}, + {0x00c9, 105, 1}, - {0x0232, 574, 1}, + {0xa7c9, 3333, 1}, + {0xffffffff, -1, 0}, - {0x1ecc, 2159, 1}, + {0x1e91e, 4083, 1}, {0xabbb, 1825, 1}, + {0xffffffff, -1, 0}, - {0x03a1, 802, 1}, + {0x0397, 765, 1}, - {0x1fcc, 71, 2}, + {0x019f, 673, 1}, - {0x2ccc, 2855, 1}, + {0x1f9f, 204, 2}, - {0x00cc, 114, 1}, + {0x1f6d, 2372, 1}, - {0x019d, 670, 1}, + {0x2c6d, 607, 1}, - {0x1f9d, 194, 2}, + {0x1e912, 4047, 1}, - {0x1e78, 2048, 1}, + {0x1c9f, 1507, 1}, - {0x0478, 1084, 1}, + {0xab9f, 1741, 1}, - {0x0178, 168, 1}, + {0x10c9f, 3741, 1}, - {0x1c9d, 1501, 1}, + {0x0145, 270, 1}, - {0xab9d, 1735, 1}, + {0x019d, 670, 1}, - {0x10c9d, 3726, 1}, + {0x1f9d, 194, 2}, + + {0x0045, 12, 1}, {0xffffffff, -1, 0}, - {0x16e44, 3900, 1}, + {0x1e910, 4041, 1}, - {0xab78, 1624, 1}, + {0x1c9d, 1501, 1}, + + {0xab9d, 1735, 1}, + + {0x10c9d, 3735, 1}, {0x1e9b, 2011, 1}, - {0x10bd, 2987, 1}, + {0x16e49, 3924, 1}, {0xffffffff, -1, 0}, {0x1f9b, 184, 2}, - - {0xff2b, 3363, 1}, {0xffffffff, -1, 0}, - {0x118bd, 3879, 1}, + {0x10cd, 3017, 1}, + {0xffffffff, -1, 0}, {0x1c9b, 1495, 1}, {0xab9b, 1729, 1}, - {0x10c9b, 3720, 1}, + {0x10c9b, 3729, 1}, - {0x216a, 2477, 1}, + {0x0193, 634, 1}, - {0x24cb, 2561, 1}, + {0x1f93, 184, 2}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e99, 42, 2}, + {0x1e40, 1963, 1}, - {0x1e920, 4080, 1}, - {0xffffffff, -1, 0}, + {0x1c93, 1471, 1}, - {0x1f99, 174, 2}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xab93, 1705, 1}, - {0x1c99, 1489, 1}, + {0x10c93, 3705, 1}, - {0xab99, 1723, 1}, + {0x0532, 1351, 1}, + + {0x0232, 574, 1}, + + {0xa740, 3174, 1}, + + {0x1e99, 42, 2}, - {0x10c99, 3714, 1}, + {0x10bf, 2993, 1}, {0xffffffff, -1, 0}, - {0x2168, 2471, 1}, + {0x1f99, 174, 2}, + {0xffffffff, -1, 0}, - {0x0193, 634, 1}, + {0x10bd, 2987, 1}, - {0x1f93, 184, 2}, + {0x118bf, 3894, 1}, - {0x1e91e, 4074, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x1c99, 1489, 1}, - {0x1c93, 1471, 1}, + {0xab99, 1723, 1}, - {0xab93, 1705, 1}, + {0x10c99, 3723, 1}, - {0x10c93, 3696, 1}, + {0x118bd, 3888, 1}, + + {0xa640, 3020, 1}, {0x0191, 369, 1}, {0x1f91, 174, 2}, - {0x1e912, 4038, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x1c91, 1465, 1}, + {0x216d, 2486, 1}, - {0xab91, 1699, 1}, + {0x0551, 1444, 1}, - {0x10c91, 3690, 1}, - {0xffffffff, -1, 0}, + {0xff32, 3393, 1}, - {0x10cd, 3017, 1}, + {0x1c91, 1465, 1}, - {0xff27, 3351, 1}, + {0xab91, 1699, 1}, - {0x1e910, 4032, 1}, + {0x10c91, 3699, 1}, {0x0187, 363, 1}, {0x1f87, 164, 2}, - - {0x0055, 59, 1}, - - {0x2164, 2459, 1}, - - {0x10c3, 3005, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0x1c87, 1050, 1}, {0xab87, 1669, 1}, - {0x10c87, 3660, 1}, - - {0x0145, 270, 1}, + {0x10c87, 3669, 1}, {0x10bb, 2981, 1}, {0xffffffff, -1, 0}, - {0x0045, 12, 1}, - - {0x24bd, 2519, 1}, - {0xffffffff, -1, 0}, - - {0x118bb, 3873, 1}, - {0x1e72, 2039, 1}, {0x0472, 1075, 1}, {0x0172, 336, 1}, - {0x2132, 2444, 1}, + {0x118bb, 3882, 1}, {0x2c72, 2735, 1}, + {0x1e76, 2045, 1}, + + {0x0476, 1081, 1}, + + {0x0176, 342, 1}, + + {0xab72, 1606, 1}, + + {0x24cd, 2567, 1}, + {0x1e70, 2036, 1}, {0x0470, 1072, 1}, {0x0170, 333, 1}, - {0xab72, 1606, 1}, + {0xab76, 1618, 1}, {0x2c70, 610, 1}, - {0x1e66, 2021, 1}, + {0x1e5a, 2002, 1}, - {0x0466, 1057, 1}, + {0x24cb, 2561, 1}, - {0x0166, 318, 1}, + {0x015a, 300, 1}, {0xab70, 1600, 1}, - {0xffffffff, -1, 0}, - {0x1e3a, 1954, 1}, + {0x1e42, 1966, 1}, - {0xa766, 3231, 1}, + {0x005a, 74, 1}, - {0x1e5a, 2002, 1}, + {0xa75a, 3213, 1}, - {0x1f3a, 2309, 1}, + {0x1f6b, 2366, 1}, - {0x015a, 300, 1}, + {0x2c6b, 2732, 1}, - {0x2162, 2453, 1}, + {0x0042, 3, 1}, - {0xa73a, 3165, 1}, + {0xa742, 3177, 1}, - {0x005a, 74, 1}, + {0x24bf, 2525, 1}, - {0xa75a, 3213, 1}, + {0x10421, 3519, 1}, - {0x03e2, 863, 1}, + {0x03de, 857, 1}, + + {0x054b, 1426, 1}, + + {0x24bd, 2519, 1}, + + {0x0421, 950, 1}, + + {0x104c1, 3591, 1}, + + {0xa65a, 3060, 1}, + + {0x2c21, 2675, 1}, + + {0x03da, 851, 1}, + + {0x04c1, 1180, 1}, + + {0xa642, 3023, 1}, + + {0x054f, 1438, 1}, {0xffffffff, -1, 0}, - {0x104cf, 3624, 1}, + {0x00c1, 80, 1}, + + {0x03d8, 848, 1}, + + {0x24cc, 2564, 1}, + + {0x16e56, 3963, 1}, + + {0x1efa, 2228, 1}, + + {0x04fa, 1267, 1}, + + {0x01fa, 493, 1}, + + {0x1ffa, 2417, 1}, + + {0x03d6, 798, 1}, {0x1e58, 1999, 1}, - {0xa666, 3078, 1}, + {0x16e54, 3957, 1}, {0x0158, 297, 1}, - {0x2160, 2447, 1}, + {0x24c9, 2555, 1}, - {0x01cf, 432, 1}, + {0x24bb, 2513, 1}, {0x0058, 68, 1}, {0xa758, 3210, 1}, - {0x00cf, 123, 1}, - - {0xa65a, 3060, 1}, + {0x042f, 996, 1}, - {0x03da, 851, 1}, + {0x16e4e, 3939, 1}, - {0x24cd, 2567, 1}, + {0x1f2f, 2300, 1}, - {0x1eec, 2207, 1}, + {0x018f, 625, 1}, - {0x04ec, 1246, 1}, + {0x1f8f, 164, 2}, - {0x01ec, 477, 1}, + {0x0545, 1408, 1}, - {0x1fec, 2441, 1}, + {0x0245, 703, 1}, + {0xffffffff, -1, 0}, - {0x03d8, 848, 1}, + {0x16e52, 3951, 1}, - {0x212b, 92, 1}, + {0xab8f, 1693, 1}, - {0x24c3, 2537, 1}, + {0x10c8f, 3693, 1}, {0xa658, 3057, 1}, - {0x1edc, 2183, 1}, + {0x1ef4, 2219, 1}, - {0x04dc, 1222, 1}, - {0xffffffff, -1, 0}, + {0x04f4, 1258, 1}, - {0x24bb, 2513, 1}, + {0x01f4, 487, 1}, - {0x2cdc, 2879, 1}, + {0x1ff4, 101, 2}, - {0x00dc, 159, 1}, + {0x216b, 2480, 1}, - {0x03d6, 798, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x16e53, 3954, 1}, - {0x24cc, 2564, 1}, + {0x13fa, 1846, 1}, + + {0x03d0, 748, 1}, + + {0x1ef2, 2216, 1}, + + {0x04f2, 1255, 1}, + + {0x01f2, 483, 1}, + + {0x1ff2, 257, 2}, + + {0x2cf2, 2897, 1}, + + {0x1eec, 2207, 1}, + + {0x04ec, 1246, 1}, + + {0x01ec, 477, 1}, + + {0x1fec, 2441, 1}, + {0xffffffff, -1, 0}, + + {0x0540, 1393, 1}, + + {0x16e50, 3945, 1}, {0x1eea, 2204, 1}, @@ -2621,6 +2727,8 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x1fe8, 2435, 1}, + {0x16e48, 3921, 1}, + {0x1ee6, 2198, 1}, {0x04e6, 1237, 1}, @@ -2628,9 +2736,6 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x01e6, 468, 1}, {0x1fe6, 88, 2}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x03d0, 748, 1}, {0x1ee4, 2195, 1}, @@ -2640,104 +2745,61 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x1fe4, 80, 2}, - {0x104c9, 3606, 1}, - - {0x104c1, 3582, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x04c9, 1192, 1}, - - {0x04c1, 1180, 1}, - - {0x1fc9, 2390, 1}, + {0x0143, 267, 1}, - {0x1f95, 194, 2}, + {0x039f, 795, 1}, - {0x00c9, 105, 1}, + {0x0141, 264, 1}, - {0x00c1, 80, 1}, + {0x0043, 6, 1}, {0xffffffff, -1, 0}, - {0x1c95, 1477, 1}, - - {0xab95, 1711, 1}, - - {0x10c95, 3702, 1}, - - {0x018b, 366, 1}, - - {0x1f8b, 144, 2}, - - {0x0555, 1456, 1}, + {0x0041, 0, 1}, {0x0587, 105, 2}, - {0xa78b, 3267, 1}, - {0xffffffff, -1, 0}, - - {0xab8b, 1681, 1}, - - {0x10c8b, 3672, 1}, - - {0x1e76, 2045, 1}, - - {0x0476, 1081, 1}, - - {0x0176, 342, 1}, + {0x10c1, 2999, 1}, - {0x0545, 1408, 1}, + {0x1f95, 194, 2}, - {0x0245, 703, 1}, - {0xffffffff, -1, 0}, + {0x0345, 773, 1}, {0x039d, 789, 1}, {0x1f83, 144, 2}, - {0xab76, 1618, 1}, - {0xffffffff, -1, 0}, + {0x1c95, 1477, 1}, + + {0xab95, 1711, 1}, - {0xff3a, 3408, 1}, + {0x10c95, 3711, 1}, {0x1c83, 950, 1}, {0xab83, 1657, 1}, - {0x10c83, 3648, 1}, - - {0x0141, 264, 1}, + {0x10c83, 3657, 1}, {0x0181, 613, 1}, {0x1f81, 134, 2}, - {0x0041, 0, 1}, - {0xffffffff, -1, 0}, - {0x039b, 782, 1}, - {0x1c81, 909, 1}, - - {0xab81, 1651, 1}, - - {0x10c81, 3642, 1}, - - {0x0149, 46, 2}, - - {0x1f49, 2330, 1}, + {0x16e4d, 3936, 1}, {0xffffffff, -1, 0}, - {0x0049, 4086, 1}, + {0x1c81, 909, 1}, - {0x053a, 1375, 1}, + {0xab81, 1651, 1}, - {0x023a, 2720, 1}, + {0x10c81, 3651, 1}, {0x1e7e, 2057, 1}, {0x047e, 1093, 1}, - {0x0399, 773, 1}, + {0x0393, 752, 1}, {0x1e5e, 2008, 1}, @@ -2746,102 +2808,100 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x015e, 306, 1}, {0xa77e, 3252, 1}, - - {0x16e55, 3951, 1}, + {0xffffffff, -1, 0}, {0xab7e, 1642, 1}, {0xa75e, 3219, 1}, - {0x104d1, 3630, 1}, - {0x1e5c, 2005, 1}, - {0x0393, 752, 1}, + {0x0542, 1399, 1}, {0x015c, 303, 1}, - {0xffffffff, -1, 0}, - - {0x01d1, 435, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x16e45, 3903, 1}, + {0x0399, 773, 1}, {0xa75c, 3216, 1}, - {0x00d1, 129, 1}, - - {0x10421, 3510, 1}, + {0x013d, 258, 1}, - {0x0391, 745, 1}, + {0x1f3d, 2318, 1}, - {0xa65e, 3066, 1}, + {0x0147, 273, 1}, {0x1e3e, 1960, 1}, - {0x0421, 950, 1}, - {0xffffffff, -1, 0}, + {0xa65e, 3066, 1}, + + {0x0047, 18, 1}, {0x1f3e, 2321, 1}, - {0x2c21, 2675, 1}, - {0xffffffff, -1, 0}, + {0x1e38, 1951, 1}, + + {0x0391, 745, 1}, {0xa73e, 3171, 1}, + {0x1f38, 2303, 1}, + {0xa65c, 3063, 1}, + {0xffffffff, -1, 0}, - {0x10c1, 2999, 1}, + {0xa738, 3162, 1}, {0x1e36, 1948, 1}, - - {0x1e38, 1951, 1}, + {0xffffffff, -1, 0}, {0x0136, 249, 1}, - - {0x24cf, 2573, 1}, - - {0x1f38, 2303, 1}, {0xffffffff, -1, 0}, - {0xa736, 3159, 1}, + {0x2183, 2495, 1}, - {0xa738, 3162, 1}, - {0xffffffff, -1, 0}, + {0x24c1, 2531, 1}, - {0x0345, 773, 1}, + {0xa736, 3159, 1}, {0x1e34, 1945, 1}, - {0x16e5a, 3966, 1}, + {0x10417, 3489, 1}, {0x0134, 246, 1}, - {0x1041d, 3498, 1}, - - {0x013b, 255, 1}, + {0xff21, 3342, 1}, + {0xffffffff, -1, 0}, - {0x1f3b, 2312, 1}, + {0x0417, 919, 1}, {0xa734, 3156, 1}, - {0x041d, 937, 1}, - {0x0372, 718, 1}, - {0x1f1d, 2276, 1}, + {0x2c17, 2645, 1}, - {0x2c1d, 2663, 1}, + {0x013b, 255, 1}, - {0x16e58, 3960, 1}, + {0x1f3b, 2312, 1}, + + {0x042b, 984, 1}, + + {0x0376, 721, 1}, + + {0x1f2b, 2288, 1}, + + {0x2c2b, 2705, 1}, - {0x1041b, 3492, 1}, + {0x1041b, 3501, 1}, + {0xffffffff, -1, 0}, {0x0370, 715, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0x041b, 931, 1}, - {0x10419, 3486, 1}, + {0x10419, 3495, 1}, {0x1f1b, 2270, 1}, @@ -2850,69 +2910,42 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x0419, 925, 1}, - {0x10417, 3480, 1}, + {0x10415, 3483, 1}, {0x1f19, 2264, 1}, {0x2c19, 2651, 1}, - {0xffffffff, -1, 0}, - - {0x0417, 919, 1}, - - {0x10415, 3474, 1}, - {0x10413, 3468, 1}, - - {0x2c17, 2645, 1}, - {0xffffffff, -1, 0}, + {0xff2f, 3384, 1}, {0x0415, 913, 1}, - {0x0413, 906, 1}, - - {0x2166, 2465, 1}, + {0x10413, 3477, 1}, + {0xffffffff, -1, 0}, {0x2c15, 2639, 1}, + {0xffffffff, -1, 0}, - {0x2c13, 2633, 1}, - - {0x0053, 52, 1}, - - {0x03cf, 845, 1}, - - {0x1efe, 2234, 1}, - - {0x04fe, 1273, 1}, - - {0x01fe, 499, 1}, - - {0x1040f, 3456, 1}, + {0x0413, 906, 1}, {0xffffffff, -1, 0}, - {0x24c9, 2555, 1}, + {0x1040f, 3465, 1}, - {0x24c1, 2531, 1}, + {0x2c13, 2633, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0x040f, 1044, 1}, - {0x03ec, 878, 1}, + {0x1040d, 3459, 1}, {0x1f0f, 2258, 1}, {0x2c0f, 2621, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x1040d, 3450, 1}, - - {0x0541, 1396, 1}, - - {0x0241, 586, 1}, - - {0x03dc, 854, 1}, + {0xffffffff, -1, 0}, {0x040d, 1038, 1}, - {0x1040b, 3444, 1}, + {0x1040b, 3453, 1}, {0x1f0d, 2252, 1}, @@ -2921,352 +2954,312 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0x040b, 1032, 1}, - {0x0549, 1420, 1}, + {0x03fa, 893, 1}, {0x1f0b, 2246, 1}, {0x2c0b, 2609, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x10409, 3438, 1}, - - {0x03ea, 875, 1}, - - {0x0139, 252, 1}, - - {0x1f39, 2306, 1}, + {0x0543, 1402, 1}, - {0x0409, 1026, 1}, + {0x0243, 354, 1}, - {0x03e8, 872, 1}, + {0x0541, 1396, 1}, - {0x1f09, 2240, 1}, + {0x0241, 586, 1}, - {0x2c09, 2603, 1}, + {0x10409, 3447, 1}, - {0x10405, 3426, 1}, + {0x10405, 3435, 1}, - {0x03e6, 869, 1}, + {0x212b, 92, 1}, - {0xff21, 3333, 1}, + {0x038f, 842, 1}, - {0x1f4b, 2336, 1}, + {0x0409, 1026, 1}, {0x0405, 1014, 1}, - {0x004b, 27, 1}, + {0x1f09, 2240, 1}, - {0x10403, 3420, 1}, + {0x2c09, 2603, 1}, {0x2c05, 2591, 1}, - {0x03e4, 866, 1}, + {0x10403, 3429, 1}, + {0xffffffff, -1, 0}, - {0x104d3, 3636, 1}, + {0x10401, 3423, 1}, + {0xffffffff, -1, 0}, {0x0403, 1008, 1}, - {0xff36, 3396, 1}, + {0x03f4, 768, 1}, - {0xff38, 3402, 1}, + {0x0401, 1002, 1}, {0x2c03, 2585, 1}, + {0xffffffff, -1, 0}, - {0x01d3, 438, 1}, - - {0x1fd3, 25, 3}, - - {0x0395, 758, 1}, - - {0x00d3, 135, 1}, - - {0x0051, 46, 1}, - - {0x053e, 1387, 1}, - - {0x023e, 2723, 1}, - - {0xff34, 3390, 1}, - - {0x16e41, 3891, 1}, - - {0x10401, 3414, 1}, - - {0x1ef0, 2213, 1}, - - {0x04f0, 1252, 1}, + {0x2c01, 2579, 1}, - {0x01f0, 20, 2}, + {0x1ef6, 2222, 1}, - {0x0401, 1002, 1}, + {0x04f6, 1261, 1}, - {0x0536, 1363, 1}, + {0x01f6, 372, 1}, - {0x0538, 1369, 1}, + {0x1ff6, 92, 2}, - {0x2c01, 2579, 1}, + {0x1e78, 2048, 1}, - {0x16e49, 3915, 1}, + {0x0478, 1084, 1}, - {0x004f, 40, 1}, + {0x0178, 168, 1}, {0xffffffff, -1, 0}, - {0x0376, 721, 1}, + {0x03ec, 878, 1}, - {0x1eee, 2210, 1}, + {0x1e74, 2042, 1}, - {0x04ee, 1249, 1}, + {0x0474, 1078, 1}, - {0x01ee, 480, 1}, + {0x0174, 339, 1}, - {0x0534, 1357, 1}, + {0xab78, 1624, 1}, {0xffffffff, -1, 0}, - {0x053b, 1378, 1}, - - {0x023b, 577, 1}, - - {0x16e5e, 3978, 1}, - - {0x1ef8, 2225, 1}, - - {0x04f8, 1264, 1}, + {0x16e46, 3915, 1}, - {0x01f8, 490, 1}, + {0x03ea, 875, 1}, + {0xffffffff, -1, 0}, - {0x1ff8, 2405, 1}, + {0xab74, 1612, 1}, + {0xffffffff, -1, 0}, - {0x1ed4, 2171, 1}, + {0x03e8, 872, 1}, - {0x04d4, 1210, 1}, - {0xffffffff, -1, 0}, + {0x053d, 1384, 1}, - {0x16e5c, 3972, 1}, + {0x023d, 378, 1}, - {0x2cd4, 2867, 1}, + {0x0547, 1414, 1}, - {0x00d4, 138, 1}, + {0x16e44, 3909, 1}, - {0x2183, 2495, 1}, + {0x03e6, 869, 1}, - {0x1ef6, 2222, 1}, + {0x053e, 1387, 1}, - {0x04f6, 1261, 1}, + {0x023e, 2723, 1}, - {0x01f6, 372, 1}, + {0x1e3c, 1957, 1}, - {0x1ff6, 92, 2}, + {0x03e4, 866, 1}, - {0x1f8d, 154, 2}, + {0x0538, 1369, 1}, - {0x0147, 273, 1}, + {0x1f3c, 2315, 1}, {0xffffffff, -1, 0}, - {0xa78d, 643, 1}, - - {0x0047, 18, 1}, + {0x0429, 977, 1}, - {0xab8d, 1687, 1}, + {0xa73c, 3168, 1}, - {0x10c8d, 3678, 1}, + {0x1f29, 2282, 1}, - {0x1e74, 2042, 1}, + {0x2c29, 2699, 1}, - {0x0474, 1078, 1}, + {0x0536, 1363, 1}, - {0x0174, 339, 1}, + {0x0395, 758, 1}, - {0x1e3c, 1957, 1}, + {0x10427, 3537, 1}, + {0xffffffff, -1, 0}, - {0x0553, 1450, 1}, + {0x10425, 3531, 1}, + {0xffffffff, -1, 0}, - {0x1e30, 1939, 1}, + {0x0427, 971, 1}, - {0x1f3c, 2315, 1}, + {0x0534, 1357, 1}, - {0x0130, 261, 2}, + {0x0425, 965, 1}, - {0xab74, 1612, 1}, + {0x2c27, 2693, 1}, + {0xffffffff, -1, 0}, - {0xa73c, 3168, 1}, + {0x2c25, 2687, 1}, - {0x10425, 3522, 1}, + {0xff38, 3411, 1}, - {0x03d1, 768, 1}, + {0x10423, 3525, 1}, - {0x10423, 3516, 1}, - {0xffffffff, -1, 0}, + {0x053b, 1378, 1}, - {0x0425, 965, 1}, + {0x023b, 577, 1}, {0xffffffff, -1, 0}, {0x0423, 959, 1}, - {0x2c25, 2687, 1}, + {0x1041f, 3513, 1}, - {0x1041f, 3504, 1}, + {0xff36, 3405, 1}, {0x2c23, 2681, 1}, - - {0xff39, 3405, 1}, {0xffffffff, -1, 0}, {0x041f, 944, 1}, - {0x10411, 3462, 1}, + {0x10411, 3471, 1}, - {0x10407, 3432, 1}, + {0x10407, 3441, 1}, {0x2c1f, 2669, 1}, - {0xffffffff, -1, 0}, + + {0xff34, 3399, 1}, {0x0411, 899, 1}, {0x0407, 1020, 1}, - {0xffffffff, -1, 0}, + + {0x0179, 345, 1}, {0x2c11, 2627, 1}, {0x2c07, 2597, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, - {0x0539, 1372, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xa779, 3246, 1}, + {0xffffffff, -1, 0}, - {0x13f8, 1840, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xab79, 1627, 1}, - {0x054b, 1426, 1}, + {0xff2b, 3372, 1}, - {0x16e53, 3945, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, + {0x1f69, 2360, 1}, - {0x0551, 1444, 1}, + {0x2c69, 2729, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x054f, 1438, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x03fe, 727, 1}, + {0xfb17, 117, 2}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x16e4b, 3921, 1}, + {0x1e921, 4092, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xff30, 3378, 1}, + {0xfb15, 113, 2}, {0xffffffff, -1, 0}, - {0x16e51, 3939, 1}, - {0xffffffff, -1, 0}, + {0x16e51, 3948, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x0547, 1414, 1}, + {0xfb13, 121, 2}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xff25, 3345, 1}, + {0x2169, 2474, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xff23, 3339, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x053c, 1381, 1}, - {0x16e4f, 3933, 1}, - {0xffffffff, -1, 0}, - - {0x0230, 571, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xfb05, 29, 2}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xfb17, 117, 2}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xfb03, 0, 3}, - {0xfb15, 113, 2}, + {0x16e4b, 3930, 1}, - {0xfb13, 121, 2}, + {0xfb01, 8, 2}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x03f0, 778, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x16e4f, 3942, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x16e47, 3909, 1}, - {0xffffffff, -1, 0}, - - {0x03ee, 881, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xff29, 3366, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xfb05, 29, 2}, + {0xff27, 3360, 1}, + {0xffffffff, -1, 0}, + + {0xff25, 3354, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, - {0xfb03, 0, 3}, + {0x16e45, 3912, 1}, + {0xffffffff, -1, 0}, + + {0xff23, 3348, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x1e921, 4083, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xfb01, 8, 2}, + {0x16e40, 3897, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e91d, 4071, 1}, + {0x1e917, 4062, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e91b, 4065, 1}, + {0x1e91b, 4074, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e919, 4059, 1}, + {0x1e919, 4068, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e917, 4053, 1}, + {0x1e915, 4056, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e915, 4047, 1}, + {0x1e913, 4050, 1}, - {0x1e913, 4041, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x16e5a, 3975, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e90f, 4029, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x16e42, 3903, 1}, + + {0x1e90f, 4038, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e90d, 4023, 1}, + {0x1e90d, 4032, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e90b, 4017, 1}, + {0x1e90b, 4026, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - - {0x1e909, 4011, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + + {0x1e909, 4020, 1}, + + {0x1e905, 4008, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e905, 3999, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x16e58, 3969, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e903, 3993, 1}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0x1e903, 4002, 1}, {0xffffffff, -1, 0}, - {0x1e901, 3987, 1}, + {0x1e901, 3996, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, @@ -3276,16 +3269,34 @@ onigenc_unicode_unfold_key(OnigCodePoint code) {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, + + {0x16e43, 3906, 1}, + {0xffffffff, -1, 0}, + + {0x16e41, 3900, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + + {0x1e91f, 4086, 1}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e91f, 4077, 1}, + {0x1e911, 4044, 1}, + + {0x1e907, 4014, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, - {0x1e911, 4035, 1}, + {0x16e5e, 3987, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, + + {0x16e5c, 3981, 1}, + {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, {0xffffffff, -1, 0}, + {0xffffffff, -1, 0}, - {0x1e907, 4005, 1} + {0x16e47, 3918, 1} }; diff --git a/src/unicode_wb_data.c b/src/unicode_wb_data.c index 8e1a267..ce082a2 100644 --- a/src/unicode_wb_data.c +++ b/src/unicode_wb_data.c @@ -1,6 +1,6 @@ /* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */ /*- - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@ * SUCH DAMAGE. */ -#define WORD_BREAK_PROPERTY_VERSION 120100 +#define WORD_BREAK_PROPERTY_VERSION 130000 /* ALetter @@ -48,7 +48,7 @@ WSegSpace ZWJ */ -static int WB_RANGE_NUM = 970; +static int WB_RANGE_NUM = 993; static WB_RANGE_TYPE WB_RANGES[] = { {0x00000a, 0x00000a, WB_LF }, {0x00000b, 0x00000c, WB_Newline }, @@ -73,8 +73,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x0000c0, 0x0000d6, WB_ALetter }, {0x0000d8, 0x0000f6, WB_ALetter }, {0x0000f8, 0x0002d7, WB_ALetter }, - {0x0002de, 0x0002e4, WB_ALetter }, - {0x0002ec, 0x0002ff, WB_ALetter }, + {0x0002de, 0x0002ff, WB_ALetter }, {0x000300, 0x00036f, WB_Extend }, {0x000370, 0x000374, WB_ALetter }, {0x000376, 0x000377, WB_ALetter }, @@ -91,11 +90,12 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x000483, 0x000489, WB_Extend }, {0x00048a, 0x00052f, WB_ALetter }, {0x000531, 0x000556, WB_ALetter }, - {0x000559, 0x000559, WB_ALetter }, - {0x00055b, 0x00055c, WB_ALetter }, + {0x000559, 0x00055c, WB_ALetter }, {0x00055e, 0x00055e, WB_ALetter }, + {0x00055f, 0x00055f, WB_MidLetter }, {0x000560, 0x000588, WB_ALetter }, {0x000589, 0x000589, WB_MidNum }, + {0x00058a, 0x00058a, WB_ALetter }, {0x000591, 0x0005bd, WB_Extend }, {0x0005bf, 0x0005bf, WB_Extend }, {0x0005c1, 0x0005c2, WB_Extend }, @@ -155,7 +155,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x000859, 0x00085b, WB_Extend }, {0x000860, 0x00086a, WB_ALetter }, {0x0008a0, 0x0008b4, WB_ALetter }, - {0x0008b6, 0x0008bd, WB_ALetter }, + {0x0008b6, 0x0008c7, WB_ALetter }, {0x0008d3, 0x0008e1, WB_Extend }, {0x0008e2, 0x0008e2, WB_Format }, {0x0008e3, 0x000903, WB_Extend }, @@ -239,7 +239,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x000b3e, 0x000b44, WB_Extend }, {0x000b47, 0x000b48, WB_Extend }, {0x000b4b, 0x000b4d, WB_Extend }, - {0x000b56, 0x000b57, WB_Extend }, + {0x000b55, 0x000b57, WB_Extend }, {0x000b5c, 0x000b5d, WB_ALetter }, {0x000b5f, 0x000b61, WB_ALetter }, {0x000b62, 0x000b63, WB_Extend }, @@ -295,7 +295,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x000ce6, 0x000cef, WB_Numeric }, {0x000cf1, 0x000cf2, WB_ALetter }, {0x000d00, 0x000d03, WB_Extend }, - {0x000d05, 0x000d0c, WB_ALetter }, + {0x000d04, 0x000d0c, WB_ALetter }, {0x000d0e, 0x000d10, WB_ALetter }, {0x000d12, 0x000d3a, WB_ALetter }, {0x000d3b, 0x000d3c, WB_Extend }, @@ -310,7 +310,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x000d62, 0x000d63, WB_Extend }, {0x000d66, 0x000d6f, WB_Numeric }, {0x000d7a, 0x000d7f, WB_ALetter }, - {0x000d82, 0x000d83, WB_Extend }, + {0x000d81, 0x000d83, WB_Extend }, {0x000d85, 0x000d96, WB_ALetter }, {0x000d9a, 0x000db1, WB_ALetter }, {0x000db3, 0x000dbb, WB_ALetter }, @@ -421,7 +421,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x001a7f, 0x001a7f, WB_Extend }, {0x001a80, 0x001a89, WB_Numeric }, {0x001a90, 0x001a99, WB_Numeric }, - {0x001ab0, 0x001abe, WB_Extend }, + {0x001ab0, 0x001ac0, WB_Extend }, {0x001b00, 0x001b04, WB_Extend }, {0x001b05, 0x001b33, WB_ALetter }, {0x001b34, 0x001b44, WB_Extend }, @@ -545,7 +545,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x0030fc, 0x0030ff, WB_Katakana }, {0x003105, 0x00312f, WB_ALetter }, {0x003131, 0x00318e, WB_ALetter }, - {0x0031a0, 0x0031ba, WB_ALetter }, + {0x0031a0, 0x0031bf, WB_ALetter }, {0x0031f0, 0x0031ff, WB_Katakana }, {0x0032d0, 0x0032fe, WB_Katakana }, {0x003300, 0x003357, WB_Katakana }, @@ -562,9 +562,9 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x00a69e, 0x00a69f, WB_Extend }, {0x00a6a0, 0x00a6ef, WB_ALetter }, {0x00a6f0, 0x00a6f1, WB_Extend }, - {0x00a717, 0x00a7bf, WB_ALetter }, - {0x00a7c2, 0x00a7c6, WB_ALetter }, - {0x00a7f7, 0x00a801, WB_ALetter }, + {0x00a708, 0x00a7bf, WB_ALetter }, + {0x00a7c2, 0x00a7ca, WB_ALetter }, + {0x00a7f5, 0x00a801, WB_ALetter }, {0x00a802, 0x00a802, WB_Extend }, {0x00a803, 0x00a805, WB_ALetter }, {0x00a806, 0x00a806, WB_Extend }, @@ -572,6 +572,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x00a80b, 0x00a80b, WB_Extend }, {0x00a80c, 0x00a822, WB_ALetter }, {0x00a823, 0x00a827, WB_Extend }, + {0x00a82c, 0x00a82c, WB_Extend }, {0x00a840, 0x00a873, WB_ALetter }, {0x00a880, 0x00a881, WB_Extend }, {0x00a882, 0x00a8b3, WB_ALetter }, @@ -617,7 +618,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x00ab11, 0x00ab16, WB_ALetter }, {0x00ab20, 0x00ab26, WB_ALetter }, {0x00ab28, 0x00ab2e, WB_ALetter }, - {0x00ab30, 0x00ab67, WB_ALetter }, + {0x00ab30, 0x00ab69, WB_ALetter }, {0x00ab70, 0x00abe2, WB_ALetter }, {0x00abe3, 0x00abea, WB_Extend }, {0x00abec, 0x00abed, WB_Extend }, @@ -739,10 +740,14 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x010d00, 0x010d23, WB_ALetter }, {0x010d24, 0x010d27, WB_Extend }, {0x010d30, 0x010d39, WB_Numeric }, + {0x010e80, 0x010ea9, WB_ALetter }, + {0x010eab, 0x010eac, WB_Extend }, + {0x010eb0, 0x010eb1, WB_ALetter }, {0x010f00, 0x010f1c, WB_ALetter }, {0x010f27, 0x010f27, WB_ALetter }, {0x010f30, 0x010f45, WB_ALetter }, {0x010f46, 0x010f50, WB_Extend }, + {0x010fb0, 0x010fc4, WB_ALetter }, {0x010fe0, 0x010ff6, WB_ALetter }, {0x011000, 0x011002, WB_Extend }, {0x011003, 0x011037, WB_ALetter }, @@ -761,6 +766,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x011136, 0x01113f, WB_Numeric }, {0x011144, 0x011144, WB_ALetter }, {0x011145, 0x011146, WB_Extend }, + {0x011147, 0x011147, WB_ALetter }, {0x011150, 0x011172, WB_ALetter }, {0x011173, 0x011173, WB_Extend }, {0x011176, 0x011176, WB_ALetter }, @@ -769,6 +775,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x0111b3, 0x0111c0, WB_Extend }, {0x0111c1, 0x0111c4, WB_ALetter }, {0x0111c9, 0x0111cc, WB_Extend }, + {0x0111ce, 0x0111cf, WB_Extend }, {0x0111d0, 0x0111d9, WB_Numeric }, {0x0111da, 0x0111da, WB_ALetter }, {0x0111dc, 0x0111dc, WB_ALetter }, @@ -807,7 +814,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x011447, 0x01144a, WB_ALetter }, {0x011450, 0x011459, WB_Numeric }, {0x01145e, 0x01145e, WB_Extend }, - {0x01145f, 0x01145f, WB_ALetter }, + {0x01145f, 0x011461, WB_ALetter }, {0x011480, 0x0114af, WB_ALetter }, {0x0114b0, 0x0114c3, WB_Extend }, {0x0114c4, 0x0114c5, WB_ALetter }, @@ -832,7 +839,19 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x01182c, 0x01183a, WB_Extend }, {0x0118a0, 0x0118df, WB_ALetter }, {0x0118e0, 0x0118e9, WB_Numeric }, - {0x0118ff, 0x0118ff, WB_ALetter }, + {0x0118ff, 0x011906, WB_ALetter }, + {0x011909, 0x011909, WB_ALetter }, + {0x01190c, 0x011913, WB_ALetter }, + {0x011915, 0x011916, WB_ALetter }, + {0x011918, 0x01192f, WB_ALetter }, + {0x011930, 0x011935, WB_Extend }, + {0x011937, 0x011938, WB_Extend }, + {0x01193b, 0x01193e, WB_Extend }, + {0x01193f, 0x01193f, WB_ALetter }, + {0x011940, 0x011940, WB_Extend }, + {0x011941, 0x011941, WB_ALetter }, + {0x011942, 0x011943, WB_Extend }, + {0x011950, 0x011959, WB_Numeric }, {0x0119a0, 0x0119a7, WB_ALetter }, {0x0119aa, 0x0119d0, WB_ALetter }, {0x0119d1, 0x0119d7, WB_Extend }, @@ -882,6 +901,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x011da0, 0x011da9, WB_Numeric }, {0x011ee0, 0x011ef2, WB_ALetter }, {0x011ef3, 0x011ef6, WB_Extend }, + {0x011fb0, 0x011fb0, WB_ALetter }, {0x012000, 0x012399, WB_ALetter }, {0x012400, 0x01246e, WB_ALetter }, {0x012480, 0x012543, WB_ALetter }, @@ -908,6 +928,8 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x016f93, 0x016f9f, WB_ALetter }, {0x016fe0, 0x016fe1, WB_ALetter }, {0x016fe3, 0x016fe3, WB_ALetter }, + {0x016fe4, 0x016fe4, WB_Extend }, + {0x016ff0, 0x016ff1, WB_Extend }, {0x01b000, 0x01b000, WB_Katakana }, {0x01b164, 0x01b167, WB_Katakana }, {0x01bc00, 0x01bc6a, WB_ALetter }, @@ -1017,6 +1039,7 @@ static WB_RANGE_TYPE WB_RANGES[] = { {0x01f170, 0x01f189, WB_ALetter }, {0x01f1e6, 0x01f1ff, WB_Regional_Indicator }, {0x01f3fb, 0x01f3ff, WB_Extend }, + {0x01fbf0, 0x01fbf9, WB_Numeric }, {0x0e0001, 0x0e0001, WB_Format }, {0x0e0020, 0x0e007f, WB_Extend }, {0x0e0100, 0x0e01ef, WB_Extend } diff --git a/src/utf16_be.c b/src/utf16_be.c index d99af71..d237b93 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -128,7 +128,7 @@ static int utf16be_is_mbc_newline(const UChar* p, const UChar* end) { if (p + 1 < end) { - if (*(p+1) == 0x0a && *p == 0x00) + if (*(p+1) == NEWLINE_CODE && *p == 0x00) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS if (( @@ -193,7 +193,7 @@ utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) } else { *p++ = (UChar )((code & 0xff00) >> 8); - *p++ = (UChar )(code & 0xff); + *p = (UChar )(code & 0xff); return 2; } } diff --git a/src/utf16_le.c b/src/utf16_le.c index c6edd94..f14d263 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -140,7 +140,7 @@ static int utf16le_is_mbc_newline(const UChar* p, const UChar* end) { if (p + 1 < end) { - if (*p == 0x0a && *(p+1) == 0x00) + if (*p == NEWLINE_CODE && *(p+1) == 0x00) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS if (( @@ -194,7 +194,7 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) } else { *p++ = (UChar )(code & 0xff); - *p++ = (UChar )((code & 0xff00) >> 8); + *p = (UChar )((code & 0xff00) >> 8); return 2; } } diff --git a/src/utf32_be.c b/src/utf32_be.c index 67e50a2..bdd3db7 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -45,7 +45,7 @@ static int utf32be_is_mbc_newline(const UChar* p, const UChar* end) { if (p + 3 < end) { - if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) + if (*(p+3) == NEWLINE_CODE && *(p+2) == 0 && *(p+1) == 0 && *p == 0) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS if (( diff --git a/src/utf32_le.c b/src/utf32_le.c index 2ae2275..473ab74 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -45,7 +45,7 @@ static int utf32le_is_mbc_newline(const UChar* p, const UChar* end) { if (p + 3 < end) { - if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) + if (*p == NEWLINE_CODE && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) return 1; #ifdef USE_UNICODE_ALL_LINE_TERMINATORS if (( diff --git a/test/Makefile.am b/test/Makefile.am index 4d62568..f12eebe 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -6,9 +6,9 @@ AM_CFLAGS = -Wall -Wno-invalid-source-encoding AM_CPPFLAGS = -I$(top_srcdir)/src if ENABLE_POSIX_API -TESTS = test_utf8 testc testp testcu test_regset +TESTS = test_utf8 test_syntax testc testp testcu test_regset test_back else -TESTS = test_utf8 testc testcu test_regset +TESTS = test_utf8 test_syntax testc testcu test_regset test_back endif check_PROGRAMS = $(TESTS) @@ -16,10 +16,12 @@ check_PROGRAMS = $(TESTS) test: test_uchar $(TESTS) @echo "[Oniguruma API, UTF-8 check]" @./test_utf8 | grep RESULT - @echo "[Oniguruma API, ASCII/EUC-JP check]" + @echo "[Oniguruma API, SYNTAX check]" + @./test_syntax | grep RESULT + @echo "[Oniguruma API, EUC-JP check]" @./testc | grep RESULT if ENABLE_POSIX_API - @echo "[POSIX API, ASCII/EUC-JP check]" + @echo "[POSIX API, UTF-8 check]" @./testp | grep RESULT endif @echo "[Oniguruma API, UTF-16 check]" @@ -27,6 +29,8 @@ endif @echo "" @echo "[Oniguruma API, regset check]" @./test_regset + @echo "[Oniguruma API, backward search check]" + @./test_back | grep RESULT test_uchar: @echo "[UChar in oniguruma.h check]" @@ -36,13 +40,14 @@ test_uchar: test_utf8_SOURCES = test_utf8.c test_utf8_LDADD = $(lib_onig) +test_syntax_SOURCES = test_syntax.c +test_syntax_LDADD = $(lib_onig) + testc_SOURCES = testc.c testc_LDADD = $(lib_onig) -testp_SOURCES = testc.c +testp_SOURCES = testp.c testp_LDADD = $(lib_onig) -testp_CFLAGS = -DPOSIX_TEST -Wall -Wno-invalid-source-encoding - testcu_SOURCES = testu.c testcu_LDADD = $(lib_onig) @@ -50,10 +55,17 @@ testcu_LDADD = $(lib_onig) test_regset_SOURCES = test_regset.c test_regset_LDADD = $(lib_onig) +test_back_SOURCES = test_back.c +test_back_LDADD = $(lib_onig) + gcov: make CFLAGS="--coverage" test_utf8 + make CFLAGS="--coverage" test_syntax make CFLAGS="--coverage" testc +if ENABLE_POSIX_API make CFLAGS="--coverage" testp +endif make CFLAGS="--coverage" testcu make CFLAGS="--coverage" test_regset + make CFLAGS="--coverage" test_back diff --git a/test/test_back.c b/test/test_back.c new file mode 100644 index 0000000..9a337b9 --- /dev/null +++ b/test/test_back.c @@ -0,0 +1,1443 @@ +/* + * test_back.c + * Copyright (c) 2020 K.Kosako + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include <stdio.h> + +#include "oniguruma.h" + +#include <string.h> + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +static OnigRegion* region; + +static void xx(char* pattern, char* str, int from, int to, int mem, int not, + int error_no, int line_no) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + if (error_no == 0) { + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s /%s/ #%d\n", s, pattern, line_no); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): /%s/ %d #%d\n", pattern, r, line_no); + nsucc++; + } + else { + fprintf(stdout, "FAIL(ERROR): /%s/ '%s', %d, %d #%d\n", pattern, str, + error_no, r, line_no); + nfail++; + } + } + + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )(str + SLEN(str)), (UChar* )str, + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + if (error_no == 0) { + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s /%s/ #%d\n", s, pattern, line_no); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): /%s/ '%s', %d #%d\n", pattern, str, r, line_no); + nsucc++; + } + else { + fprintf(stdout, "FAIL ERROR NO: /%s/ '%s', %d, %d #%d\n", pattern, + str, error_no, r, line_no); + nfail++; + } + } + + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s' #%d\n", pattern, str, line_no); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' #%d\n", pattern, str, line_no); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s' #%d\n", pattern, str, line_no); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s' #%d\n", pattern, str, line_no); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d #%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem], line_no); + nfail++; + } + } + } + onig_free(reg); +} + +static void xx2(char* pattern, char* str, int from, int to, int line_no) +{ + xx(pattern, str, from, to, 0, 0, 0, line_no); +} + +static void xx3(char* pattern, char* str, int from, int to, int mem, int line_no) +{ + xx(pattern, str, from, to, mem, 0, 0, line_no); +} + +static void xn(char* pattern, char* str, int line_no) +{ + xx(pattern, str, 0, 0, 0, 1, 0, line_no); +} + +static void xe(char* pattern, char* str, int error_no, int line_no) +{ + xx(pattern, str, 0, 0, 0, 0, error_no, line_no); +} + +#define x2(p,s,f,t) xx2(p,s,f,t, __LINE__) +#define x3(p,s,f,t,m) xx3(p,s,f,t,m, __LINE__) +#define n(p,s) xn(p,s, __LINE__) +#define e(p,s,e) xe(p,s,e, __LINE__) + +extern int main(int argc, char* argv[]) +{ + OnigEncoding use_encs[1]; + + use_encs[0] = ONIG_ENCODING_UTF8; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + + err_file = stdout; + + region = onig_region_new(); + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("^a", "\na", 1, 2); + x2("$", "", 0, 0); + x2("$\\O", "bb\n", 2, 3); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 1, 1); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 1, 1); + x2("\\b", " z", 2, 2); + x2("\\b", " z ", 3, 3); + x2("\\B", "zz ", 3, 3); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 2, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 8, 9); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + n("\\Gaz", "az"); + n("\\Gz", "bza"); + x2("az\\G", "az", 0, 2); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + x2("(?i:i)", "I", 0, 1); + x2("(?i:I)", "i", 0, 1); + x2("(?i:[A-Z])", "i", 0, 1); + x2("(?i:[a-z])", "I", 0, 1); + n("(?i:A)", "b"); + x2("(?i:ss)", "ss", 0, 2); + x2("(?i:ss)", "Ss", 0, 2); + x2("(?i:ss)", "SS", 0, 2); + /* 0xc5,0xbf == 017F: # LATIN SMALL LETTER LONG S */ + x2("(?i:ss)", "\xc5\xbfS", 0, 3); + x2("(?i:ss)", "s\xc5\xbf", 0, 3); + /* 0xc3,0x9f == 00DF: # LATIN SMALL LETTER SHARP S */ + x2("(?i:ss)", "\xc3\x9f", 0, 2); + /* 0xe1,0xba,0x9e == 1E9E # LATIN CAPITAL LETTER SHARP S */ + x2("(?i:ss)", "\xe1\xba\x9e", 0, 3); + x2("(?i:xssy)", "xssy", 0, 4); + x2("(?i:xssy)", "xSsy", 0, 4); + x2("(?i:xssy)", "xSSy", 0, 4); + x2("(?i:xssy)", "x\xc5\xbfSy", 0, 5); + x2("(?i:xssy)", "xs\xc5\xbfy", 0, 5); + x2("(?i:xssy)", "x\xc3\x9fy", 0, 4); + x2("(?i:xssy)", "x\xe1\xba\x9ey", 0, 5); + x2("(?i:x\xc3\x9fy)", "xssy", 0, 4); + x2("(?i:x\xc3\x9fy)", "xSSy", 0, 4); + x2("(?i:\xc3\x9f)", "ss", 0, 2); + x2("(?i:\xc3\x9f)", "SS", 0, 2); + x2("(?i:[\xc3\x9f])", "ss", 0, 2); + x2("(?i:[\xc3\x9f])", "SS", 0, 2); + x2("(?i)(?<!ss)z", "qqz", 2, 3); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 10, 13); + x2(".+abc", "dddabdd\nddabcaa\naaaabc", 18, 22); + x2("(?m:.*abc)", "dddabddabc", 7, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 1, 1); + x2("a?", "a", 1, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 1, 1); + x2("a*", "aaa", 3, 3); + x2("a*", "baaaa", 5, 5); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 3, 4); + x2("a+", "aabbb", 1, 2); + x2("a+", "baaaa", 4, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 1, 1); + x2(".?", "\n", 1, 1); + x2(".*", "", 0, 0); + x2(".*", "abcde", 5, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 5, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 1, 1); + x2("(|a)", "a", 1, 1); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 1, 2); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 1, 2); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 1, 2); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 1, 1); + x2("a?|b", "b", 1, 1); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 2, 2); + x2("a*|b*", "ba", 2, 2); + x2("a*|b*", "ab", 2, 2); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 3, 3); + x2("a+|b*", "abbb", 4, 4); + n("a+|b+", ""); + x2("(a|b)?", "b", 1, 1); + x2("(a|b)*", "ba", 2, 2); + x2("(a|b)+", "bab", 2, 3); + x2("(ab|ca)+", "caabbc", 2, 4); + x2("(ab|ca)+", "aabca", 3, 5); + x2("(ab|ca)+", "abzca", 3, 5); + x2("(a|bab)+", "ababa", 4, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 5, 6); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 6, 6); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 5, 6); + x2("(?:a+|b+){2}", "aaabbb", 4, 6); + x2("h{0,}", "hhhh", 4, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 5, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 7, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 6, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("a(?i)b|c", "aB", 0, 2); + x2("a(?i)b|c", "aC", 0, 2); + n("a(?i)b|c", "AC"); + n("a(?:(?i)b)|c", "aC"); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 3, 3); + x2("[abc]*", "abc", 3, 3); + x2("[^abc]*", "abc", 3, 3); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 3, 3); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 3, 3); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 2, 3); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 1, 1); + x2("(?:a?\?)?", "a", 1, 1); + x2("(?:a?)+?", "aaa", 3, 3); + x2("(?:a+)?\?", "aaa", 3, 3); + x2("(?:a+)?\?b", "aaab", 3, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 5, 5); + x2("(?:ab)*{0}", "ababa", 5, 5); + x2("(?:ab){3,}", "abababab", 2, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 2, 6); + x2("(?:ab){2,4}", "ababababab", 6, 10); + x2("(?:ab){2,4}?", "ababababab", 6, 10); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 3, 9); + x2("(?:X*)(?i:xa)", "XXXa", 2, 4); + x2("(d+)([^abc]z)", "dddz", 1, 4); + x2("([^abc]*)([^abc]z)", "dddz", 2, 4); + x2("(\\w+)(\\wz)", "dddz", 1, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 3, 3, 1); + x3("(a*)", "aaa", 3, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 6, 7, 1); + x3("(a+|b*)", "bbbaa", 5, 5, 1); + x3("(a+|b?)", "bbbaa", 5, 5, 1); + x3("(abc)?", "abc", -1, -1, 1); + x3("(abc)*", "abc", -1, -1, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 4, 5, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 2, 2); + x2("(a?\?)\\1", "aa", 2, 2); + x2("(a*)\\1", "aaaaa", 5, 5); + x3("(a*)\\1", "aaaaa", 5, 5, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 10, 10); + x2("(a*)(b*)\\2", "aaabbbb", 7, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 3, 5); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 3, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x3("(?<=(abc))d", "abcd", 0, 3, 1); + x2("(?<=(?i:abc))d", "ABCd", 3, 4); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?<!a)b", "cb", 1, 2); + n("(?<!a)b", "ab"); + x2("(?<!a|bc)b", "bbb", 2, 3); + n("(?<!a|bc)z", "bcz"); + x2("(?<name1>a)", "a", 0, 1); + x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4); + x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8); + x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3); + x2("(?<n>|a\\g<n>)+", "", 0, 0); + x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 6, 6); + x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1); + x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4); + x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 8, 8); + x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2); + x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0); + x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9); + n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg"); + x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10); + x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16); + x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1); + x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 6, 7); + x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 8, 9, 1); + x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 6, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7); + x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 2, 5); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 8, 11); + x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 1, 4); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x2("(?:a*|b*)*c", "abadc", 4, 5); + x3("(?:\\1a|())*", "a", 1, 1, 1); + x2("x((.)*)*x", "0x1x2x3", 3, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("[0-9-a]", "-", 0, 1); // PR#44 + n("[0-9-a]", ":"); // PR#44 + x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 6, 9, 2); // PR#43 + x2("\\o{101}", "A", 0, 1); + x2("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbacca", 0, 6); + n("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbaccb"); + x2("(?i)\\A(a|b\\g<1>c)\\k<1+2>\\z", "bBACcbac", 0, 8); + x2("(?i)(?<X>aa)|(?<X>bb)\\k<X>", "BBbb", 0, 4); + x2("(?:\\k'+1'B|(A)C)*", "ACAB", 4, 4); // relative backref by postitive number + x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number + x2("A\\g'0'|B()", "AAAAB", 4, 5); + x3("(A\\g'0')|B", "AAAAB", -1, -1, 1); + x2("(a*)(?(1))aa", "aaaaa", 3, 5); + x2("(a*)(?(-1))aa", "aaaaa", 3, 5); + x2("(?<name>aaa)(?('name'))aa", "aaaaa", 0, 5); + x2("(a)(?(1)aa|bb)a", "aaaaa", 1, 5); + x2("(?:aa|())(?(<1>)aa|bb)a", "aabba", 0, 5); + x2("(?:aa|())(?('1')aa|bb|cc)a", "aacca", 0, 5); + x3("(a*)(?(1)aa|a)b", "aaab", 1, 1, 1); + n("(a)(?(1)a|b)c", "abc"); + x2("(a)(?(1)|)c", "ac", 0, 2); + n("(?()aaa|bbb)", "bbb"); + x2("(a)(?(1+0)b|c)d", "abd", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "ace", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "bce", 0, 3); + x2("\\R", "\r\n", 1, 2); + x2("\\R", "\r", 0, 1); + x2("\\R", "\n", 0, 1); + x2("\\R", "\x0b", 0, 1); + n("\\R\\n", "\r\n"); + x2("\\R", "\xc2\x85", 0, 2); + x2("\\N", "a", 0, 1); + n("\\N", "\n"); + n("(?m:\\N)", "\n"); + n("(?-m:\\N)", "\n"); + x2("\\O", "a", 0, 1); + x2("\\O", "\n", 0, 1); + x2("(?m:\\O)", "\n", 0, 1); + x2("(?-m:\\O)", "\n", 0, 1); + x2("\\K", "a", 1, 1); + x2("a\\K", "a", 1, 1); + x2("a\\Kb", "ab", 1, 2); + x2("(a\\Kb|ac\\Kd)", "acd", 2, 3); + x2("(a\\Kb|\\Kac\\K)*", "acababacab", 10, 10); + x2("(?:()|())*\\1", "abc", 3, 3); + x2("(?:()|())*\\2", "abc", 3, 3); + x2("(?:()|()|())*\\3\\1", "abc", 3, 3); + x2("(|(?:a(?:\\g'1')*))b|", "abc", 3, 3); + x2("^(\"|)(.*)\\1$", "XX", 0, 2); + x2("(abc|def|ghi|jkl|mno|pqr|stu){0,10}?\\z", "admno", 5, 5); + x2("(abc|(def|ghi|jkl|mno|pqr){0,7}?){5}\\z", "adpqrpqrpqr", 11, 11); // cover OP_REPEAT_INC_NG_SG + x2("(?!abc).*\\z", "abcde", 5, 5); // cover OP_PREC_READ_NOT_END + x2("(.{2,})?", "abcde", 5, 5); // up coverage + x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n)+)?", "abcde", 5, 5); // up coverage + x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n){3,})?", "abcde", 5, 5); // up coverage + x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+)?", "abacadae", 8, 8); // up coverage + x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+?)?z", "abacadaez", 8, 9); // up coverage + x2("\\A((a|b)\?\?)?z", "bz", 0, 2); // up coverage + x2("((?<x>abc){0}a\\g<x>d)+", "aabcd", 0, 5); // up coverage + x2("((?(abc)true|false))+", "false", 0, 5); // up coverage + x2("((?i:abc)d)+", "abcdABCd", 4, 8); // up coverage + x2("((?<!abc)def)+", "bcdef", 2, 5); // up coverage + x2("(\\ba)+", "aaa", 0, 1); // up coverage + x2("()(?<x>ab)(?(<x>)a|b)", "aba", 0, 3); // up coverage + x2("(?<=a.b)c", "azbc", 3, 4); // up coverage + n("(?<=(?:abcde){30})z", "abc"); // up coverage + x2("(?<=(?(a)a|bb))z", "aaz", 2, 3); // up coverage + x2("[a]*\\W", "aa@", 2, 3); // up coverage + x2("[a]*[b]", "aab", 2, 3); // up coverage + n("a*\\W", "aaa"); // up coverage + n("(?W)a*\\W", "aaa"); // up coverage + x2("(?<=ab(?<=ab))", "ab", 2, 2); // up coverage + x2("(?<x>a)(?<x>b)(\\k<x>)+", "abbaab", 0, 6); // up coverage + x2("()(\\1)(\\2)", "abc", 3, 3); // up coverage + x2("((?(a)b|c))(\\1)", "abab", 0, 4); // up coverage + x2("(?<x>$|b\\g<x>)", "bbb", 3, 3); // up coverage + x2("(?<x>(?(a)a|b)|c\\g<x>)", "cccb", 3, 4); // up coverage + x2("(a)(?(1)a*|b*)+", "aaaa", 3, 4); // up coverage + x2("[[^abc]&&cde]*", "de", 2, 2); // up coverage + n("(a){10}{10}", "aa"); // up coverage + x2("(?:a?)+", "aa", 2, 2); // up coverage + x2("(?:a?)*?", "a", 1, 1); // up coverage + x2("(?:a*)*?", "a", 1, 1); // up coverage + x2("(?:a+?)*", "a", 1, 1); // up coverage + x2("\\h", "5", 0, 1); // up coverage + x2("\\H", "z", 0, 1); // up coverage + x2("[\\h]", "5", 0, 1); // up coverage + x2("[\\H]", "z", 0, 1); // up coverage + x2("[\\o{101}]", "A", 0, 1); // up coverage + x2("[\\u0041]", "A", 0, 1); // up coverage + + x2("(?~)", "", 0, 0); + x2("(?~)", "A", 1, 1); + x2("aaaaa(?~)", "aaaaaaaaaa", 5, 10); + x2("(?~(?:|aaa))", "aaa", 3, 3); + x2("(?~aaa|)", "aaa", 3, 3); + x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! + x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); + x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); + x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); + x2("(?~A.C|B)", "ABC", 3, 3); + x2("(?~XYZ|ABC)a", "ABCa", 3, 4); + x2("(?~XYZ|ABC)a", "aABCa", 4, 5); + x2("<[^>]*>(?~[<>])</[^>]*>", "<a>vvv</a> <b> </b>", 13, 22); + x2("(?~ab)", "ccc\ndab", 7, 7); + x2("(?m:(?~ab))", "ccc\ndab", 7, 7); + x2("(?-m:(?~ab))", "ccc\ndab", 7, 7); + x2("(?~abc)xyz", "xyz012345678901234567890123456789abc", 0, 3); + + // absent with expr + x2("(?~|78|\\d*)", "123456789", 9, 9); + x2("(?~|def|(?:abc|de|f){0,100})", "abcdedeabcfdefabc", 17, 17); + x2("(?~|ab|.*)", "ccc\nddd", 7, 7); + x2("(?~|ab|\\O*)", "ccc\ndab", 7, 7); + x2("(?~|ab|\\O{2,10})", "ccc\ndab", 3, 5); + x2("(?~|ab|\\O{1,10})", "ab", 1, 2); + n("(?~|ab|\\O{2,10})", "ab"); + x2("(?~|abc|\\O{1,10})", "abc", 2, 3); + x2("(?~|ab|\\O{5,10})|abc", "abc", 0, 3); + x2("(?~|ab|\\O{1,10})", "cccccccccccab", 12, 13); + x2("(?~|aaa|)", "aaa", 3, 3); + x2("(?~||a*)", "aaaaaa", 6, 6); + x2("(?~||a*?)", "aaaaaa", 6, 6); + x2("(a)(?~|b|\\1)", "aaaaaa", 4, 6); + x2("(a)(?~|bb|(?:a\\1)*)", "aaaaaa", 5, 6); + x2("(b|c)(?~|abac|(?:a\\1)*)", "abababacabab", 11, 12); + n("(?~|c|a*+)a", "aaaaa"); + x2("(?~|aaaaa|a*+)", "aaaaa", 5, 5); + x2("(?~|aaaaaa|a*+)b", "aaaaaab", 6, 7); + x2("(?~|abcd|(?>))", "zzzabcd", 7, 7); + x2("(?~|abc|a*?)", "aaaabc", 6, 6); + + // absent range cutter + x2("(?~|abc)a*", "aaaaaabc", 8, 8); + x2("(?~|abc)a*z|aaaaaabc", "aaaaaabc", 0, 8); + x2("(?~|aaaaaa)a*", "aaaaaa", 6, 6); + x2("(?~|abc)aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?>(?~|abc))aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?~|)a", "a", 0, 1); + n("(?~|a)a", "a"); + x2("(?~|a)(?~|)a", "a", 0, 1); + x2("(?~|a).*(?~|)a", "bbbbbbbbbbbbbbbbbbbba", 20, 21); + x2("(?~|abc).*(xyz|pqr)(?~|)abc", "aaaaxyzaaapqrabc", 10, 16); + x2("(?~|abc).*(xyz|pqr)(?~|)abc", "aaaaxyzaaaabcpqrabc", 13, 19); + n("\\A(?~|abc).*(xyz|pqrabc)(?~|)abc", "aaaaxyzaaaabcpqrabcabc"); + + x2("", "あ", 3, 3); + x2("あ", "あ", 0, 3); + n("い", "あ"); + x2("うう", "うう", 0, 6); + x2("あいう", "あいう", 0, 9); + x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 105); + x2("あ", "いあ", 3, 6); + x2("いう", "あいう", 3, 9); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "あ", 0, 3); + x2("..", "かき", 0, 6); + x2("\\w", "お", 0, 3); + n("\\W", "あ"); + x2("[\\W]", "う$", 3, 4); + x2("\\S", "そ", 0, 3); + x2("\\S", "漢", 0, 3); + x2("\\b", "気 ", 3, 3); + x2("\\b", " ほ", 4, 4); + x2("\\B", "せそ ", 7, 7); + x2("\\B", "う ", 4, 4); + x2("\\B", " い", 0, 0); + x2("[たち]", "ち", 0, 3); + n("[なに]", "ぬ"); + x2("[う-お]", "え", 0, 3); + n("[^け]", "け"); + x2("[\\w]", "ね", 0, 3); + n("[\\d]", "ふ"); + x2("[\\D]", "は", 0, 3); + n("[\\s]", "く"); + x2("[\\S]", "へ", 0, 3); + x2("[\\w\\d]", "よ", 0, 3); + x2("[\\w\\d]", " よ", 3, 6); + n("\\w鬼車", " 鬼車"); + x2("鬼\\W車", "鬼 車", 0, 7); + x2("あ.い.う", "ああいいう", 0, 15); + x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 19); + x2("\\s\\wこここ", " ここここ", 0, 13); + x2("ああ.け", "ああけけ", 0, 12); + n(".い", "いえ"); + x2(".お", "おお", 0, 6); + x2("^あ", "あ", 0, 3); + x2("^む$", "む", 0, 3); + x2("^\\w$", "に", 0, 3); + x2("^\\wかきくけこ$", "zかきくけこ", 0, 16); + x2("^\\w...うえお$", "zあいううえお", 0, 19); + x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 16); + x2("\\Aたちつ", "たちつ", 0, 9); + x2("むめも\\Z", "むめも", 0, 9); + x2("かきく\\z", "かきく", 0, 9); + x2("かきく\\Z", "かきく\n", 0, 9); + n("\\Gぽぴ", "ぽぴ"); + n("\\Gえ", "うえお"); + x2("とて\\G", "とて", 0, 6); + n("まみ\\A", "まみ"); + n("ま\\Aみ", "まみ"); + x2("(?=せ)せ", "せ", 0, 3); + n("(?=う).", "い"); + x2("(?!う)か", "か", 0, 3); + n("(?!と)あ", "と"); + x2("(?i:あ)", "あ", 0, 3); + x2("(?i:ぶべ)", "ぶべ", 0, 6); + n("(?i:い)", "う"); + x2("(?m:よ.)", "よ\n", 0, 4); + x2("(?m:.め)", "ま\nめ", 3, 7); + x2("あ?", "", 0, 0); + x2("変?", "化", 3, 3); + x2("変?", "変", 3, 3); + x2("量*", "", 0, 0); + x2("量*", "量", 3, 3); + x2("子*", "子子子", 9, 9); + x2("馬*", "鹿馬馬馬馬", 15, 15); + n("山+", ""); + x2("河+", "河", 0, 3); + x2("時+", "時時時時", 9, 12); + x2("え+", "ええううう", 3, 6); + x2("う+", "おうううう", 12, 15); + x2(".?", "た", 3, 3); + x2(".*", "ぱぴぷぺ", 12, 12); + x2(".+", "ろ", 0, 3); + x2(".+", "いうえか\n", 9, 12); + x2("あ|い", "あ", 0, 3); + x2("あ|い", "い", 0, 3); + x2("あい|いう", "あい", 0, 6); + x2("あい|いう", "いう", 0, 6); + x2("を(?:かき|きく)", "をかき", 0, 9); + x2("を(?:かき|きく)け", "をきくけ", 0, 12); + x2("あい|(?:あう|あを)", "あを", 0, 6); + x2("あ|い|う", "えう", 3, 6); + x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 9); + n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); + x2("あ|^わ", "ぶあ", 3, 6); + x2("あ|^を", "をあ", 3, 6); + x2("鬼|\\G車", "け車鬼", 6, 9); + x2("鬼|\\G車", "車鬼", 3, 6); + x2("鬼|\\A車", "b車鬼", 4, 7); + x2("鬼|\\A車", "車", 0, 3); + x2("鬼|車\\Z", "車鬼", 3, 6); + x2("鬼|車\\Z", "車", 0, 3); + x2("鬼|車\\Z", "車\n", 0, 3); + x2("鬼|車\\z", "車鬼", 3, 6); + x2("鬼|車\\z", "車", 0, 3); + x2("\\w|\\s", "お", 0, 3); + x2("\\w|%", "%お", 1, 4); + x2("\\w|[&$]", "う&", 3, 4); + x2("[い-け]", "う", 0, 3); + x2("[い-け]|[^か-こ]", "あ", 0, 3); + x2("[い-け]|[^か-こ]", "か", 0, 3); + x2("[^あ]", "\n", 0, 1); + x2("(?:あ|[う-き])|いを", "うを", 0, 3); + x2("(?:あ|[う-き])|いを", "いを", 0, 6); + x2("あいう|(?=けけ)..ほ", "けけほ", 0, 9); + x2("あいう|(?!けけ)..ほ", "あいほ", 0, 9); + x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 9); + x2("(?<=あ|いう)い", "いうい", 6, 9); + n("(?>あ|あいえ)う", "あいえう"); + x2("(?>あいえ|あ)う", "あいえう", 0, 12); + x2("あ?|い", "あ", 3, 3); + x2("あ?|い", "い", 3, 3); + x2("あ?|い", "", 0, 0); + x2("あ*|い", "ああ", 6, 6); + x2("あ*|い*", "いあ", 6, 6); + x2("あ*|い*", "あい", 6, 6); + x2("[aあ]*|い*", "aあいいい", 13, 13); + x2("あ+|い*", "", 0, 0); + x2("あ+|い*", "いいい", 9, 9); + x2("あ+|い*", "あいいい", 12, 12); + x2("あ+|い*", "aあいいい", 13, 13); + n("あ+|い+", ""); + x2("(あ|い)?", "い", 3, 3); + x2("(あ|い)*", "いあ", 6, 6); + x2("(あ|い)+", "いあい", 6, 9); + x2("(あい|うあ)+", "うああいうえ", 6, 12); + x2("(あい|うえ)+", "うああいうえ", 12, 18); + x2("(あい|うあ)+", "ああいうあ", 9, 15); + x2("(あい|うあ)+", "あいをうあ", 9, 15); + x2("(あい|うあ)+", "$$zzzzあいをうあ", 15, 21); + x2("(あ|いあい)+", "あいあいあ", 12, 15); + x2("(あ|いあい)+", "いあ", 3, 6); + x2("(あ|いあい)+", "いあああいあ", 15, 18); + x2("(?:あ|い)(?:あ|い)", "あい", 0, 6); + x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 18, 18); + x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 15, 18); + x2("(?:あ+|い+){2}", "あああいいい", 12, 18); + x2("(?:あ+|い+){1,2}", "あああいいい", 15, 18); + x2("(?:あ+|\\Aい*)うう", "うう", 0, 6); + n("(?:あ+|\\Aい*)うう", "あいうう"); + x2("(?:^あ+|い+)*う", "ああいいいあいう", 21, 24); + x2("(?:^あ+|い+)*う", "ああいいいいう", 18, 21); + x2("う{0,}", "うううう", 12, 12); + x2("あ|(?i)c", "C", 0, 1); + x2("(?i)c|あ", "C", 0, 1); + x2("(?i:あ)|a", "a", 0, 1); + n("(?i:あ)|a", "A"); + x2("[あいう]?", "あいう", 9, 9); + x2("[あいう]*", "あいう", 9, 9); + x2("[^あいう]*", "あいう", 9, 9); + n("[^あいう]+", "あいう"); + x2("あ?\?", "あああ", 9, 9); + x2("いあ?\?い", "いあい", 0, 9); + x2("あ*?", "あああ", 9, 9); + x2("いあ*?", "いああ", 0, 3); + x2("いあ*?い", "いああい", 0, 12); + x2("あ+?", "あああ", 6, 9); + x2("いあ+?", "いああ", 0, 6); + x2("いあ+?い", "いああい", 0, 12); + x2("(?:天?)?\?", "天", 3, 3); + x2("(?:天?\?)?", "天", 3, 3); + x2("(?:夢?)+?", "夢夢夢", 9, 9); + x2("(?:風+)?\?", "風風風", 9, 9); + x2("(?:雪+)?\?霜", "雪雪雪霜", 9, 12); + x2("(?:あい)?{2}", "", 0, 0); + x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 15, 15); + x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 15, 15); + x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 6, 24); + n("(?:鬼車){3,}", "鬼車鬼車"); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 6, 18); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 18, 30); + x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 18, 30); + x2("(?:鬼車){,}", "鬼車{,}", 0, 9); + x2("(?:かきく)+?{2}", "かきくかきくかきく", 9, 27); + x3("(火)", "火", 0, 3, 1); + x3("(火水)", "火水", 0, 6, 1); + x2("((時間))", "時間", 0, 6); + x3("((風水))", "風水", 0, 6, 1); + x3("((昨日))", "昨日", 0, 6, 2); + x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 6, 20); + x3("(あい)(うえ)", "あいうえ", 0, 6, 1); + x3("(あい)(うえ)", "あいうえ", 6, 12, 2); + x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 9, 18, 3); + x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 9, 18, 4); + x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 15, 27, 2); + x2("(^あ)", "あ", 0, 3); + x3("(あ)|(あ)", "いあ", 3, 6, 1); + x3("(^あ)|(あ)", "いあ", 3, 6, 2); + x3("(あ?)", "あああ", 9, 9, 1); + x3("(ま*)", "ままま", 9, 9, 1); + x3("(と*)", "", 0, 0, 1); + x3("(る+)", "るるるるるるる", 18, 21, 1); + x3("(ふ+|へ*)", "ふふふへへ", 15, 15, 1); + x3("(あ+|い?)", "いいいああ", 15, 15, 1); + x3("(あいう)?", "あいう", -1, -1, 1); + x3("(あいう)*", "あいう", -1, -1, 1); + x3("(あいう)+", "あいう", 0, 9, 1); + x3("(さしす|あいう)+", "あいう", 0, 9, 1); + x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 9, 1); + x3("((?i:あいう))", "あいう", 0, 9, 1); + x3("((?m:あ.う))", "あ\nう", 0, 7, 1); + x3("((?=あん)あ)", "あんい", 0, 3, 1); + x3("あいう|(.あいえ)", "んあいえ", 0, 12, 1); + x3("あ*(.)", "ああああん", 12, 15, 1); + x3("あ*?(.)", "ああああん", 12, 15, 1); + x3("あ*?(ん)", "ああああん", 12, 15, 1); + x3("[いうえ]あ*(.)", "えああああん", 15, 18, 1); + x3("(\\Aいい)うう", "いいうう", 0, 6, 1); + n("(\\Aいい)うう", "んいいうう"); + x3("(^いい)うう", "いいうう", 0, 6, 1); + n("(^いい)うう", "んいいうう"); + x3("ろろ(るる$)", "ろろるる", 6, 12, 1); + n("ろろ(るる$)", "ろろるるる"); + x2("(無)\\1", "無無", 0, 6); + n("(無)\\1", "無武"); + x2("(空?)\\1", "空空", 6, 6); + x2("(空?\?)\\1", "空空", 6, 6); + x2("(空*)\\1", "空空空空空", 15, 15); + x3("(空*)\\1", "空空空空空", 15, 15, 1); + x2("あ(い*)\\1", "あいいいい", 0, 15); + x2("あ(い*)\\1", "あい", 0, 3); + x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 30, 30); + x2("(あ*)(い*)\\2", "あああいいいい", 21, 21); + x3("(あ*)(い*)\\2", "あああいいいい", 21, 21, 2); + x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 9, 15); + x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 9, 9, 7); + x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 18); + x2("([き-け])\\1", "くく", 0, 6); + x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 10); + n("(\\w\\d\\s)\\1", "あ5 あ5"); + x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 12); + x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 19); + x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 18); + x2("(^こ)\\1", "ここ", 0, 6); + n("(^む)\\1", "めむむ"); + n("(あ$)\\1", "ああ"); + n("(あい\\Z)\\1", "あい"); + x2("(あ*\\Z)\\1", "あ", 3, 3); + x2(".(あ*\\Z)\\1", "いあ", 3, 6); + x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 19, 1); + x3("(.(..\\d.)\\2)", "あ12341234", 0, 11, 1); + x2("((?i:あvず))\\1", "あvずあvず", 0, 14); + x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 6, 9); + x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 39); + x2("[[ひふ]]", "ふ", 0, 3); + x2("[[いおう]か]", "か", 0, 3); + n("[[^あ]]", "あ"); + n("[^[あ]]", "あ"); + x2("[^[^あ]]", "あ", 0, 3); + x2("[[かきく]&&きく]", "く", 0, 3); + n("[[かきく]&&きく]", "か"); + n("[[かきく]&&きく]", "け"); + x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 3); + n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); + x2("[[^あ&&あ]&&あ-ん]", "い", 0, 3); + n("[[^あ&&あ]&&あ-ん]", "あ"); + x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 3); + n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); + x2("[^[^あいう]&&[^うえお]]", "う", 0, 3); + x2("[^[^あいう]&&[^うえお]]", "え", 0, 3); + n("[^[^あいう]&&[^うえお]]", "か"); + x2("[あ-&&-あ]", "-", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 3); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); + n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); + x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44); + x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44); + x2("\\n?\\z", "こんにちは", 15, 15); + x2("(?m).*", "青赤黄", 9, 9); + x2("(?m).*a", "青赤黄a", 9, 10); + + x2("\\p{Hiragana}", "ぴ", 0, 3); + n("\\P{Hiragana}", "ぴ"); + x2("\\p{Emoji}", "\xE2\xAD\x90", 0, 3); + x2("\\p{^Emoji}", "\xEF\xBC\x93", 0, 3); + x2("\\p{Extended_Pictographic}", "\xE2\x9A\xA1", 0, 3); + n("\\p{Extended_Pictographic}", "\xE3\x81\x82"); + + x2("\\p{Word}", "こ", 0, 3); + n("\\p{^Word}", "こ"); + x2("[\\p{Word}]", "こ", 0, 3); + n("[\\p{^Word}]", "こ"); + n("[^\\p{Word}]", "こ"); + x2("[^\\p{^Word}]", "こ", 0, 3); + x2("[^\\p{^Word}&&\\p{ASCII}]", "こ", 0, 3); + x2("[^\\p{^Word}&&\\p{ASCII}]", "a", 0, 1); + n("[^\\p{^Word}&&\\p{ASCII}]", "#"); + x2("[^[\\p{^Word}]&&[\\p{ASCII}]]", "こ", 0, 3); + x2("[^[\\p{ASCII}]&&[^\\p{Word}]]", "こ", 0, 3); + n("[[\\p{ASCII}]&&[^\\p{Word}]]", "こ"); + x2("[^[\\p{^Word}]&&[^\\p{ASCII}]]", "こ", 0, 3); + x2("[^\\x{104a}]", "こ", 0, 3); + x2("[^\\p{^Word}&&[^\\x{104a}]]", "こ", 0, 3); + x2("[^[\\p{^Word}]&&[^\\x{104a}]]", "こ", 0, 3); + n("[^\\p{Word}||[^\\x{104a}]]", "こ"); + + x2("\\p{^Cntrl}", "こ", 0, 3); + n("\\p{Cntrl}", "こ"); + x2("[\\p{^Cntrl}]", "こ", 0, 3); + n("[\\p{Cntrl}]", "こ"); + n("[^\\p{^Cntrl}]", "こ"); + x2("[^\\p{Cntrl}]", "こ", 0, 3); + x2("[^\\p{Cntrl}&&\\p{ASCII}]", "こ", 0, 3); + x2("[^\\p{Cntrl}&&\\p{ASCII}]", "a", 0, 1); + n("[^\\p{^Cntrl}&&\\p{ASCII}]", "#"); + x2("[^[\\p{^Cntrl}]&&[\\p{ASCII}]]", "こ", 0, 3); + x2("[^[\\p{ASCII}]&&[^\\p{Cntrl}]]", "こ", 0, 3); + n("[[\\p{ASCII}]&&[^\\p{Cntrl}]]", "こ"); + n("[^[\\p{^Cntrl}]&&[^\\p{ASCII}]]", "こ"); + n("[^\\p{^Cntrl}&&[^\\x{104a}]]", "こ"); + n("[^[\\p{^Cntrl}]&&[^\\x{104a}]]", "こ"); + n("[^\\p{Cntrl}||[^\\x{104a}]]", "こ"); + + x2("(?-W:\\p{Word})", "こ", 0, 3); + n("(?W:\\p{Word})", "こ"); + x2("(?W:\\p{Word})", "k", 0, 1); + x2("(?-W:[[:word:]])", "こ", 0, 3); + n("(?W:[[:word:]])", "こ"); + x2("(?-D:\\p{Digit})", "3", 0, 3); + n("(?D:\\p{Digit})", "3"); + x2("(?-S:\\p{Space})", "\xc2\x85", 0, 2); + n("(?S:\\p{Space})", "\xc2\x85"); + x2("(?-P:\\p{Word})", "こ", 0, 3); + n("(?P:\\p{Word})", "こ"); + x2("(?-W:\\w)", "こ", 0, 3); + n("(?W:\\w)", "こ"); + x2("(?-W:\\w)", "k", 0, 1); + x2("(?W:\\w)", "k", 0, 1); + n("(?-W:\\W)", "こ"); + x2("(?W:\\W)", "こ", 0, 3); + n("(?-W:\\W)", "k"); + n("(?W:\\W)", "k"); + + x2("(?-W:\\b)", "こ", 3, 3); + n("(?W:\\b)", "こ"); + x2("(?-W:\\b)", "h", 1, 1); + x2("(?W:\\b)", "h", 1, 1); + n("(?-W:\\B)", "こ"); + x2("(?W:\\B)", "こ", 3, 3); + n("(?-W:\\B)", "h"); + n("(?W:\\B)", "h"); + x2("(?-P:\\b)", "こ", 3, 3); + n("(?P:\\b)", "こ"); + x2("(?-P:\\b)", "h", 1, 1); + x2("(?P:\\b)", "h", 1, 1); + n("(?-P:\\B)", "こ"); + x2("(?P:\\B)", "こ", 3, 3); + n("(?-P:\\B)", "h"); + n("(?P:\\B)", "h"); + + x2("\\p{InBasicLatin}", "\x41", 0, 1); + //x2("\\p{Grapheme_Cluster_Break_Regional_Indicator}", "\xF0\x9F\x87\xA9", 0, 4); + //n("\\p{Grapheme_Cluster_Break_Regional_Indicator}", "\xF0\x9F\x87\xA5"); + + // extended grapheme cluster + + // CR + LF + n(".\\y\\O", "\x0d\x0a"); + x2(".\\Y\\O", "\x0d\x0a", 0, 2); + + // LATIN SMALL LETTER G, COMBINING DIAERESIS + n("^.\\y.$", "\x67\xCC\x88"); + x2(".\\Y.", "\x67\xCC\x88", 0, 3); + x2("\\y.\\Y.\\y", "\x67\xCC\x88", 0, 3); + // HANGUL SYLLABLE GAG + x2("\\y.\\y", "\xEA\xB0\x81", 0, 3); + // HANGUL CHOSEONG KIYEOK, HANGUL JUNGSEONG A, HANGUL JONGSEONG KIYEOK + x2("^.\\Y.\\Y.$", "\xE1\x84\x80\xE1\x85\xA1\xE1\x86\xA8", 0, 9); + n("^.\\y.\\Y.$", "\xE1\x84\x80\xE1\x85\xA1\xE1\x86\xA8"); + // TAMIL LETTER NA, TAMIL VOWEL SIGN I, + x2(".\\Y.", "\xE0\xAE\xA8\xE0\xAE\xBF", 0, 6); + n(".\\y.", "\xE0\xAE\xA8\xE0\xAE\xBF"); + // THAI CHARACTER KO KAI, THAI CHARACTER SARA AM + x2(".\\Y.", "\xE0\xB8\x81\xE0\xB8\xB3", 0, 6); + n(".\\y.", "\xE0\xB8\x81\xE0\xB8\xB3"); + // DEVANAGARI LETTER SSA, DEVANAGARI VOWEL SIGN I + x2(".\\Y.", "\xE0\xA4\xB7\xE0\xA4\xBF", 0, 6); + n(".\\y.", "\xE0\xA4\xB7\xE0\xA4\xBF"); + + // {Extended_Pictographic} Extend* ZWJ x {Extended_Pictographic} + x2("..\\Y.", "\xE3\x80\xB0\xE2\x80\x8D\xE2\xAD\x95", 0, 9); + x2("...\\Y.", "\xE3\x80\xB0\xCC\x82\xE2\x80\x8D\xE2\xAD\x95", 0, 11); + n("...\\Y.", "\xE3\x80\xB0\xCD\xB0\xE2\x80\x8D\xE2\xAD\x95"); + + // CR + LF + n("^\\X\\X$", "\x0d\x0a"); + x2("^\\X$", "\x0d\x0a", 0, 2); + // LATIN SMALL LETTER G, COMBINING DIAERESIS + n("^\\X\\X.$", "\x67\xCC\x88"); + x2("^\\X$", "\x67\xCC\x88", 0, 3); + // HANGUL CHOSEONG KIYEOK, HANGUL JUNGSEONG A, HANGUL JONGSEONG KIYEOK + x2("^\\X$", "\xE1\x84\x80\xE1\x85\xA1\xE1\x86\xA8", 0, 9); + n("^\\X\\X\\X$", "\xE1\x84\x80\xE1\x85\xA1\xE1\x86\xA8"); + // TAMIL LETTER NA, TAMIL VOWEL SIGN I, + x2("^\\X$", "\xE0\xAE\xA8\xE0\xAE\xBF", 0, 6); + n("\\X\\X", "\xE0\xAE\xA8\xE0\xAE\xBF"); + // THAI CHARACTER KO KAI, THAI CHARACTER SARA AM + x2("^\\X$", "\xE0\xB8\x81\xE0\xB8\xB3", 0, 6); + n("\\X\\X", "\xE0\xB8\x81\xE0\xB8\xB3"); + // DEVANAGARI LETTER SSA, DEVANAGARI VOWEL SIGN I + x2("^\\X$", "\xE0\xA4\xB7\xE0\xA4\xBF", 0, 6); + n("\\X\\X", "\xE0\xA4\xB7\xE0\xA4\xBF"); + + n("^\\X.$", "\xE0\xAE\xA8\xE0\xAE\xBF"); + + // a + COMBINING GRAVE ACCENT (U+0300) + x2("h\\Xllo", "ha\xCC\x80llo", 0, 7); + + // Text Segment: Extended Grapheme Cluster <-> Word Boundary + x2("(?y{g})\\yabc\\y", "abc", 0, 3); + x2("(?y{g})\\y\\X\\y", "abc", 2, 3); + x2("(?y{w})\\yabc\\y", "abc", 0, 3); // WB1, WB2 + x2("(?y{w})\\y\\X", "\r\n", 0, 2); // WB3 + x2("(?y{w})\\X", "\x0cz", 1, 2); // WB3a + x2("(?y{w})\\X", "q\x0c", 1, 2); // WB3b + x2("(?y{w})\\y\\X", "\xE2\x80\x8D\xE2\x9D\x87", 0, 6); // WB3c + x2("(?y{w})\\y\\X", "\x20\x20", 0, 2); // WB3d + x2("(?y{w})\\y\\X", "a\xE2\x80\x8D", 0, 4); // WB4 + x2("(?y{w})\\y\\X\\y", "abc", 0, 3); // WB5 + x2("(?y{w})\\y\\X\\y", "v\xCE\x87w", 0, 4); // WB6, WB7 + x2("(?y{w})\\y\\X\\y", "\xD7\x93\x27", 0, 3); // WB7a + x2("(?y{w})\\y\\X\\y", "\xD7\x93\x22\xD7\x93", 0, 5); // WB7b, WB7c + x2("(?y{w})\\y\\X", "14 45", 3, 5); // WB8 + x2("(?y{w})\\y\\X", "a14", 0, 3); // WB9 + x2("(?y{w})\\y\\X", "832e", 0, 4); // WB10 + x2("(?y{w})\\y\\X", "8\xEF\xBC\x8C\xDB\xB0", 0, 6); // WB11, WB12 + x2("(?y{w})\\y\\X\\y", "ケン", 0, 6); // WB13 + x2("(?y{w})\\y\\X\\y", "ケン\xE2\x80\xAFタ", 0, 12); // WB13a, WB13b + x2("(?y{w})\\y\\X\\y", "\x21\x23", 1, 2); // WB999 + x2("(?y{w})\\y\\X\\y", "山ア", 3, 6); + x2("(?y{w})\\y\\X", "3.14", 0, 4); + x2("(?y{w})\\y\\X", "3 14", 2, 4); + + x2("\\x40", "@", 0, 1); + x2("\\x1", "\x01", 0, 1); + x2("\\x{1}", "\x01", 0, 1); + x2("\\x{4E38}", "\xE4\xB8\xB8", 0, 3); + x2("\\u4E38", "\xE4\xB8\xB8", 0, 3); + x2("\\u0040", "@", 0, 1); + + x2("c.*\\b", "abc", 2, 3); + x2("\\b.*abc.*\\b", "abc", 0, 3); + x2("((?()0+)+++(((0\\g<0>)0)|())++++((?(1)(0\\g<0>))++++++0*())++++((?(1)(0\\g<1>)+)++++++++++*())++++((?(1)((0)\\g<0>)+)++())+0++*+++(((0\\g<0>))*())++++((?(1)(0\\g<0>)+)++++++++++*|)++++*+++((?(1)((0)\\g<0>)+)+++++++++())++*|)++++((?()0))|", "abcde", 5, 5); // #139 + + n("(*FAIL)", "abcdefg"); + n("abcd(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)", "abcdefg"); + x2("(?:[ab]|(*MAX{2}).)*", "abcbaaccaaa", 11, 11); + x2("(?:(*COUNT[AB]{X})[ab]|(*COUNT[CD]{X})[cd])*(*CMP{AB,<,CD})", + "abababcdab", 7, 8); + x2("(?(?{....})123|456)", "123", 0, 3); + x2("(?(*FAIL)123|456)", "456", 0, 3); + + x2("\\g'0'++{,0}", "abcdefgh", 8, 8); + x2("\\g'0'++{,0}?", "abcdefgh", 8, 8); + x2("\\g'0'++{,0}b", "abcdefgh", 1, 2); + x2("\\g'0'++{,0}?def", "abcdefgh", 3, 6); + x2("a{1,3}?", "aaa", 2, 3); + x2("a{3}", "aaa", 0, 3); + x2("a{3}?", "aaa", 3, 3); + x2("a{3}?", "aa", 2, 2); + x2("a{3,3}?", "aaa", 0, 3); + n("a{3,3}?", "aa"); + x2("a{1,3}+", "aaaaaa", 5, 6); + x2("a{3}+", "aaaaaa", 3, 6); + x2("a{3,3}+", "aaaaaa", 3, 6); + n("a{2,3}?", "a"); + n("a{3,2}a", "aaa"); + x2("a{3,2}b", "aaab", 1, 4); + x2("a{3,2}b", "aaaab", 2, 5); + x2("a{3,2}b", "aab", 0, 3); + x2("a{3,2}?", "", 0, 0); /* == (?:a{3,2})?*/ + x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/ + x2("[\\x{0}-\\x{7fffffff}]", "a", 0, 1); + x2("[\\x{7f}-\\x{7fffffff}]", "\xe5\xae\xb6", 0, 3); + x2("[a[cdef]]", "a", 0, 1); + n("[a[xyz]-c]", "b"); + x2("[a[xyz]-c]", "a", 0, 1); + x2("[a[xyz]-c]", "-", 0, 1); + x2("[a[xyz]-c]", "c", 0, 1); + x2("(a.c|def)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|de)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|def)(.{5})(?<=d\\1e)", "abcdabce", 0, 8); + x2("(a.c|.)d(?<=\\k<1>d)", "zzzzzabcdabc", 7, 9); + x2("(?<=az*)abc", "azzzzzzzzzzabcdabcabc", 11, 14); + x2("(?<=ab|abc|abcd)ef", "abcdef", 4, 6); + x2("(?<=ta+|tb+|tc+|td+)zz", "tcccccccccczz", 11, 13); + x2("(?<=t.{7}|t.{5}|t.{2}|t.)zz", "tczz", 2, 4); + x2("(?<=t.{7}|t.{5}|t.{2})zz", "tczzzz", 3, 5); + x2("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbzz", 8, 10); + n("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbczz"); + x2("(?<=(ab|abc|abcd))ef", "abcdef", 4, 6); + x2("(?<=(ta+|tb+|tc+|td+))zz", "tcccccccccczz", 11, 13); + x2("(?<=(t.{7}|t.{5}|t.{2}|t.))zz", "tczz", 2, 4); + x2("(?<=(t.{7}|t.{5}|t.{2}))zz", "tczzzz", 3, 5); + x2("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbzz", 8, 10); + n("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbczz"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abaaba", 2, 4); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "ababab", 2, 6); + n("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabce"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabceabce", 4, 12); + x2("(?<=a)", "a", 1, 1); + x2("(?<=a.*\\w)z", "abbbz", 4, 5); + n("(?<=a.*\\w)z", "abb z"); + x2("(?<=a.*\\W)z", "abb z", 4, 5); + x2("(?<=a.*\\b)z", "abb z", 4, 5); + x2("(?<=(?>abc))", "abc", 3, 3); + x2("(?<=a\\Xz)", "abz", 3, 3); + n("(?<=^a*)bc", "zabc"); + n("(?<=a*\\b)b", "abc"); + x2("(?<=a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=a+.*[efg])z", "abcdfgz", 6, 7); + n("(?<=a+.*[efg])z", "bcdfz"); + x2("(?<=a*.*[efg])z", "bcdfz", 4, 5); + n("(?<=a+.*[efg])z", "abcdz"); + x2("(?<=v|t|a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=v|t|^a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=^(?:v|t|a+.*[efg]))z", "abcdfz", 5, 6); + x2("(?<=v|^t|a+.*[efg])z", "uabcdfz", 6, 7); + n("^..(?<=(a{,2}))\\1z", "aaaaz"); // !!! look-behind is shortest priority + x2("^..(?<=(a{,2}))\\1z", "aaz", 0, 3); // shortest priority + e("(?<=(?~|zoo)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(?<=(?~|)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(a(?~|boo)z){0}(?<=\\g<1>)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + x2("(?<=(?<= )| )", "abcde fg", 6, 6); // #173 + x2("(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", "(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", 55, 55); // #173 + x2("(?<=;()|)\\g<1>", "", 0, 0); // reduced #173 + x2("(?<=;()|)\\k<1>", ";", 1, 1); + x2("(())\\g<3>{0}(?<=|())", "abc", 3, 3); // #175 + x2("(?<=()|)\\1{0}", "abc", 3, 3); + e("(?<!xxxxxxxxxxxxxxxxxxxxxxx{32774}{65521}xxxxxxxx{65521}xxxxxxxxxxxxxx{32774}xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx)", "", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); // #177 + x2("(?<=(?<=abc))def", "abcdef", 3, 6); + x2("(?<=ab(?<=.+b)c)def", "abcdef", 3, 6); + n("(?<=ab(?<=a+)c)def", "abcdef"); + n("(?<=abc)(?<!abc)def", "abcdef"); + n("(?<!ab.)(?<=.bc)def", "abcdef"); + x2("(?<!ab.)(?<=.bc)def", "abcdefcbcdef", 9, 12); + n("(?<!abc)def", "abcdef"); + n("(?<!xxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "xxxxxxdef"); + n("(?<!x+|abc)def", "abcdef"); + n("(?<!x+|abc)def", "xxxxxxxxxdef"); + x2("(?<!x+|abc)def", "xxxxxxxxzdef", 9, 12); + n("(?<!a.*z|a)def", "axxxxxxxzdef"); + n("(?<!a.*z|a)def", "bxxxxxxxadef"); + x2("(?<!a.*z|a)def", "axxxxxxxzdefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxadefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxzdef", 9, 12); + x2("(?<!x+|y+)\\d+", "xxx572", 5, 6); + x2("(?<!3+|4+)\\d+", "33334444", 0, 8); + n(".(?<!3+|4+)\\d+", "33334444"); + n("(.{,3})..(?<!\\1)", "aaaaa"); + x2("(.{,3})..(?<!\\1)", "abcde", 2, 5); + x2("(.{,3})...(?<!\\1)", "abcde", 1, 5); + x2("(a.c)(.{3,}?)(?<!\\1)", "abcabcd", 0, 7); + x2("(a*)(.{3,}?)(?<!\\1)", "abcabcd", 3, 7); + x2("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzzb", 0, 6); + n("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzb"); + x2("<(?<!NT{+}abcd)", "<(?<!NT{+}abcd)", 3, 4); + x2("(?<!a.*c)def", "abbbbdef", 5, 8); + n("(?<!a.*c)def", "abbbcdef"); + x2("(?<!a.*X\\b)def", "abbbbbXdef", 7, 10); + n("(?<!a.*X\\B)def", "abbbbbXdef"); + x2("(?<!a.*[uvw])def", "abbbbbXdef", 7, 10); + n("(?<!a.*[uvw])def", "abbbbbwdef"); + x2("(?<!ab*\\S+)def", "abbbbb def", 9, 12); + x2("(?<!a.*\\S)def", "abbbbb def", 7, 10); + n("(?<!ab*\\s+)def", "abbbbb def"); + x2("(?<!ab*\\s+\\B)def", "abbbbb def", 9, 12); + n("(?<!v|t|a+.*[efg])z", "abcdfz"); + x2("(?<!v|t|a+.*[efg])z", "abcdfzavzuz", 10, 11); + n("(?<!v|t|^a+.*[efg])z", "abcdfz"); + n("(?<!^(?:v|t|a+.*[efg]))z", "abcdfz"); + x2("(?<!v|^t|^a+.*[efg])z", "uabcdfz", 6, 7); + + x2("((?(a)\\g<1>|b))", "aab", 2, 3); + x2("((?(a)\\g<1>))", "aab", 1, 2); + x2("(b(?(a)|\\g<1>))", "bba", 1, 3); + e("(()(?(2)\\g<1>))", "", ONIGERR_NEVER_ENDING_RECURSION); + x2("(?(a)(?:b|c))", "ac", 0, 2); + n("^(?(a)b|c)", "ac"); + x2("(?i)a|b", "B", 0, 1); + n("((?i)a|b.)|c", "C"); + n("c(?i)a.|b.", "Caz"); + x2("c(?i)a|b", "cB", 0, 2); /* == c(?i:a|b) */ + x2("c(?i)a.|b.", "cBb", 0, 3); + + x2("(?i)st", "st", 0, 2); + x2("(?i)st", "St", 0, 2); + x2("(?i)st", "sT", 0, 2); + x2("(?i)st", "\xC5\xBFt", 0, 3); // U+017F + x2("(?i)st", "\xEF\xAC\x85", 0, 3); // U+FB05 + x2("(?i)st", "\xEF\xAC\x86", 0, 3); // U+FB06 + x2("(?i)ast", "Ast", 0, 3); + x2("(?i)ast", "ASt", 0, 3); + x2("(?i)ast", "AsT", 0, 3); + x2("(?i)ast", "A\xC5\xBFt", 0, 4); // U+017F + x2("(?i)ast", "A\xEF\xAC\x85", 0, 4); // U+FB05 + x2("(?i)ast", "A\xEF\xAC\x86", 0, 4); // U+FB06 + x2("(?i)stZ", "stz", 0, 3); + x2("(?i)stZ", "Stz", 0, 3); + x2("(?i)stZ", "sTz", 0, 3); + x2("(?i)stZ", "\xC5\xBFtz", 0, 4); // U+017F + x2("(?i)stZ", "\xEF\xAC\x85z", 0, 4); // U+FB05 + x2("(?i)stZ", "\xEF\xAC\x86z", 0, 4); // U+FB06 + x2("(?i)BstZ", "bstz", 0, 4); + x2("(?i)BstZ", "bStz", 0, 4); + x2("(?i)BstZ", "bsTz", 0, 4); + x2("(?i)BstZ", "b\xC5\xBFtz", 0, 5); // U+017F + x2("(?i)BstZ", "b\xEF\xAC\x85z", 0, 5); // U+FB05 + x2("(?i)BstZ", "b\xEF\xAC\x86z", 0, 5); // U+FB06 + x2("(?i).*st\\z", "tttssss\xC5\xBFt", 7, 10); // U+017F + x2("(?i).*st\\z", "tttssss\xEF\xAC\x85", 7, 10); // U+FB05 + x2("(?i).*st\\z", "tttssss\xEF\xAC\x86", 7, 10); // U+FB06 + x2("(?i).*あstい\\z", "tttssssあ\xC5\xBFtい", 7, 16); // U+017F + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x85い", 7, 16); // U+FB05 + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x86い", 7, 16); // U+FB06 + x2("(?i).*\xC5\xBFt\\z", "tttssssst", 7, 9); // U+017F + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあst", 10, 12); // U+FB05 + x2("(?i).*\xEF\xAC\x86い\\z", "tttssssstい", 7, 12); // U+FB06 + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあ\xEF\xAC\x85", 10, 13); + + x2("(?i).*ss", "abcdefghijklmnopqrstuvwxyz\xc3\x9f", 26, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyz\xc3\x9fxyz", 26, 31); // U+00DF + x2("(?i).*\xc3\x9f", "abcdefghijklmnopqrstuvwxyzss", 26, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyzSSxyz", 26, 31); + + x2("(?i)ssv", "\xc3\x9fv", 0, 3); // U+00DF + x2("(?i)(?<=ss)v", "SSv", 2, 3); + x2("(?i)(?<=\xc3\x9f)v", "\xc3\x9fv", 2, 3); + //x2("(?i)(?<=\xc3\x9f)v", "ssv", 2, 3); + //x2("(?i)(?<=ss)v", "\xc3\x9fv", 2, 3); + + /* #156 U+01F0 (UTF-8: C7 B0) */ + x2("(?i).+Isssǰ", ".+Isssǰ", 1, 8); + x2(".+Isssǰ", ".+Isssǰ", 1, 8); + x2("(?i)ǰ", "ǰ", 0, 2); + x2("(?i)ǰ", "j\xcc\x8c", 0, 3); + x2("(?i)j\xcc\x8c", "ǰ", 0, 2); + x2("(?i)5ǰ", "5ǰ", 0, 3); + x2("(?i)5ǰ", "5j\xcc\x8c", 0, 4); + x2("(?i)5j\xcc\x8c", "5ǰ", 0, 3); + x2("(?i)ǰv", "ǰV", 0, 3); + x2("(?i)ǰv", "j\xcc\x8cV", 0, 4); + x2("(?i)j\xcc\x8cv", "ǰV", 0, 3); + x2("(?i)[ǰ]", "ǰ", 0, 2); + x2("(?i)[ǰ]", "j\xcc\x8c", 0, 3); + //x2("(?i)[j]\xcc\x8c", "ǰ", 0, 2); + x2("(?i)\ufb00a", "ffa", 0, 3); + x2("(?i)ffz", "\xef\xac\x80z", 0, 4); + x2("(?i)\u2126", "\xcf\x89", 0, 2); + x2("a(?i)\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126=", "a\xcf\x89=", 0, 4); + x2("(?i:ss)=1234567890", "\xc5\xbf\xc5\xbf=1234567890", 0, 15); + + n("a(b|)+d", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcd"); /* https://www.haijin-boys.com/discussions/5079 */ + n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ + /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ + n("()0\\xfc00000\\xfc00000\\xfc00000\xfc", ""); /* https://bugs.php.net/bug.php?id=77371 */ + x2("000||0\xfa", "0", 1, 1); + e("(?i)000000000000000000000\xf0", "", ONIGERR_INVALID_CODE_POINT_VALUE); /* https://bugs.php.net/bug.php?id=77382 */ + n("0000\\\xf5", "0"); /* https://bugs.php.net/bug.php?id=77385 */ + n("(?i)FFF00000000000000000\xfd", ""); /* https://bugs.php.net/bug.php?id=77394 */ + e("x{55380}{77590}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("(xyz){40000}{99999}(?<name>vv)", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("f{90000,90000}{80000,80000}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + n("f{90000,90000}{80000,80001}", ""); + + x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ + x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ + + e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); + e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); + e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); + e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME); + e("*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("|*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?i)*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?m:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + x2("(?:)*", "abc", 3, 3); + e("^*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID); + + fprintf(stdout, + "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + + onig_region_free(region, 1); + onig_end(); + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/test/test_regset.c b/test/test_regset.c index 497fbd6..c8442a1 100644 --- a/test/test_regset.c +++ b/test/test_regset.c @@ -66,19 +66,16 @@ make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no) return 0; } -#ifndef _WIN32 - static double -get_sec(struct timespec* ts, struct timespec* te) +get_sec(clock_t start, clock_t end) { double t; - t = (te->tv_sec - ts->tv_sec) + - (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; + t = (double )(end - start) / CLOCKS_PER_SEC; return t; } -/* clock_gettime() doesn't exist in Windows */ +/* use clock(), because clock_gettime() doesn't exist in Windows and old Unix. */ static int time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, double* rt_reg) @@ -87,14 +84,13 @@ time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, dou int i; int match_pos; OnigRegSet* set; - struct timespec ts1, ts2; + clock_t ts1, ts2; double t_set, t_reg; r = make_regset(0, n, ps, &set, 0); if (r != 0) return r; - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); - + ts1 = clock(); for (i = 0; i < repeat; i++) { r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos); @@ -104,12 +100,10 @@ time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, dou } } - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); - t_set = get_sec(&ts1, &ts2); - - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + ts2 = clock(); + t_set = get_sec(ts1, ts2); + ts1 = clock(); for (i = 0; i < repeat; i++) { r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos); @@ -119,16 +113,15 @@ time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, dou } } - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); - onig_regset_free(set); + ts2 = clock(); + t_reg = get_sec(ts1, ts2); - t_reg = get_sec(&ts1, &ts2); + onig_regset_free(set); *rt_set = t_set; *rt_reg = t_reg; return 0; } -#endif static void fisher_yates_shuffle(int n, char* ps[], char* cps[]) @@ -147,7 +140,6 @@ fisher_yates_shuffle(int n, char* ps[], char* cps[]) } } -#ifndef _WIN32 static void time_compare(int n, char* ps[], char* s, char* end) { @@ -176,7 +168,6 @@ time_compare(int n, char* ps[], char* s, char* end) fprintf(stdout, "POS lead: %6.2lfmsec. REG lead: %6.2lfmsec.\n", total_set * 1000.0, total_reg * 1000.0); } -#endif static OnigRegSetLead XX_LEAD = ONIG_REGSET_POSITION_LEAD; @@ -391,11 +382,15 @@ static char* p7[] = { "0+", "1+", "2+", "3+", "4+", "5+", "6+", "7+", "8+", "9+", }; +static char* p8[] = {"a", ".*"}; + extern int main(int argc, char* argv[]) { - int r; +#ifndef _WIN32 int file_exist; +#endif + int r; char *s, *end; OnigEncoding use_encs[1]; @@ -412,6 +407,7 @@ main(int argc, char* argv[]) N(p2, " XXXX AAA 1223 012345678bbb"); X2(p2, "0123456789", 9, 10); X2(p7, "abcde 555 qwert", 6, 9); + X2(p8, "", 0, 0); XX_LEAD = ONIG_REGSET_REGEX_LEAD; @@ -432,22 +428,20 @@ main(int argc, char* argv[]) fprintf(stdout, "Ignore %s\n", TEXT_PATH); file_exist = 0; } -#else - file_exist = 0; -#endif if (file_exist != 0) { X2(p2, s, 10, 22); X2(p3, s, 496079, 496088); X2(p4, s, 1294, 1315); } +#endif fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); - if (file_exist != 0) { #ifndef _WIN32 + if (file_exist != 0) { fprintf(stdout, "\n"); time_compare(ASIZE(p2), p2, s, end); time_compare(ASIZE(p3), p3, s, end); @@ -455,9 +449,9 @@ main(int argc, char* argv[]) time_compare(ASIZE(p5), p5, s, end); time_compare(ASIZE(p6), p6, s, end); fprintf(stdout, "\n"); -#endif free(s); } +#endif onig_end(); diff --git a/test/test_syntax.c b/test/test_syntax.c new file mode 100644 index 0000000..df80e59 --- /dev/null +++ b/test/test_syntax.c @@ -0,0 +1,246 @@ +/* + * test_syntax.c + * Copyright (c) 2019-2020 K.Kosako + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +static OnigRegion* region; + +static OnigSyntaxType* Syntax; + +static void xx(char* pattern, char* str, int from, int to, int mem, int not, + int error_no) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, Syntax, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + if (error_no == 0) { + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s /%s/\n", s, pattern); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): /%s/ %d\n", pattern, r); + nsucc++; + } + else { + fprintf(stdout, "FAIL(ERROR): /%s/ '%s', %d, %d\n", pattern, str, + error_no, r); + nfail++; + } + } + + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + if (error_no == 0) { + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s /%s/\n", s, pattern); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): /%s/ '%s', %d\n", pattern, str, r); + nsucc++; + } + else { + fprintf(stdout, "FAIL ERROR NO: /%s/ '%s', %d, %d\n", pattern, str, + error_no, r); + nfail++; + } + } + + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1, 0); +} + +static void e(char* pattern, char* str, int error_no) +{ + xx(pattern, str, 0, 0, 0, 0, error_no); +} + +static int test_fixed_interval() +{ + x2("a{1,3}?", "aaa", 0, 1); + x2("a{3}", "aaa", 0, 3); + x2("a{3}?", "aaa", 0, 3); + n("a{3}?", "aa"); + x2("a{3,3}?", "aaa", 0, 3); + n("a{3,3}?", "aa"); + + x2("a{1,3}+", "aaaaaa", 0, 3); + x2("a{3}+", "aaaaaa", 0, 3); + x2("a{3,3}+", "aaaaaa", 0, 3); + + return 0; +} + +static int test_isolated_option() +{ + x2("", "", 0, 0); + x2("^", "", 0, 0); + n("^a", "\na"); + n(".", "\n"); + x2("(?s:.)", "\n", 0, 1); + x2("(?s).", "\n", 0, 1); + x2("(?s)a|.", "\n", 0, 1); + n("(?s:a)|.", "\n"); + x2("b(?s)a|.", "\n", 0, 1); + n("((?s)a)|.", "\n"); + n("b(?:(?s)a)|z|.", "\n"); + n(".|b(?s)a", "\n"); + n(".(?s)", "\n"); + n("(?s)(?-s)a|.", "\n"); + x2("(?s)a|.(?-s)", "\n", 0, 1); + x2("(?s)a|((?-s)).", "\n", 0, 1); + x2("(?s)a|(?:(?-s)).", "\n", 0, 1); // !!! Perl 5.26.1 returns empty match + x2("(?s)a|(?:).", "\n", 0, 1); // !!! Perl 5.26.1 returns empty match + x2("(?s)a|(?:.)", "\n", 0, 1); + x2("(?s)a|(?:a*).", "\n", 0, 1); + n("a|(?:).", "\n"); // !!! Perl 5.26.1 returns empty match + n("a|(?:)(.)", "\n"); + x2("(?s)a|(?:)(.)", "\n", 0, 1); + x2("b(?s)a|(?:)(.)", "\n", 0, 1); + n("b((?s)a)|(?:)(.)", "\n"); + + return 0; +} + +static int test_prec_read() +{ + x2("(?=a).b", "ab", 0, 2); + x2("(?=ab|(.))\\1", "ab", 1, 2); // doesn't backtrack if success once in prec-read + n("(?!(.)z)a\\1", "aa"); // ! Perl 5.26.1 match with "aa" + + return 0; +} + +static int test_look_behind() +{ + x2("(?<=a)b", "ab", 1, 2); + x2("(?<=a|b)c", "abc", 2, 3); + x2("(?<=a|(.))\\1", "abcc", 3, 4); + + // following is not match in Perl and Java + //x2("(?<=a|(.))\\1", "aa", 1, 2); + + n("(?<!c|c)a", "ca"); + + return 0; +} + +extern int main(int argc, char* argv[]) +{ + OnigEncoding use_encs[1]; + + use_encs[0] = ONIG_ENCODING_UTF8; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + + err_file = stdout; + + region = onig_region_new(); + + Syntax = ONIG_SYNTAX_PERL; + + test_fixed_interval(); + test_isolated_option(); + test_prec_read(); + test_look_behind(); + e("(?<=ab|(.))\\1", "abb", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); // Variable length lookbehind not implemented in Perl 5.26.1 + + x3("()", "abc", 0, 0, 1); + e("(", "", ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); + // different spec. + // e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); + + Syntax = ONIG_SYNTAX_JAVA; + + test_fixed_interval(); + test_isolated_option(); + test_prec_read(); + test_look_behind(); + x2("(?<=ab|(.))\\1", "abb", 2, 3); + n("(?<!ab|b)c", "bbc"); + n("(?<!b|ab)c", "bbc"); + + fprintf(stdout, + "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + + onig_region_free(region, 1); + onig_end(); + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/test/test_utf8.c b/test/test_utf8.c index d6fc761..1bbc071 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -1,6 +1,6 @@ /* * test_utf8.c - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako */ #include "config.h" #ifdef ONIG_ESCAPE_UCHAR_COLLISION @@ -685,7 +685,7 @@ extern int main(int argc, char* argv[]) x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5); x2("(?~(?:|aaa))", "aaa", 0, 0); x2("(?~aaa|)", "aaa", 0, 0); - x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! + x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // nested absent functions cause strange result x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); @@ -1198,6 +1198,15 @@ extern int main(int argc, char* argv[]) x2("\\g'0'++{,0}?", "abcdefgh", 0, 0); x2("\\g'0'++{,0}b", "abcdefgh", 1, 2); x2("\\g'0'++{,0}?def", "abcdefgh", 3, 6); + x2("a{1,3}?", "aaa", 0, 1); + x2("a{3}", "aaa", 0, 3); + x2("a{3}?", "aaa", 0, 3); + x2("a{3}?", "aa", 0, 0); + x2("a{3,3}?", "aaa", 0, 3); + n("a{3,3}?", "aa"); + x2("a{1,3}+", "aaaaaa", 0, 6); + x2("a{3}+", "aaaaaa", 0, 6); + x2("a{3,3}+", "aaaaaa", 0, 6); n("a{2,3}?", "a"); n("a{3,2}a", "aaa"); x2("a{3,2}b", "aaab", 0, 4); @@ -1212,11 +1221,113 @@ extern int main(int argc, char* argv[]) x2("[a[xyz]-c]", "a", 0, 1); x2("[a[xyz]-c]", "-", 0, 1); x2("[a[xyz]-c]", "c", 0, 1); + x2("(a.c|def)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|de)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|def)(.{5})(?<=d\\1e)", "abcdabce", 0, 8); + x2("(a.c|.)d(?<=\\k<1>d)", "zzzzzabcdabc", 5, 9); + x2("(?<=az*)abc", "azzzzzzzzzzabcdabcabc", 11, 14); + x2("(?<=ab|abc|abcd)ef", "abcdef", 4, 6); + x2("(?<=ta+|tb+|tc+|td+)zz", "tcccccccccczz", 11, 13); + x2("(?<=t.{7}|t.{5}|t.{2}|t.)zz", "tczz", 2, 4); + x2("(?<=t.{7}|t.{5}|t.{2})zz", "tczzzz", 3, 5); + x2("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbzz", 8, 10); + n("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbczz"); + x2("(?<=(ab|abc|abcd))ef", "abcdef", 4, 6); + x2("(?<=(ta+|tb+|tc+|td+))zz", "tcccccccccczz", 11, 13); + x2("(?<=(t.{7}|t.{5}|t.{2}|t.))zz", "tczz", 2, 4); + x2("(?<=(t.{7}|t.{5}|t.{2}))zz", "tczzzz", 3, 5); + x2("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbzz", 8, 10); + n("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbczz"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abaaba", 0, 6); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "ababab", 0, 6); + n("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabce"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabceabce", 4, 12); + x2("(?<=a)", "a", 1, 1); + x2("(?<=a.*\\w)z", "abbbz", 4, 5); + n("(?<=a.*\\w)z", "abb z"); + x2("(?<=a.*\\W)z", "abb z", 4, 5); + x2("(?<=a.*\\b)z", "abb z", 4, 5); + x2("(?<=(?>abc))", "abc", 3, 3); + x2("(?<=a\\Xz)", "abz", 3, 3); + n("(?<=^a*)bc", "zabc"); + n("(?<=a*\\b)b", "abc"); + x2("(?<=a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=a+.*[efg])z", "abcdfgz", 6, 7); + n("(?<=a+.*[efg])z", "bcdfz"); + x2("(?<=a*.*[efg])z", "bcdfz", 4, 5); + n("(?<=a+.*[efg])z", "abcdz"); + x2("(?<=v|t|a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=v|t|^a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=^(?:v|t|a+.*[efg]))z", "abcdfz", 5, 6); + x2("(?<=v|^t|a+.*[efg])z", "uabcdfz", 6, 7); + n("^..(?<=(a{,2}))\\1z", "aaaaz"); // !!! look-behind is shortest priority + x2("^..(?<=(a{,2}))\\1z", "aaz", 0, 3); // shortest priority + e("(?<=(?~|zoo)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(?<=(?~|)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(a(?~|boo)z){0}(?<=\\g<1>)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + x2("(?<=(?<= )| )", "abcde fg", 6, 6); // #173 + x2("(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", "(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", 55, 55); // #173 + x2("(?<=;()|)\\g<1>", "", 0, 0); // reduced #173 + x2("(?<=;()|)\\k<1>", ";", 1, 1); + x2("(())\\g<3>{0}(?<=|())", "abc", 0, 0); // #175 + x2("(?<=()|)\\1{0}", "abc", 0, 0); + e("(?<!xxxxxxxxxxxxxxxxxxxxxxx{32774}{65521}xxxxxxxx{65521}xxxxxxxxxxxxxx{32774}xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx)", "", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); // #177 + x2("(?<=(?<=abc))def", "abcdef", 3, 6); + x2("(?<=ab(?<=.+b)c)def", "abcdef", 3, 6); + n("(?<=ab(?<=a+)c)def", "abcdef"); + n("(?<=abc)(?<!abc)def", "abcdef"); + n("(?<!ab.)(?<=.bc)def", "abcdef"); + x2("(?<!ab.)(?<=.bc)def", "abcdefcbcdef", 9, 12); + n("(?<!abc)def", "abcdef"); + n("(?<!xxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "xxxxxxdef"); + n("(?<!x+|abc)def", "abcdef"); + n("(?<!x+|abc)def", "xxxxxxxxxdef"); + x2("(?<!x+|abc)def", "xxxxxxxxzdef", 9, 12); + n("(?<!a.*z|a)def", "axxxxxxxzdef"); + n("(?<!a.*z|a)def", "bxxxxxxxadef"); + x2("(?<!a.*z|a)def", "axxxxxxxzdefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxadefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxzdef", 9, 12); + x2("(?<!x+|y+)\\d+", "xxx572", 4, 6); + x2("(?<!3+|4+)\\d+", "33334444", 0, 8); + n(".(?<!3+|4+)\\d+", "33334444"); + n("(.{,3})..(?<!\\1)", "aaaaa"); + x2("(.{,3})..(?<!\\1)", "abcde", 0, 5); + x2("(.{,3})...(?<!\\1)", "abcde", 0, 5); + x2("(a.c)(.{3,}?)(?<!\\1)", "abcabcd", 0, 7); + x2("(a*)(.{3,}?)(?<!\\1)", "abcabcd", 0, 5); + x2("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzzb", 0, 6); + n("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzb"); + x2("<(?<!NT{+}abcd)", "<(?<!NT{+}abcd)", 0, 1); + x2("(?<!a.*c)def", "abbbbdef", 5, 8); + n("(?<!a.*c)def", "abbbcdef"); + x2("(?<!a.*X\\b)def", "abbbbbXdef", 7, 10); + n("(?<!a.*X\\B)def", "abbbbbXdef"); + x2("(?<!a.*[uvw])def", "abbbbbXdef", 7, 10); + n("(?<!a.*[uvw])def", "abbbbbwdef"); + x2("(?<!ab*\\S+)def", "abbbbb def", 9, 12); + x2("(?<!a.*\\S)def", "abbbbb def", 7, 10); + n("(?<!ab*\\s+)def", "abbbbb def"); + x2("(?<!ab*\\s+\\B)def", "abbbbb def", 9, 12); + n("(?<!v|t|a+.*[efg])z", "abcdfz"); + x2("(?<!v|t|a+.*[efg])z", "abcdfzavzuz", 10, 11); + n("(?<!v|t|^a+.*[efg])z", "abcdfz"); + n("(?<!^(?:v|t|a+.*[efg]))z", "abcdfz"); + x2("(?<!v|^t|^a+.*[efg])z", "uabcdfz", 6, 7); x2("((?(a)\\g<1>|b))", "aab", 0, 3); x2("((?(a)\\g<1>))", "aab", 0, 2); x2("(b(?(a)|\\g<1>))", "bba", 0, 3); e("(()(?(2)\\g<1>))", "", ONIGERR_NEVER_ENDING_RECURSION); + x2("(?(a)(?:b|c))", "ac", 0, 2); + n("^(?(a)b|c)", "ac"); + x2("(?i)a|b", "B", 0, 1); + n("((?i)a|b.)|c", "C"); + n("c(?i)a.|b.", "Caz"); + x2("c(?i)a|b", "cB", 0, 2); /* == c(?i:a|b) */ + x2("c(?i)a.|b.", "cBb", 0, 3); x2("(?i)st", "st", 0, 2); x2("(?i)st", "St", 0, 2); @@ -1279,7 +1390,68 @@ extern int main(int argc, char* argv[]) x2("(?i)[ǰ]", "ǰ", 0, 2); x2("(?i)[ǰ]", "j\xcc\x8c", 0, 3); //x2("(?i)[j]\xcc\x8c", "ǰ", 0, 2); + x2("(?i)\ufb00a", "ffa", 0, 3); + x2("(?i)ffz", "\xef\xac\x80z", 0, 4); + x2("(?i)\u2126", "\xcf\x89", 0, 2); + x2("a(?i)\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126=", "a\xcf\x89=", 0, 4); + x2("(?i:ss)=1234567890", "\xc5\xbf\xc5\xbf=1234567890", 0, 15); + x2("\\x{000A}", "\x0a", 0, 1); + x2("\\x{000A 002f}", "\x0a\x2f", 0, 2); + x2("\\x{000A 002f }", "\x0a\x2f", 0, 2); + x2("\\x{007C 001b}", "\x7c\x1b", 0, 2); + x2("\\x{1 2 3 4 5 6 7 8 9 a b c d e f}", "\x01\x02\x3\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 0, 15); + x2("a\\x{000A 002f}@", "a\x0a\x2f@", 0, 4); + x2("a\\x{0060\n0063}@", "a\x60\x63@", 0, 4); + e("\\x{00000001 000000012}", "", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); + e("\\x{000A 00000002f}", "", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); + e("\\x{000A 002f/", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A 002f /", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A 002f ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("\\o{102}", "B", 0, 1); + x2("\\o{102 103}", "BC", 0, 2); + x2("\\o{0160 0000161}", "pq", 0, 2); + x2("\\o{1 2 3 4 5 6 7 10 11 12 13 14 15 16 17}", "\x01\x02\x3\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 0, 15); + x2("\\o{0007 0010 }", "\x07\x08", 0, 2); + e("\\o{0000 0015/", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0000 0015 /", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0015", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0015 ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0007 002f}", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\x{000A}]", "\x0a", 0, 1); + x2("[\\x{000A 002f}]+", "\x0a\x2f\x2e", 0, 2); + x2("[\\x{01 0F 1A 2c 4B}]+", "\x20\x01\x0f\x1a\x2c\x4b\x1b", 1, 6); + x2("[\\x{0020 0024}-\\x{0026}]+", "\x25\x24\x26\x23", 0, 3); + x2("[\\x{0030}-\\x{0033 005a}]+", "\x30\x31\x32\x33\x5a\34", 0, 5); + e("[\\x{000A]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{000A ]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{000A }]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\o{102}]", "B", 0, 1); + x2("[\\o{102 103}]*", "BC", 0, 2); + e("[a\\o{002 003]bcde|zzz", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\x{0030-0039}]+", "abc0123456789def", 3, 13); + x2("[\\x{0030 - 0039 }]+", "abc0123456789def", 3, 13); + x2("[\\x{0030 - 0039 0063 0064}]+", "abc0123456789def", 2, 14); + x2("[\\x{0030 - 0039 0063-0065}]+", "acde019b", 1, 7); + e("[\\x{0030 - 0039-0063 0064}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 - }]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 -- 0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030--0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 - - 0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 0044 - }]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[a-\\x{0070 - 0039}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[a-\\x{0063 0071}]+", "dabcqz", 1, 5); + x2("[-\\x{0063-0065}]+", "ace-df", 1, 5); + x2("[\\x61-\\x{0063 0065}]+", "abced", 0, 4); + e("[\\x61-\\x{0063-0065}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[t\\x{0063 0071}]+", "tcqb", 0, 3); + x2("[\\W\\x{0063 0071}]+", "*cqa", 0, 3); + + n("a(b|)+d", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcd"); /* https://www.haijin-boys.com/discussions/5079 */ n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ n("()0\\xfc00000\\xfc00000\\xfc00000\xfc", ""); /* https://bugs.php.net/bug.php?id=77371 */ @@ -1299,8 +1471,14 @@ extern int main(int argc, char* argv[]) e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); - e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME); + e("*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("|*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?i)*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?m:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + x2("(?:)*", "abc", 0, 0); + e("^*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID); fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", diff --git a/test/testc.c b/test/testc.c index 5c60764..fbede67 100644 --- a/test/testc.c +++ b/test/testc.c @@ -1,17 +1,12 @@ /* * testc.c - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako */ #include "config.h" #include <stdio.h> +#include <string.h> -#ifdef POSIX_TEST -#include "onigposix.h" -#else #include "oniguruma.h" -#endif - -#include <string.h> #define SLEN(s) strlen(s) @@ -20,66 +15,11 @@ static int nfail = 0; static int nerror = 0; static FILE* err_file; - -#ifndef POSIX_TEST static OnigRegion* region; -#endif static void xx(char* pattern, char* str, int from, int to, int mem, int not) { int r; - -#ifdef POSIX_TEST - regex_t reg; - char buf[200]; - regmatch_t pmatch[25]; - - r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); - if (r) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(err_file, "ERROR: %s\n", buf); - nerror++; - return ; - } - - r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); - if (r != 0 && r != REG_NOMATCH) { - regerror(r, ®, buf, sizeof(buf)); - fprintf(err_file, "ERROR: %s\n", buf); - nerror++; - return ; - } - - if (r == REG_NOMATCH) { - if (not) { - fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); - nfail++; - } - } - else { - if (not) { - fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); - nfail++; - } - else { - if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { - fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); - nsucc++; - } - else { - fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, - from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); - nfail++; - } - } - } - regfree(®); - -#else regex_t* reg; OnigErrorInfo einfo; @@ -132,7 +72,6 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not) } } onig_free(reg); -#endif } static void x2(char* pattern, char* str, int from, int to) @@ -152,20 +91,13 @@ static void n(char* pattern, char* str) extern int main(int argc, char* argv[]) { -#ifndef POSIX_TEST OnigEncoding use_encs[1]; use_encs[0] = ONIG_ENCODING_EUC_JP; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); -#endif err_file = stdout; - -#ifdef POSIX_TEST - reg_set_encoding(REG_POSIX_ENCODING_EUC_JP); -#else region = onig_region_new(); -#endif x2("", "", 0, 0); x2("^", "", 0, 0); @@ -961,19 +893,15 @@ extern int main(int argc, char* argv[]) x2(".<b>�С������Υ����������<\\/b>", "a<b>�С������Υ����������</b>", 0, 32); x2("\\n?\\z", "����ˤ���", 10, 10); -#ifndef POSIX_TEST x2("\\p{Hiragana}", "��", 0, 2); n("\\P{Hiragana}", "��"); -#endif fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); -#ifndef POSIX_TEST onig_region_free(region, 1); onig_end(); -#endif return ((nfail == 0 && nerror == 0) ? 0 : -1); } diff --git a/test/testp.c b/test/testp.c new file mode 100644 index 0000000..b88d0e3 --- /dev/null +++ b/test/testp.c @@ -0,0 +1,614 @@ +/* + * testp.c + * Copyright (c) 2020 K.Kosako + */ +#include "config.h" +#include <stdio.h> +#include <string.h> + +#include "onigposix.h" + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +static void +xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + + reg_set_encoding(REG_POSIX_ENCODING_UTF8); + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?<!a)b", "cb", 1, 2); + n("(?<!a)b", "ab"); + x2("(?<!a|bc)b", "bbb", 0, 1); + n("(?<!a|bc)z", "bcz"); + x2("(?<name1>a)", "a", 0, 1); + x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4); + x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8); + x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3); + x2("(?<n>|a\\g<n>)+", "", 0, 0); + x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6); + x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1); + x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4); + x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8); + x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2); + x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0); + x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9); + n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg"); + x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10); + x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16); + x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1); + x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13); + x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7); + x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10); + x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("[0-9-a]", "-", 0, 1); // PR#44 + n("[0-9-a]", ":"); // PR#44 + x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 1, 4, 2); // PR#43 + x2("\\o{101}", "A", 0, 1); + x2("(?:\\k'+1'B|(A)C)*", "ACAB", 0, 4); // relative backref by postitive number + x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number + x2("A\\g'0'|B()", "AAAAB", 0, 5); + x3("(A\\g'0')|B", "AAAAB", 0, 5, 1); + x2("(a*)(?(1))aa", "aaaaa", 0, 5); + x2("(a*)(?(-1))aa", "aaaaa", 0, 5); + x2("(?<name>aaa)(?('name'))aa", "aaaaa", 0, 5); + x2("(a)(?(1)aa|bb)a", "aaaaa", 0, 4); + x2("(?:aa|())(?(<1>)aa|bb)a", "aabba", 0, 5); + x2("(?:aa|())(?('1')aa|bb|cc)a", "aacca", 0, 5); + x3("(a*)(?(1)aa|a)b", "aaab", 0, 1, 1); + n("(a)(?(1)a|b)c", "abc"); + x2("(a)(?(1)|)c", "ac", 0, 2); + n("(?()aaa|bbb)", "bbb"); + x2("(a)(?(1+0)b|c)d", "abd", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "ace", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "bce", 0, 3); + x2("\\R", "\r\n", 0, 2); + x2("\\R", "\r", 0, 1); + x2("\\R", "\n", 0, 1); + x2("\\R", "\x0b", 0, 1); + n("\\R\\n", "\r\n"); + x2("\\N", "a", 0, 1); + n("\\N", "\n"); + n("(?m:\\N)", "\n"); + n("(?-m:\\N)", "\n"); + x2("\\O", "a", 0, 1); + x2("\\O", "\n", 0, 1); + x2("(?m:\\O)", "\n", 0, 1); + x2("(?-m:\\O)", "\n", 0, 1); + x2("\\K", "a", 0, 0); + x2("a\\K", "a", 1, 1); + x2("a\\Kb", "ab", 1, 2); + x2("(a\\Kb|ac\\Kd)", "acd", 2, 3); + x2("(a\\Kb|\\Kac\\K)*", "acababacab", 9, 10); + + x2("(?~)", "", 0, 0); + x2("(?~)", "A", 0, 0); + x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5); + x2("(?~(?:|aaa))", "aaa", 0, 0); + x2("(?~aaa|)", "aaa", 0, 0); + x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! + x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); + x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); + x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); + x2("(?~A.C|B)", "ABC", 0, 0); + x2("(?~XYZ|ABC)a", "ABCa", 1, 4); + x2("(?~XYZ|ABC)a", "aABCa", 0, 1); + x2("<[^>]*>(?~[<>])</[^>]*>", "<a>vvv</a> <b> </b>", 0, 10); + x2("(?~ab)", "ccc\ndab", 0, 5); + x2("(?m:(?~ab))", "ccc\ndab", 0, 5); + x2("(?-m:(?~ab))", "ccc\ndab", 0, 5); + x2("(?~abc)xyz", "xyz012345678901234567890123456789abc", 0, 3); + + // absent with expr + x2("(?~|78|\\d*)", "123456789", 0, 6); + x2("(?~|def|(?:abc|de|f){0,100})", "abcdedeabcfdefabc", 0, 11); + x2("(?~|ab|.*)", "ccc\nddd", 0, 3); + x2("(?~|ab|\\O*)", "ccc\ndab", 0, 5); + x2("(?~|ab|\\O{2,10})", "ccc\ndab", 0, 5); + x2("(?~|ab|\\O{1,10})", "ab", 1, 2); + n("(?~|ab|\\O{2,10})", "ab"); + x2("(?~|abc|\\O{1,10})", "abc", 1, 3); + x2("(?~|ab|\\O{5,10})|abc", "abc", 0, 3); + x2("(?~|ab|\\O{1,10})", "cccccccccccab", 0, 10); + x2("(?~|aaa|)", "aaa", 0, 0); + x2("(?~||a*)", "aaaaaa", 0, 0); + x2("(?~||a*?)", "aaaaaa", 0, 0); + x2("(a)(?~|b|\\1)", "aaaaaa", 0, 2); + x2("(a)(?~|bb|(?:a\\1)*)", "aaaaaa", 0, 5); + x2("(b|c)(?~|abac|(?:a\\1)*)", "abababacabab", 1, 4); + n("(?~|c|a*+)a", "aaaaa"); + x2("(?~|aaaaa|a*+)", "aaaaa", 0, 0); + x2("(?~|aaaaaa|a*+)b", "aaaaaab", 1, 7); + x2("(?~|abcd|(?>))", "zzzabcd", 0, 0); + x2("(?~|abc|a*?)", "aaaabc", 0, 0); + + // absent stopper + x2("(?~|abc)a*", "aaaaaabc", 0, 5); + x2("(?~|abc)a*z|aaaaaabc", "aaaaaabc", 0, 8); + x2("(?~|aaaaaa)a*", "aaaaaa", 0, 0); + x2("(?~|abc)aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?>(?~|abc))aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?~|)a", "a", 0, 1); + n("(?~|a)a", "a"); + x2("(?~|a)(?~|)a", "a", 0, 1); + x2("(?~|a).*(?~|)a", "bbbbbbbbbbbbbbbbbbbba", 0, 21); + x2("(?~|abc).*(xyz|pqr)(?~|)abc", "aaaaxyzaaapqrabc", 0, 16); + x2("(?~|abc).*(xyz|pqr)(?~|)abc", "aaaaxyzaaaabcpqrabc", 11, 19); + n("\\A(?~|abc).*(xyz|pqrabc)(?~|)abc", "aaaaxyzaaaabcpqrabcabc"); + x2("(?~|a)(?~|)c|ab|a|", "ab", 0, 2); + x2("(?~|a)((?~|)c|ab|a|)", "ab", 0, 0); + x2("(?~|a)((?>(?~|))c|ab|a|)", "ab", 0, 0); + + // extended grapheme cluster + // CR + LF + n(".\\y\\O", "\x0d\x0a"); + x2(".\\Y\\O", "\x0d\x0a", 0, 2); + n("\\X\\X", "\x0d\x0a"); + x2("^\\X$", "\x0d\x0a", 0, 2); + x2("^\\X\\X\\X$", "ab\x0d\x0a", 0, 4); + + fprintf(stdout, + "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} |