From 98ab313fe496ae7c792db29c80bf6b23347484ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Thu, 14 Dec 2017 16:55:10 +0100 Subject: New upstream version 6.7.0 --- .gitignore | 6 +- HISTORY | 12 + README.md | 9 + configure.ac | 2 +- dist.info | 2 +- doc/API | 5 +- doc/API.ja | 5 +- doc/RE | 7 +- doc/RE.ja | 7 +- index.html | 3 +- index_ja.html | 3 +- sample/Makefile.am | 2 +- sample/posix.c | 2 +- src/Makefile.windows | 20 +- src/euc_jp.c | 4 +- src/gperf_fold_key_conv.py | 2 +- src/make_unicode_egcb_data.py | 32 +- src/make_unicode_fold_data.py | 6 +- src/make_unicode_property.sh | 7 +- src/make_unicode_property_data.py | 10 +- src/onigposix.h | 5 +- src/oniguruma.h | 7 +- src/regcomp.c | 535 +++-- src/regenc.c | 6 +- src/regerror.c | 6 +- src/regexec.c | 323 ++- src/regext.c | 2 +- src/reggnu.c | 4 +- src/regint.h | 69 +- src/regparse.c | 254 +- src/regparse.h | 4 +- src/regposix.c | 2 +- src/regsyntax.c | 5 +- src/sjis.c | 6 +- src/unicode.c | 371 +-- src/unicode_egcb_data.c | 28 +- src/unicode_fold1_key.c | 2 +- src/unicode_fold2_key.c | 2 +- src/unicode_fold3_key.c | 2 +- src/unicode_property_data.c | 4624 ++++++++++++++++++++++++------------- src/unicode_property_data_posix.c | 88 +- src/utf8.c | 6 +- test/Makefile.am | 2 +- test/test_utf8.c | 13 + 44 files changed, 4118 insertions(+), 2394 deletions(-) diff --git a/.gitignore b/.gitignore index fe616c2..e42910f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ ltmain.sh stamp-h1 configure config.status -config.log config.h config.h.in onig-config @@ -17,6 +16,11 @@ Makefile.in *.lo *.la *.pc +*.log +*.dll +*.lib +*.exe +*.exp *~ .libs/ .deps/ diff --git a/HISTORY b/HISTORY index 91675e9..0888a9e 100644 --- a/HISTORY +++ b/HISTORY @@ -1,5 +1,17 @@ History +2017/12/11: Version 6.7.0 + +2017/12/08: add ONIG_SYNTAX_ONIGURUMA (default syntax) +2017/12/05: restructure StackType +2017/11/13: implement subexp calls (?R), (?&name), (?-n), (?+n) for Perl syntax +2017/09/25: use string pool of gperf for Unicode Property lookup function +2017/09/16: fix #70: an empty greedy regex and a word boundary (.*\b) fails +2017/09/13: remove a stack type STK_POS +2017/09/08: fix #69: add a declaration of onig_end() +2017/09/07: fix #68: Compilation failure in out-of-source build +2017/09/03: [new] hexadecimal codepoint \uHHHH + 2017/08/30: Version 6.6.1 2017/08/29: fix definition of \X to (?>\O(?:\Y\O)*) diff --git a/README.md b/README.md index da0ce1f..6ddcdf1 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,15 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.7.0 +-------------------------- + +* NEW: hexadecimal codepoint \uHHHH +* NEW: add ONIG_SYNTAX_ONIGURUMA (== ONIG_SYNTAX_DEFAULT) +* Disabled \N and \O on ONIG_SYNTAX_RUBY +* Reduced object size + + New feature of version 6.6.1 -------------------------- diff --git a/configure.ac b/configure.ac index d585690..ebb0a66 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.6.1) +AC_INIT(onig, 6.7.0) AC_CONFIG_MACRO_DIR([m4]) diff --git a/dist.info b/dist.info index 52df1f7..fe7e16d 100644 --- a/dist.info +++ b/dist.info @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.6.1" +version = "6.7.0" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" diff --git a/doc/API b/doc/API index f0d0f09..801e2ac 100644 --- a/doc/API +++ b/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 6.6.0 2017/08/15 +Oniguruma API Version 6.7.0 2017/12/08 #include @@ -140,7 +140,8 @@ Oniguruma API Version 6.6.0 2017/08/15 ONIG_SYNTAX_PERL Perl ONIG_SYNTAX_PERL_NG Perl + named group ONIG_SYNTAX_RUBY Ruby - ONIG_SYNTAX_DEFAULT default (== Ruby) + ONIG_SYNTAX_ONIGURUMA Oniguruma + ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) onig_set_default_syntax() or any OnigSyntaxType data address defined by user. diff --git a/doc/API.ja b/doc/API.ja index 213a783..d73186c 100644 --- a/doc/API.ja +++ b/doc/API.ja @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.6.0 2017/08/15 +鬼車インターフェース Version 6.7.0 2017/12/08 #include @@ -139,7 +139,8 @@ ONIG_SYNTAX_PERL Perl ONIG_SYNTAX_PERL_NG Perl + 名前付き捕獲式集合 ONIG_SYNTAX_RUBY Ruby - ONIG_SYNTAX_DEFAULT default (== Ruby) + ONIG_SYNTAX_ONIGURUMA Oniguruma + ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) onig_set_default_syntax() または、ユーザが定義したOnigSyntaxTypeデータのアドレス diff --git a/doc/RE b/doc/RE index 8781b38..64f9bb2 100644 --- a/doc/RE +++ b/doc/RE @@ -1,6 +1,6 @@ -Oniguruma Regular Expressions Version 6.6.0 2017/08/29 +Oniguruma Regular Expressions Version 6.7.0 2017/12/08 -syntax: ONIG_SYNTAX_RUBY (default) +syntax: ONIG_SYNTAX_ONIGURUMA (default) 1. Syntax elements @@ -23,6 +23,7 @@ syntax: ONIG_SYNTAX_RUBY (default) \e escape (0x1B) \nnn octal char (encoded byte value) \o{17777777777} wide octal char (character code point value) + \uHHHH wide hexadecimal char (character code point value) \xHH hexadecimal char (encoded byte value) \x{7HHHHHHH} wide hexadecimal char (character code point value) \cx control char (character code point value) @@ -421,7 +422,7 @@ syntax: ONIG_SYNTAX_RUBY (default) ----------------------------- A-1. Syntax-dependent options - + ONIG_SYNTAX_RUBY + + ONIG_SYNTAX_ONIGURUMA (?m): dot (.) also matches newline + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA diff --git a/doc/RE.ja b/doc/RE.ja index 97fde3f..4679818 100644 --- a/doc/RE.ja +++ b/doc/RE.ja @@ -1,6 +1,6 @@ -鬼車 正規表現 Version 6.6.0 2017/08/29 +鬼車 正規表現 Version 6.7.0 2017/12/08 -使用文法: ONIG_SYNTAX_RUBY (既定値) +使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) 1. 基本要素 @@ -23,6 +23,7 @@ \e 退避修飾 (0x1B) \nnn 八進数表現 符号化バイト値(の一部) \o{17777777777} 拡張八進数表現 コードポイント値 + \uHHHH 拡張十六進数表現 コードポイント値 \xHH 十六進数表現 符号化バイト値(の一部) \x{7HHHHHHH} 拡張十六進数表現 コードポイント値 \cx 制御文字表現 コードポイント値 @@ -429,7 +430,7 @@ ----------------------------- 補記 1. 文法依存オプション - + ONIG_SYNTAX_RUBY + + ONIG_SYNTAX_ONIGURUMA (?m): 終止符記号(.)は改行と照合成功 + ONIG_SYNTAX_PERL と ONIG_SYNTAX_JAVA diff --git a/index.html b/index.html index 2bcdf07..e15cf8a 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2017/08/30 +(c) K.Kosako, updated at: 2017/12/08

@@ -16,6 +16,7 @@
What's new
    +
  • 2017/12/11: Version 6.7.0 released.
  • 2017/08/30: Version 6.6.1 released.
  • 2017/08/28: Version 6.6.0 released.
  • 2017/08/03: Version 6.5.0 released.
  • diff --git a/index_ja.html b/index_ja.html index 4e5265c..929ae16 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@

    鬼車

    -(c) K.Kosako, 最終更新: 2017/08/30 +(c) K.Kosako, 最終更新: 2017/12/08

    @@ -16,6 +16,7 @@
    更新情報
      +
    • 2017/12/11: Version 6.7.0 リリース
    • 2017/08/30: Version 6.6.1 リリース
    • 2017/08/28: Version 6.6.0 リリース
    • 2017/08/03: Version 6.5.0 リリース
    • diff --git a/sample/Makefile.am b/sample/Makefile.am index 6799ecd..7403384 100644 --- a/sample/Makefile.am +++ b/sample/Makefile.am @@ -4,7 +4,7 @@ lib_onig = ../src/libonig.la LDADD = $(lib_onig) AM_LDFLAGS = -L$(prefix)/lib -AM_CPPFLAGS = -I../src -I$(includedir) +AM_CPPFLAGS = -I$(top_srcdir)/src -I$(includedir) TESTS = encode listcap names posix simple sql syntax user_property bug_fix diff --git a/sample/posix.c b/sample/posix.c index f4cbe37..65bcf51 100644 --- a/sample/posix.c +++ b/sample/posix.c @@ -40,7 +40,7 @@ extern int main(int argc, char* argv[]) reg_set_encoding(REG_POSIX_ENCODING_ASCII); - /* default syntax (ONIG_SYNTAX_RUBY) */ + /* default syntax (ONIG_SYNTAX_ONIGURUMA) */ pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu"; r = regcomp(®, (char* )pattern, REG_EXTENDED); if (r) { diff --git a/src/Makefile.windows b/src/Makefile.windows index 8c22aa9..046345a 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -158,26 +158,26 @@ ptest: $(testp) .\$(testp) $(testc): $(testc).c $(libname) - $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname) $(testp): $(testc).c $(dlllib) - $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) + $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib) #$(testc)u.c: test.rb testconvu.rb # ruby -Ke testconvu.rb test.rb > $@ $(testc)u: $(testc)u.c $(libname) - $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) clean: del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj samples: all - $(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib) - $(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib) - $(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib) - $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib) - $(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib) - $(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib) - $(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:simple sample\simple.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:posix sample\posix.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:names sample\names.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:listcap sample\listcap.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:sql sample\sql.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:encode sample\encode.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:syntax sample\syntax.c $(dlllib) diff --git a/src/euc_jp.c b/src/euc_jp.c index 756bdc7..42c3bce 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -152,7 +152,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; + return (int )(p - buf); } static int @@ -230,7 +230,7 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { struct PropertyNameCtype* pc; - int len = end - p; + int len = (int )(end - p); char q[32]; if (len < sizeof(q) - 1) { diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py index 5267aa4..59c5de9 100755 --- a/src/gperf_fold_key_conv.py +++ b/src/gperf_fold_key_conv.py @@ -9,7 +9,7 @@ import re REG_LINE_GPERF = re.compile('#line .+gperf"') REG_HASH_FUNC = re.compile('hash\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+unsigned\s+int\s+len\s*\)') REG_STR_AT = re.compile('str\[(\d+)\]') -REG_RETURN_TYPE = re.compile('^const\s+int\s*\*') +REG_RETURN_TYPE = re.compile('^const\s+short\s+int\s*\*') REG_FOLD_KEY = re.compile('unicode_fold(\d)_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+unsigned\s+int\s+len\)') REG_ENTRY = re.compile('\{".*?",\s*(-?\d+)\s*\}') REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+') diff --git a/src/make_unicode_egcb_data.py b/src/make_unicode_egcb_data.py index 3f958eb..3d20508 100755 --- a/src/make_unicode_egcb_data.py +++ b/src/make_unicode_egcb_data.py @@ -191,8 +191,36 @@ merge_props(PROPS, props) PROPS = sorted(PROPS) -print '/* Copyright (c) 2017 K.Kosako */' -print '/* Generated by make_gcb_data.py. */' +print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */' +COPYRIGHT = ''' +/*- + * Copyright (c) 2017 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +'''.strip() + +print COPYRIGHT print '' if VERSION_INFO is not None: print "#define GRAPHEME_BREAK_PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO) diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py index f4ad321..64aa3a2 100755 --- a/src/make_unicode_fold_data.py +++ b/src/make_unicode_fold_data.py @@ -250,8 +250,8 @@ def output_gperf_unfold_key(f): %} struct ByUnfoldKey { OnigCodePoint code; - int index; - int fold_len; + short int index; + short int fold_len; }; %% """ @@ -272,7 +272,7 @@ def output_gperf_fold_key(f, key_len): #include #include "regenc.h" %} -int +short int %% """ f.write(head) diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh index 5ee2990..1e57674 100755 --- a/src/make_unicode_property.sh +++ b/src/make_unicode_property.sh @@ -3,15 +3,16 @@ NAME=unicode_property_data TMP=gperf.tmp #GPERF_OPT='-P -Q prop_name_pool -C -c -t -j1 -L ANSI-C --ignore-case' -GPERF_OPT='-T -C -c -t -j1 -L ANSI-C --ignore-case' +GPERF_OPT='-T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool' +POOL_CAST='s/\(int *\)\(long *\)&\(\(struct +unicode_prop_name_pool_t *\* *\) *0\)->unicode_prop_name_pool_str([^,]+)/pool_offset(\1)/g' ./make_unicode_property_data.py > ${NAME}.gperf ./make_unicode_property_data.py -posix > ${NAME}_posix.gperf gperf ${GPERF_OPT} -N unicode_lookup_property_name --output-file ${TMP} ${NAME}.gperf -sed -e 's/^#line.*$//g' ${TMP} > ${NAME}.c +sed -e 's/^#line.*$//g' ${TMP} | sed -r "${POOL_CAST}" > ${NAME}.c gperf ${GPERF_OPT} -N unicode_lookup_property_name --output-file ${TMP} ${NAME}_posix.gperf -sed -e 's/^#line.*$//g' ${TMP} > ${NAME}_posix.c +sed -e 's/^#line.*$//g' ${TMP} | sed -r "${POOL_CAST}" > ${NAME}_posix.c rm -f ${NAME}.gperf ${NAME}_posix.gperf ${TMP} diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 51986b6..8dd11b8 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -501,11 +501,15 @@ if not(POSIX_ONLY): print " CR_%s," % prop s = '''}; + +#define pool_offset(s) offsetof(struct unicode_prop_name_pool_t, unicode_prop_name_pool_str##s) + %} -struct PropertyNameCtype { - char* name: - int ctype; +struct PoolPropertyNameCtype { + short int name; + short int ctype; }; + %% ''' sys.stdout.write(s) diff --git a/src/onigposix.h b/src/onigposix.h index 2af3717..22211e4 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@ onigposix.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2017 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -128,6 +128,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; +ONIG_EXTERN OnigSyntaxType OnigSyntaxOniguruma; /* predefined syntaxes (see regsyntax.c) */ #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) @@ -138,6 +139,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) +#define ONIG_SYNTAX_ONIGURUMA (&OnigSyntaxOniguruma) /* default syntax */ #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax @@ -147,6 +149,7 @@ ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); ONIG_EXTERN const char* onig_version P_((void)); ONIG_EXTERN const char* onig_copyright P_((void)); +ONIG_EXTERN int onig_end P_((void)); #endif /* ONIGURUMA_H */ diff --git a/src/oniguruma.h b/src/oniguruma.h index dbcbbdb..5ad4469 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,8 +35,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 6 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_MINOR 7 +#define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -432,6 +432,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; +ONIG_EXTERN OnigSyntaxType OnigSyntaxOniguruma; /* predefined syntaxes (see regsyntax.c) */ #define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) @@ -444,6 +445,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) #define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) +#define ONIG_SYNTAX_ONIGURUMA (&OnigSyntaxOniguruma) /* default syntax */ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; @@ -510,6 +512,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */ #define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */ #define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */ +#define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ diff --git a/src/regcomp.c b/src/regcomp.c index ab5701c..63df18b 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -29,6 +29,8 @@ #include "regparse.h" +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + #if 0 typedef struct { int n; @@ -106,8 +108,6 @@ int_stack_pop(int_stack* s) } #endif -OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; - extern OnigCaseFoldType onig_get_default_case_fold_flag(void) { @@ -129,7 +129,7 @@ static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; static UChar* str_dup(UChar* s, UChar* end) { - int len = end - s; + int len = (int )(end - s); if (len > 0) { UChar* r = (UChar* )xmalloc(len + 1); @@ -145,12 +145,13 @@ static void swap_node(Node* a, Node* b) { Node c; + c = *a; *a = *b; *b = c; if (NODE_TYPE(a) == NODE_STRING) { StrNode* sn = STR_(a); if (sn->capa == 0) { - int len = sn->end - sn->s; + int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; } @@ -159,7 +160,7 @@ swap_node(Node* a, Node* b) if (NODE_TYPE(b) == NODE_STRING) { StrNode* sn = STR_(b); if (sn->capa == 0) { - int len = sn->end - sn->s; + int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; } @@ -192,6 +193,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } @@ -235,10 +237,9 @@ onig_bbuf_init(BBuf* buf, int size) static int unset_addr_list_init(UnsetAddrList* list, int size) { - UnsetAddr* p; - - p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); CHECK_NULL_RETURN_MEMERR(p); + list->num = 0; list->alloc = size; list->us = p; @@ -562,9 +563,9 @@ static int compile_length_string_node(Node* node, regex_t* reg) { int rlen, r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; UChar *p, *prev; StrNode* sn; + OnigEncoding enc = reg->enc; sn = STR_(node); if (sn->end <= sn->s) @@ -592,6 +593,7 @@ compile_length_string_node(Node* node, regex_t* reg) } p += len; } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; return rlen; @@ -603,16 +605,17 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg) if (sn->end <= sn->s) return 0; - return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); + return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s), + reg, 0); } static int compile_string_node(Node* node, regex_t* reg) { int r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; UChar *p, *prev, *end; StrNode* sn; + OnigEncoding enc = reg->enc; sn = STR_(node); if (sn->end <= sn->s) @@ -642,6 +645,7 @@ compile_string_node(Node* node, regex_t* reg) p += len; } + return add_compile_string(prev, prev_len, slen, reg, ambig); } @@ -651,7 +655,7 @@ compile_string_raw_node(StrNode* sn, regex_t* reg) if (sn->end <= sn->s) return 0; - return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); + return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0); } static int @@ -892,8 +896,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; } else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; if (CKN_ON) len += SIZE_OP_STATE_CHECK; } @@ -1066,8 +1069,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) if (tlen < 0) return tlen; /* anychar repeat */ - if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { - if (qn->greedy && infinite) { + if (is_anychar_star_quantifier(qn)) { + if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else @@ -1130,7 +1133,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (tlen < 0) return tlen; - if (is_anychar_star_quantifier(qn)) { + if (is_anychar_star_quantifier(qn) && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1184,7 +1188,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); } else if (IS_NOT_NULL(qn->next_head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, @@ -1194,7 +1198,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); } else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); @@ -1218,7 +1222,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } - else if (!infinite && qn->greedy && + else if (! infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; @@ -1228,13 +1232,13 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) for (i = 0; i < n; i++) { r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; } } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, tlen); @@ -1343,7 +1347,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) len = SIZE_OP_MEMORY_START; len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; @@ -1357,7 +1361,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { - len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; } break; @@ -1370,7 +1374,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) len = compile_length_tree(cond, reg); if (len < 0) return len; len += SIZE_OP_PUSH; - len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT; + len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END; if (IS_NOT_NULL(Then)) { tlen = compile_length_tree(Then, reg); @@ -1511,11 +1515,11 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { - r = add_opcode(reg, OP_PUSH_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_START); if (r != 0) return r; r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_END); } break; @@ -1526,7 +1530,7 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) Node* Then = node->te.Then; Node* Else = node->te.Else; - r = add_opcode(reg, OP_PUSH_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_START); if (r != 0) return r; cond_len = compile_length_tree(cond, reg); @@ -1538,14 +1542,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) else then_len = 0; - jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT; + jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END; if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; r = add_opcode_rel_addr(reg, OP_PUSH, jump_len); if (r != 0) return r; r = compile_tree(cond, reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_END); if (r != 0) return r; if (IS_NOT_NULL(Then)) { @@ -1586,13 +1590,13 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; break; case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT; + len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END; break; case ANCHOR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; break; case ANCHOR_LOOK_BEHIND_NOT: - len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END; break; case ANCHOR_WORD_BOUNDARY: @@ -1670,11 +1674,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) case ANCHOR_PREC_READ_NOT: len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT); + r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_PREC_READ_NOT); + r = add_opcode(reg, OP_PREC_READ_NOT_END); break; case ANCHOR_LOOK_BEHIND: @@ -1698,9 +1702,10 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) case ANCHOR_LOOK_BEHIND_NOT: { int n; + len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); - r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START, + len + SIZE_OP_LOOK_BEHIND_NOT_END); if (r != 0) return r; if (node->char_len < 0) { r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); @@ -1712,7 +1717,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END); } break; @@ -2203,6 +2208,7 @@ renumber_by_map(Node* node, GroupNumRemap* map) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + r = renumber_by_map(NODE_BODY(node), map); if (r != 0) return r; @@ -2259,6 +2265,7 @@ numbered_ref_check(Node* node) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + r = numbered_ref_check(NODE_BODY(node)); if (r != 0) return r; @@ -2402,6 +2409,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) { StrNode* sn = STR_(node); UChar *s = sn->s; + while (s < sn->end) { s += enclen(reg->enc, s); (*len)++; @@ -2412,6 +2420,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case NODE_QUANT: { QuantNode* qn = QUANT_(node); + if (qn->lower == qn->upper) { if (qn->upper == 0) { *len = 0; @@ -2444,6 +2453,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + switch (en->type) { case ENCLOSURE_MEMORY: #ifdef USE_CALL @@ -2465,6 +2475,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case ENCLOSURE_IF_ELSE: { int clen, elen; + r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); if (r == 0) { if (IS_NOT_NULL(en->te.Then)) { @@ -2487,6 +2498,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } } break; + default: break; } @@ -2566,6 +2578,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) { int range; CClassNode* xc = CCLASS_(x); + switch (ytype) { case NODE_CTYPE: switch (CTYPE_(y)->ctype) { @@ -2616,8 +2629,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); if ((v != 0 && !IS_NCCLASS_NOT(yc)) || (v == 0 && IS_NCCLASS_NOT(yc))) @@ -2644,6 +2656,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) case NODE_STRING: { StrNode* xs = STR_(x); + if (NODE_STRING_LEN(x) == 0) break; @@ -2687,6 +2700,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) { UChar *q; StrNode* ys = STR_(y); + len = NODE_STRING_LEN(x); if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) { @@ -2700,7 +2714,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } } break; - + default: break; } @@ -2925,7 +2939,7 @@ tree_min_len(Node* node, ScanEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - len = sn->end - sn->s; + len = (int )(sn->end - sn->s); } break; @@ -2972,7 +2986,8 @@ tree_min_len(Node* node, ScanEnv* env) break; case ENCLOSURE_IF_ELSE: { - int elen; + OnigLen elen; + len = tree_min_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) len += tree_min_len(en->te.Then, env); @@ -3029,7 +3044,7 @@ tree_max_len(Node* node, ScanEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - len = sn->end - sn->s; + len = (OnigLen )(sn->end - sn->s); } break; @@ -3108,7 +3123,8 @@ tree_max_len(Node* node, ScanEnv* env) break; case ENCLOSURE_IF_ELSE: { - int tlen, elen; + OnigLen tlen, elen; + len = tree_max_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) { tlen = tree_max_len(en->te.Then, env); @@ -3655,7 +3671,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) StrNode* sn = STR_(node); end = sn->end; - sbuf_size = (end - sn->s) * 2; + sbuf_size = (int )(end - sn->s) * 2; sbuf = (UChar* )xmalloc(sbuf_size); CHECK_NULL_RETURN_MEMERR(sbuf); ebuf = sbuf + sbuf_size; @@ -3688,8 +3704,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) } static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg) { int r; Node *node; @@ -3711,8 +3726,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) + UChar *p, int slen, UChar *end, regex_t* reg, + Node **rnode) { int r, i, j, len, varlen; Node *anode, *var_anode, *snode, *xnode, *an; @@ -3843,8 +3858,8 @@ expand_case_fold_string(Node* node, regex_t* reg) alt_num = 1; p = start; while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end, + items); if (n < 0) { r = n; goto err; @@ -3993,11 +4008,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) case NODE_QUANT: { + int var_num; int child_state = state; int add_state = 0; QuantNode* qn = QUANT_(node); Node* target = NODE_QUANT_BODY(qn); - int var_num; if (! IS_REPEAT_INFINITE(qn->upper)) { if (qn->upper > 1) { @@ -4203,7 +4218,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state) if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } @@ -4904,11 +4919,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moore search */ static int set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) + UChar skip[], int** int_skip) { int i, len; - len = end - s; + len = (int )(end - s); if (len < ONIG_CHAR_TABLE_SIZE) { for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; @@ -4944,38 +4959,34 @@ typedef struct { } OptEnv; typedef struct { - int left_anchor; - int right_anchor; -} OptAncInfo; + int left; + int right; +} OptAnc; typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - + MinMaxLen mmd; /* info position */ + OptAnc anc; int reach_end; int ignore_case; int len; UChar s[OPT_EXACT_MAXLEN]; -} OptExactInfo; +} OptExact; typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - + MinMaxLen mmd; /* info position */ + OptAnc anc; int value; /* weighted value */ UChar map[ONIG_CHAR_TABLE_SIZE]; -} OptMapInfo; +} OptMap; typedef struct { - MinMaxLen len; - - OptAncInfo anc; - OptExactInfo exb; /* boundary */ - OptExactInfo exm; /* middle */ - OptExactInfo expr; /* prec read (?=...) */ - - OptMapInfo map; /* boundary */ -} NodeOptInfo; + MinMaxLen len; + OptAnc anc; + OptExact exb; /* boundary */ + OptExact exm; /* middle */ + OptExact expr; /* prec read (?=...) */ + OptMap map; /* boundary */ +} NodeOpt; static int @@ -5054,7 +5065,6 @@ is_equal_mml(MinMaxLen* a, MinMaxLen* b) return (a->min == b->min && a->max == b->max) ? 1 : 0; } - static void set_mml(MinMaxLen* mml, OnigLen min, OnigLen max) { @@ -5082,15 +5092,6 @@ add_mml(MinMaxLen* to, MinMaxLen* from) to->max = distance_add(to->max, from->max); } -#if 0 -static void -add_len_mml(MinMaxLen* to, OnigLen len) -{ - to->min = distance_add(to->min, len); - to->max = distance_add(to->max, len); -} -#endif - static void alt_merge_mml(MinMaxLen* to, MinMaxLen* from) { @@ -5105,42 +5106,42 @@ copy_opt_env(OptEnv* to, OptEnv* from) } static void -clear_opt_anc_info(OptAncInfo* anc) +clear_opt_anc_info(OptAnc* anc) { - anc->left_anchor = 0; - anc->right_anchor = 0; + anc->left = 0; + anc->right = 0; } static void -copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +copy_opt_anc_info(OptAnc* to, OptAnc* from) { *to = *from; } static void -concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigLen left_len, OnigLen right_len) +concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right, + OnigLen left_len, OnigLen right_len) { clear_opt_anc_info(to); - to->left_anchor = left->left_anchor; + to->left = left->left; if (left_len == 0) { - to->left_anchor |= right->left_anchor; + to->left |= right->left; } - to->right_anchor = right->right_anchor; + to->right = right->right; if (right_len == 0) { - to->right_anchor |= left->right_anchor; + to->right |= left->right; } else { - to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); + to->right |= (left->right & ANCHOR_PREC_READ_NOT); } } static int -is_left_anchor(int anc) +is_left(int anc) { - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || anc == ANCHOR_PREC_READ_NOT) return 0; @@ -5149,46 +5150,46 @@ is_left_anchor(int anc) } static int -is_set_opt_anc_info(OptAncInfo* to, int anc) +is_set_opt_anc_info(OptAnc* to, int anc) { - if ((to->left_anchor & anc) != 0) return 1; + if ((to->left & anc) != 0) return 1; - return ((to->right_anchor & anc) != 0 ? 1 : 0); + return ((to->right & anc) != 0 ? 1 : 0); } static void -add_opt_anc_info(OptAncInfo* to, int anc) +add_opt_anc_info(OptAnc* to, int anc) { - if (is_left_anchor(anc)) - to->left_anchor |= anc; + if (is_left(anc)) + to->left |= anc; else - to->right_anchor |= anc; + to->right |= anc; } static void -remove_opt_anc_info(OptAncInfo* to, int anc) +remove_opt_anc_info(OptAnc* to, int anc) { - if (is_left_anchor(anc)) - to->left_anchor &= ~anc; + if (is_left(anc)) + to->left &= ~anc; else - to->right_anchor &= ~anc; + to->right &= ~anc; } static void -alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +alt_merge_opt_anc_info(OptAnc* to, OptAnc* add) { - to->left_anchor &= add->left_anchor; - to->right_anchor &= add->right_anchor; + to->left &= add->left; + to->right &= add->right; } static int -is_full_opt_exact_info(OptExactInfo* ex) +is_full_opt_exact(OptExact* ex) { return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); } static void -clear_opt_exact_info(OptExactInfo* ex) +clear_opt_exact(OptExact* ex) { clear_mml(&ex->mmd); clear_opt_anc_info(&ex->anc); @@ -5199,17 +5200,17 @@ clear_opt_exact_info(OptExactInfo* ex) } static void -copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +copy_opt_exact(OptExact* to, OptExact* from) { *to = *from; } static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) { int i, j, len; UChar *p, *end; - OptAncInfo tanc; + OptAnc tanc; if (! to->ignore_case && add->ignore_case) { if (to->len >= add->len) return ; /* avoid */ @@ -5230,13 +5231,13 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) to->reach_end = (p == end ? add->reach_end : 0); concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); - if (! to->reach_end) tanc.right_anchor = 0; + if (! to->reach_end) tanc.right = 0; copy_opt_anc_info(&to->anc, &tanc); } static void -concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) +concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; @@ -5252,17 +5253,17 @@ concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, } static void -alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) { int i, j, len; if (add->len == 0 || to->len == 0) { - clear_opt_exact_info(to); + clear_opt_exact(to); return ; } if (! is_equal_mml(&to->mmd, &add->mmd)) { - clear_opt_exact_info(to); + clear_opt_exact(to); return ; } @@ -5284,11 +5285,11 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) to->ignore_case |= add->ignore_case; alt_merge_opt_anc_info(&to->anc, &add->anc); - if (! to->reach_end) to->anc.right_anchor = 0; + if (! to->reach_end) to->anc.right = 0; } static void -select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) { int v1, v2; @@ -5299,7 +5300,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) return ; } else if (v1 == 0) { - copy_opt_exact_info(now, alt); + copy_opt_exact(now, alt); return ; } else if (v1 <= 2 && v2 <= 2) { @@ -5315,13 +5316,13 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) if (alt->ignore_case == 0) v2 *= 2; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_exact_info(now, alt); + copy_opt_exact(now, alt); } static void -clear_opt_map_info(OptMapInfo* map) +clear_opt_map(OptMap* map) { - static const OptMapInfo clean_info = { + static const OptMap clean_info = { {0, 0}, {0, 0}, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -5343,17 +5344,17 @@ clear_opt_map_info(OptMapInfo* map) } }; - xmemcpy(map, &clean_info, sizeof(OptMapInfo)); + xmemcpy(map, &clean_info, sizeof(OptMap)); } static void -copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +copy_opt_map(OptMap* to, OptMap* from) { *to = *from; } static void -add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc) { if (map->map[c] == 0) { map->map[c] = 1; @@ -5362,14 +5363,14 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) } static int -add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) +add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, + OnigEncoding enc, OnigCaseFoldType case_fold_flag) { OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; int i, n; - add_char_opt_map_info(map, p[0], enc); + add_char_opt_map(map, p[0], enc); case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); @@ -5377,14 +5378,14 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, for (i = 0; i < n; i++) { ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map_info(map, buf[0], enc); + add_char_opt_map(map, buf[0], enc); } return 0; } static void -select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +select_opt_map(OptMap* now, OptMap* alt) { static int z = 1<<15; /* 32768: something big value */ @@ -5392,18 +5393,18 @@ select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) if (alt->value == 0) return ; if (now->value == 0) { - copy_opt_map_info(now, alt); + copy_opt_map(now, alt); return ; } v1 = z / now->value; v2 = z / alt->value; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_map_info(now, alt); + copy_opt_map(now, alt); } static int -comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +comp_opt_exact_or_map(OptExact* e, OptMap* m) { #define COMP_EM_BASE 20 int ve, vm; @@ -5416,14 +5417,14 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) } static void -alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) { int i, val; /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ if (to->value == 0) return ; if (add->value == 0 || to->mmd.max < add->mmd.min) { - clear_opt_map_info(to); + clear_opt_map(to); return ; } @@ -5443,7 +5444,7 @@ alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) } static void -set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd) { copy_mml(&(opt->exb.mmd), mmd); copy_mml(&(opt->expr.mmd), mmd); @@ -5451,40 +5452,39 @@ set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) } static void -clear_node_opt_info(NodeOptInfo* opt) +clear_node_opt_info(NodeOpt* opt) { clear_mml(&opt->len); clear_opt_anc_info(&opt->anc); - clear_opt_exact_info(&opt->exb); - clear_opt_exact_info(&opt->exm); - clear_opt_exact_info(&opt->expr); - clear_opt_map_info(&opt->map); + clear_opt_exact(&opt->exb); + clear_opt_exact(&opt->exm); + clear_opt_exact(&opt->expr); + clear_opt_map(&opt->map); } static void -copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +copy_node_opt_info(NodeOpt* to, NodeOpt* from) { *to = *from; } static void -concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) { int exb_reach, exm_reach; - OptAncInfo tanc; + OptAnc tanc; concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); copy_opt_anc_info(&to->anc, &tanc); if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max); copy_opt_anc_info(&add->exb.anc, &tanc); } if (add->map.value > 0 && to->len.max == 0) { if (add->map.mmd.max == 0) - add->map.anc.left_anchor |= to->anc.left_anchor; + add->map.anc.left |= to->anc.left; } exb_reach = to->exb.reach_end; @@ -5495,16 +5495,16 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (add->exb.len > 0) { if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb, enc); - clear_opt_exact_info(&add->exb); + concat_opt_exact(&to->exb, &add->exb, enc); + clear_opt_exact(&add->exb); } else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb, enc); - clear_opt_exact_info(&add->exb); + concat_opt_exact(&to->exm, &add->exb, enc); + clear_opt_exact(&add->exb); } } - select_opt_exact_info(enc, &to->exm, &add->exb); - select_opt_exact_info(enc, &to->exm, &add->exm); + select_opt_exact(enc, &to->exm, &add->exb); + select_opt_exact(enc, &to->exm, &add->exm); if (to->expr.len > 0) { if (add->len.max > 0) { @@ -5512,28 +5512,27 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) to->expr.len = add->len.max; if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); + select_opt_exact(enc, &to->exb, &to->expr); else - select_opt_exact_info(enc, &to->exm, &to->expr); + select_opt_exact(enc, &to->exm, &to->expr); } } else if (add->expr.len > 0) { - copy_opt_exact_info(&to->expr, &add->expr); + copy_opt_exact(&to->expr, &add->expr); } - select_opt_map_info(&to->map, &add->map); - + select_opt_map(&to->map, &add->map); add_mml(&to->len, &add->len); } static void -alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) { - alt_merge_opt_anc_info (&to->anc, &add->anc); - alt_merge_opt_exact_info(&to->exb, &add->exb, env); - alt_merge_opt_exact_info(&to->exm, &add->exm, env); - alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info(env->enc, &to->map, &add->map); + alt_merge_opt_anc_info(&to->anc, &add->anc); + alt_merge_opt_exact(&to->exb, &add->exb, env); + alt_merge_opt_exact(&to->exm, &add->exm, env); + alt_merge_opt_exact(&to->expr, &add->expr, env); + alt_merge_opt_map(env->enc, &to->map, &add->map); alt_merge_mml(&to->len, &add->len); } @@ -5542,10 +5541,13 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) #define MAX_NODE_OPT_INFO_REF_COUNT 5 static int -optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { + OnigEncoding enc; + int i; int r = 0; + enc = env->enc; clear_node_opt_info(opt); set_bound_node_opt_info(opt, &env->mmd); @@ -5553,15 +5555,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_LIST: { OptEnv nenv; - NodeOptInfo nopt; + NodeOpt nopt; Node* nd = node; copy_opt_env(&nenv, env); do { - r = optimize_node_left(NODE_CAR(nd), &nopt, &nenv); + r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); } } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5569,11 +5571,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_ALT: { - NodeOptInfo nopt; + NodeOpt nopt; Node* nd = node; do { - r = optimize_node_left(NODE_CAR(nd), &nopt, env); + r = optimize_nodes(NODE_CAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); @@ -5585,14 +5587,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - int slen = sn->end - sn->s; + int slen = (int )(sn->end - sn->s); int is_raw = NODE_STRING_IS_RAW(node); if (! NODE_STRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NODE_STRING_IS_RAW(node), env->enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, + NODE_STRING_IS_RAW(node), enc); if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + add_char_opt_map(&opt->map, *(sn->s), enc); } set_mml(&opt->len, slen, slen); } @@ -5600,17 +5602,16 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int max; if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + int n = onigenc_strlen(enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; } else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc); opt->exb.ignore_case = 1; if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); + r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, + enc, env->case_fold_flag); if (r != 0) break; } @@ -5627,22 +5628,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_CCLASS: { - int i, z; + int z; CClassNode* cc = CCLASS_(node); /* no need to check ignore case. (set in setup_tree()) */ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigLen min = ONIGENC_MBC_MINLEN(env->enc); - OnigLen max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + OnigLen min = ONIGENC_MBC_MINLEN(enc); + OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc); set_mml(&opt->len, min, max); } else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } set_mml(&opt->len, 1, 1); @@ -5652,10 +5653,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_CTYPE: { - int i, min, max; + int min, max; int range; - max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + max = ONIGENC_MBC_MAXLEN_DIST(enc); if (max == 1) { min = 1; @@ -5668,18 +5669,18 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE; if (CTYPE_(node)->not != 0) { for (i = 0; i < range; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if (! ONIGENC_IS_CODE_WORD(enc, i)) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } for (i = range; i < SINGLE_BYTE_SIZE; i++) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + add_char_opt_map(&opt->map, (UChar )i, enc); } } else { for (i = 0; i < range; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if (ONIGENC_IS_CODE_WORD(enc, i)) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } } @@ -5687,7 +5688,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } } else { - min = ONIGENC_MBC_MINLEN(env->enc); + min = ONIGENC_MBC_MINLEN(enc); } set_mml(&opt->len, min, max); } @@ -5708,19 +5709,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_PREC_READ: { - NodeOptInfo nopt; + NodeOpt nopt; - r = optimize_node_left(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &nopt, env); if (r == 0) { if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); + copy_opt_exact(&opt->expr, &nopt.exb); else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); + copy_opt_exact(&opt->expr, &nopt.exm); opt->expr.reach_end = 0; if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); + copy_opt_map(&opt->map, &nopt.map); } } break; @@ -5732,7 +5733,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_BACKREF: if (! NODE_IS_CHECKER(node)) { - int i; int* backs; OnigLen min, max, tmin, tmax; MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); @@ -5762,7 +5762,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { OnigOptionType save = env->options; env->options = ENCLOSURE_(NODE_BODY(node))->o.options; - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); env->options = save; } break; @@ -5770,12 +5770,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_QUANT: { - int i; OnigLen min, max; - NodeOptInfo nopt; + NodeOpt nopt; QuantNode* qn = QUANT_(node); - r = optimize_node_left(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &nopt, env); if (r != 0) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { @@ -5792,13 +5791,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_node_opt_info(opt, &nopt); if (nopt.exb.len > 0) { if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; + for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { + concat_opt_exact(&opt->exb, &nopt.exb, enc); } + if (i < qn->lower) opt->exb.reach_end = 0; } } @@ -5831,7 +5827,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) OnigOptionType save = env->options; env->options = en->o.options; - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); env->options = save; } break; @@ -5851,8 +5847,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else #endif { - r = optimize_node_left(NODE_BODY(node), opt, env); - + r = optimize_nodes(NODE_BODY(node), opt, env); if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); @@ -5861,28 +5856,28 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; case ENCLOSURE_STOP_BACKTRACK: - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); break; case ENCLOSURE_IF_ELSE: { OptEnv nenv; - NodeOptInfo nopt; + NodeOpt nopt; copy_opt_env(&nenv, env); - r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); if (IS_NOT_NULL(en->te.Then)) { - r = optimize_node_left(en->te.Then, &nopt, &nenv); + r = optimize_nodes(en->te.Then, &nopt, &nenv); if (r == 0) { - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); } } if (IS_NOT_NULL(en->te.Else)) { - r = optimize_node_left(en->te.Else, &nopt, env); + r = optimize_nodes(en->te.Else, &nopt, env); if (r == 0) alt_merge_node_opt_info(opt, &nopt, env); } @@ -5898,7 +5893,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) default: #ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); + fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node)); #endif r = ONIGERR_TYPE_BUG; break; @@ -5908,7 +5903,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } static int -set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +set_optimize_exact(regex_t* reg, OptExact* e) { int r; @@ -5933,11 +5928,11 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); + reg->map, &(reg->int_map)); if (r != 0) return r; reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { reg->optimize = ONIG_OPTIMIZE_EXACT; @@ -5948,14 +5943,14 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->dmax = e->mmd.max; if (reg->dmin != INFINITE_LEN) { - reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact); } return 0; } static void -set_optimize_map_info(regex_t* reg, OptMapInfo* m) +set_optimize_map(regex_t* reg, OptMap* m) { int i; @@ -5972,10 +5967,10 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m) } static void -set_sub_anchor(regex_t* reg, OptAncInfo* anc) +set_sub_anchor(regex_t* reg, OptAnc* anc) { - reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; + reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right & ANCHOR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5985,28 +5980,27 @@ static void print_optimize_info(FILE* f, regex_t* reg); static int set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) { - int r; - NodeOptInfo opt; + NodeOpt opt; OptEnv env; env.enc = reg->enc; env.options = reg->options; env.case_fold_flag = reg->case_fold_flag; - env.scan_env = scan_env; + env.scan_env = scan_env; clear_mml(&env.mmd); - r = optimize_node_left(node, &opt, &env); + r = optimize_nodes(node, &opt, &env); if (r != 0) return r; - reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | ANCHOR_LOOK_BEHIND); - if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) + if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | + reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { @@ -6015,25 +6009,24 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) } if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + select_opt_exact(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) { goto set_map; } else { - r = set_optimize_exact_info(reg, &opt.exb); + r = set_optimize_exact(reg, &opt.exb); set_sub_anchor(reg, &opt.exb.anc); } } else if (opt.map.value > 0) { set_map: - set_optimize_map_info(reg, &opt.map); + set_optimize_map(reg, &opt.map); set_sub_anchor(reg, &opt.map.anc); } else { - reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE; if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -6061,7 +6054,7 @@ clear_optimize_info(regex_t* reg) #ifdef ONIG_DEBUG static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) + const UChar *s, const UChar *end) { fprintf(fp, "\nPATTERN: /"); @@ -6261,7 +6254,7 @@ static void print_tree P_((FILE* f, Node* node)); extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) + OnigErrorInfo* einfo) { #define COMPILE_INIT_SIZE 20 @@ -6280,7 +6273,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #endif if (reg->alloc == 0) { - init_size = (pattern_end - pattern) * 2; + init_size = (int )(pattern_end - pattern) * 2; if (init_size <= 0) init_size = COMPILE_INIT_SIZE; r = BB_INIT(reg, init_size); if (r != 0) goto end; @@ -6460,8 +6453,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, static int onig_inited = 0; extern int -onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, +onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax) { int r; @@ -6521,9 +6513,10 @@ onig_reg_init(regex_t* reg, OnigOptionType option, } extern int -onig_new_without_alloc(regex_t* reg, const UChar* pattern, - const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, - OnigSyntaxType* syntax, OnigErrorInfo* einfo) +onig_new_without_alloc(regex_t* reg, + const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) { int r; @@ -6536,8 +6529,8 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern, extern int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) { int r; @@ -6775,13 +6768,13 @@ OnigOpInfoType OnigOpInfo[] = { { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, { OP_PREC_READ_START, "push-pos", ARG_NON }, { OP_PREC_READ_END, "pop-pos", ARG_NON }, - { OP_PUSH_PREC_READ_NOT, "push-prec-read-not", ARG_RELADDR }, - { OP_FAIL_PREC_READ_NOT, "fail-prec-read-not", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, - { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, { OP_CALL, "call", ARG_ABSADDR }, { OP_RETURN, "return", ARG_NON }, { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, @@ -7077,7 +7070,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, fprintf(f, ":%d", len); break; - case OP_PUSH_LOOK_BEHIND_NOT: + case OP_LOOK_BEHIND_NOT_START: GET_RELADDR_INC(addr, bp); GET_LENGTH_INC(len, bp); fprintf(f, ":%d:", len); @@ -7113,8 +7106,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, break; default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - *--bp); + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); } } if (nextp) *nextp = bp; @@ -7188,8 +7180,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case NODE_STRING: - fprintf(f, "", - (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); + fprintf(f, "", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); for (p = STR_(node)->s; p < STR_(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); @@ -7303,8 +7294,8 @@ print_indent_tree(FILE* f, Node* node, int indent) case NODE_QUANT: fprintf(f, "{%d,%d}%s\n", node, - QUANT_(node)->lower, QUANT_(node)->upper, - (QUANT_(node)->greedy ? "" : "?")); + QUANT_(node)->lower, QUANT_(node)->upper, + (QUANT_(node)->greedy ? "" : "?")); print_indent_tree(f, NODE_BODY(node), indent + add); break; diff --git a/src/regenc.c b/src/regenc.c index 97b5052..7ded5a8 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2017 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -780,7 +780,7 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) if (enclen(enc, buf) != (p - buf)) return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; + return (int )(p - buf); } extern int @@ -803,7 +803,7 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) if (enclen(enc, buf) != (p - buf)) return ONIGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; + return (int )(p - buf); } extern int diff --git a/src/regerror.c b/src/regerror.c index c0d2346..e7d2570 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -236,7 +236,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, *is_over = ((p < end) ? 1 : 0); } else { - len = MIN((end - s), buf_size); + len = MIN((int )(end - s), buf_size); xmemcpy(buf, s, (size_t )len); *is_over = ((buf_size < (end - s)) ? 1 : 0); } @@ -300,7 +300,7 @@ onig_error_code_to_str(s, code, va_alist) } } *p = '\0'; - len = p - s; + len = (int )(p - s); break; default: @@ -340,7 +340,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); - need = (pat_end - pat) * 4 + 4; + need = (int )(pat_end - pat) * 4 + 4; if (n + need < bufsize) { xstrcat((char* )buf, ": /", bufsize); diff --git a/src/regexec.c b/src/regexec.c index 9dbef70..53f42ee 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -326,12 +326,11 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_EMPTY_CHECK_START 0x3000 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ #define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BACKTRACK 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_SAVE_VAL 0x0a00 +#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0600 +#define STK_CALL_FRAME 0x0700 +#define STK_RETURN 0x0800 +#define STK_SAVE_VAL 0x0900 /* stack type check mask */ #define STK_MASK_POP_USED STK_ALT_FLAG @@ -342,6 +341,7 @@ typedef intptr_t StackIndex; typedef struct _StackType { unsigned int type; + int id; union { struct { UChar *pcode; /* byte code position */ @@ -354,31 +354,26 @@ typedef struct _StackType { struct { int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ } repeat; struct { StackIndex si; /* index of stack */ } repeat_inc; struct { - int num; /* memory num */ UChar *pstr; /* start/end position */ /* Following information is set, if this stack type is MEM-START */ StackIndex start; /* prev. info (for backtrack "(...)*" ) */ StackIndex end; /* prev. info (for backtrack "(...)*" ) */ } mem; struct { - int num; /* null check id */ UChar *pstr; /* start position */ } empty_check; #ifdef USE_CALL struct { UChar *ret_addr; /* byte code position */ - int num; /* null check id */ UChar *pstr; /* string position */ } call_frame; #endif struct { - int id; enum SaveType type; UChar* v; UChar* v2; @@ -410,7 +405,7 @@ typedef struct _StackType { #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ offset = ((offset) * (state_num)) >> 3;\ @@ -481,7 +476,7 @@ typedef struct _StackType { #define STACK_SAVE do{\ - msa->stack_n = stk_end - stk_base;\ + msa->stack_n = (int )(stk_end - stk_base);\ if (is_alloca != 0) {\ size_t size = sizeof(StackIndex) * msa->ptr_num \ + sizeof(StackType) * msa->stack_n;\ @@ -517,9 +512,8 @@ onig_set_match_stack_limit_size(unsigned int size) static int stack_double(int is_alloca, char** arg_alloc_base, - StackType** arg_stk_base, - StackType** arg_stk_end, StackType** arg_stk, - OnigMatchArg* msa) + StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, + OnigMatchArg* msa) { unsigned int n; int used; @@ -534,7 +528,7 @@ stack_double(int is_alloca, char** arg_alloc_base, stk_end = *arg_stk_end; stk = *arg_stk; - n = stk_end - stk_base; + n = (unsigned int )(stk_end - stk_base); size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; n *= 2; new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; @@ -561,17 +555,17 @@ stack_double(int is_alloca, char** arg_alloc_base, } alloc_base = new_alloc_base; - used = stk - stk_base; + used = (int )(stk - stk_base); *arg_alloc_base = alloc_base; *arg_stk_base = (StackType* )(alloc_base - + (sizeof(StackIndex) * msa->ptr_num)); + + (sizeof(StackIndex) * msa->ptr_num)); *arg_stk = *arg_stk_base + used; *arg_stk_end = *arg_stk_base + n; return 0; } -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ +#define STACK_ENSURE(n) do {\ + if ((int )(stk_end - stk) < (n)) {\ int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\ if (r != 0) { STACK_SAVE; return r; } \ is_alloca = 0;\ @@ -605,7 +599,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define ELSE_IF_STATE_CHECK_MARK(stk) \ else if ((stk)->type == STK_STATE_CHECK_MARK) { \ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8)); \ + state_check_buff[x/8] |= (1<<(x%8));\ } #define STACK_PUSH(stack_type,pat,s,sprev) do {\ @@ -667,17 +661,18 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS(s,sprev) \ + STACK_PUSH(STK_TO_VOID_START,NULL_UCHARP,s,sprev) #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \ STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BACKTRACK STACK_PUSH_TYPE(STK_STOP_BACKTRACK) +#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START) #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \ STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev) -#define STACK_PUSH_REPEAT(id, pat) do {\ +#define STACK_PUSH_REPEAT(sid, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ + stk->id = (sid);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ STACK_INC;\ @@ -693,7 +688,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ - stk->u.mem.num = (mnum);\ + stk->id = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -705,7 +700,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ + stk->id = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -716,7 +711,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ + stk->id = (mnum);\ STACK_INC;\ } while(0) @@ -726,10 +721,10 @@ stack_double(int is_alloca, char** arg_alloc_base, while (k > stk_base) {\ k--;\ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->u.mem.num == (mnum)) {\ + && k->id == (mnum)) {\ level++;\ }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + else if (k->type == STK_MEM_START && k->id == (mnum)) {\ if (level == 0) break;\ level--;\ }\ @@ -757,7 +752,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_START;\ - stk->u.empty_check.num = (cnum);\ + stk->id = (cnum);\ stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) @@ -765,7 +760,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_END;\ - stk->u.empty_check.num = (cnum);\ + stk->id = (cnum);\ STACK_INC;\ } while(0) @@ -785,7 +780,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->u.val.id = (sid);\ + stk->id = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ STACK_INC;\ @@ -794,7 +789,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->u.val.id = (sid);\ + stk->id = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ stk->u.val.v2 = sprev;\ @@ -820,7 +815,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->u.val.id == (sid)) {\ + && k->id == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ break;\ @@ -840,7 +835,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->u.val.id == (sid)) {\ + && k->id == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ sprev = k->u.val.v2;\ @@ -905,8 +900,8 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ }\ @@ -917,15 +912,15 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ else if (stk->type == STK_REPEAT_INC) {\ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ }\ else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ }\ @@ -939,15 +934,15 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ if (stk->type == STK_ALT_PREC_READ_NOT) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ else if (stk->type == STK_REPEAT_INC) {\ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ }\ else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ }\ @@ -959,57 +954,42 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ else if (stk->type == STK_REPEAT_INC) {\ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ }\ else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->id] = stk->u.mem.start;\ + mem_end_stk[stk->id] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ }\ } while(0) -#define STACK_POS_END(k) do {\ +#define STACK_EXEC_TO_VOID(k) do {\ k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_POS_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_STOP_BACKTRACK_END do {\ - StackType *k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BACKTRACK_END"); \ + STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ - else if (k->type == STK_STOP_BACKTRACK) {\ + else if (k->type == STK_TO_VOID_START) {\ k->type = STK_VOID;\ break;\ }\ }\ } while(0) -#define STACK_EMPTY_CHECK(isnull,id,s) do {\ +#define STACK_EMPTY_CHECK(isnull,sid,s) do {\ StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->u.empty_check.num == (id)) {\ + if (k->id == (sid)) {\ (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ @@ -1018,13 +998,13 @@ stack_double(int is_alloca, char** arg_alloc_base, } while(0) #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -#define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\ +#define STACK_EMPTY_CHECK_MEMST(isnull,sid,s,reg) do {\ StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->u.empty_check.num == (id)) {\ + if (k->id == (sid)) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ @@ -1037,7 +1017,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1058,14 +1038,14 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\ +#define STACK_EMPTY_CHECK_MEMST_REC(isnull,sid,s,reg) do {\ int level = 0;\ StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->u.empty_check.num == (id)) {\ + if (k->id == (sid)) {\ if (level == 0) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ @@ -1079,7 +1059,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1101,7 +1081,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ else if (k->type == STK_EMPTY_CHECK_END) {\ - if (k->u.empty_check.num == (id)) level++;\ + if (k->id == (sid)) level++;\ }\ }\ } while(0) @@ -1128,7 +1108,7 @@ stack_double(int is_alloca, char** arg_alloc_base, } while(0) #endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */ -#define STACK_GET_REPEAT(id, k) do {\ +#define STACK_GET_REPEAT(sid, k) do {\ int level = 0;\ k = stk;\ while (1) {\ @@ -1136,7 +1116,7 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ + if (k->id == (sid)) {\ break;\ }\ }\ @@ -1177,7 +1157,7 @@ stack_double(int is_alloca, char** arg_alloc_base, } while(0) static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, int mblen) + UChar* s1, UChar** ps2, int mblen) { UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1245,7 +1225,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, while (k < stk_top) { if (k->type == STK_MEM_START) { - n = k->u.mem.num; + n = k->id; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); @@ -1263,7 +1243,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } } else if (k->type == STK_MEM_END) { - if (k->u.mem.num == node->group) { + if (k->id == node->group) { node->end = (int )(k->u.mem.pstr - str); *kp = k; return 0; @@ -1289,10 +1269,12 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) return 0; } -static int backref_match_at_nested_level(regex_t* reg - , StackType* top, StackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +static int +backref_match_at_nested_level(regex_t* reg, + StackType* top, StackType* stk_base, + int ignore_case, int case_fold_flag, + int nest, int mem_num, UChar* memp, + UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; @@ -1310,7 +1292,7 @@ static int backref_match_at_nested_level(regex_t* reg } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + if (mem_is_in_memp(k->id, mem_num, memp)) { pstart = k->u.mem.pstr; if (IS_NOT_NULL(pend)) { if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ @@ -1334,7 +1316,7 @@ static int backref_match_at_nested_level(regex_t* reg } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + if (mem_is_in_memp(k->id, mem_num, memp)) { pend = k->u.mem.pstr; } } @@ -1365,7 +1347,7 @@ backref_check_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + if (mem_is_in_memp(k->id, mem_num, memp)) { return 1; } } @@ -1478,9 +1460,9 @@ typedef struct { static int match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* in_right_range, + const UChar* in_right_range, #endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1521,7 +1503,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", str, end, sstart, sprev); fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); + (int )(end - str), (int )(sstart - str)); #endif STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ @@ -1560,7 +1542,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sbegin = s; switch (*p++) { case OP_END: MOP_IN(OP_END); - n = s - sstart; + n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -1582,18 +1564,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; - rmt[0].rm_so = keep - str; - rmt[0].rm_eo = s - str; + rmt[0].rm_so = (regoff_t )(keep - str); + rmt[0].rm_eo = (regoff_t )(s - str); for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (MEM_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); else - rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); - rmt[i].rm_eo = (MEM_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; + rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) + - str); } else { rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; @@ -1602,18 +1585,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else { #endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = keep - str; - region->end[0] = s - str; + region->beg[0] = (int )(keep - str); + region->end[0] = (int )(s - str); for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (MEM_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str); - region->end[i] = (MEM_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; + region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str); } else { region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; @@ -1635,8 +1618,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } node->group = 0; - node->beg = keep - str; - node->end = s - str; + node->beg = (int )(keep - str); + node->end = (int )(s - str); stkp = stk_base; r = make_capture_history_tree(region->history_root, &stkp, @@ -2053,6 +2036,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } MOP_OUT; + continue; break; case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); @@ -2070,6 +2054,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } MOP_OUT; + continue; break; case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); @@ -2454,7 +2439,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; + n = (int )(pend - pstart); DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); @@ -2483,7 +2468,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; + n = (int )(pend - pstart); DATA_ENSURE(n); sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); @@ -2515,7 +2500,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; + n = (int )(pend - pstart); DATA_ENSURE(n); sprev = s; swork = s; @@ -2554,7 +2539,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; + n = (int )(pend - pstart); DATA_ENSURE(n); sprev = s; swork = s; @@ -2708,7 +2693,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "EMPTY_CHECK_END_MEMST: skip id:%d, s:%p\n", (int)mem, s); #endif if (is_empty == -1) goto fail; - goto empty_check_found; + goto empty_check_found; } } MOP_OUT; @@ -2734,7 +2719,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )mem, s); #endif if (is_empty == -1) goto fail; - goto empty_check_found; + goto empty_check_found; } else { STACK_PUSH_EMPTY_CHECK_END(mem); @@ -2943,7 +2928,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END); { - STACK_POS_END(stkp); + STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } @@ -2951,26 +2936,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; - case OP_PUSH_PREC_READ_NOT: MOP_IN(OP_PUSH_PREC_READ_NOT); + case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); MOP_OUT; continue; break; - case OP_FAIL_PREC_READ_NOT: MOP_IN(OP_FAIL_PREC_READ_NOT); + case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END); STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; - case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BACKTRACK; + case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START); + STACK_PUSH_TO_VOID_START; MOP_OUT; continue; break; - case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BACKTRACK_END; + case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END); + STACK_EXEC_TO_VOID(stkp); MOP_OUT; continue; break; @@ -2984,7 +2969,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; - case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -3003,7 +2988,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; - case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); + case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END); STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; break; @@ -3051,6 +3036,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); { UpdateVarType type; + enum SaveType save_type; + GET_UPDATE_VAR_TYPE_INC(type, p); GET_MEMNUM_INC(mem, p); /* mem: save id */ switch ((enum UpdateVarType )type) { @@ -3061,10 +3048,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s); break; case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK: - STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_S, mem, right_range); + save_type = SAVE_S; + goto get_save_val_type_last_id; break; case UPDATE_VAR_RIGHT_RANGE_FROM_STACK: - STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_RIGHT_RANGE, mem, right_range); + save_type = SAVE_RIGHT_RANGE; + get_save_val_type_last_id: + STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range); break; case UPDATE_VAR_RIGHT_RANGE_INIT: INIT_RIGHT_RANGE; @@ -3128,7 +3118,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, static UChar* slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *t, *p, *s, *end; @@ -3160,7 +3150,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, static int str_lower_case_match(OnigEncoding enc, int case_fold_flag, const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) + const UChar* p, const UChar* end) { int lowlen; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -3169,7 +3159,7 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag, lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) return 0; + if (*t++ != *q++) return 0; lowlen--; } } @@ -3179,8 +3169,8 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *s, *end; @@ -3193,7 +3183,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, while (s < end) { if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) + s, text_end)) return s; s += enclen(enc, s); @@ -3204,8 +3194,8 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *t, *p, *s; @@ -3236,9 +3226,9 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, static UChar* slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *s; @@ -3262,8 +3252,8 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; @@ -3275,7 +3265,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, #endif tail = target_end - 1; - tlen1 = tail - target; + tlen1 = (int )(tail - target); end = text_range; if (end + tlen1 > text_end) end = text_end - tlen1; @@ -3318,7 +3308,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, static UChar* bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; @@ -3356,9 +3346,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, #ifdef USE_INT_MAP_BACKWARD static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) - +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) { int i, len; @@ -3379,8 +3367,8 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, static UChar* bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { const UChar *s, *t, *p; @@ -3409,7 +3397,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, static UChar* map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range) + const UChar* text, const UChar* text_range) { const UChar *s = text; @@ -3423,8 +3411,8 @@ map_search(OnigEncoding enc, UChar map[], static UChar* map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_start) { const UChar *s = text_start; @@ -3437,8 +3425,8 @@ map_search_backward(OnigEncoding enc, UChar map[], } extern int -onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) { int r; UChar *prev; @@ -3473,9 +3461,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, + end, #endif - at, prev, &msa); + at, prev, &msa); } end: @@ -3485,7 +3473,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On static int forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) + UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; @@ -3609,8 +3597,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); #endif return 1; /* success */ } @@ -3623,8 +3611,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, static int backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) { UChar *p; @@ -3636,7 +3624,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); + range, adjrange, end, p); break; case ONIG_OPTIMIZE_EXACT_IC: @@ -3718,7 +3706,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); + (int )(*low - str), (int )(*high - str)); #endif return 1; /* success */ } @@ -3733,7 +3721,8 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, extern int onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option) { int r; UChar *s, *prev; @@ -3926,7 +3915,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); + (int )(end - str), (int )(start - str), (int )(range - str)); #endif MATCH_ARG_INIT(msa, reg, option, region, orig_start); @@ -4110,14 +4099,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, match: MATCH_ARG_FREE(msa); - return s - str; + return (int )(s - str); } extern int onig_scan(regex_t* reg, const UChar* str, const UChar* end, - OnigRegion* region, OnigOptionType option, - int (*scan_callback)(int, int, OnigRegion*, void*), - void* callback_arg) + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(int, int, OnigRegion*, void*), + void* callback_arg) { int r; int n; diff --git a/src/regext.c b/src/regext.c index 62a557c..996d043 100644 --- a/src/regext.c +++ b/src/regext.c @@ -95,7 +95,7 @@ static int conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, UChar** conv, UChar** conv_end) { - int len = end - s; + int len = (int )(end - s); if (to == ONIG_ENCODING_UTF16_BE) { if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { diff --git a/src/reggnu.c b/src/reggnu.c index 1de82fb..50eb9b4 100644 --- a/src/reggnu.c +++ b/src/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako + * Copyright (c) 2002-2017 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -54,7 +54,7 @@ re_adjust_startpos(regex_t* reg, const char* string, int size, else { p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); } - return p - (UChar* )string; + return (int )(p - (UChar* )string); } return startpos; diff --git a/src/regint.h b/src/regint.h index 9dc1723..256b045 100644 --- a/src/regint.h +++ b/src/regint.h @@ -61,7 +61,6 @@ #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ /* internal config */ #define USE_OP_PUSH_OR_JUMP_EXACT @@ -139,10 +138,16 @@ #endif +#include + #ifdef HAVE_STDLIB_H #include #endif +#ifdef HAVE_STDINT_H +#include +#endif + #if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) #include #endif @@ -174,7 +179,8 @@ #ifdef _WIN32 #if defined(_MSC_VER) && (_MSC_VER < 1300) -typedef int intptr_t; +typedef int intptr_t; +typedef unsigned int uintptr_t; #endif #endif @@ -186,6 +192,7 @@ typedef int intptr_t; #ifdef MAX #undef MAX #endif + #define MIN(a,b) (((a)>(b))?(b):(a)) #define MAX(a,b) (((a)<(b))?(b):(a)) @@ -219,20 +226,19 @@ typedef int intptr_t; #endif #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ - (pad_size) = WORD_ALIGNMENT_SIZE \ - - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ + (pad_size) = WORD_ALIGNMENT_SIZE - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ } while (0) #define ALIGNMENT_RIGHT(addr) do {\ (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ + (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ } while (0) #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ typedef struct { - int num_keeper; + int num_keeper; int* keepers; } RegExt; @@ -241,9 +247,9 @@ typedef struct { /* stack pop level */ enum StackPopLevel { - STACK_POP_LEVEL_FREE = 0, + STACK_POP_LEVEL_FREE = 0, STACK_POP_LEVEL_MEM_START = 1, - STACK_POP_LEVEL_ALL =2 + STACK_POP_LEVEL_ALL = 2 }; /* optimize flags */ @@ -353,7 +359,7 @@ typedef Bits* BitSetRef; #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ } while (0) #define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] @@ -538,7 +544,7 @@ enum OpCode { OP_BACKREF_MULTI_IC, OP_BACKREF_WITH_LEVEL, /* \k, \k */ OP_BACKREF_CHECK, /* (?(n)), (?('name')) */ - OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n)), (?('name')) */ + OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -565,15 +571,15 @@ enum OpCode { OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - OP_PREC_READ_START, /* (?=...) start */ - OP_PREC_READ_END, /* (?=...) end */ - OP_PUSH_PREC_READ_NOT, /* (?!...) start */ - OP_FAIL_PREC_READ_NOT, /* (?!...) end */ - OP_PUSH_STOP_BT, /* (?>...) start */ - OP_POP_STOP_BT, /* (?>...) end */ - OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ - OP_PUSH_LOOK_BEHIND_NOT, /* (?...) start */ + OP_ATOMIC_END, /* (?>...) end */ + OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + OP_LOOK_BEHIND_NOT_START, /* (? */ OP_RETURN, @@ -663,9 +669,9 @@ typedef int ModeType; #define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_WORD_BOUNDARY (SIZE_OPCODE + SIZE_MODE) #define SIZE_OP_PREC_READ_START SIZE_OPCODE -#define SIZE_OP_PUSH_PREC_READ_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PREC_READ_NOT_START (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PREC_READ_END SIZE_OPCODE -#define SIZE_OP_FAIL_PREC_READ_NOT SIZE_OPCODE +#define SIZE_OP_PREC_READ_NOT_END SIZE_OPCODE #define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) #define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) #define SIZE_OP_FAIL SIZE_OPCODE @@ -675,13 +681,13 @@ typedef int ModeType; #define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE -#define SIZE_OP_POP_STOP_BT SIZE_OPCODE +#define SIZE_OP_ATOMIC_START SIZE_OPCODE +#define SIZE_OP_ATOMIC_END SIZE_OPCODE #define SIZE_OP_EMPTY_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_EMPTY_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) -#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) -#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE +#define SIZE_OP_LOOK_BEHIND_NOT_START (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) +#define SIZE_OP_LOOK_BEHIND_NOT_END SIZE_OPCODE #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) #define SIZE_OP_RETURN SIZE_OPCODE #define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) @@ -790,16 +796,17 @@ extern int onig_print_statistics P_((FILE* f)); #endif #endif -extern void onig_warning(const char* s); +extern void onig_warning(const char* s); extern UChar* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); -extern int onig_bbuf_init P_((BBuf* buf, int size)); -extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); -extern void onig_transfer P_((regex_t* to, regex_t* from)); -extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); +extern int onig_bbuf_init P_((BBuf* buf, int size)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); +extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); /* strend hash */ typedef void hash_table_type; + #ifdef _WIN32 # include typedef ULONG_PTR hash_data_type; diff --git a/src/regparse.c b/src/regparse.c index 1fb2357..1e4dc30 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -38,7 +38,7 @@ #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS -OnigSyntaxType OnigSyntaxRuby = { +OnigSyntaxType OnigSyntaxOniguruma = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | @@ -60,7 +60,7 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | - ONIG_SYN_OP2_ESC_H_XDIGIT ) + ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | @@ -81,7 +81,49 @@ OnigSyntaxType OnigSyntaxRuby = { } }; -OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; +OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | + ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | + ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | + ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA; extern void onig_null_warn(const char* s ARG_UNUSED) { } @@ -282,7 +324,7 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n) extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end) { - int len = end - src; + int len = (int )(end - src); if (len > 0) { xmemcpy(dest, src, len); dest[len] = (UChar )0; @@ -295,7 +337,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) int slen, term_len, i; UChar *r; - slen = end - s; + slen = (int )(end - s); term_len = ONIGENC_MBC_MINLEN(enc); r = (UChar* )xmalloc(slen + term_len); @@ -365,7 +407,7 @@ save_entry(ScanEnv* env, enum SaveType type, int* id) static UChar* strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, - int capa) + int capa) { UChar* r; @@ -382,7 +424,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end /* dest on static area */ static UChar* strcat_capa_from_static(UChar* dest, UChar* dest_end, - const UChar* src, const UChar* src_end, int capa) + const UChar* src, const UChar* src_end, int capa) { UChar* r; @@ -450,7 +492,7 @@ onig_st_init_strend_table_with_size(int size) extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type *value) + const UChar* end_key, hash_data_type *value) { st_str_end_key key; @@ -462,7 +504,7 @@ onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type value) + const UChar* end_key, hash_data_type value) { st_str_end_key* key; int result; @@ -834,7 +876,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) (HashDataType )e); if (r < 0) return r; - e->name_len = name_end - name; + e->name_len = (int )(name_end - name); e->back_num = 0; e->back_alloc = 0; e->back_refs = (int* )NULL; @@ -919,7 +961,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) + const UChar* name_end, int** nums) { NameEntry* e = name_find(reg, name, name_end); @@ -940,7 +982,7 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name, extern int onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion *region) + const UChar* name_end, OnigRegion *region) { int i, n, *nums; @@ -1361,9 +1403,9 @@ onig_node_new_anchor(int type, int ascii_mode) static Node* node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, + int exist_level, int nest_level, #endif - ScanEnv* env) + ScanEnv* env) { int i; Node* node = node_new(); @@ -1411,9 +1453,9 @@ node_new_backref(int back_num, int* backrefs, int by_name, static Node* node_new_backref_checker(int back_num, int* backrefs, int by_name, #ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, + int exist_level, int nest_level, #endif - ScanEnv* env) + ScanEnv* env) { Node* node; @@ -2051,10 +2093,10 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end) { - int addlen = end - s; + int addlen = (int )(end - s); if (addlen > 0) { - int len = STR_(node)->end - STR_(node)->s; + int len = (int )(STR_(node)->end - STR_(node)->s); if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { UChar* p; @@ -2245,21 +2287,24 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) } static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, + int maxlen, OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; + int n; UChar* p = *src; PFETCH_READY; num = 0; - while (! PEND && maxlen-- != 0) { + n = 0; + while (! PEND && n < maxlen) { PFETCH(c); if (IS_CODE_XDIGIT_ASCII(enc, c)) { + n++; val = (unsigned int )XDIGITVAL(enc,c); if ((INT_MAX_LIMIT - val) / 16UL < num) - return -1; /* overflow */ + return ONIGERR_TOO_BIG_NUMBER; /* overflow */ num = (num << 4) + XDIGITVAL(enc,c); } @@ -2268,6 +2313,10 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, break; } } + + if (n < minlen) + return ONIGERR_INVALID_CODE_POINT_VALUE; + *src = p; return num; } @@ -3477,8 +3526,9 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"character class has '%s' without escape", c); + env->pattern, env->pattern_end, + (UChar* )"character class has '%s' without escape", + c); (*onig_warn)((char* )buf); } } @@ -3491,8 +3541,8 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, - (env)->pattern, (env)->pattern_end, - (UChar* )"regular expression has '%s' without escape", c); + (env)->pattern, (env)->pattern_end, + (UChar* )"regular expression has '%s' without escape", c); (*onig_warn)((char* )buf); } } @@ -3700,8 +3750,13 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); + if (num < 0) { + if (num == ONIGERR_TOO_BIG_NUMBER) + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + else + return num; + } if (!PEND) { c2 = PPEEK; if (IS_CODE_XDIGIT_ASCII(enc, c2)) @@ -3720,8 +3775,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); + if (num < 0) return num; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3736,8 +3791,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < 0) return num; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -4103,8 +4158,13 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); + if (num < 0) { + if (num == ONIGERR_TOO_BIG_NUMBER) + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + else + return num; + } if (!PEND) { if (IS_CODE_XDIGIT_ASCII(enc, PPEEK)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; @@ -4121,8 +4181,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); + if (num < 0) return num; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -4137,8 +4197,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < 0) return num; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -4442,20 +4502,91 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (!PEND && PPEEK_IS('?') && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { PINC; - if (!PEND && PPEEK_IS('#')) { - PFETCH(c); - while (1) { - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + if (! PEND) { + c = PPEEK; + if (c == '#') { PFETCH(c); - if (c == MC_ESC(syn)) { - if (!PEND) PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(syn)) { + if (! PEND) PFETCH(c); + } + else { + if (c == ')') break; + } } - else { - if (c == ')') break; + goto start; + } + else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) { + int gnum; + UChar* name; + UChar* name_end; + enum REF_NUM num_type; + + switch (c) { + case '&': + { + PINC; + name = p; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, + &num_type, 0); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.by_number = 0; + tok->u.call.gnum = 0; + tok->u.call.name = name; + tok->u.call.name_end = name_end; + } + break; + + case 'R': + tok->type = TK_CALL; + tok->u.call.by_number = 1; + tok->u.call.gnum = 0; + tok->u.call.name = p; + PINC; + if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; + tok->u.call.name_end = p; + break; + + case '-': + case '+': + goto lparen_qmark_num; + break; + default: + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end; + + lparen_qmark_num: + { + name = p; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, + &gnum, &num_type, 1); + if (r < 0) return r; + + if (num_type == IS_NOT_NUM) { + return ONIGERR_INVALID_GROUP_NAME; + } + else { + if (num_type == IS_REL_NUM) { + gnum = backref_rel_to_abs(gnum, env); + if (gnum < 0) + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + tok->u.call.by_number = 1; + tok->u.call.gnum = gnum; + } + + tok->type = TK_CALL; + tok->u.call.name = name; + tok->u.call.name_end = name_end; + } + break; } } - goto start; } + lparen_qmark_end: PUNFETCH; } @@ -4472,14 +4603,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; case '[': @@ -4731,7 +4862,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_ASCII: case ONIGENC_CTYPE_ALNUM: if (not != 0) { - for (c = 0; c < limit; c++) { + for (c = 0; c < (int )limit; c++) { if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } @@ -4742,7 +4873,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { - for (c = 0; c < limit; c++) { + for (c = 0; c < (int )limit; c++) { if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } @@ -4753,7 +4884,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_PRINT: case ONIGENC_CTYPE_WORD: if (not != 0) { - for (c = 0; c < limit; c++) { + for (c = 0; c < (int )limit; c++) { if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */ && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); @@ -4764,7 +4895,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) } } else { - for (c = 0; c < limit; c++) { + for (c = 0; c < (int )limit; c++) { if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } @@ -5037,8 +5168,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, } static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, neg, len, fetched, and_start; OnigCodePoint v, vs; @@ -5171,7 +5301,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); val_entry2: r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, - &state, env); + &state, env); if (r != 0) goto err; break; @@ -5260,7 +5390,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } - + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ @@ -5333,7 +5463,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (state == CCS_VALUE) { r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); + &val_type, &state, env); if (r != 0) goto err; } @@ -6053,7 +6183,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) if (onig_is_code_in_cc(env->enc, from, cc) #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - && !IS_NCCLASS_NOT(cc) + && !IS_NCCLASS_NOT(cc) #endif ) { for (i = 0; i < to_len; i++) { @@ -6316,10 +6446,10 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), tok->u.backref.by_name, #ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level, - tok->u.backref.level, + tok->u.backref.exist_level, + tok->u.backref.level, #endif - env); + env); CHECK_NULL_RETURN_MEMERR(*np); break; diff --git a/src/regparse.h b/src/regparse.h index 57219a4..99fe7c9 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -115,7 +115,7 @@ enum EnclosureType { #define STRING_AMBIG (1<<1) #define STRING_DONT_GET_OPT_INFO (1<<2) -#define NODE_STRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) #define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW #define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW #define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG @@ -256,7 +256,7 @@ typedef struct { OnigLen min_len; /* min length (byte) */ OnigLen max_len; /* max length (byte) */ int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ + int opt_count; /* referenced count in optimize_nodes() */ } EnclosureNode; #ifdef USE_CALL diff --git a/src/regposix.c b/src/regposix.c index 32b11b5..0fdbcbb 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -40,7 +40,7 @@ if (ONIGENC_MBC_MINLEN(enc) == 1) { \ UChar* tmps = (UChar* )(s); \ while (*tmps != 0) tmps++; \ - len = tmps - (UChar* )(s); \ + len = (int )(tmps - (UChar* )(s));\ } \ else { \ len = onigenc_str_bytelen_null(enc, (UChar* )s); \ diff --git a/src/regsyntax.c b/src/regsyntax.c index 83989b6..3817d38 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -215,7 +215,8 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | - ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) + ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | + ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) @@ -237,7 +238,7 @@ extern int onig_set_default_syntax(OnigSyntaxType* syntax) { if (IS_NULL(syntax)) - syntax = ONIG_SYNTAX_RUBY; + syntax = ONIG_SYNTAX_ONIGURUMA; OnigDefaultSyntax = syntax; return 0; diff --git a/src/sjis.c b/src/sjis.c index 3378474..88b8d02 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2017 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -156,7 +156,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) return REGERR_INVALID_CODE_POINT_VALUE; #endif - return p - buf; + return (int )(p - buf); } static int @@ -262,7 +262,7 @@ static int property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) { struct PropertyNameCtype* pc; - int len = end - p; + int len = (int )(end - p); char q[32]; if (len < sizeof(q) - 1) { diff --git a/src/unicode.c b/src/unicode.c index 65de74a..e585937 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -29,6 +29,11 @@ #include "regint.h" +struct PoolPropertyNameCtype { + short int name; + short int ctype; +}; + #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) @@ -67,183 +72,10 @@ static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; -#ifdef USE_UNICODE_PROPERTIES -#include "unicode_property_data.c" -#else -#include "unicode_property_data_posix.c" -#endif - #include "st.h" -#define USER_DEFINED_PROPERTY_MAX_NUM 20 - -typedef struct { - int ctype; - OnigCodePoint* ranges; -} UserDefinedPropertyValue; - -static int UserDefinedPropertyNum; -static UserDefinedPropertyValue -UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM]; -static st_table* UserDefinedPropertyTable; - -extern int -onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) -{ - UserDefinedPropertyValue* e; - int r; - int i; - int n; - int len; - int c; - char* s; - - if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM) - return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS; - - len = strlen(name); - if (len >= PROPERTY_NAME_MAX_SIZE) - return ONIGERR_TOO_LONG_PROPERTY_NAME; - - s = (char* )xmalloc(len + 1); - if (s == 0) - return ONIGERR_MEMORY; - - n = 0; - for (i = 0; i < len; i++) { - c = name[i]; - if (c <= 0 || c >= 0x80) { - xfree(s); - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - } - - if (c != ' ' && c != '-' && c != '_') { - s[n] = c; - n++; - } - } - s[n] = '\0'; - - if (UserDefinedPropertyTable == 0) { - UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10); - } - - e = UserDefinedPropertyRanges + UserDefinedPropertyNum; - e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum; - e->ranges = ranges; - r = onig_st_insert_strend(UserDefinedPropertyTable, - (const UChar* )s, (const UChar* )s + n, - (hash_data_type )((void* )e)); - if (r < 0) return r; - - UserDefinedPropertyNum++; - return 0; -} - -extern int -onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if ( -#ifdef USE_UNICODE_PROPERTIES - ctype <= ONIGENC_MAX_STD_CTYPE && -#endif - code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - - if (ctype >= CODE_RANGES_NUM) { - int index = ctype - CODE_RANGES_NUM; - if (index < UserDefinedPropertyNum) - return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code); - else - return ONIGERR_TYPE_BUG; - } - - return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); -} - - -extern int -onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[]) -{ - if (ctype >= CODE_RANGES_NUM) { - int index = ctype - CODE_RANGES_NUM; - if (index < UserDefinedPropertyNum) { - *ranges = UserDefinedPropertyRanges[index].ranges; - return 0; - } - else - return ONIGERR_TYPE_BUG; - } - - *ranges = CodeRanges[ctype]; - return 0; -} - -extern int -onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, - const OnigCodePoint* ranges[]) -{ - *sb_out = 0x00; - return onigenc_unicode_ctype_code_range(ctype, ranges); -} - -extern int -onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) -{ - int len; - UChar *p; - OnigCodePoint code; - const struct PropertyNameCtype* pc; - char buf[PROPERTY_NAME_MAX_SIZE]; - - p = name; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - - if (code != ' ' && code != '-' && code != '_') { - buf[len++] = (char )code; - if (len >= PROPERTY_NAME_MAX_SIZE) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - } - - p += enclen(enc, p); - } - - buf[len] = 0; - - if (UserDefinedPropertyTable != 0) { - UserDefinedPropertyValue* e; - e = (UserDefinedPropertyValue* )NULL; - onig_st_lookup_strend(UserDefinedPropertyTable, - (const UChar* )buf, (const UChar* )buf + len, - (hash_data_type* )((void* )(&e))); - if (e != 0) { - return e->ctype; - } - } - - pc = unicode_lookup_property_name(buf, len); - if (pc != 0) { - /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */ -#ifndef USE_UNICODE_PROPERTIES - if (pc->ctype > ONIGENC_MAX_STD_CTYPE) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; -#endif - - return pc->ctype; - } - - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; -} - -/* for use macros in unicode_fold_data.c */ #include "unicode_fold_data.c" - extern int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, @@ -389,7 +221,7 @@ apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg) extern int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) + OnigApplyAllCaseFoldFunc f, void* arg) { int r; @@ -710,7 +542,7 @@ egcb_get_type(OnigCodePoint code) OnigCodePoint low, high, x; enum EGCB_TYPE type; - for (low = 0, high = EGCB_RANGE_NUM; low < high; ) { + for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) { x = (low + high) >> 1; if (code > EGCB_RANGES[x].end) low = x + 1; @@ -718,8 +550,10 @@ egcb_get_type(OnigCodePoint code) high = x; } - type = (low < EGCB_RANGE_NUM && code >= EGCB_RANGES[low].start) ? + type = (low < (OnigCodePoint )EGCB_RANGE_NUM && + code >= EGCB_RANGES[low].start) ? EGCB_RANGES[low].type : EGCB_Other; + return type; } @@ -864,3 +698,188 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, else return 1; #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */ } + + +/* + Undefine __GNUC__ for Escape warnings in Clang. + +./unicode_property_data.c:26730:44: warning: static variable + 'unicode_prop_name_pool_contents' is used in an inline function with + external linkage [-Wstatic-in-inline] + register const char *s = o + unicode_prop_name_pool; +*/ + +#ifdef __clang__ +#undef __GNUC__ +#endif + +#ifdef USE_UNICODE_PROPERTIES +#include "unicode_property_data.c" +#else +#include "unicode_property_data_posix.c" +#endif + +#define USER_DEFINED_PROPERTY_MAX_NUM 20 + +typedef struct { + int ctype; + OnigCodePoint* ranges; +} UserDefinedPropertyValue; + +static int UserDefinedPropertyNum; +static UserDefinedPropertyValue +UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM]; +static st_table* UserDefinedPropertyTable; + +extern int +onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) +{ + UserDefinedPropertyValue* e; + int r; + int i; + int n; + int len; + int c; + char* s; + + if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM) + return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS; + + len = (int )strlen(name); + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_TOO_LONG_PROPERTY_NAME; + + s = (char* )xmalloc(len + 1); + if (s == 0) + return ONIGERR_MEMORY; + + n = 0; + for (i = 0; i < len; i++) { + c = name[i]; + if (c <= 0 || c >= 0x80) { + xfree(s); + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + if (c != ' ' && c != '-' && c != '_') { + s[n] = c; + n++; + } + } + s[n] = '\0'; + + if (UserDefinedPropertyTable == 0) { + UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10); + } + + e = UserDefinedPropertyRanges + UserDefinedPropertyNum; + e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum; + e->ranges = ranges; + r = onig_st_insert_strend(UserDefinedPropertyTable, + (const UChar* )s, (const UChar* )s + n, + (hash_data_type )((void* )e)); + if (r < 0) return r; + + UserDefinedPropertyNum++; + return 0; +} + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + int index = ctype - CODE_RANGES_NUM; + if (index < UserDefinedPropertyNum) + return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code); + else + return ONIGERR_TYPE_BUG; + } + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + int index = ctype - CODE_RANGES_NUM; + if (index < UserDefinedPropertyNum) { + *ranges = UserDefinedPropertyRanges[index].ranges; + return 0; + } + else + return ONIGERR_TYPE_BUG; + } + + *ranges = CodeRanges[ctype]; + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + UChar *p; + OnigCodePoint code; + const struct PoolPropertyNameCtype* pc; + char buf[PROPERTY_NAME_MAX_SIZE]; + + p = name; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + if (code != ' ' && code != '-' && code != '_') { + buf[len++] = (char )code; + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + p += enclen(enc, p); + } + + buf[len] = 0; + + if (UserDefinedPropertyTable != 0) { + UserDefinedPropertyValue* e; + e = (UserDefinedPropertyValue* )NULL; + onig_st_lookup_strend(UserDefinedPropertyTable, + (const UChar* )buf, (const UChar* )buf + len, + (hash_data_type* )((void* )(&e))); + if (e != 0) { + return e->ctype; + } + } + + pc = unicode_lookup_property_name(buf, len); + if (pc != 0) { + /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */ +#ifndef USE_UNICODE_PROPERTIES + if (pc->ctype > ONIGENC_MAX_STD_CTYPE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +#endif + + return (int )pc->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} diff --git a/src/unicode_egcb_data.c b/src/unicode_egcb_data.c index 144d9b3..2304407 100644 --- a/src/unicode_egcb_data.c +++ b/src/unicode_egcb_data.c @@ -1,5 +1,29 @@ -/* Copyright (c) 2017 K.Kosako */ -/* Generated by make_gcb_data.py. */ +/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */ +/*- + * Copyright (c) 2017 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #define GRAPHEME_BREAK_PROPERTY_VERSION 10_0_0 diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 2151211..ffaa661 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -69,7 +69,7 @@ __attribute__ ((__gnu_inline__)) int unicode_fold1_key(OnigCodePoint codes[]) { - static const int wordlist[] = + static const short int wordlist[] = { -1, -1, -1, -1, -1, -1, diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 07cfa4e..8a45c9d 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -69,7 +69,7 @@ __attribute__ ((__gnu_inline__)) int unicode_fold2_key(OnigCodePoint codes[]) { - static const int wordlist[] = + static const short int wordlist[] = { 101, diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 1b4d9d4..deb9d22 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -69,7 +69,7 @@ __attribute__ ((__gnu_inline__)) int unicode_fold3_key(OnigCodePoint codes[]) { - static const int wordlist[] = + static const short int wordlist[] = { 62, diff --git a/src/unicode_property_data.c b/src/unicode_property_data.c index 1807de4..b25392b 100644 --- a/src/unicode_property_data.c +++ b/src/unicode_property_data.c @@ -1,5 +1,5 @@ /* ANSI-C code produced by gperf version 3.0.4 */ -/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case -N unicode_lookup_property_name --output-file gperf.tmp unicode_property_data.gperf */ +/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf.tmp unicode_property_data.gperf */ /* Computed positions: -k'1-3,5-6,12,16,$' */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -22909,6 +22909,9 @@ const CodeRanges[] = { CR_In_No_Block, }; +#define pool_offset(s) offsetof(struct unicode_prop_name_pool_t, unicode_prop_name_pool_str##s) + + #define TOTAL_KEYWORDS 711 #define MIN_WORD_LENGTH 1 #define MAX_WORD_LENGTH 44 @@ -23038,2255 +23041,3684 @@ hash (register const char *str, register unsigned int len) return hval + asso_values[(unsigned char)str[len - 1]]; } +struct unicode_prop_name_pool_t + { + char unicode_prop_name_pool_str1[sizeof("n")]; + char unicode_prop_name_pool_str3[sizeof("z")]; + char unicode_prop_name_pool_str8[sizeof("zzzz")]; + char unicode_prop_name_pool_str9[sizeof("mn")]; + char unicode_prop_name_pool_str10[sizeof("cn")]; + char unicode_prop_name_pool_str14[sizeof("ci")]; + char unicode_prop_name_pool_str15[sizeof("m")]; + char unicode_prop_name_pool_str17[sizeof("c")]; + char unicode_prop_name_pool_str23[sizeof("inmanichaean")]; + char unicode_prop_name_pool_str24[sizeof("mani")]; + char unicode_prop_name_pool_str25[sizeof("mc")]; + char unicode_prop_name_pool_str26[sizeof("cc")]; + char unicode_prop_name_pool_str28[sizeof("qaai")]; + char unicode_prop_name_pool_str34[sizeof("qaac")]; + char unicode_prop_name_pool_str41[sizeof("incham")]; + char unicode_prop_name_pool_str46[sizeof("mandaic")]; + char unicode_prop_name_pool_str49[sizeof("sm")]; + char unicode_prop_name_pool_str51[sizeof("sc")]; + char unicode_prop_name_pool_str56[sizeof("cans")]; + char unicode_prop_name_pool_str61[sizeof("ascii")]; + char unicode_prop_name_pool_str64[sizeof("insamaritan")]; + char unicode_prop_name_pool_str66[sizeof("incuneiform")]; + char unicode_prop_name_pool_str67[sizeof("s")]; + char unicode_prop_name_pool_str68[sizeof("inarmenian")]; + char unicode_prop_name_pool_str69[sizeof("zs")]; + char unicode_prop_name_pool_str76[sizeof("cs")]; + char unicode_prop_name_pool_str85[sizeof("me")]; + char unicode_prop_name_pool_str90[sizeof("incommonindicnumberforms")]; + char unicode_prop_name_pool_str93[sizeof("inavestan")]; + char unicode_prop_name_pool_str94[sizeof("inthai")]; + char unicode_prop_name_pool_str103[sizeof("inipaextensions")]; + char unicode_prop_name_pool_str108[sizeof("initialpunctuation")]; + char unicode_prop_name_pool_str112[sizeof("inancientsymbols")]; + char unicode_prop_name_pool_str114[sizeof("inthaana")]; + char unicode_prop_name_pool_str116[sizeof("cf")]; + char unicode_prop_name_pool_str118[sizeof("incuneiformnumbersandpunctuation")]; + char unicode_prop_name_pool_str122[sizeof("mtei")]; + char unicode_prop_name_pool_str124[sizeof("inspecials")]; + char unicode_prop_name_pool_str133[sizeof("inmusicalsymbols")]; + char unicode_prop_name_pool_str141[sizeof("inmiscellaneousmathematicalsymbolsa")]; + char unicode_prop_name_pool_str145[sizeof("lm")]; + char unicode_prop_name_pool_str146[sizeof("lina")]; + char unicode_prop_name_pool_str147[sizeof("lc")]; + char unicode_prop_name_pool_str149[sizeof("inlycian")]; + char unicode_prop_name_pool_str155[sizeof("lana")]; + char unicode_prop_name_pool_str157[sizeof("intaixuanjingsymbols")]; + char unicode_prop_name_pool_str158[sizeof("inmyanmarextendeda")]; + char unicode_prop_name_pool_str159[sizeof("alnum")]; + char unicode_prop_name_pool_str161[sizeof("sterm")]; + char unicode_prop_name_pool_str162[sizeof("intaitham")]; + char unicode_prop_name_pool_str163[sizeof("intransportandmapsymbols")]; + char unicode_prop_name_pool_str165[sizeof("taile")]; + char unicode_prop_name_pool_str167[sizeof("inmalayalam")]; + char unicode_prop_name_pool_str176[sizeof("inmiscellaneoussymbols")]; + char unicode_prop_name_pool_str177[sizeof("incontrolpictures")]; + char unicode_prop_name_pool_str185[sizeof("inmiscellaneoussymbolsandarrows")]; + char unicode_prop_name_pool_str189[sizeof("inlineara")]; + char unicode_prop_name_pool_str190[sizeof("inmiscellaneoussymbolsandpictographs")]; + char unicode_prop_name_pool_str193[sizeof("sinhala")]; + char unicode_prop_name_pool_str202[sizeof("taiviet")]; + char unicode_prop_name_pool_str205[sizeof("ext")]; + char unicode_prop_name_pool_str215[sizeof("latn")]; + char unicode_prop_name_pool_str216[sizeof("latin")]; + char unicode_prop_name_pool_str217[sizeof("ital")]; + char unicode_prop_name_pool_str218[sizeof("intamil")]; + char unicode_prop_name_pool_str220[sizeof("inmultani")]; + char unicode_prop_name_pool_str222[sizeof("taml")]; + char unicode_prop_name_pool_str229[sizeof("inrunic")]; + char unicode_prop_name_pool_str230[sizeof("incarian")]; + char unicode_prop_name_pool_str232[sizeof("armn")]; + char unicode_prop_name_pool_str234[sizeof("armi")]; + char unicode_prop_name_pool_str235[sizeof("cari")]; + char unicode_prop_name_pool_str236[sizeof("inlatinextendedc")]; + char unicode_prop_name_pool_str238[sizeof("armenian")]; + char unicode_prop_name_pool_str239[sizeof("inmyanmar")]; + char unicode_prop_name_pool_str240[sizeof("incyrillic")]; + char unicode_prop_name_pool_str242[sizeof("inlatinextendeda")]; + char unicode_prop_name_pool_str246[sizeof("carian")]; + char unicode_prop_name_pool_str249[sizeof("intaile")]; + char unicode_prop_name_pool_str253[sizeof("tale")]; + char unicode_prop_name_pool_str256[sizeof("arabic")]; + char unicode_prop_name_pool_str259[sizeof("l")]; + char unicode_prop_name_pool_str260[sizeof("nl")]; + char unicode_prop_name_pool_str261[sizeof("zl")]; + char unicode_prop_name_pool_str263[sizeof("insyriac")]; + char unicode_prop_name_pool_str265[sizeof("samr")]; + char unicode_prop_name_pool_str267[sizeof("merc")]; + char unicode_prop_name_pool_str269[sizeof("inlinearbideograms")]; + char unicode_prop_name_pool_str272[sizeof("samaritan")]; + char unicode_prop_name_pool_str273[sizeof("lt")]; + char unicode_prop_name_pool_str276[sizeof("insharada")]; + char unicode_prop_name_pool_str283[sizeof("inmeeteimayekextensions")]; + char unicode_prop_name_pool_str292[sizeof("inruminumeralsymbols")]; + char unicode_prop_name_pool_str295[sizeof("miao")]; + char unicode_prop_name_pool_str296[sizeof("inlatinextendede")]; + char unicode_prop_name_pool_str300[sizeof("zinh")]; + char unicode_prop_name_pool_str304[sizeof("inahom")]; + char unicode_prop_name_pool_str306[sizeof("incherokee")]; + char unicode_prop_name_pool_str307[sizeof("han")]; + char unicode_prop_name_pool_str310[sizeof("hani")]; + char unicode_prop_name_pool_str311[sizeof("inosmanya")]; + char unicode_prop_name_pool_str312[sizeof("inmiscellaneoustechnical")]; + char unicode_prop_name_pool_str323[sizeof("cham")]; + char unicode_prop_name_pool_str325[sizeof("inmahajani")]; + char unicode_prop_name_pool_str326[sizeof("osma")]; + char unicode_prop_name_pool_str329[sizeof("manichaean")]; + char unicode_prop_name_pool_str330[sizeof("term")]; + char unicode_prop_name_pool_str332[sizeof("sinh")]; + char unicode_prop_name_pool_str342[sizeof("cntrl")]; + char unicode_prop_name_pool_str347[sizeof("chakma")]; + char unicode_prop_name_pool_str348[sizeof("insinhala")]; + char unicode_prop_name_pool_str352[sizeof("tamil")]; + char unicode_prop_name_pool_str353[sizeof("inethiopic")]; + char unicode_prop_name_pool_str356[sizeof("connectorpunctuation")]; + char unicode_prop_name_pool_str357[sizeof("shavian")]; + char unicode_prop_name_pool_str361[sizeof("joinc")]; + char unicode_prop_name_pool_str362[sizeof("inenclosedalphanumerics")]; + char unicode_prop_name_pool_str368[sizeof("ahex")]; + char unicode_prop_name_pool_str369[sizeof("inlatinextendedadditional")]; + char unicode_prop_name_pool_str370[sizeof("lineara")]; + char unicode_prop_name_pool_str377[sizeof("inenclosedcjklettersandmonths")]; + char unicode_prop_name_pool_str378[sizeof("hex")]; + char unicode_prop_name_pool_str381[sizeof("thai")]; + char unicode_prop_name_pool_str382[sizeof("mahj")]; + char unicode_prop_name_pool_str386[sizeof("math")]; + char unicode_prop_name_pool_str389[sizeof("ll")]; + char unicode_prop_name_pool_str390[sizeof("thaa")]; + char unicode_prop_name_pool_str392[sizeof("hatran")]; + char unicode_prop_name_pool_str399[sizeof("mahajani")]; + char unicode_prop_name_pool_str400[sizeof("insinhalaarchaicnumbers")]; + char unicode_prop_name_pool_str402[sizeof("taitham")]; + char unicode_prop_name_pool_str403[sizeof("thaana")]; + char unicode_prop_name_pool_str408[sizeof("ethi")]; + char unicode_prop_name_pool_str409[sizeof("inenclosedalphanumericsupplement")]; + char unicode_prop_name_pool_str414[sizeof("meroiticcursive")]; + char unicode_prop_name_pool_str415[sizeof("inideographicdescriptioncharacters")]; + char unicode_prop_name_pool_str419[sizeof("terminalpunctuation")]; + char unicode_prop_name_pool_str426[sizeof("inemoticons")]; + char unicode_prop_name_pool_str431[sizeof("intifinagh")]; + char unicode_prop_name_pool_str433[sizeof("asciihexdigit")]; + char unicode_prop_name_pool_str438[sizeof("tifinagh")]; + char unicode_prop_name_pool_str451[sizeof("inearlydynasticcuneiform")]; + char unicode_prop_name_pool_str457[sizeof("inopticalcharacterrecognition")]; + char unicode_prop_name_pool_str479[sizeof("loe")]; + char unicode_prop_name_pool_str486[sizeof("titlecaseletter")]; + char unicode_prop_name_pool_str489[sizeof("inscriptionalparthian")]; + char unicode_prop_name_pool_str510[sizeof("oriya")]; + char unicode_prop_name_pool_str520[sizeof("hira")]; + char unicode_prop_name_pool_str529[sizeof("sora")]; + char unicode_prop_name_pool_str530[sizeof("mero")]; + char unicode_prop_name_pool_str535[sizeof("inarrows")]; + char unicode_prop_name_pool_str537[sizeof("inhiragana")]; + char unicode_prop_name_pool_str544[sizeof("no")]; + char unicode_prop_name_pool_str545[sizeof("inmahjongtiles")]; + char unicode_prop_name_pool_str549[sizeof("innko")]; + char unicode_prop_name_pool_str552[sizeof("co")]; + char unicode_prop_name_pool_str553[sizeof("cher")]; + char unicode_prop_name_pool_str556[sizeof("inmro")]; + char unicode_prop_name_pool_str563[sizeof("common")]; + char unicode_prop_name_pool_str564[sizeof("cwcm")]; + char unicode_prop_name_pool_str568[sizeof("inmiao")]; + char unicode_prop_name_pool_str573[sizeof("inscriptionalpahlavi")]; + char unicode_prop_name_pool_str576[sizeof("inmathematicalalphanumericsymbols")]; + char unicode_prop_name_pool_str577[sizeof("so")]; + char unicode_prop_name_pool_str579[sizeof("hano")]; + char unicode_prop_name_pool_str580[sizeof("tirh")]; + char unicode_prop_name_pool_str584[sizeof("inhatran")]; + char unicode_prop_name_pool_str585[sizeof("inolchiki")]; + char unicode_prop_name_pool_str586[sizeof("ahom")]; + char unicode_prop_name_pool_str588[sizeof("di")]; + char unicode_prop_name_pool_str589[sizeof("hatr")]; + char unicode_prop_name_pool_str591[sizeof("inogham")]; + char unicode_prop_name_pool_str596[sizeof("intirhuta")]; + char unicode_prop_name_pool_str601[sizeof("inmodi")]; + char unicode_prop_name_pool_str603[sizeof("idc")]; + char unicode_prop_name_pool_str604[sizeof("mand")]; + char unicode_prop_name_pool_str608[sizeof("inmandaic")]; + char unicode_prop_name_pool_str609[sizeof("dia")]; + char unicode_prop_name_pool_str610[sizeof("cwcf")]; + char unicode_prop_name_pool_str618[sizeof("xidc")]; + char unicode_prop_name_pool_str621[sizeof("sind")]; + char unicode_prop_name_pool_str622[sizeof("newline")]; + char unicode_prop_name_pool_str631[sizeof("mend")]; + char unicode_prop_name_pool_str635[sizeof("newtailue")]; + char unicode_prop_name_pool_str638[sizeof("inshorthandformatcontrols")]; + char unicode_prop_name_pool_str642[sizeof("anatolianhieroglyphs")]; + char unicode_prop_name_pool_str643[sizeof("xids")]; + char unicode_prop_name_pool_str653[sizeof("ids")]; + char unicode_prop_name_pool_str659[sizeof("innewtailue")]; + char unicode_prop_name_pool_str660[sizeof("insylotinagri")]; + char unicode_prop_name_pool_str666[sizeof("insundanese")]; + char unicode_prop_name_pool_str673[sizeof("lo")]; + char unicode_prop_name_pool_str677[sizeof("indominotiles")]; + char unicode_prop_name_pool_str678[sizeof("inlao")]; + char unicode_prop_name_pool_str685[sizeof("lao")]; + char unicode_prop_name_pool_str686[sizeof("laoo")]; + char unicode_prop_name_pool_str687[sizeof("mongolian")]; + char unicode_prop_name_pool_str690[sizeof("cwt")]; + char unicode_prop_name_pool_str692[sizeof("idst")]; + char unicode_prop_name_pool_str693[sizeof("format")]; + char unicode_prop_name_pool_str696[sizeof("lineseparator")]; + char unicode_prop_name_pool_str702[sizeof("letter")]; + char unicode_prop_name_pool_str711[sizeof("idcontinue")]; + char unicode_prop_name_pool_str714[sizeof("inanatolianhieroglyphs")]; + char unicode_prop_name_pool_str723[sizeof("inlydian")]; + char unicode_prop_name_pool_str725[sizeof("pi")]; + char unicode_prop_name_pool_str726[sizeof("nchar")]; + char unicode_prop_name_pool_str731[sizeof("inspacingmodifierletters")]; + char unicode_prop_name_pool_str732[sizeof("oalpha")]; + char unicode_prop_name_pool_str735[sizeof("indeseret")]; + char unicode_prop_name_pool_str737[sizeof("pc")]; + char unicode_prop_name_pool_str756[sizeof("xdigit")]; + char unicode_prop_name_pool_str758[sizeof("inphaistosdisc")]; + char unicode_prop_name_pool_str759[sizeof("joincontrol")]; + char unicode_prop_name_pool_str760[sizeof("inlowsurrogates")]; + char unicode_prop_name_pool_str762[sizeof("mro")]; + char unicode_prop_name_pool_str763[sizeof("mroo")]; + char unicode_prop_name_pool_str767[sizeof("xidstart")]; + char unicode_prop_name_pool_str773[sizeof("wara")]; + char unicode_prop_name_pool_str776[sizeof("inwarangciti")]; + char unicode_prop_name_pool_str787[sizeof("ps")]; + char unicode_prop_name_pool_str797[sizeof("pe")]; + char unicode_prop_name_pool_str806[sizeof("cwl")]; + char unicode_prop_name_pool_str816[sizeof("incoptic")]; + char unicode_prop_name_pool_str824[sizeof("diacritic")]; + char unicode_prop_name_pool_str827[sizeof("pf")]; + char unicode_prop_name_pool_str844[sizeof("space")]; + char unicode_prop_name_pool_str853[sizeof("hanunoo")]; + char unicode_prop_name_pool_str856[sizeof("incyrillicextendeda")]; + char unicode_prop_name_pool_str860[sizeof("odi")]; + char unicode_prop_name_pool_str866[sizeof("modi")]; + char unicode_prop_name_pool_str867[sizeof("oidc")]; + char unicode_prop_name_pool_str868[sizeof("incopticepactnumbers")]; + char unicode_prop_name_pool_str870[sizeof("palm")]; + char unicode_prop_name_pool_str871[sizeof("insundanesesupplement")]; + char unicode_prop_name_pool_str872[sizeof("patws")]; + char unicode_prop_name_pool_str876[sizeof("inolditalic")]; + char unicode_prop_name_pool_str877[sizeof("inlepcha")]; + char unicode_prop_name_pool_str878[sizeof("shaw")]; + char unicode_prop_name_pool_str880[sizeof("omath")]; + char unicode_prop_name_pool_str886[sizeof("alpha")]; + char unicode_prop_name_pool_str892[sizeof("oids")]; + char unicode_prop_name_pool_str896[sizeof("control")]; + char unicode_prop_name_pool_str897[sizeof("ideo")]; + char unicode_prop_name_pool_str898[sizeof("lepc")]; + char unicode_prop_name_pool_str899[sizeof("psalterpahlavi")]; + char unicode_prop_name_pool_str900[sizeof("dsrt")]; + char unicode_prop_name_pool_str902[sizeof("otheridcontinue")]; + char unicode_prop_name_pool_str906[sizeof("inpalmyrene")]; + char unicode_prop_name_pool_str907[sizeof("decimalnumber")]; + char unicode_prop_name_pool_str915[sizeof("noncharactercodepoint")]; + char unicode_prop_name_pool_str916[sizeof("idstart")]; + char unicode_prop_name_pool_str917[sizeof("otheralphabetic")]; + char unicode_prop_name_pool_str918[sizeof("letternumber")]; + char unicode_prop_name_pool_str923[sizeof("dash")]; + char unicode_prop_name_pool_str926[sizeof("xidcontinue")]; + char unicode_prop_name_pool_str930[sizeof("finalpunctuation")]; + char unicode_prop_name_pool_str931[sizeof("extender")]; + char unicode_prop_name_pool_str935[sizeof("siddham")]; + char unicode_prop_name_pool_str945[sizeof("inoldturkic")]; + char unicode_prop_name_pool_str948[sizeof("sharada")]; + char unicode_prop_name_pool_str951[sizeof("whitespace")]; + char unicode_prop_name_pool_str956[sizeof("inmodifiertoneletters")]; + char unicode_prop_name_pool_str958[sizeof("radical")]; + char unicode_prop_name_pool_str960[sizeof("canadianaboriginal")]; + char unicode_prop_name_pool_str969[sizeof("inethiopicextendeda")]; + char unicode_prop_name_pool_str970[sizeof("imperialaramaic")]; + char unicode_prop_name_pool_str978[sizeof("perm")]; + char unicode_prop_name_pool_str979[sizeof("deseret")]; + char unicode_prop_name_pool_str984[sizeof("casedletter")]; + char unicode_prop_name_pool_str998[sizeof("inimperialaramaic")]; + char unicode_prop_name_pool_str1001[sizeof("otheridstart")]; + char unicode_prop_name_pool_str1006[sizeof("prti")]; + char unicode_prop_name_pool_str1012[sizeof("cprt")]; + char unicode_prop_name_pool_str1018[sizeof("othersymbol")]; + char unicode_prop_name_pool_str1022[sizeof("coptic")]; + char unicode_prop_name_pool_str1037[sizeof("otherlowercase")]; + char unicode_prop_name_pool_str1038[sizeof("phnx")]; + char unicode_prop_name_pool_str1042[sizeof("inphoenician")]; + char unicode_prop_name_pool_str1049[sizeof("incaucasianalbanian")]; + char unicode_prop_name_pool_str1050[sizeof("inlatin1supplement")]; + char unicode_prop_name_pool_str1054[sizeof("xpeo")]; + char unicode_prop_name_pool_str1058[sizeof("ininscriptionalparthian")]; + char unicode_prop_name_pool_str1059[sizeof("ininscriptionalpahlavi")]; + char unicode_prop_name_pool_str1060[sizeof("other")]; + char unicode_prop_name_pool_str1061[sizeof("privateuse")]; + char unicode_prop_name_pool_str1066[sizeof("othernumber")]; + char unicode_prop_name_pool_str1071[sizeof("inenclosedideographicsupplement")]; + char unicode_prop_name_pool_str1073[sizeof("copt")]; + char unicode_prop_name_pool_str1078[sizeof("print")]; + char unicode_prop_name_pool_str1080[sizeof("cuneiform")]; + char unicode_prop_name_pool_str1081[sizeof("olditalic")]; + char unicode_prop_name_pool_str1082[sizeof("xsux")]; + char unicode_prop_name_pool_str1086[sizeof("inmongolian")]; + char unicode_prop_name_pool_str1091[sizeof("inancientgreekmusicalnotation")]; + char unicode_prop_name_pool_str1092[sizeof("sundanese")]; + char unicode_prop_name_pool_str1093[sizeof("inoldnortharabian")]; + char unicode_prop_name_pool_str1095[sizeof("patternsyntax")]; + char unicode_prop_name_pool_str1098[sizeof("inherited")]; + char unicode_prop_name_pool_str1099[sizeof("ingrantha")]; + char unicode_prop_name_pool_str1100[sizeof("inphoneticextensions")]; + char unicode_prop_name_pool_str1110[sizeof("saurashtra")]; + char unicode_prop_name_pool_str1116[sizeof("inancientgreeknumbers")]; + char unicode_prop_name_pool_str1122[sizeof("shrd")]; + char unicode_prop_name_pool_str1126[sizeof("inoldsoutharabian")]; + char unicode_prop_name_pool_str1127[sizeof("innabataean")]; + char unicode_prop_name_pool_str1128[sizeof("inalphabeticpresentationforms")]; + char unicode_prop_name_pool_str1129[sizeof("spaceseparator")]; + char unicode_prop_name_pool_str1135[sizeof("olower")]; + char unicode_prop_name_pool_str1137[sizeof("ingeneralpunctuation")]; + char unicode_prop_name_pool_str1138[sizeof("inunifiedcanadianaboriginalsyllabics")]; + char unicode_prop_name_pool_str1140[sizeof("bamum")]; + char unicode_prop_name_pool_str1143[sizeof("inarabic")]; + char unicode_prop_name_pool_str1147[sizeof("phli")]; + char unicode_prop_name_pool_str1148[sizeof("inphoneticextensionssupplement")]; + char unicode_prop_name_pool_str1150[sizeof("patternwhitespace")]; + char unicode_prop_name_pool_str1152[sizeof("inbasiclatin")]; + char unicode_prop_name_pool_str1154[sizeof("othermath")]; + char unicode_prop_name_pool_str1155[sizeof("idsbinaryoperator")]; + char unicode_prop_name_pool_str1156[sizeof("inmathematicaloperators")]; + char unicode_prop_name_pool_str1157[sizeof("multani")]; + char unicode_prop_name_pool_str1158[sizeof("ingujarati")]; + char unicode_prop_name_pool_str1163[sizeof("inelbasan")]; + char unicode_prop_name_pool_str1164[sizeof("inoldhungarian")]; + char unicode_prop_name_pool_str1165[sizeof("innumberforms")]; + char unicode_prop_name_pool_str1166[sizeof("nd")]; + char unicode_prop_name_pool_str1168[sizeof("yi")]; + char unicode_prop_name_pool_str1169[sizeof("lisu")]; + char unicode_prop_name_pool_str1172[sizeof("yiii")]; + char unicode_prop_name_pool_str1174[sizeof("inpsalterpahlavi")]; + char unicode_prop_name_pool_str1178[sizeof("tagbanwa")]; + char unicode_prop_name_pool_str1184[sizeof("bass")]; + char unicode_prop_name_pool_str1189[sizeof("nbat")]; + char unicode_prop_name_pool_str1190[sizeof("inbyzantinemusicalsymbols")]; + char unicode_prop_name_pool_str1192[sizeof("closepunctuation")]; + char unicode_prop_name_pool_str1194[sizeof("tfng")]; + char unicode_prop_name_pool_str1195[sizeof("otherletter")]; + char unicode_prop_name_pool_str1199[sizeof("sd")]; + char unicode_prop_name_pool_str1202[sizeof("lowercase")]; + char unicode_prop_name_pool_str1203[sizeof("sidd")]; + char unicode_prop_name_pool_str1205[sizeof("nabataean")]; + char unicode_prop_name_pool_str1207[sizeof("lepcha")]; + char unicode_prop_name_pool_str1208[sizeof("inarabicpresentationformsa")]; + char unicode_prop_name_pool_str1210[sizeof("modifierletter")]; + char unicode_prop_name_pool_str1211[sizeof("intags")]; + char unicode_prop_name_pool_str1212[sizeof("mult")]; + char unicode_prop_name_pool_str1215[sizeof("insiddham")]; + char unicode_prop_name_pool_str1216[sizeof("talu")]; + char unicode_prop_name_pool_str1221[sizeof("cased")]; + char unicode_prop_name_pool_str1223[sizeof("intibetan")]; + char unicode_prop_name_pool_str1230[sizeof("separator")]; + char unicode_prop_name_pool_str1232[sizeof("runic")]; + char unicode_prop_name_pool_str1233[sizeof("inmiscellaneousmathematicalsymbolsb")]; + char unicode_prop_name_pool_str1238[sizeof("linb")]; + char unicode_prop_name_pool_str1239[sizeof("inbraillepatterns")]; + char unicode_prop_name_pool_str1240[sizeof("oldpersian")]; + char unicode_prop_name_pool_str1242[sizeof("inugaritic")]; + char unicode_prop_name_pool_str1243[sizeof("telu")]; + char unicode_prop_name_pool_str1245[sizeof("limb")]; + char unicode_prop_name_pool_str1247[sizeof("oldpermic")]; + char unicode_prop_name_pool_str1249[sizeof("bali")]; + char unicode_prop_name_pool_str1250[sizeof("inmyanmarextendedb")]; + char unicode_prop_name_pool_str1251[sizeof("tibt")]; + char unicode_prop_name_pool_str1259[sizeof("saur")]; + char unicode_prop_name_pool_str1263[sizeof("po")]; + char unicode_prop_name_pool_str1265[sizeof("tibetan")]; + char unicode_prop_name_pool_str1280[sizeof("insaurashtra")]; + char unicode_prop_name_pool_str1284[sizeof("inbalinese")]; + char unicode_prop_name_pool_str1285[sizeof("elba")]; + char unicode_prop_name_pool_str1290[sizeof("bengali")]; + char unicode_prop_name_pool_str1291[sizeof("gran")]; + char unicode_prop_name_pool_str1295[sizeof("phoenician")]; + char unicode_prop_name_pool_str1297[sizeof("ingurmukhi")]; + char unicode_prop_name_pool_str1300[sizeof("caseignorable")]; + char unicode_prop_name_pool_str1305[sizeof("lyci")]; + char unicode_prop_name_pool_str1307[sizeof("inyijinghexagramsymbols")]; + char unicode_prop_name_pool_str1309[sizeof("mlym")]; + char unicode_prop_name_pool_str1316[sizeof("lycian")]; + char unicode_prop_name_pool_str1321[sizeof("elbasan")]; + char unicode_prop_name_pool_str1327[sizeof("balinese")]; + char unicode_prop_name_pool_str1328[sizeof("narb")]; + char unicode_prop_name_pool_str1330[sizeof("brai")]; + char unicode_prop_name_pool_str1333[sizeof("insuttonsignwriting")]; + char unicode_prop_name_pool_str1335[sizeof("insorasompeng")]; + char unicode_prop_name_pool_str1336[sizeof("malayalam")]; + char unicode_prop_name_pool_str1339[sizeof("arab")]; + char unicode_prop_name_pool_str1341[sizeof("brahmi")]; + char unicode_prop_name_pool_str1343[sizeof("incountingrodnumerals")]; + char unicode_prop_name_pool_str1347[sizeof("rjng")]; + char unicode_prop_name_pool_str1348[sizeof("mong")]; + char unicode_prop_name_pool_str1352[sizeof("incyrillicsupplement")]; + char unicode_prop_name_pool_str1359[sizeof("ogam")]; + char unicode_prop_name_pool_str1361[sizeof("sarb")]; + char unicode_prop_name_pool_str1362[sizeof("lower")]; + char unicode_prop_name_pool_str1364[sizeof("inrejang")]; + char unicode_prop_name_pool_str1370[sizeof("hmng")]; + char unicode_prop_name_pool_str1373[sizeof("tirhuta")]; + char unicode_prop_name_pool_str1374[sizeof("hang")]; + char unicode_prop_name_pool_str1379[sizeof("wspace")]; + char unicode_prop_name_pool_str1384[sizeof("inlatinextendedd")]; + char unicode_prop_name_pool_str1387[sizeof("invai")]; + char unicode_prop_name_pool_str1388[sizeof("incherokeesupplement")]; + char unicode_prop_name_pool_str1390[sizeof("mymr")]; + char unicode_prop_name_pool_str1394[sizeof("vai")]; + char unicode_prop_name_pool_str1395[sizeof("vaii")]; + char unicode_prop_name_pool_str1408[sizeof("ethiopic")]; + char unicode_prop_name_pool_str1410[sizeof("sylotinagri")]; + char unicode_prop_name_pool_str1415[sizeof("myanmar")]; + char unicode_prop_name_pool_str1417[sizeof("syrc")]; + char unicode_prop_name_pool_str1419[sizeof("inbrahmi")]; + char unicode_prop_name_pool_str1423[sizeof("mathsymbol")]; + char unicode_prop_name_pool_str1425[sizeof("runr")]; + char unicode_prop_name_pool_str1431[sizeof("inshavian")]; + char unicode_prop_name_pool_str1432[sizeof("gothic")]; + char unicode_prop_name_pool_str1438[sizeof("syriac")]; + char unicode_prop_name_pool_str1439[sizeof("p")]; + char unicode_prop_name_pool_str1441[sizeof("zp")]; + char unicode_prop_name_pool_str1442[sizeof("induployan")]; + char unicode_prop_name_pool_str1444[sizeof("vs")]; + char unicode_prop_name_pool_str1447[sizeof("ingreekandcoptic")]; + char unicode_prop_name_pool_str1448[sizeof("ingothic")]; + char unicode_prop_name_pool_str1451[sizeof("lowercaseletter")]; + char unicode_prop_name_pool_str1455[sizeof("inoriya")]; + char unicode_prop_name_pool_str1461[sizeof("grext")]; + char unicode_prop_name_pool_str1462[sizeof("linearb")]; + char unicode_prop_name_pool_str1463[sizeof("inyiradicals")]; + char unicode_prop_name_pool_str1465[sizeof("inethiopicsupplement")]; + char unicode_prop_name_pool_str1466[sizeof("inletterlikesymbols")]; + char unicode_prop_name_pool_str1467[sizeof("ingeometricshapes")]; + char unicode_prop_name_pool_str1469[sizeof("java")]; + char unicode_prop_name_pool_str1470[sizeof("inblockelements")]; + char unicode_prop_name_pool_str1472[sizeof("number")]; + char unicode_prop_name_pool_str1473[sizeof("incombininghalfmarks")]; + char unicode_prop_name_pool_str1474[sizeof("inbassavah")]; + char unicode_prop_name_pool_str1489[sizeof("inlinearbsyllabary")]; + char unicode_prop_name_pool_str1491[sizeof("osmanya")]; + char unicode_prop_name_pool_str1495[sizeof("avst")]; + char unicode_prop_name_pool_str1500[sizeof("hexdigit")]; + char unicode_prop_name_pool_str1504[sizeof("injavanese")]; + char unicode_prop_name_pool_str1513[sizeof("cyrl")]; + char unicode_prop_name_pool_str1514[sizeof("avestan")]; + char unicode_prop_name_pool_str1531[sizeof("intaiviet")]; + char unicode_prop_name_pool_str1532[sizeof("inornamentaldingbats")]; + char unicode_prop_name_pool_str1533[sizeof("tavt")]; + char unicode_prop_name_pool_str1538[sizeof("javanese")]; + char unicode_prop_name_pool_str1539[sizeof("inethiopicextended")]; + char unicode_prop_name_pool_str1550[sizeof("inhalfwidthandfullwidthforms")]; + char unicode_prop_name_pool_str1559[sizeof("ingeorgian")]; + char unicode_prop_name_pool_str1561[sizeof("inarabicmathematicalalphabeticsymbols")]; + char unicode_prop_name_pool_str1569[sizeof("hangul")]; + char unicode_prop_name_pool_str1571[sizeof("insuperscriptsandsubscripts")]; + char unicode_prop_name_pool_str1576[sizeof("inhanunoo")]; + char unicode_prop_name_pool_str1578[sizeof("inhangulsyllables")]; + char unicode_prop_name_pool_str1586[sizeof("inoldpersian")]; + char unicode_prop_name_pool_str1589[sizeof("geor")]; + char unicode_prop_name_pool_str1591[sizeof("otherpunctuation")]; + char unicode_prop_name_pool_str1593[sizeof("inoldpermic")]; + char unicode_prop_name_pool_str1598[sizeof("inalchemicalsymbols")]; + char unicode_prop_name_pool_str1599[sizeof("sylo")]; + char unicode_prop_name_pool_str1601[sizeof("hiragana")]; + char unicode_prop_name_pool_str1604[sizeof("word")]; + char unicode_prop_name_pool_str1612[sizeof("inmeroitichieroglyphs")]; + char unicode_prop_name_pool_str1614[sizeof("insmallformvariants")]; + char unicode_prop_name_pool_str1620[sizeof("sund")]; + char unicode_prop_name_pool_str1621[sizeof("brah")]; + char unicode_prop_name_pool_str1623[sizeof("insupplementalmathematicaloperators")]; + char unicode_prop_name_pool_str1624[sizeof("glagolitic")]; + char unicode_prop_name_pool_str1627[sizeof("braille")]; + char unicode_prop_name_pool_str1639[sizeof("unassigned")]; + char unicode_prop_name_pool_str1640[sizeof("sgnw")]; + char unicode_prop_name_pool_str1644[sizeof("plrd")]; + char unicode_prop_name_pool_str1648[sizeof("hebr")]; + char unicode_prop_name_pool_str1649[sizeof("ogham")]; + char unicode_prop_name_pool_str1654[sizeof("cyrillic")]; + char unicode_prop_name_pool_str1658[sizeof("orya")]; + char unicode_prop_name_pool_str1669[sizeof("grantha")]; + char unicode_prop_name_pool_str1684[sizeof("invariationselectors")]; + char unicode_prop_name_pool_str1693[sizeof("indingbats")]; + char unicode_prop_name_pool_str1695[sizeof("inunifiedcanadianaboriginalsyllabicsextended")]; + char unicode_prop_name_pool_str1702[sizeof("inhanguljamoextendeda")]; + char unicode_prop_name_pool_str1705[sizeof("goth")]; + char unicode_prop_name_pool_str1706[sizeof("graphemebase")]; + char unicode_prop_name_pool_str1708[sizeof("bidic")]; + char unicode_prop_name_pool_str1717[sizeof("ogrext")]; + char unicode_prop_name_pool_str1724[sizeof("idsb")]; + char unicode_prop_name_pool_str1732[sizeof("invariationselectorssupplement")]; + char unicode_prop_name_pool_str1733[sizeof("assigned")]; + char unicode_prop_name_pool_str1737[sizeof("symbol")]; + char unicode_prop_name_pool_str1741[sizeof("ingreekextended")]; + char unicode_prop_name_pool_str1743[sizeof("pauc")]; + char unicode_prop_name_pool_str1752[sizeof("meroitichieroglyphs")]; + char unicode_prop_name_pool_str1754[sizeof("logicalorderexception")]; + char unicode_prop_name_pool_str1771[sizeof("incurrencysymbols")]; + char unicode_prop_name_pool_str1775[sizeof("inarabicextendeda")]; + char unicode_prop_name_pool_str1783[sizeof("insupplementalarrowsc")]; + char unicode_prop_name_pool_str1786[sizeof("insupplementalarrowsa")]; + char unicode_prop_name_pool_str1788[sizeof("inkannada")]; + char unicode_prop_name_pool_str1792[sizeof("kana")]; + char unicode_prop_name_pool_str1793[sizeof("phagspa")]; + char unicode_prop_name_pool_str1796[sizeof("cakm")]; + char unicode_prop_name_pool_str1797[sizeof("digit")]; + char unicode_prop_name_pool_str1802[sizeof("softdotted")]; + char unicode_prop_name_pool_str1806[sizeof("inchakma")]; + char unicode_prop_name_pool_str1818[sizeof("inphagspa")]; + char unicode_prop_name_pool_str1822[sizeof("inhighprivateusesurrogates")]; + char unicode_prop_name_pool_str1836[sizeof("warangciti")]; + char unicode_prop_name_pool_str1844[sizeof("incjkcompatibilityforms")]; + char unicode_prop_name_pool_str1847[sizeof("idstrinaryoperator")]; + char unicode_prop_name_pool_str1849[sizeof("incjkcompatibilityideographs")]; + char unicode_prop_name_pool_str1851[sizeof("inkaithi")]; + char unicode_prop_name_pool_str1857[sizeof("takri")]; + char unicode_prop_name_pool_str1864[sizeof("phlp")]; + char unicode_prop_name_pool_str1865[sizeof("mendekikakui")]; + char unicode_prop_name_pool_str1867[sizeof("punct")]; + char unicode_prop_name_pool_str1870[sizeof("otherdefaultignorablecodepoint")]; + char unicode_prop_name_pool_str1871[sizeof("inkatakana")]; + char unicode_prop_name_pool_str1878[sizeof("graph")]; + char unicode_prop_name_pool_str1879[sizeof("lydi")]; + char unicode_prop_name_pool_str1885[sizeof("pd")]; + char unicode_prop_name_pool_str1887[sizeof("incjkstrokes")]; + char unicode_prop_name_pool_str1890[sizeof("lydian")]; + char unicode_prop_name_pool_str1895[sizeof("sorasompeng")]; + char unicode_prop_name_pool_str1897[sizeof("incjkcompatibilityideographssupplement")]; + char unicode_prop_name_pool_str1900[sizeof("meeteimayek")]; + char unicode_prop_name_pool_str1909[sizeof("otheruppercase")]; + char unicode_prop_name_pool_str1912[sizeof("kali")]; + char unicode_prop_name_pool_str1915[sizeof("inhanguljamo")]; + char unicode_prop_name_pool_str1917[sizeof("ideographic")]; + char unicode_prop_name_pool_str1935[sizeof("inmeeteimayek")]; + char unicode_prop_name_pool_str1948[sizeof("incyrillicextendedb")]; + char unicode_prop_name_pool_str1964[sizeof("hluw")]; + char unicode_prop_name_pool_str1969[sizeof("patsyn")]; + char unicode_prop_name_pool_str1972[sizeof("incombiningdiacriticalmarks")]; + char unicode_prop_name_pool_str1973[sizeof("inarabicsupplement")]; + char unicode_prop_name_pool_str1975[sizeof("inkanasupplement")]; + char unicode_prop_name_pool_str1977[sizeof("deprecated")]; + char unicode_prop_name_pool_str1978[sizeof("inverticalforms")]; + char unicode_prop_name_pool_str1982[sizeof("incombiningdiacriticalmarksforsymbols")]; + char unicode_prop_name_pool_str1987[sizeof("indevanagari")]; + char unicode_prop_name_pool_str1989[sizeof("inbuhid")]; + char unicode_prop_name_pool_str1991[sizeof("alphabetic")]; + char unicode_prop_name_pool_str1995[sizeof("oldhungarian")]; + char unicode_prop_name_pool_str1998[sizeof("mark")]; + char unicode_prop_name_pool_str2005[sizeof("inkharoshthi")]; + char unicode_prop_name_pool_str2011[sizeof("deva")]; + char unicode_prop_name_pool_str2012[sizeof("invedicextensions")]; + char unicode_prop_name_pool_str2019[sizeof("devanagari")]; + char unicode_prop_name_pool_str2020[sizeof("incombiningdiacriticalmarkssupplement")]; + char unicode_prop_name_pool_str2024[sizeof("ingeometricshapesextended")]; + char unicode_prop_name_pool_str2030[sizeof("inkhmer")]; + char unicode_prop_name_pool_str2034[sizeof("changeswhencasemapped")]; + char unicode_prop_name_pool_str2058[sizeof("intakri")]; + char unicode_prop_name_pool_str2061[sizeof("dep")]; + char unicode_prop_name_pool_str2062[sizeof("takr")]; + char unicode_prop_name_pool_str2080[sizeof("changeswhencasefolded")]; + char unicode_prop_name_pool_str2082[sizeof("kaithi")]; + char unicode_prop_name_pool_str2093[sizeof("phag")]; + char unicode_prop_name_pool_str2104[sizeof("inpahawhhmong")]; + char unicode_prop_name_pool_str2106[sizeof("bidicontrol")]; + char unicode_prop_name_pool_str2116[sizeof("inkhojki")]; + char unicode_prop_name_pool_str2126[sizeof("bamu")]; + char unicode_prop_name_pool_str2127[sizeof("inbamum")]; + char unicode_prop_name_pool_str2128[sizeof("changeswhentitlecased")]; + char unicode_prop_name_pool_str2131[sizeof("inkhmersymbols")]; + char unicode_prop_name_pool_str2132[sizeof("uideo")]; + char unicode_prop_name_pool_str2133[sizeof("lu")]; + char unicode_prop_name_pool_str2136[sizeof("kthi")]; + char unicode_prop_name_pool_str2145[sizeof("limbu")]; + char unicode_prop_name_pool_str2147[sizeof("defaultignorablecodepoint")]; + char unicode_prop_name_pool_str2151[sizeof("inplayingcards")]; + char unicode_prop_name_pool_str2153[sizeof("inhebrew")]; + char unicode_prop_name_pool_str2161[sizeof("modifiersymbol")]; + char unicode_prop_name_pool_str2167[sizeof("ugaritic")]; + char unicode_prop_name_pool_str2172[sizeof("inlisu")]; + char unicode_prop_name_pool_str2173[sizeof("inglagolitic")]; + char unicode_prop_name_pool_str2175[sizeof("inprivateusearea")]; + char unicode_prop_name_pool_str2178[sizeof("olck")]; + char unicode_prop_name_pool_str2182[sizeof("inbengali")]; + char unicode_prop_name_pool_str2185[sizeof("olchiki")]; + char unicode_prop_name_pool_str2195[sizeof("caucasianalbanian")]; + char unicode_prop_name_pool_str2210[sizeof("oldturkic")]; + char unicode_prop_name_pool_str2211[sizeof("beng")]; + char unicode_prop_name_pool_str2212[sizeof("intelugu")]; + char unicode_prop_name_pool_str2218[sizeof("hyphen")]; + char unicode_prop_name_pool_str2221[sizeof("inbuginese")]; + char unicode_prop_name_pool_str2225[sizeof("intagalog")]; + char unicode_prop_name_pool_str2240[sizeof("cypriot")]; + char unicode_prop_name_pool_str2249[sizeof("inlimbu")]; + char unicode_prop_name_pool_str2252[sizeof("graphemeextend")]; + char unicode_prop_name_pool_str2255[sizeof("tagb")]; + char unicode_prop_name_pool_str2259[sizeof("incypriotsyllabary")]; + char unicode_prop_name_pool_str2263[sizeof("intagbanwa")]; + char unicode_prop_name_pool_str2268[sizeof("inaegeannumbers")]; + char unicode_prop_name_pool_str2275[sizeof("inkatakanaphoneticextensions")]; + char unicode_prop_name_pool_str2276[sizeof("glag")]; + char unicode_prop_name_pool_str2277[sizeof("combiningmark")]; + char unicode_prop_name_pool_str2278[sizeof("palmyrene")]; + char unicode_prop_name_pool_str2280[sizeof("khmr")]; + char unicode_prop_name_pool_str2284[sizeof("khar")]; + char unicode_prop_name_pool_str2292[sizeof("ugar")]; + char unicode_prop_name_pool_str2300[sizeof("inarabicpresentationformsb")]; + char unicode_prop_name_pool_str2304[sizeof("insupplementaryprivateuseareaa")]; + char unicode_prop_name_pool_str2306[sizeof("variationselector")]; + char unicode_prop_name_pool_str2311[sizeof("nko")]; + char unicode_prop_name_pool_str2312[sizeof("nkoo")]; + char unicode_prop_name_pool_str2330[sizeof("inbamumsupplement")]; + char unicode_prop_name_pool_str2336[sizeof("tglg")]; + char unicode_prop_name_pool_str2338[sizeof("any")]; + char unicode_prop_name_pool_str2339[sizeof("enclosingmark")]; + char unicode_prop_name_pool_str2348[sizeof("gujr")]; + char unicode_prop_name_pool_str2358[sizeof("changeswhenlowercased")]; + char unicode_prop_name_pool_str2363[sizeof("knda")]; + char unicode_prop_name_pool_str2364[sizeof("hung")]; + char unicode_prop_name_pool_str2365[sizeof("gujarati")]; + char unicode_prop_name_pool_str2368[sizeof("bopo")]; + char unicode_prop_name_pool_str2373[sizeof("inmendekikakui")]; + char unicode_prop_name_pool_str2376[sizeof("inbopomofo")]; + char unicode_prop_name_pool_str2386[sizeof("kharoshthi")]; + char unicode_prop_name_pool_str2388[sizeof("kannada")]; + char unicode_prop_name_pool_str2401[sizeof("khoj")]; + char unicode_prop_name_pool_str2422[sizeof("cherokee")]; + char unicode_prop_name_pool_str2426[sizeof("inlatinextendedb")]; + char unicode_prop_name_pool_str2432[sizeof("dashpunctuation")]; + char unicode_prop_name_pool_str2435[sizeof("dupl")]; + char unicode_prop_name_pool_str2443[sizeof("inyisyllables")]; + char unicode_prop_name_pool_str2451[sizeof("georgian")]; + char unicode_prop_name_pool_str2453[sizeof("rejang")]; + char unicode_prop_name_pool_str2455[sizeof("oupper")]; + char unicode_prop_name_pool_str2477[sizeof("aghb")]; + char unicode_prop_name_pool_str2491[sizeof("khmer")]; + char unicode_prop_name_pool_str2494[sizeof("grbase")]; + char unicode_prop_name_pool_str2503[sizeof("currencysymbol")]; + char unicode_prop_name_pool_str2529[sizeof("incombiningdiacriticalmarksextended")]; + char unicode_prop_name_pool_str2542[sizeof("spacingmark")]; + char unicode_prop_name_pool_str2544[sizeof("orkh")]; + char unicode_prop_name_pool_str2550[sizeof("cwu")]; + char unicode_prop_name_pool_str2552[sizeof("hebrew")]; + char unicode_prop_name_pool_str2570[sizeof("insupplementalpunctuation")]; + char unicode_prop_name_pool_str2581[sizeof("oldnortharabian")]; + char unicode_prop_name_pool_str2587[sizeof("incjksymbolsandpunctuation")]; + char unicode_prop_name_pool_str2590[sizeof("othergraphemeextend")]; + char unicode_prop_name_pool_str2613[sizeof("indevanagariextended")]; + char unicode_prop_name_pool_str2621[sizeof("tagalog")]; + char unicode_prop_name_pool_str2628[sizeof("surrogate")]; + char unicode_prop_name_pool_str2650[sizeof("bopomofo")]; + char unicode_prop_name_pool_str2679[sizeof("ingeorgiansupplement")]; + char unicode_prop_name_pool_str2704[sizeof("uppercase")]; + char unicode_prop_name_pool_str2711[sizeof("unifiedideograph")]; + char unicode_prop_name_pool_str2742[sizeof("inpaucinhau")]; + char unicode_prop_name_pool_str2743[sizeof("paucinhau")]; + char unicode_prop_name_pool_str2774[sizeof("insupplementalsymbolsandpictographs")]; + char unicode_prop_name_pool_str2792[sizeof("nonspacingmark")]; + char unicode_prop_name_pool_str2794[sizeof("inhanguljamoextendedb")]; + char unicode_prop_name_pool_str2802[sizeof("inhighsurrogates")]; + char unicode_prop_name_pool_str2803[sizeof("punctuation")]; + char unicode_prop_name_pool_str2834[sizeof("openpunctuation")]; + char unicode_prop_name_pool_str2835[sizeof("bassavah")]; + char unicode_prop_name_pool_str2864[sizeof("upper")]; + char unicode_prop_name_pool_str2878[sizeof("insupplementalarrowsb")]; + char unicode_prop_name_pool_str2879[sizeof("inkanbun")]; + char unicode_prop_name_pool_str2918[sizeof("inkangxiradicals")]; + char unicode_prop_name_pool_str2922[sizeof("inmeroiticcursive")]; + char unicode_prop_name_pool_str2925[sizeof("signwriting")]; + char unicode_prop_name_pool_str2928[sizeof("incjkradicalssupplement")]; + char unicode_prop_name_pool_str2930[sizeof("pahawhhmong")]; + char unicode_prop_name_pool_str2952[sizeof("inkayahli")]; + char unicode_prop_name_pool_str2953[sizeof("uppercaseletter")]; + char unicode_prop_name_pool_str2955[sizeof("batk")]; + char unicode_prop_name_pool_str2960[sizeof("inbatak")]; + char unicode_prop_name_pool_str2968[sizeof("incjkcompatibility")]; + char unicode_prop_name_pool_str2983[sizeof("buhd")]; + char unicode_prop_name_pool_str2989[sizeof("egyp")]; + char unicode_prop_name_pool_str3009[sizeof("innoblock")]; + char unicode_prop_name_pool_str3043[sizeof("inegyptianhieroglyphs")]; + char unicode_prop_name_pool_str3084[sizeof("grek")]; + char unicode_prop_name_pool_str3176[sizeof("bugi")]; + char unicode_prop_name_pool_str3230[sizeof("changeswhenuppercased")]; + char unicode_prop_name_pool_str3234[sizeof("paragraphseparator")]; + char unicode_prop_name_pool_str3254[sizeof("buginese")]; + char unicode_prop_name_pool_str3282[sizeof("guru")]; + char unicode_prop_name_pool_str3299[sizeof("inbopomofoextended")]; + char unicode_prop_name_pool_str3312[sizeof("telugu")]; + char unicode_prop_name_pool_str3343[sizeof("quotationmark")]; + char unicode_prop_name_pool_str3364[sizeof("inkhudawadi")]; + char unicode_prop_name_pool_str3370[sizeof("kayahli")]; + char unicode_prop_name_pool_str3372[sizeof("oldsoutharabian")]; + char unicode_prop_name_pool_str3396[sizeof("insupplementaryprivateuseareab")]; + char unicode_prop_name_pool_str3479[sizeof("inhangulcompatibilityjamo")]; + char unicode_prop_name_pool_str3491[sizeof("zyyy")]; + char unicode_prop_name_pool_str3555[sizeof("qmark")]; + char unicode_prop_name_pool_str3566[sizeof("buhid")]; + char unicode_prop_name_pool_str3567[sizeof("sk")]; + char unicode_prop_name_pool_str3582[sizeof("unknown")]; + char unicode_prop_name_pool_str3619[sizeof("khudawadi")]; + char unicode_prop_name_pool_str3644[sizeof("katakana")]; + char unicode_prop_name_pool_str3670[sizeof("incjkunifiedideographsextensionc")]; + char unicode_prop_name_pool_str3673[sizeof("incjkunifiedideographsextensiona")]; + char unicode_prop_name_pool_str3685[sizeof("incjkunifiedideographs")]; + char unicode_prop_name_pool_str3700[sizeof("incjkunifiedideographsextensione")]; + char unicode_prop_name_pool_str3743[sizeof("duployan")]; + char unicode_prop_name_pool_str3763[sizeof("egyptianhieroglyphs")]; + char unicode_prop_name_pool_str3853[sizeof("inboxdrawing")]; + char unicode_prop_name_pool_str4106[sizeof("khojki")]; + char unicode_prop_name_pool_str4244[sizeof("incjkunifiedideographsextensiond")]; + char unicode_prop_name_pool_str4722[sizeof("batak")]; + char unicode_prop_name_pool_str4765[sizeof("incjkunifiedideographsextensionb")]; + char unicode_prop_name_pool_str4780[sizeof("blank")]; + char unicode_prop_name_pool_str4851[sizeof("greek")]; + char unicode_prop_name_pool_str4943[sizeof("grlink")]; + char unicode_prop_name_pool_str5054[sizeof("gurmukhi")]; + char unicode_prop_name_pool_str5162[sizeof("graphemelink")]; + }; +static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = + { + "n", + "z", + "zzzz", + "mn", + "cn", + "ci", + "m", + "c", + "inmanichaean", + "mani", + "mc", + "cc", + "qaai", + "qaac", + "incham", + "mandaic", + "sm", + "sc", + "cans", + "ascii", + "insamaritan", + "incuneiform", + "s", + "inarmenian", + "zs", + "cs", + "me", + "incommonindicnumberforms", + "inavestan", + "inthai", + "inipaextensions", + "initialpunctuation", + "inancientsymbols", + "inthaana", + "cf", + "incuneiformnumbersandpunctuation", + "mtei", + "inspecials", + "inmusicalsymbols", + "inmiscellaneousmathematicalsymbolsa", + "lm", + "lina", + "lc", + "inlycian", + "lana", + "intaixuanjingsymbols", + "inmyanmarextendeda", + "alnum", + "sterm", + "intaitham", + "intransportandmapsymbols", + "taile", + "inmalayalam", + "inmiscellaneoussymbols", + "incontrolpictures", + "inmiscellaneoussymbolsandarrows", + "inlineara", + "inmiscellaneoussymbolsandpictographs", + "sinhala", + "taiviet", + "ext", + "latn", + "latin", + "ital", + "intamil", + "inmultani", + "taml", + "inrunic", + "incarian", + "armn", + "armi", + "cari", + "inlatinextendedc", + "armenian", + "inmyanmar", + "incyrillic", + "inlatinextendeda", + "carian", + "intaile", + "tale", + "arabic", + "l", + "nl", + "zl", + "insyriac", + "samr", + "merc", + "inlinearbideograms", + "samaritan", + "lt", + "insharada", + "inmeeteimayekextensions", + "inruminumeralsymbols", + "miao", + "inlatinextendede", + "zinh", + "inahom", + "incherokee", + "han", + "hani", + "inosmanya", + "inmiscellaneoustechnical", + "cham", + "inmahajani", + "osma", + "manichaean", + "term", + "sinh", + "cntrl", + "chakma", + "insinhala", + "tamil", + "inethiopic", + "connectorpunctuation", + "shavian", + "joinc", + "inenclosedalphanumerics", + "ahex", + "inlatinextendedadditional", + "lineara", + "inenclosedcjklettersandmonths", + "hex", + "thai", + "mahj", + "math", + "ll", + "thaa", + "hatran", + "mahajani", + "insinhalaarchaicnumbers", + "taitham", + "thaana", + "ethi", + "inenclosedalphanumericsupplement", + "meroiticcursive", + "inideographicdescriptioncharacters", + "terminalpunctuation", + "inemoticons", + "intifinagh", + "asciihexdigit", + "tifinagh", + "inearlydynasticcuneiform", + "inopticalcharacterrecognition", + "loe", + "titlecaseletter", + "inscriptionalparthian", + "oriya", + "hira", + "sora", + "mero", + "inarrows", + "inhiragana", + "no", + "inmahjongtiles", + "innko", + "co", + "cher", + "inmro", + "common", + "cwcm", + "inmiao", + "inscriptionalpahlavi", + "inmathematicalalphanumericsymbols", + "so", + "hano", + "tirh", + "inhatran", + "inolchiki", + "ahom", + "di", + "hatr", + "inogham", + "intirhuta", + "inmodi", + "idc", + "mand", + "inmandaic", + "dia", + "cwcf", + "xidc", + "sind", + "newline", + "mend", + "newtailue", + "inshorthandformatcontrols", + "anatolianhieroglyphs", + "xids", + "ids", + "innewtailue", + "insylotinagri", + "insundanese", + "lo", + "indominotiles", + "inlao", + "lao", + "laoo", + "mongolian", + "cwt", + "idst", + "format", + "lineseparator", + "letter", + "idcontinue", + "inanatolianhieroglyphs", + "inlydian", + "pi", + "nchar", + "inspacingmodifierletters", + "oalpha", + "indeseret", + "pc", + "xdigit", + "inphaistosdisc", + "joincontrol", + "inlowsurrogates", + "mro", + "mroo", + "xidstart", + "wara", + "inwarangciti", + "ps", + "pe", + "cwl", + "incoptic", + "diacritic", + "pf", + "space", + "hanunoo", + "incyrillicextendeda", + "odi", + "modi", + "oidc", + "incopticepactnumbers", + "palm", + "insundanesesupplement", + "patws", + "inolditalic", + "inlepcha", + "shaw", + "omath", + "alpha", + "oids", + "control", + "ideo", + "lepc", + "psalterpahlavi", + "dsrt", + "otheridcontinue", + "inpalmyrene", + "decimalnumber", + "noncharactercodepoint", + "idstart", + "otheralphabetic", + "letternumber", + "dash", + "xidcontinue", + "finalpunctuation", + "extender", + "siddham", + "inoldturkic", + "sharada", + "whitespace", + "inmodifiertoneletters", + "radical", + "canadianaboriginal", + "inethiopicextendeda", + "imperialaramaic", + "perm", + "deseret", + "casedletter", + "inimperialaramaic", + "otheridstart", + "prti", + "cprt", + "othersymbol", + "coptic", + "otherlowercase", + "phnx", + "inphoenician", + "incaucasianalbanian", + "inlatin1supplement", + "xpeo", + "ininscriptionalparthian", + "ininscriptionalpahlavi", + "other", + "privateuse", + "othernumber", + "inenclosedideographicsupplement", + "copt", + "print", + "cuneiform", + "olditalic", + "xsux", + "inmongolian", + "inancientgreekmusicalnotation", + "sundanese", + "inoldnortharabian", + "patternsyntax", + "inherited", + "ingrantha", + "inphoneticextensions", + "saurashtra", + "inancientgreeknumbers", + "shrd", + "inoldsoutharabian", + "innabataean", + "inalphabeticpresentationforms", + "spaceseparator", + "olower", + "ingeneralpunctuation", + "inunifiedcanadianaboriginalsyllabics", + "bamum", + "inarabic", + "phli", + "inphoneticextensionssupplement", + "patternwhitespace", + "inbasiclatin", + "othermath", + "idsbinaryoperator", + "inmathematicaloperators", + "multani", + "ingujarati", + "inelbasan", + "inoldhungarian", + "innumberforms", + "nd", + "yi", + "lisu", + "yiii", + "inpsalterpahlavi", + "tagbanwa", + "bass", + "nbat", + "inbyzantinemusicalsymbols", + "closepunctuation", + "tfng", + "otherletter", + "sd", + "lowercase", + "sidd", + "nabataean", + "lepcha", + "inarabicpresentationformsa", + "modifierletter", + "intags", + "mult", + "insiddham", + "talu", + "cased", + "intibetan", + "separator", + "runic", + "inmiscellaneousmathematicalsymbolsb", + "linb", + "inbraillepatterns", + "oldpersian", + "inugaritic", + "telu", + "limb", + "oldpermic", + "bali", + "inmyanmarextendedb", + "tibt", + "saur", + "po", + "tibetan", + "insaurashtra", + "inbalinese", + "elba", + "bengali", + "gran", + "phoenician", + "ingurmukhi", + "caseignorable", + "lyci", + "inyijinghexagramsymbols", + "mlym", + "lycian", + "elbasan", + "balinese", + "narb", + "brai", + "insuttonsignwriting", + "insorasompeng", + "malayalam", + "arab", + "brahmi", + "incountingrodnumerals", + "rjng", + "mong", + "incyrillicsupplement", + "ogam", + "sarb", + "lower", + "inrejang", + "hmng", + "tirhuta", + "hang", + "wspace", + "inlatinextendedd", + "invai", + "incherokeesupplement", + "mymr", + "vai", + "vaii", + "ethiopic", + "sylotinagri", + "myanmar", + "syrc", + "inbrahmi", + "mathsymbol", + "runr", + "inshavian", + "gothic", + "syriac", + "p", + "zp", + "induployan", + "vs", + "ingreekandcoptic", + "ingothic", + "lowercaseletter", + "inoriya", + "grext", + "linearb", + "inyiradicals", + "inethiopicsupplement", + "inletterlikesymbols", + "ingeometricshapes", + "java", + "inblockelements", + "number", + "incombininghalfmarks", + "inbassavah", + "inlinearbsyllabary", + "osmanya", + "avst", + "hexdigit", + "injavanese", + "cyrl", + "avestan", + "intaiviet", + "inornamentaldingbats", + "tavt", + "javanese", + "inethiopicextended", + "inhalfwidthandfullwidthforms", + "ingeorgian", + "inarabicmathematicalalphabeticsymbols", + "hangul", + "insuperscriptsandsubscripts", + "inhanunoo", + "inhangulsyllables", + "inoldpersian", + "geor", + "otherpunctuation", + "inoldpermic", + "inalchemicalsymbols", + "sylo", + "hiragana", + "word", + "inmeroitichieroglyphs", + "insmallformvariants", + "sund", + "brah", + "insupplementalmathematicaloperators", + "glagolitic", + "braille", + "unassigned", + "sgnw", + "plrd", + "hebr", + "ogham", + "cyrillic", + "orya", + "grantha", + "invariationselectors", + "indingbats", + "inunifiedcanadianaboriginalsyllabicsextended", + "inhanguljamoextendeda", + "goth", + "graphemebase", + "bidic", + "ogrext", + "idsb", + "invariationselectorssupplement", + "assigned", + "symbol", + "ingreekextended", + "pauc", + "meroitichieroglyphs", + "logicalorderexception", + "incurrencysymbols", + "inarabicextendeda", + "insupplementalarrowsc", + "insupplementalarrowsa", + "inkannada", + "kana", + "phagspa", + "cakm", + "digit", + "softdotted", + "inchakma", + "inphagspa", + "inhighprivateusesurrogates", + "warangciti", + "incjkcompatibilityforms", + "idstrinaryoperator", + "incjkcompatibilityideographs", + "inkaithi", + "takri", + "phlp", + "mendekikakui", + "punct", + "otherdefaultignorablecodepoint", + "inkatakana", + "graph", + "lydi", + "pd", + "incjkstrokes", + "lydian", + "sorasompeng", + "incjkcompatibilityideographssupplement", + "meeteimayek", + "otheruppercase", + "kali", + "inhanguljamo", + "ideographic", + "inmeeteimayek", + "incyrillicextendedb", + "hluw", + "patsyn", + "incombiningdiacriticalmarks", + "inarabicsupplement", + "inkanasupplement", + "deprecated", + "inverticalforms", + "incombiningdiacriticalmarksforsymbols", + "indevanagari", + "inbuhid", + "alphabetic", + "oldhungarian", + "mark", + "inkharoshthi", + "deva", + "invedicextensions", + "devanagari", + "incombiningdiacriticalmarkssupplement", + "ingeometricshapesextended", + "inkhmer", + "changeswhencasemapped", + "intakri", + "dep", + "takr", + "changeswhencasefolded", + "kaithi", + "phag", + "inpahawhhmong", + "bidicontrol", + "inkhojki", + "bamu", + "inbamum", + "changeswhentitlecased", + "inkhmersymbols", + "uideo", + "lu", + "kthi", + "limbu", + "defaultignorablecodepoint", + "inplayingcards", + "inhebrew", + "modifiersymbol", + "ugaritic", + "inlisu", + "inglagolitic", + "inprivateusearea", + "olck", + "inbengali", + "olchiki", + "caucasianalbanian", + "oldturkic", + "beng", + "intelugu", + "hyphen", + "inbuginese", + "intagalog", + "cypriot", + "inlimbu", + "graphemeextend", + "tagb", + "incypriotsyllabary", + "intagbanwa", + "inaegeannumbers", + "inkatakanaphoneticextensions", + "glag", + "combiningmark", + "palmyrene", + "khmr", + "khar", + "ugar", + "inarabicpresentationformsb", + "insupplementaryprivateuseareaa", + "variationselector", + "nko", + "nkoo", + "inbamumsupplement", + "tglg", + "any", + "enclosingmark", + "gujr", + "changeswhenlowercased", + "knda", + "hung", + "gujarati", + "bopo", + "inmendekikakui", + "inbopomofo", + "kharoshthi", + "kannada", + "khoj", + "cherokee", + "inlatinextendedb", + "dashpunctuation", + "dupl", + "inyisyllables", + "georgian", + "rejang", + "oupper", + "aghb", + "khmer", + "grbase", + "currencysymbol", + "incombiningdiacriticalmarksextended", + "spacingmark", + "orkh", + "cwu", + "hebrew", + "insupplementalpunctuation", + "oldnortharabian", + "incjksymbolsandpunctuation", + "othergraphemeextend", + "indevanagariextended", + "tagalog", + "surrogate", + "bopomofo", + "ingeorgiansupplement", + "uppercase", + "unifiedideograph", + "inpaucinhau", + "paucinhau", + "insupplementalsymbolsandpictographs", + "nonspacingmark", + "inhanguljamoextendedb", + "inhighsurrogates", + "punctuation", + "openpunctuation", + "bassavah", + "upper", + "insupplementalarrowsb", + "inkanbun", + "inkangxiradicals", + "inmeroiticcursive", + "signwriting", + "incjkradicalssupplement", + "pahawhhmong", + "inkayahli", + "uppercaseletter", + "batk", + "inbatak", + "incjkcompatibility", + "buhd", + "egyp", + "innoblock", + "inegyptianhieroglyphs", + "grek", + "bugi", + "changeswhenuppercased", + "paragraphseparator", + "buginese", + "guru", + "inbopomofoextended", + "telugu", + "quotationmark", + "inkhudawadi", + "kayahli", + "oldsoutharabian", + "insupplementaryprivateuseareab", + "inhangulcompatibilityjamo", + "zyyy", + "qmark", + "buhid", + "sk", + "unknown", + "khudawadi", + "katakana", + "incjkunifiedideographsextensionc", + "incjkunifiedideographsextensiona", + "incjkunifiedideographs", + "incjkunifiedideographsextensione", + "duployan", + "egyptianhieroglyphs", + "inboxdrawing", + "khojki", + "incjkunifiedideographsextensiond", + "batak", + "incjkunifiedideographsextensionb", + "blank", + "greek", + "grlink", + "gurmukhi", + "graphemelink" + }; +#define unicode_prop_name_pool ((const char *) &unicode_prop_name_pool_contents) #ifdef __GNUC__ __inline #if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ __attribute__ ((__gnu_inline__)) #endif #endif -const struct PropertyNameCtype * +const struct PoolPropertyNameCtype * unicode_lookup_property_name (register const char *str, register unsigned int len) { - static const struct PropertyNameCtype wordlist[] = + static const struct PoolPropertyNameCtype wordlist[] = { - {""}, + {-1}, - {"n", 144}, - {""}, + {pool_offset(1), 144}, + {-1}, - {"z", 234}, - {""}, {""}, {""}, {""}, + {pool_offset(3), 234}, + {-1}, {-1}, {-1}, {-1}, - {"zzzz", 225}, + {pool_offset(8), 225}, - {"mn", 138}, + {pool_offset(9), 138}, - {"cn", 51}, - {""}, {""}, {""}, + {pool_offset(10), 51}, + {-1}, {-1}, {-1}, - {"ci", 38}, + {pool_offset(14), 38}, - {"m", 125}, - {""}, + {pool_offset(15), 125}, + {-1}, - {"c", 35}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(17), 35}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmanichaean", 430}, + {pool_offset(23), 430}, - {"mani", 129}, + {pool_offset(24), 129}, - {"mc", 131}, + {pool_offset(25), 131}, - {"cc", 41}, - {""}, + {pool_offset(26), 41}, + {-1}, - {"qaai", 94}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(28), 94}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"qaac", 54}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(34), 54}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incham", 373}, - {""}, {""}, {""}, {""}, + {pool_offset(41), 373}, + {-1}, {-1}, {-1}, {-1}, - {"mandaic", 128}, - {""}, {""}, + {pool_offset(46), 128}, + {-1}, {-1}, - {"sm", 202}, - {""}, + {pool_offset(49), 202}, + {-1}, - {"sc", 195}, - {""}, {""}, {""}, {""}, + {pool_offset(51), 195}, + {-1}, {-1}, {-1}, {-1}, - {"cans", 36}, - {""}, {""}, {""}, {""}, + {pool_offset(56), 36}, + {-1}, {-1}, {-1}, {-1}, - {"ascii", 14}, - {""}, {""}, + {pool_offset(61), 14}, + {-1}, {-1}, - {"insamaritan", 255}, - {""}, + {pool_offset(64), 255}, + {-1}, - {"incuneiform", 456}, + {pool_offset(66), 456}, - {"s", 191}, + {pool_offset(67), 191}, - {"inarmenian", 248}, + {pool_offset(68), 248}, - {"zs", 237}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(69), 237}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"cs", 55}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(76), 55}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"me", 132}, - {""}, {""}, {""}, {""}, + {pool_offset(85), 132}, + {-1}, {-1}, {-1}, {-1}, - {"incommonindicnumberforms", 364}, - {""}, {""}, + {pool_offset(90), 364}, + {-1}, {-1}, - {"inavestan", 431}, + {pool_offset(93), 431}, - {"inthai", 268}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(94), 268}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inipaextensions", 242}, - {""}, {""}, {""}, {""}, + {pool_offset(103), 242}, + {-1}, {-1}, {-1}, {-1}, - {"initialpunctuation", 183}, - {""}, {""}, {""}, + {pool_offset(108), 183}, + {-1}, {-1}, {-1}, - {"inancientsymbols", 402}, - {""}, + {pool_offset(112), 402}, + {-1}, - {"inthaana", 253}, - {""}, + {pool_offset(114), 253}, + {-1}, - {"cf", 42}, - {""}, + {pool_offset(116), 42}, + {-1}, - {"incuneiformnumbersandpunctuation", 457}, - {""}, {""}, {""}, + {pool_offset(118), 457}, + {-1}, {-1}, {-1}, - {"mtei", 133}, - {""}, + {pool_offset(122), 133}, + {-1}, - {"inspecials", 397}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(124), 397}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmusicalsymbols", 470}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(133), 470}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmiscellaneousmathematicalsymbolsa", 323}, - {""}, {""}, {""}, + {pool_offset(141), 323}, + {-1}, {-1}, {-1}, - {"lm", 117}, + {pool_offset(145), 117}, - {"lina", 113}, + {pool_offset(146), 113}, - {"lc", 108}, - {""}, + {pool_offset(147), 108}, + {-1}, - {"inlycian", 404}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(149), 404}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"lana", 212}, - {""}, + {pool_offset(155), 212}, + {-1}, - {"intaixuanjingsymbols", 472}, + {pool_offset(157), 472}, - {"inmyanmarextendeda", 374}, + {pool_offset(158), 374}, - {"alnum", 13}, - {""}, + {pool_offset(159), 13}, + {-1}, - {"sterm", 192}, + {pool_offset(161), 192}, - {"intaitham", 292}, + {pool_offset(162), 292}, - {"intransportandmapsymbols", 486}, - {""}, + {pool_offset(163), 486}, + {-1}, - {"taile", 211}, - {""}, + {pool_offset(165), 211}, + {-1}, - {"inmalayalam", 266}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(167), 266}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmiscellaneoussymbols", 321}, + {pool_offset(176), 321}, - {"incontrolpictures", 315}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(177), 315}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmiscellaneoussymbolsandarrows", 329}, - {""}, {""}, {""}, + {pool_offset(185), 329}, + {-1}, {-1}, {-1}, - {"inlineara", 417}, + {pool_offset(189), 417}, - {"inmiscellaneoussymbolsandpictographs", 483}, - {""}, {""}, + {pool_offset(190), 483}, + {-1}, {-1}, - {"sinhala", 200}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(193), 200}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"taiviet", 213}, - {""}, {""}, + {pool_offset(202), 213}, + {-1}, {-1}, - {"ext", 69}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(205), 69}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"latn", 110}, + {pool_offset(215), 110}, - {"latin", 110}, + {pool_offset(216), 110}, - {"ital", 155}, + {pool_offset(217), 155}, - {"intamil", 263}, - {""}, + {pool_offset(218), 263}, + {-1}, - {"inmultani", 446}, - {""}, + {pool_offset(220), 446}, + {-1}, - {"taml", 215}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(222), 215}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inrunic", 279}, + {pool_offset(229), 279}, - {"incarian", 405}, - {""}, + {pool_offset(230), 405}, + {-1}, - {"armn", 21}, - {""}, + {pool_offset(232), 21}, + {-1}, - {"armi", 93}, + {pool_offset(234), 93}, - {"cari", 37}, + {pool_offset(235), 37}, - {"inlatinextendedc", 331}, - {""}, + {pool_offset(236), 331}, + {-1}, - {"armenian", 21}, + {pool_offset(238), 21}, - {"inmyanmar", 271}, + {pool_offset(239), 271}, - {"incyrillic", 246}, - {""}, + {pool_offset(240), 246}, + {-1}, - {"inlatinextendeda", 240}, - {""}, {""}, {""}, + {pool_offset(242), 240}, + {-1}, {-1}, {-1}, - {"carian", 37}, - {""}, {""}, + {pool_offset(246), 37}, + {-1}, {-1}, - {"intaile", 288}, - {""}, {""}, {""}, + {pool_offset(249), 288}, + {-1}, {-1}, {-1}, - {"tale", 211}, - {""}, {""}, + {pool_offset(253), 211}, + {-1}, {-1}, - {"arabic", 20}, - {""}, {""}, + {pool_offset(256), 20}, + {-1}, {-1}, - {"l", 107}, + {pool_offset(259), 107}, - {"nl", 149}, + {pool_offset(260), 149}, - {"zl", 235}, - {""}, + {pool_offset(261), 235}, + {-1}, - {"insyriac", 251}, - {""}, + {pool_offset(263), 251}, + {-1}, - {"samr", 193}, - {""}, + {pool_offset(265), 193}, + {-1}, - {"merc", 135}, - {""}, + {pool_offset(267), 135}, + {-1}, - {"inlinearbideograms", 399}, - {""}, {""}, + {pool_offset(269), 399}, + {-1}, {-1}, - {"samaritan", 193}, + {pool_offset(272), 193}, - {"lt", 121}, - {""}, {""}, + {pool_offset(273), 121}, + {-1}, {-1}, - {"insharada", 443}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(276), 443}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmeeteimayekextensions", 376}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(283), 376}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inruminumeralsymbols", 437}, - {""}, {""}, + {pool_offset(292), 437}, + {-1}, {-1}, - {"miao", 137}, + {pool_offset(295), 137}, - {"inlatinextendede", 378}, - {""}, {""}, {""}, + {pool_offset(296), 378}, + {-1}, {-1}, {-1}, - {"zinh", 94}, - {""}, {""}, {""}, + {pool_offset(300), 94}, + {-1}, {-1}, {-1}, - {"inahom", 453}, - {""}, + {pool_offset(304), 453}, + {-1}, - {"incherokee", 276}, + {pool_offset(306), 276}, - {"han", 80}, - {""}, {""}, + {pool_offset(307), 80}, + {-1}, {-1}, - {"hani", 80}, + {pool_offset(310), 80}, - {"inosmanya", 414}, + {pool_offset(311), 414}, - {"inmiscellaneoustechnical", 314}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(312), 314}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"cham", 44}, - {""}, + {pool_offset(323), 44}, + {-1}, - {"inmahajani", 442}, + {pool_offset(325), 442}, - {"osma", 162}, - {""}, {""}, + {pool_offset(326), 162}, + {-1}, {-1}, - {"manichaean", 129}, + {pool_offset(329), 129}, - {"term", 217}, - {""}, + {pool_offset(330), 217}, + {-1}, - {"sinh", 200}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(332), 200}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"cntrl", 3}, - {""}, {""}, {""}, {""}, + {pool_offset(342), 3}, + {-1}, {-1}, {-1}, {-1}, - {"chakma", 43}, + {pool_offset(347), 43}, - {"insinhala", 267}, - {""}, {""}, {""}, + {pool_offset(348), 267}, + {-1}, {-1}, {-1}, - {"tamil", 215}, + {pool_offset(352), 215}, - {"inethiopic", 274}, - {""}, {""}, + {pool_offset(353), 274}, + {-1}, {-1}, - {"connectorpunctuation", 177}, + {pool_offset(356), 177}, - {"shavian", 197}, - {""}, {""}, {""}, + {pool_offset(357), 197}, + {-1}, {-1}, {-1}, - {"joinc", 98}, + {pool_offset(361), 98}, - {"inenclosedalphanumerics", 317}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(362), 317}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"ahex", 15}, + {pool_offset(368), 15}, - {"inlatinextendedadditional", 304}, + {pool_offset(369), 304}, - {"lineara", 113}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(370), 113}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inenclosedcjklettersandmonths", 350}, + {pool_offset(377), 350}, - {"hex", 85}, - {""}, {""}, + {pool_offset(378), 85}, + {-1}, {-1}, - {"thai", 219}, + {pool_offset(381), 219}, - {"mahj", 126}, - {""}, {""}, {""}, + {pool_offset(382), 126}, + {-1}, {-1}, {-1}, - {"math", 130}, - {""}, {""}, + {pool_offset(386), 130}, + {-1}, {-1}, - {"ll", 116}, + {pool_offset(389), 116}, - {"thaa", 218}, - {""}, + {pool_offset(390), 218}, + {-1}, - {"hatran", 83}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(392), 83}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"mahajani", 126}, + {pool_offset(399), 126}, - {"insinhalaarchaicnumbers", 444}, - {""}, + {pool_offset(400), 444}, + {-1}, - {"taitham", 212}, + {pool_offset(402), 212}, - {"thaana", 218}, - {""}, {""}, {""}, {""}, + {pool_offset(403), 218}, + {-1}, {-1}, {-1}, {-1}, - {"ethi", 68}, + {pool_offset(408), 68}, - {"inenclosedalphanumericsupplement", 481}, - {""}, {""}, {""}, {""}, + {pool_offset(409), 481}, + {-1}, {-1}, {-1}, {-1}, - {"meroiticcursive", 135}, + {pool_offset(414), 135}, - {"inideographicdescriptioncharacters", 340}, - {""}, {""}, {""}, + {pool_offset(415), 340}, + {-1}, {-1}, {-1}, - {"terminalpunctuation", 217}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(419), 217}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inemoticons", 484}, - {""}, {""}, {""}, {""}, + {pool_offset(426), 484}, + {-1}, {-1}, {-1}, {-1}, - {"intifinagh", 334}, - {""}, + {pool_offset(431), 334}, + {-1}, - {"asciihexdigit", 15}, - {""}, {""}, {""}, {""}, + {pool_offset(433), 15}, + {-1}, {-1}, {-1}, {-1}, - {"tifinagh", 221}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(438), 221}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"inearlydynasticcuneiform", 458}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(451), 458}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inopticalcharacterrecognition", 316}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(457), 316}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"loe", 119}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(479), 119}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"titlecaseletter", 121}, - {""}, {""}, + {pool_offset(486), 121}, + {-1}, {-1}, - {"inscriptionalparthian", 96}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, + {pool_offset(489), 96}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {"oriya", 161}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(510), 161}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"hira", 86}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(520), 86}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"sora", 205}, + {pool_offset(529), 205}, - {"mero", 136}, - {""}, {""}, {""}, {""}, + {pool_offset(530), 136}, + {-1}, {-1}, {-1}, {-1}, - {"inarrows", 312}, - {""}, + {pool_offset(535), 312}, + {-1}, - {"inhiragana", 342}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(537), 342}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"no", 150}, + {pool_offset(544), 150}, - {"inmahjongtiles", 478}, - {""}, {""}, {""}, + {pool_offset(545), 478}, + {-1}, {-1}, {-1}, - {"innko", 254}, - {""}, {""}, + {pool_offset(549), 254}, + {-1}, {-1}, - {"co", 52}, + {pool_offset(552), 52}, - {"cher", 50}, - {""}, {""}, + {pool_offset(553), 50}, + {-1}, {-1}, - {"inmro", 462}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(556), 462}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"common", 53}, + {pool_offset(563), 53}, - {"cwcm", 46}, - {""}, {""}, {""}, + {pool_offset(564), 46}, + {-1}, {-1}, {-1}, - {"inmiao", 465}, - {""}, {""}, {""}, {""}, + {pool_offset(568), 465}, + {-1}, {-1}, {-1}, {-1}, - {"inscriptionalpahlavi", 95}, - {""}, {""}, + {pool_offset(573), 95}, + {-1}, {-1}, - {"inmathematicalalphanumericsymbols", 474}, + {pool_offset(576), 474}, - {"so", 203}, - {""}, + {pool_offset(577), 203}, + {-1}, - {"hano", 82}, + {pool_offset(579), 82}, - {"tirh", 222}, - {""}, {""}, {""}, + {pool_offset(580), 222}, + {-1}, {-1}, {-1}, - {"inhatran", 422}, + {pool_offset(584), 422}, - {"inolchiki", 298}, + {pool_offset(585), 298}, - {"ahom", 16}, - {""}, + {pool_offset(586), 16}, + {-1}, - {"di", 60}, + {pool_offset(588), 60}, - {"hatr", 83}, - {""}, + {pool_offset(589), 83}, + {-1}, - {"inogham", 278}, - {""}, {""}, {""}, {""}, + {pool_offset(591), 278}, + {-1}, {-1}, {-1}, {-1}, - {"intirhuta", 449}, - {""}, {""}, {""}, {""}, + {pool_offset(596), 449}, + {-1}, {-1}, {-1}, {-1}, - {"inmodi", 451}, - {""}, + {pool_offset(601), 451}, + {-1}, - {"idc", 90}, + {pool_offset(603), 90}, - {"mand", 128}, - {""}, {""}, {""}, + {pool_offset(604), 128}, + {-1}, {-1}, {-1}, - {"inmandaic", 256}, + {pool_offset(608), 256}, - {"dia", 64}, + {pool_offset(609), 64}, - {"cwcf", 45}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(610), 45}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"xidc", 231}, - {""}, {""}, + {pool_offset(618), 231}, + {-1}, {-1}, - {"sind", 106}, + {pool_offset(621), 106}, - {"newline", 0}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(622), 0}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"mend", 134}, - {""}, {""}, {""}, + {pool_offset(631), 134}, + {-1}, {-1}, {-1}, - {"newtailue", 147}, - {""}, {""}, + {pool_offset(635), 147}, + {-1}, {-1}, - {"inshorthandformatcontrols", 468}, - {""}, {""}, {""}, + {pool_offset(638), 468}, + {-1}, {-1}, {-1}, - {"anatolianhieroglyphs", 18}, + {pool_offset(642), 18}, - {"xids", 232}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(643), 232}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"ids", 91}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(653), 91}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"innewtailue", 289}, + {pool_offset(659), 289}, - {"insylotinagri", 363}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(660), 363}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"insundanese", 295}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(666), 295}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"lo", 118}, - {""}, {""}, {""}, + {pool_offset(673), 118}, + {-1}, {-1}, {-1}, - {"indominotiles", 479}, + {pool_offset(677), 479}, - {"inlao", 269}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(678), 269}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"lao", 109}, + {pool_offset(685), 109}, - {"laoo", 109}, + {pool_offset(686), 109}, - {"mongolian", 140}, - {""}, {""}, + {pool_offset(687), 140}, + {-1}, {-1}, - {"cwt", 48}, - {""}, + {pool_offset(690), 48}, + {-1}, - {"idst", 89}, + {pool_offset(692), 89}, - {"format", 42}, - {""}, {""}, + {pool_offset(693), 42}, + {-1}, {-1}, - {"lineseparator", 235}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(696), 235}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"letter", 107}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(702), 107}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"idcontinue", 90}, - {""}, {""}, + {pool_offset(711), 90}, + {-1}, {-1}, - {"inanatolianhieroglyphs", 460}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(714), 460}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inlydian", 424}, - {""}, + {pool_offset(723), 424}, + {-1}, - {"pi", 183}, + {pool_offset(725), 183}, - {"nchar", 151}, - {""}, {""}, {""}, {""}, + {pool_offset(726), 151}, + {-1}, {-1}, {-1}, {-1}, - {"inspacingmodifierletters", 243}, + {pool_offset(731), 243}, - {"oalpha", 163}, - {""}, {""}, + {pool_offset(732), 163}, + {-1}, {-1}, - {"indeseret", 412}, - {""}, + {pool_offset(735), 412}, + {-1}, - {"pc", 177}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(737), 177}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"xdigit", 11}, - {""}, + {pool_offset(756), 11}, + {-1}, - {"inphaistosdisc", 403}, + {pool_offset(758), 403}, - {"joincontrol", 98}, + {pool_offset(759), 98}, - {"inlowsurrogates", 385}, - {""}, + {pool_offset(760), 385}, + {-1}, - {"mro", 141}, + {pool_offset(762), 141}, - {"mroo", 141}, - {""}, {""}, {""}, + {pool_offset(763), 141}, + {-1}, {-1}, {-1}, - {"xidstart", 232}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(767), 232}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"wara", 229}, - {""}, {""}, + {pool_offset(773), 229}, + {-1}, {-1}, - {"inwarangciti", 454}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(776), 454}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"ps", 185}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(787), 185}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"pe", 179}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(797), 179}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"cwl", 47}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(806), 47}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incoptic", 332}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(816), 332}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"diacritic", 64}, - {""}, {""}, + {pool_offset(824), 64}, + {-1}, {-1}, - {"pf", 180}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(827), 180}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"space", 9}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(844), 9}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"hanunoo", 82}, - {""}, {""}, + {pool_offset(853), 82}, + {-1}, {-1}, - {"incyrillicextendeda", 336}, - {""}, {""}, {""}, + {pool_offset(856), 336}, + {-1}, {-1}, {-1}, - {"odi", 164}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(860), 164}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"modi", 139}, + {pool_offset(866), 139}, - {"oidc", 166}, + {pool_offset(867), 166}, - {"incopticepactnumbers", 406}, - {""}, + {pool_offset(868), 406}, + {-1}, - {"palm", 173}, + {pool_offset(870), 173}, - {"insundanesesupplement", 299}, + {pool_offset(871), 299}, - {"patws", 175}, - {""}, {""}, {""}, + {pool_offset(872), 175}, + {-1}, {-1}, {-1}, - {"inolditalic", 407}, + {pool_offset(876), 407}, - {"inlepcha", 297}, + {pool_offset(877), 297}, - {"shaw", 197}, - {""}, + {pool_offset(878), 197}, + {-1}, - {"omath", 169}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(880), 169}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"alpha", 1}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(886), 1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"oids", 167}, - {""}, {""}, {""}, + {pool_offset(892), 167}, + {-1}, {-1}, {-1}, - {"control", 41}, + {pool_offset(896), 41}, - {"ideo", 92}, + {pool_offset(897), 92}, - {"lepc", 111}, + {pool_offset(898), 111}, - {"psalterpahlavi", 186}, + {pool_offset(899), 186}, - {"dsrt", 62}, - {""}, + {pool_offset(900), 62}, + {-1}, - {"otheridcontinue", 166}, - {""}, {""}, {""}, + {pool_offset(902), 166}, + {-1}, {-1}, {-1}, - {"inpalmyrene", 420}, + {pool_offset(906), 420}, - {"decimalnumber", 146}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(907), 146}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"noncharactercodepoint", 151}, + {pool_offset(915), 151}, - {"idstart", 91}, + {pool_offset(916), 91}, - {"otheralphabetic", 163}, + {pool_offset(917), 163}, - {"letternumber", 149}, - {""}, {""}, {""}, {""}, + {pool_offset(918), 149}, + {-1}, {-1}, {-1}, {-1}, - {"dash", 59}, - {""}, {""}, + {pool_offset(923), 59}, + {-1}, {-1}, - {"xidcontinue", 231}, - {""}, {""}, {""}, + {pool_offset(926), 231}, + {-1}, {-1}, {-1}, - {"finalpunctuation", 180}, + {pool_offset(930), 180}, - {"extender", 69}, - {""}, {""}, {""}, + {pool_offset(931), 69}, + {-1}, {-1}, {-1}, - {"siddham", 198}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(935), 198}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inoldturkic", 435}, - {""}, {""}, + {pool_offset(945), 435}, + {-1}, {-1}, - {"sharada", 196}, - {""}, {""}, + {pool_offset(948), 196}, + {-1}, {-1}, - {"whitespace", 230}, - {""}, {""}, {""}, {""}, + {pool_offset(951), 230}, + {-1}, {-1}, {-1}, {-1}, - {"inmodifiertoneletters", 361}, - {""}, + {pool_offset(956), 361}, + {-1}, - {"radical", 188}, - {""}, + {pool_offset(958), 188}, + {-1}, - {"canadianaboriginal", 36}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(960), 36}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inethiopicextendeda", 377}, + {pool_offset(969), 377}, - {"imperialaramaic", 93}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(970), 93}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"perm", 157}, + {pool_offset(978), 157}, - {"deseret", 62}, - {""}, {""}, {""}, {""}, + {pool_offset(979), 62}, + {-1}, {-1}, {-1}, {-1}, - {"casedletter", 108}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, + {pool_offset(984), 108}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {"inimperialaramaic", 419}, - {""}, {""}, + {pool_offset(998), 419}, + {-1}, {-1}, - {"otheridstart", 167}, - {""}, {""}, {""}, {""}, + {pool_offset(1001), 167}, + {-1}, {-1}, {-1}, {-1}, - {"prti", 96}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1006), 96}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"cprt", 57}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1012), 57}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"othersymbol", 203}, - {""}, {""}, {""}, + {pool_offset(1018), 203}, + {-1}, {-1}, {-1}, - {"coptic", 54}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1022), 54}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"otherlowercase", 168}, + {pool_offset(1037), 168}, - {"phnx", 182}, - {""}, {""}, {""}, + {pool_offset(1038), 182}, + {-1}, {-1}, {-1}, - {"inphoenician", 423}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1042), 423}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incaucasianalbanian", 416}, + {pool_offset(1049), 416}, - {"inlatin1supplement", 239}, - {""}, {""}, {""}, + {pool_offset(1050), 239}, + {-1}, {-1}, {-1}, - {"xpeo", 158}, - {""}, {""}, {""}, + {pool_offset(1054), 158}, + {-1}, {-1}, {-1}, - {"ininscriptionalparthian", 432}, + {pool_offset(1058), 432}, - {"ininscriptionalpahlavi", 433}, + {pool_offset(1059), 433}, - {"other", 35}, + {pool_offset(1060), 35}, - {"privateuse", 52}, - {""}, {""}, {""}, {""}, + {pool_offset(1061), 52}, + {-1}, {-1}, {-1}, {-1}, - {"othernumber", 150}, - {""}, {""}, {""}, {""}, + {pool_offset(1066), 150}, + {-1}, {-1}, {-1}, {-1}, - {"inenclosedideographicsupplement", 482}, - {""}, + {pool_offset(1071), 482}, + {-1}, - {"copt", 54}, - {""}, {""}, {""}, {""}, + {pool_offset(1073), 54}, + {-1}, {-1}, {-1}, {-1}, - {"print", 7}, - {""}, + {pool_offset(1078), 7}, + {-1}, - {"cuneiform", 56}, + {pool_offset(1080), 56}, - {"olditalic", 155}, + {pool_offset(1081), 155}, - {"xsux", 56}, - {""}, {""}, {""}, + {pool_offset(1082), 56}, + {-1}, {-1}, {-1}, - {"inmongolian", 285}, - {""}, {""}, {""}, {""}, + {pool_offset(1086), 285}, + {-1}, {-1}, {-1}, {-1}, - {"inancientgreekmusicalnotation", 471}, + {pool_offset(1091), 471}, - {"sundanese", 206}, + {pool_offset(1092), 206}, - {"inoldnortharabian", 429}, - {""}, + {pool_offset(1093), 429}, + {-1}, - {"patternsyntax", 174}, - {""}, {""}, + {pool_offset(1095), 174}, + {-1}, {-1}, - {"inherited", 94}, + {pool_offset(1098), 94}, - {"ingrantha", 448}, + {pool_offset(1099), 448}, - {"inphoneticextensions", 301}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1100), 301}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"saurashtra", 194}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1110), 194}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inancientgreeknumbers", 401}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1116), 401}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"shrd", 196}, - {""}, {""}, {""}, + {pool_offset(1122), 196}, + {-1}, {-1}, {-1}, - {"inoldsoutharabian", 428}, + {pool_offset(1126), 428}, - {"innabataean", 421}, + {pool_offset(1127), 421}, - {"inalphabeticpresentationforms", 388}, + {pool_offset(1128), 388}, - {"spaceseparator", 237}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1129), 237}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"olower", 168}, - {""}, + {pool_offset(1135), 168}, + {-1}, - {"ingeneralpunctuation", 306}, + {pool_offset(1137), 306}, - {"inunifiedcanadianaboriginalsyllabics", 277}, - {""}, + {pool_offset(1138), 277}, + {-1}, - {"bamum", 25}, - {""}, {""}, + {pool_offset(1140), 25}, + {-1}, {-1}, - {"inarabic", 250}, - {""}, {""}, {""}, + {pool_offset(1143), 250}, + {-1}, {-1}, {-1}, - {"phli", 95}, + {pool_offset(1147), 95}, - {"inphoneticextensionssupplement", 302}, - {""}, + {pool_offset(1148), 302}, + {-1}, - {"patternwhitespace", 175}, - {""}, + {pool_offset(1150), 175}, + {-1}, - {"inbasiclatin", 238}, - {""}, + {pool_offset(1152), 238}, + {-1}, - {"othermath", 169}, + {pool_offset(1154), 169}, - {"idsbinaryoperator", 88}, + {pool_offset(1155), 88}, - {"inmathematicaloperators", 313}, + {pool_offset(1156), 313}, - {"multani", 142}, + {pool_offset(1157), 142}, - {"ingujarati", 261}, - {""}, {""}, {""}, {""}, + {pool_offset(1158), 261}, + {-1}, {-1}, {-1}, {-1}, - {"inelbasan", 415}, + {pool_offset(1163), 415}, - {"inoldhungarian", 436}, + {pool_offset(1164), 436}, - {"innumberforms", 311}, + {pool_offset(1165), 311}, - {"nd", 146}, - {""}, + {pool_offset(1166), 146}, + {-1}, - {"yi", 233}, + {pool_offset(1168), 233}, - {"lisu", 115}, - {""}, {""}, + {pool_offset(1169), 115}, + {-1}, {-1}, - {"yiii", 233}, - {""}, + {pool_offset(1172), 233}, + {-1}, - {"inpsalterpahlavi", 434}, - {""}, {""}, {""}, + {pool_offset(1174), 434}, + {-1}, {-1}, {-1}, - {"tagbanwa", 210}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1178), 210}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"bass", 26}, - {""}, {""}, {""}, {""}, + {pool_offset(1184), 26}, + {-1}, {-1}, {-1}, {-1}, - {"nbat", 145}, + {pool_offset(1189), 145}, - {"inbyzantinemusicalsymbols", 469}, - {""}, + {pool_offset(1190), 469}, + {-1}, - {"closepunctuation", 179}, - {""}, + {pool_offset(1192), 179}, + {-1}, - {"tfng", 221}, + {pool_offset(1194), 221}, - {"otherletter", 118}, - {""}, {""}, {""}, + {pool_offset(1195), 118}, + {-1}, {-1}, {-1}, - {"sd", 204}, - {""}, {""}, + {pool_offset(1199), 204}, + {-1}, {-1}, - {"lowercase", 120}, + {pool_offset(1202), 120}, - {"sidd", 198}, - {""}, + {pool_offset(1203), 198}, + {-1}, - {"nabataean", 145}, - {""}, + {pool_offset(1205), 145}, + {-1}, - {"lepcha", 111}, + {pool_offset(1207), 111}, - {"inarabicpresentationformsa", 389}, - {""}, + {pool_offset(1208), 389}, + {-1}, - {"modifierletter", 117}, + {pool_offset(1210), 117}, - {"intags", 496}, + {pool_offset(1211), 496}, - {"mult", 142}, - {""}, {""}, + {pool_offset(1212), 142}, + {-1}, {-1}, - {"insiddham", 450}, + {pool_offset(1215), 450}, - {"talu", 147}, - {""}, {""}, {""}, {""}, + {pool_offset(1216), 147}, + {-1}, {-1}, {-1}, {-1}, - {"cased", 39}, - {""}, + {pool_offset(1221), 39}, + {-1}, - {"intibetan", 270}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1223), 270}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"separator", 234}, - {""}, + {pool_offset(1230), 234}, + {-1}, - {"runic", 190}, + {pool_offset(1232), 190}, - {"inmiscellaneousmathematicalsymbolsb", 327}, - {""}, {""}, {""}, {""}, + {pool_offset(1233), 327}, + {-1}, {-1}, {-1}, {-1}, - {"linb", 114}, + {pool_offset(1238), 114}, - {"inbraillepatterns", 325}, + {pool_offset(1239), 325}, - {"oldpersian", 158}, - {""}, + {pool_offset(1240), 158}, + {-1}, - {"inugaritic", 410}, + {pool_offset(1242), 410}, - {"telu", 216}, - {""}, + {pool_offset(1243), 216}, + {-1}, - {"limb", 112}, - {""}, + {pool_offset(1245), 112}, + {-1}, - {"oldpermic", 157}, - {""}, + {pool_offset(1247), 157}, + {-1}, - {"bali", 24}, + {pool_offset(1249), 24}, - {"inmyanmarextendedb", 372}, + {pool_offset(1250), 372}, - {"tibt", 220}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1251), 220}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"saur", 194}, - {""}, {""}, {""}, + {pool_offset(1259), 194}, + {-1}, {-1}, {-1}, - {"po", 184}, - {""}, + {pool_offset(1263), 184}, + {-1}, - {"tibetan", 220}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1265), 220}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"insaurashtra", 366}, - {""}, {""}, {""}, + {pool_offset(1280), 366}, + {-1}, {-1}, {-1}, - {"inbalinese", 294}, + {pool_offset(1284), 294}, - {"elba", 67}, - {""}, {""}, {""}, {""}, + {pool_offset(1285), 67}, + {-1}, {-1}, {-1}, {-1}, - {"bengali", 28}, + {pool_offset(1290), 28}, - {"gran", 73}, - {""}, {""}, {""}, + {pool_offset(1291), 73}, + {-1}, {-1}, {-1}, - {"phoenician", 182}, - {""}, + {pool_offset(1295), 182}, + {-1}, - {"ingurmukhi", 260}, - {""}, {""}, + {pool_offset(1297), 260}, + {-1}, {-1}, - {"caseignorable", 38}, - {""}, {""}, {""}, {""}, + {pool_offset(1300), 38}, + {-1}, {-1}, {-1}, {-1}, - {"lyci", 123}, - {""}, + {pool_offset(1305), 123}, + {-1}, - {"inyijinghexagramsymbols", 353}, - {""}, + {pool_offset(1307), 353}, + {-1}, - {"mlym", 127}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1309), 127}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"lycian", 123}, - {""}, {""}, {""}, {""}, + {pool_offset(1316), 123}, + {-1}, {-1}, {-1}, {-1}, - {"elbasan", 67}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1321), 67}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"balinese", 24}, + {pool_offset(1327), 24}, - {"narb", 156}, - {""}, + {pool_offset(1328), 156}, + {-1}, - {"brai", 32}, - {""}, {""}, + {pool_offset(1330), 32}, + {-1}, {-1}, - {"insuttonsignwriting", 475}, - {""}, + {pool_offset(1333), 475}, + {-1}, - {"insorasompeng", 440}, + {pool_offset(1335), 440}, - {"malayalam", 127}, - {""}, {""}, + {pool_offset(1336), 127}, + {-1}, {-1}, - {"arab", 20}, - {""}, + {pool_offset(1339), 20}, + {-1}, - {"brahmi", 31}, - {""}, + {pool_offset(1341), 31}, + {-1}, - {"incountingrodnumerals", 473}, - {""}, {""}, {""}, + {pool_offset(1343), 473}, + {-1}, {-1}, {-1}, - {"rjng", 189}, + {pool_offset(1347), 189}, - {"mong", 140}, - {""}, {""}, {""}, + {pool_offset(1348), 140}, + {-1}, {-1}, {-1}, - {"incyrillicsupplement", 247}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1352), 247}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"ogam", 152}, - {""}, + {pool_offset(1359), 152}, + {-1}, - {"sarb", 159}, + {pool_offset(1361), 159}, - {"lower", 6}, - {""}, + {pool_offset(1362), 6}, + {-1}, - {"inrejang", 369}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1364), 369}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"hmng", 172}, - {""}, {""}, + {pool_offset(1370), 172}, + {-1}, {-1}, - {"tirhuta", 222}, + {pool_offset(1373), 222}, - {"hang", 81}, - {""}, {""}, {""}, {""}, + {pool_offset(1374), 81}, + {-1}, {-1}, {-1}, {-1}, - {"wspace", 230}, - {""}, {""}, {""}, {""}, + {pool_offset(1379), 230}, + {-1}, {-1}, {-1}, {-1}, - {"inlatinextendedd", 362}, - {""}, {""}, + {pool_offset(1384), 362}, + {-1}, {-1}, - {"invai", 358}, + {pool_offset(1387), 358}, - {"incherokeesupplement", 379}, - {""}, + {pool_offset(1388), 379}, + {-1}, - {"mymr", 143}, - {""}, {""}, {""}, + {pool_offset(1390), 143}, + {-1}, {-1}, {-1}, - {"vai", 227}, + {pool_offset(1394), 227}, - {"vaii", 227}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(1395), 227}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"ethiopic", 68}, - {""}, + {pool_offset(1408), 68}, + {-1}, - {"sylotinagri", 207}, - {""}, {""}, {""}, {""}, + {pool_offset(1410), 207}, + {-1}, {-1}, {-1}, {-1}, - {"myanmar", 143}, - {""}, + {pool_offset(1415), 143}, + {-1}, - {"syrc", 208}, - {""}, + {pool_offset(1417), 208}, + {-1}, - {"inbrahmi", 438}, - {""}, {""}, {""}, + {pool_offset(1419), 438}, + {-1}, {-1}, {-1}, - {"mathsymbol", 202}, - {""}, + {pool_offset(1423), 202}, + {-1}, - {"runr", 190}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1425), 190}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inshavian", 413}, + {pool_offset(1431), 413}, - {"gothic", 72}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1432), 72}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"syriac", 208}, + {pool_offset(1438), 208}, - {"p", 171}, - {""}, + {pool_offset(1439), 171}, + {-1}, - {"zp", 236}, + {pool_offset(1441), 236}, - {"induployan", 467}, - {""}, + {pool_offset(1442), 467}, + {-1}, - {"vs", 228}, - {""}, {""}, + {pool_offset(1444), 228}, + {-1}, {-1}, - {"ingreekandcoptic", 245}, + {pool_offset(1447), 245}, - {"ingothic", 408}, - {""}, {""}, + {pool_offset(1448), 408}, + {-1}, {-1}, - {"lowercaseletter", 116}, - {""}, {""}, {""}, + {pool_offset(1451), 116}, + {-1}, {-1}, {-1}, - {"inoriya", 262}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1455), 262}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"grext", 75}, + {pool_offset(1461), 75}, - {"linearb", 114}, + {pool_offset(1462), 114}, - {"inyiradicals", 356}, - {""}, + {pool_offset(1463), 356}, + {-1}, - {"inethiopicsupplement", 275}, + {pool_offset(1465), 275}, - {"inletterlikesymbols", 310}, + {pool_offset(1466), 310}, - {"ingeometricshapes", 320}, - {""}, + {pool_offset(1467), 320}, + {-1}, - {"java", 97}, + {pool_offset(1469), 97}, - {"inblockelements", 319}, - {""}, + {pool_offset(1470), 319}, + {-1}, - {"number", 144}, + {pool_offset(1472), 144}, - {"incombininghalfmarks", 392}, + {pool_offset(1473), 392}, - {"inbassavah", 463}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1474), 463}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inlinearbsyllabary", 398}, - {""}, + {pool_offset(1489), 398}, + {-1}, - {"osmanya", 162}, - {""}, {""}, {""}, + {pool_offset(1491), 162}, + {-1}, {-1}, {-1}, - {"avst", 23}, - {""}, {""}, {""}, {""}, + {pool_offset(1495), 23}, + {-1}, {-1}, {-1}, {-1}, - {"hexdigit", 85}, - {""}, {""}, {""}, + {pool_offset(1500), 85}, + {-1}, {-1}, {-1}, - {"injavanese", 371}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1504), 371}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"cyrl", 58}, + {pool_offset(1513), 58}, - {"avestan", 23}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1514), 23}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"intaiviet", 375}, + {pool_offset(1531), 375}, - {"inornamentaldingbats", 485}, + {pool_offset(1532), 485}, - {"tavt", 213}, - {""}, {""}, {""}, {""}, + {pool_offset(1533), 213}, + {-1}, {-1}, {-1}, {-1}, - {"javanese", 97}, + {pool_offset(1538), 97}, - {"inethiopicextended", 335}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(1539), 335}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"inhalfwidthandfullwidthforms", 396}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1550), 396}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"ingeorgian", 272}, - {""}, + {pool_offset(1559), 272}, + {-1}, - {"inarabicmathematicalalphabeticsymbols", 477}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1561), 477}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"hangul", 81}, - {""}, + {pool_offset(1569), 81}, + {-1}, - {"insuperscriptsandsubscripts", 307}, - {""}, {""}, {""}, {""}, + {pool_offset(1571), 307}, + {-1}, {-1}, {-1}, {-1}, - {"inhanunoo", 281}, - {""}, + {pool_offset(1576), 281}, + {-1}, - {"inhangulsyllables", 381}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1578), 381}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inoldpersian", 411}, - {""}, {""}, + {pool_offset(1586), 411}, + {-1}, {-1}, - {"geor", 70}, - {""}, + {pool_offset(1589), 70}, + {-1}, - {"otherpunctuation", 184}, - {""}, + {pool_offset(1591), 184}, + {-1}, - {"inoldpermic", 409}, - {""}, {""}, {""}, {""}, + {pool_offset(1593), 409}, + {-1}, {-1}, {-1}, {-1}, - {"inalchemicalsymbols", 487}, + {pool_offset(1598), 487}, - {"sylo", 207}, - {""}, + {pool_offset(1599), 207}, + {-1}, - {"hiragana", 86}, - {""}, {""}, + {pool_offset(1601), 86}, + {-1}, {-1}, - {"word", 12}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1604), 12}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmeroitichieroglyphs", 425}, - {""}, + {pool_offset(1612), 425}, + {-1}, - {"insmallformvariants", 394}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1614), 394}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"sund", 206}, + {pool_offset(1620), 206}, - {"brah", 31}, - {""}, + {pool_offset(1621), 31}, + {-1}, - {"insupplementalmathematicaloperators", 328}, + {pool_offset(1623), 328}, - {"glagolitic", 71}, - {""}, {""}, + {pool_offset(1624), 71}, + {-1}, {-1}, - {"braille", 32}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, + {pool_offset(1627), 32}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {"unassigned", 51}, + {pool_offset(1639), 51}, - {"sgnw", 199}, - {""}, {""}, {""}, + {pool_offset(1640), 199}, + {-1}, {-1}, {-1}, - {"plrd", 137}, - {""}, {""}, {""}, + {pool_offset(1644), 137}, + {-1}, {-1}, {-1}, - {"hebr", 84}, + {pool_offset(1648), 84}, - {"ogham", 152}, - {""}, {""}, {""}, {""}, + {pool_offset(1649), 152}, + {-1}, {-1}, {-1}, {-1}, - {"cyrillic", 58}, - {""}, {""}, {""}, + {pool_offset(1654), 58}, + {-1}, {-1}, {-1}, - {"orya", 161}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(1658), 161}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"grantha", 73}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1669), 73}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"invariationselectors", 390}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1684), 390}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"indingbats", 322}, - {""}, + {pool_offset(1693), 322}, + {-1}, - {"inunifiedcanadianaboriginalsyllabicsextended", 286}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1695), 286}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inhanguljamoextendeda", 370}, - {""}, {""}, + {pool_offset(1702), 370}, + {-1}, {-1}, - {"goth", 72}, + {pool_offset(1705), 72}, - {"graphemebase", 74}, - {""}, + {pool_offset(1706), 74}, + {-1}, - {"bidic", 29}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1708), 29}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"ogrext", 165}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1717), 165}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"idsb", 88}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1724), 88}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"invariationselectorssupplement", 497}, + {pool_offset(1732), 497}, - {"assigned", 22}, - {""}, {""}, {""}, + {pool_offset(1733), 22}, + {-1}, {-1}, {-1}, - {"symbol", 191}, - {""}, {""}, {""}, + {pool_offset(1737), 191}, + {-1}, {-1}, {-1}, - {"ingreekextended", 305}, - {""}, + {pool_offset(1741), 305}, + {-1}, - {"pauc", 176}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1743), 176}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"meroitichieroglyphs", 136}, - {""}, + {pool_offset(1752), 136}, + {-1}, - {"logicalorderexception", 119}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1754), 119}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incurrencysymbols", 308}, - {""}, {""}, {""}, + {pool_offset(1771), 308}, + {-1}, {-1}, {-1}, - {"inarabicextendeda", 257}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1775), 257}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"insupplementalarrowsc", 489}, - {""}, {""}, + {pool_offset(1783), 489}, + {-1}, {-1}, - {"insupplementalarrowsa", 324}, - {""}, + {pool_offset(1786), 324}, + {-1}, - {"inkannada", 265}, - {""}, {""}, {""}, + {pool_offset(1788), 265}, + {-1}, {-1}, {-1}, - {"kana", 101}, + {pool_offset(1792), 101}, - {"phagspa", 181}, - {""}, {""}, + {pool_offset(1793), 181}, + {-1}, {-1}, - {"cakm", 43}, + {pool_offset(1796), 43}, - {"digit", 4}, - {""}, {""}, {""}, {""}, + {pool_offset(1797), 4}, + {-1}, {-1}, {-1}, {-1}, - {"softdotted", 204}, - {""}, {""}, {""}, + {pool_offset(1802), 204}, + {-1}, {-1}, {-1}, - {"inchakma", 441}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, + {pool_offset(1806), 441}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {"inphagspa", 365}, - {""}, {""}, {""}, + {pool_offset(1818), 365}, + {-1}, {-1}, {-1}, - {"inhighprivateusesurrogates", 384}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, + {pool_offset(1822), 384}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {"warangciti", 229}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1836), 229}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incjkcompatibilityforms", 393}, - {""}, {""}, + {pool_offset(1844), 393}, + {-1}, {-1}, - {"idstrinaryoperator", 89}, - {""}, + {pool_offset(1847), 89}, + {-1}, - {"incjkcompatibilityideographs", 387}, - {""}, + {pool_offset(1849), 387}, + {-1}, - {"inkaithi", 439}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1851), 439}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"takri", 214}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1857), 214}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"phlp", 186}, + {pool_offset(1864), 186}, - {"mendekikakui", 134}, - {""}, + {pool_offset(1865), 134}, + {-1}, - {"punct", 8}, - {""}, {""}, + {pool_offset(1867), 8}, + {-1}, {-1}, - {"otherdefaultignorablecodepoint", 164}, + {pool_offset(1870), 164}, - {"inkatakana", 343}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1871), 343}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"graph", 5}, + {pool_offset(1878), 5}, - {"lydi", 124}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(1879), 124}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"pd", 178}, - {""}, + {pool_offset(1885), 178}, + {-1}, - {"incjkstrokes", 348}, - {""}, {""}, + {pool_offset(1887), 348}, + {-1}, {-1}, - {"lydian", 124}, - {""}, {""}, {""}, {""}, + {pool_offset(1890), 124}, + {-1}, {-1}, {-1}, {-1}, - {"sorasompeng", 205}, - {""}, + {pool_offset(1895), 205}, + {-1}, - {"incjkcompatibilityideographssupplement", 495}, - {""}, {""}, + {pool_offset(1897), 495}, + {-1}, {-1}, - {"meeteimayek", 133}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1900), 133}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"otheruppercase", 170}, - {""}, {""}, + {pool_offset(1909), 170}, + {-1}, {-1}, - {"kali", 102}, - {""}, {""}, + {pool_offset(1912), 102}, + {-1}, {-1}, - {"inhanguljamo", 273}, - {""}, + {pool_offset(1915), 273}, + {-1}, - {"ideographic", 92}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1917), 92}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inmeeteimayek", 380}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(1935), 380}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"incyrillicextendedb", 359}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1948), 359}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"hluw", 18}, - {""}, {""}, {""}, {""}, + {pool_offset(1964), 18}, + {-1}, {-1}, {-1}, {-1}, - {"patsyn", 174}, - {""}, {""}, + {pool_offset(1969), 174}, + {-1}, {-1}, - {"incombiningdiacriticalmarks", 244}, + {pool_offset(1972), 244}, - {"inarabicsupplement", 252}, - {""}, + {pool_offset(1973), 252}, + {-1}, - {"inkanasupplement", 466}, - {""}, + {pool_offset(1975), 466}, + {-1}, - {"deprecated", 61}, + {pool_offset(1977), 61}, - {"inverticalforms", 391}, - {""}, {""}, {""}, + {pool_offset(1978), 391}, + {-1}, {-1}, {-1}, - {"incombiningdiacriticalmarksforsymbols", 309}, - {""}, {""}, {""}, {""}, + {pool_offset(1982), 309}, + {-1}, {-1}, {-1}, {-1}, - {"indevanagari", 258}, - {""}, + {pool_offset(1987), 258}, + {-1}, - {"inbuhid", 282}, - {""}, + {pool_offset(1989), 282}, + {-1}, - {"alphabetic", 17}, - {""}, {""}, {""}, + {pool_offset(1991), 17}, + {-1}, {-1}, {-1}, - {"oldhungarian", 154}, - {""}, {""}, + {pool_offset(1995), 154}, + {-1}, {-1}, - {"mark", 125}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(1998), 125}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inkharoshthi", 427}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2005), 427}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"deva", 63}, + {pool_offset(2011), 63}, - {"invedicextensions", 300}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2012), 300}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"devanagari", 63}, + {pool_offset(2019), 63}, - {"incombiningdiacriticalmarkssupplement", 303}, - {""}, {""}, {""}, + {pool_offset(2020), 303}, + {-1}, {-1}, {-1}, - {"ingeometricshapesextended", 488}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2024), 488}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"inkhmer", 284}, - {""}, {""}, {""}, + {pool_offset(2030), 284}, + {-1}, {-1}, {-1}, - {"changeswhencasemapped", 46}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2034), 46}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"intakri", 452}, - {""}, {""}, + {pool_offset(2058), 452}, + {-1}, {-1}, - {"dep", 61}, + {pool_offset(2061), 61}, - {"takr", 214}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2062), 214}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"changeswhencasefolded", 45}, - {""}, + {pool_offset(2080), 45}, + {-1}, - {"kaithi", 99}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(2082), 99}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"phag", 181}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(2093), 181}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"inpahawhhmong", 464}, - {""}, + {pool_offset(2104), 464}, + {-1}, - {"bidicontrol", 29}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2106), 29}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inkhojki", 445}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2116), 445}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"bamu", 25}, + {pool_offset(2126), 25}, - {"inbamum", 360}, + {pool_offset(2127), 360}, - {"changeswhentitlecased", 48}, - {""}, {""}, + {pool_offset(2128), 48}, + {-1}, {-1}, - {"inkhmersymbols", 290}, + {pool_offset(2131), 290}, - {"uideo", 224}, + {pool_offset(2132), 224}, - {"lu", 122}, - {""}, {""}, + {pool_offset(2133), 122}, + {-1}, {-1}, - {"kthi", 99}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2136), 99}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"limbu", 112}, - {""}, + {pool_offset(2145), 112}, + {-1}, - {"defaultignorablecodepoint", 60}, - {""}, {""}, {""}, + {pool_offset(2147), 60}, + {-1}, {-1}, {-1}, - {"inplayingcards", 480}, - {""}, + {pool_offset(2151), 480}, + {-1}, - {"inhebrew", 249}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2153), 249}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"modifiersymbol", 201}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2161), 201}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"ugaritic", 223}, - {""}, {""}, {""}, {""}, + {pool_offset(2167), 223}, + {-1}, {-1}, {-1}, {-1}, - {"inlisu", 357}, + {pool_offset(2172), 357}, - {"inglagolitic", 330}, - {""}, + {pool_offset(2173), 330}, + {-1}, - {"inprivateusearea", 386}, - {""}, {""}, + {pool_offset(2175), 386}, + {-1}, {-1}, - {"olck", 153}, - {""}, {""}, {""}, + {pool_offset(2178), 153}, + {-1}, {-1}, {-1}, - {"inbengali", 259}, - {""}, {""}, + {pool_offset(2182), 259}, + {-1}, {-1}, - {"olchiki", 153}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2185), 153}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"caucasianalbanian", 40}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2195), 40}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"oldturkic", 160}, + {pool_offset(2210), 160}, - {"beng", 28}, + {pool_offset(2211), 28}, - {"intelugu", 264}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2212), 264}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"hyphen", 87}, - {""}, {""}, + {pool_offset(2218), 87}, + {-1}, {-1}, - {"inbuginese", 291}, - {""}, {""}, {""}, + {pool_offset(2221), 291}, + {-1}, {-1}, {-1}, - {"intagalog", 280}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2225), 280}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"cypriot", 57}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2240), 57}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inlimbu", 287}, - {""}, {""}, + {pool_offset(2249), 287}, + {-1}, {-1}, - {"graphemeextend", 75}, - {""}, {""}, + {pool_offset(2252), 75}, + {-1}, {-1}, - {"tagb", 210}, - {""}, {""}, {""}, + {pool_offset(2255), 210}, + {-1}, {-1}, {-1}, - {"incypriotsyllabary", 418}, - {""}, {""}, {""}, + {pool_offset(2259), 418}, + {-1}, {-1}, {-1}, - {"intagbanwa", 283}, - {""}, {""}, {""}, {""}, + {pool_offset(2263), 283}, + {-1}, {-1}, {-1}, {-1}, - {"inaegeannumbers", 400}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2268), 400}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inkatakanaphoneticextensions", 349}, + {pool_offset(2275), 349}, - {"glag", 71}, + {pool_offset(2276), 71}, - {"combiningmark", 125}, + {pool_offset(2277), 125}, - {"palmyrene", 173}, - {""}, + {pool_offset(2278), 173}, + {-1}, - {"khmr", 104}, - {""}, {""}, {""}, + {pool_offset(2280), 104}, + {-1}, {-1}, {-1}, - {"khar", 103}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2284), 103}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"ugar", 223}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2292), 223}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inarabicpresentationformsb", 395}, - {""}, {""}, {""}, + {pool_offset(2300), 395}, + {-1}, {-1}, {-1}, - {"insupplementaryprivateuseareaa", 498}, - {""}, + {pool_offset(2304), 498}, + {-1}, - {"variationselector", 228}, - {""}, {""}, {""}, {""}, + {pool_offset(2306), 228}, + {-1}, {-1}, {-1}, {-1}, - {"nko", 148}, + {pool_offset(2311), 148}, - {"nkoo", 148}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2312), 148}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inbamumsupplement", 461}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2330), 461}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"tglg", 209}, - {""}, + {pool_offset(2336), 209}, + {-1}, - {"any", 19}, + {pool_offset(2338), 19}, - {"enclosingmark", 132}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2339), 132}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"gujr", 78}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2348), 78}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"changeswhenlowercased", 47}, - {""}, {""}, {""}, {""}, + {pool_offset(2358), 47}, + {-1}, {-1}, {-1}, {-1}, - {"knda", 100}, + {pool_offset(2363), 100}, - {"hung", 154}, + {pool_offset(2364), 154}, - {"gujarati", 78}, - {""}, {""}, + {pool_offset(2365), 78}, + {-1}, {-1}, - {"bopo", 30}, - {""}, {""}, {""}, {""}, + {pool_offset(2368), 30}, + {-1}, {-1}, {-1}, {-1}, - {"inmendekikakui", 476}, - {""}, {""}, + {pool_offset(2373), 476}, + {-1}, {-1}, - {"inbopomofo", 344}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2376), 344}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"kharoshthi", 103}, - {""}, + {pool_offset(2386), 103}, + {-1}, - {"kannada", 100}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2388), 100}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"khoj", 105}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, + {pool_offset(2401), 105}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, - {"cherokee", 50}, - {""}, {""}, {""}, + {pool_offset(2422), 50}, + {-1}, {-1}, {-1}, - {"inlatinextendedb", 241}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2426), 241}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"dashpunctuation", 178}, - {""}, {""}, + {pool_offset(2432), 178}, + {-1}, {-1}, - {"dupl", 65}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2435), 65}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"inyisyllables", 355}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2443), 355}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"georgian", 70}, - {""}, + {pool_offset(2451), 70}, + {-1}, - {"rejang", 189}, - {""}, + {pool_offset(2453), 189}, + {-1}, - {"oupper", 170}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2455), 170}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"aghb", 40}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, + {pool_offset(2477), 40}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {"khmer", 104}, - {""}, {""}, + {pool_offset(2491), 104}, + {-1}, {-1}, - {"grbase", 74}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2494), 74}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"currencysymbol", 195}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2503), 195}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"incombiningdiacriticalmarksextended", 293}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2529), 293}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"spacingmark", 131}, - {""}, + {pool_offset(2542), 131}, + {-1}, - {"orkh", 160}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2544), 160}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"cwu", 49}, - {""}, + {pool_offset(2550), 49}, + {-1}, - {"hebrew", 84}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2552), 84}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"insupplementalpunctuation", 337}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(2570), 337}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"oldnortharabian", 156}, - {""}, {""}, {""}, {""}, {""}, + {pool_offset(2581), 156}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"incjksymbolsandpunctuation", 341}, - {""}, {""}, + {pool_offset(2587), 341}, + {-1}, {-1}, - {"othergraphemeextend", 165}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, + {pool_offset(2590), 165}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, - {"indevanagariextended", 367}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2613), 367}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"tagalog", 209}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2621), 209}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"surrogate", 55}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2628), 55}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"bopomofo", 30}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, + {pool_offset(2650), 30}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, - {"ingeorgiansupplement", 333}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2679), 333}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"uppercase", 226}, - {""}, {""}, {""}, {""}, {""}, {""}, + {pool_offset(2704), 226}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, - {"unifiedideograph", 224}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2711), 224}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"inpaucinhau", 455}, + {pool_offset(2742), 455}, - {"paucinhau", 176}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, + {pool_offset(2743), 176}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, - {"insupplementalsymbolsandpictographs", 490}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"nonspacingmark", 138}, - {""}, - - {"inhanguljamoextendedb", 382}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"inhighsurrogates", 383}, - - {"punctuation", 171}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, - - {"openpunctuation", 185}, - - {"bassavah", 26}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"upper", 10}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, - - {"insupplementalarrowsb", 326}, - - {"inkanbun", 346}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"inkangxiradicals", 339}, - {""}, {""}, {""}, - - {"inmeroiticcursive", 426}, - {""}, {""}, - - {"signwriting", 199}, - {""}, {""}, - - {"incjkradicalssupplement", 338}, - {""}, - - {"pahawhhmong", 172}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, - - {"inkayahli", 368}, - - {"uppercaseletter", 122}, - {""}, - - {"batk", 27}, - {""}, {""}, {""}, {""}, - - {"inbatak", 296}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"incjkcompatibility", 351}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, - - {"buhd", 34}, - {""}, {""}, {""}, {""}, {""}, - - {"egyp", 66}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"innoblock", 500}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, - - {"inegyptianhieroglyphs", 459}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, - - {"grek", 77}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"bugi", 33}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"changeswhenuppercased", 49}, - {""}, {""}, {""}, - - {"paragraphseparator", 236}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"buginese", 33}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"guru", 79}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"inbopomofoextended", 347}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, - - {"telugu", 216}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, - - {"quotationmark", 187}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"inkhudawadi", 447}, - {""}, {""}, {""}, {""}, {""}, - - {"kayahli", 102}, - {""}, - - {"oldsoutharabian", 159}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, - - {"insupplementaryprivateuseareab", 499}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"inhangulcompatibilityjamo", 345}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"zyyy", 53}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"qmark", 187}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"buhid", 34}, - - {"sk", 201}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, - - {"unknown", 225}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"khudawadi", 106}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, - - {"katakana", 101}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"incjkunifiedideographsextensionc", 492}, - {""}, {""}, - - {"incjkunifiedideographsextensiona", 352}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"incjkunifiedideographs", 354}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, - - {"incjkunifiedideographsextensione", 494}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, - - {"duployan", 65}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"egyptianhieroglyphs", 66}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"inboxdrawing", 318}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"khojki", 105}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"incjkunifiedideographsextensiond", 493}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"batak", 27}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, - - {"incjkunifiedideographsextensionb", 491}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, - - {"blank", 2}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"greek", 77}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, - - {"grlink", 76}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, - - {"gurmukhi", 79}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, - - {"graphemelink", 76} + {pool_offset(2774), 490}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2792), 138}, + {-1}, + + {pool_offset(2794), 382}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2802), 383}, + + {pool_offset(2803), 171}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(2834), 185}, + + {pool_offset(2835), 26}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(2864), 10}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2878), 326}, + + {pool_offset(2879), 346}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(2918), 339}, + {-1}, {-1}, {-1}, + + {pool_offset(2922), 426}, + {-1}, {-1}, + + {pool_offset(2925), 199}, + {-1}, {-1}, + + {pool_offset(2928), 338}, + {-1}, + + {pool_offset(2930), 172}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(2952), 368}, + + {pool_offset(2953), 122}, + {-1}, + + {pool_offset(2955), 27}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2960), 296}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2968), 351}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2983), 34}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(2989), 66}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3009), 500}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3043), 459}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3084), 77}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3176), 33}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3230), 49}, + {-1}, {-1}, {-1}, + + {pool_offset(3234), 236}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3254), 33}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3282), 79}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3299), 347}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(3312), 216}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, + + {pool_offset(3343), 187}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(3364), 447}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3370), 102}, + {-1}, + + {pool_offset(3372), 159}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3396), 499}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3479), 345}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(3491), 53}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3555), 187}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3566), 34}, + + {pool_offset(3567), 201}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3582), 225}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3619), 106}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3644), 101}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3670), 492}, + {-1}, {-1}, + + {pool_offset(3673), 352}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(3685), 354}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3700), 494}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3743), 65}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(3763), 66}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(3853), 318}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4106), 105}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(4244), 493}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4722), 27}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4765), 491}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4780), 2}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(4851), 77}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, + + {pool_offset(4943), 76}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, + + {pool_offset(5054), 79}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, + + {pool_offset(5162), 76} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -25295,10 +26727,14 @@ unicode_lookup_property_name (register const char *str, register unsigned int le if (key <= MAX_HASH_VALUE && key >= 0) { - register const char *s = wordlist[key].name; - - if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') - return &wordlist[key]; + register int o = wordlist[key].name; + if (o >= 0) + { + register const char *s = o + unicode_prop_name_pool; + + if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') + return &wordlist[key]; + } } } return 0; diff --git a/src/unicode_property_data_posix.c b/src/unicode_property_data_posix.c index 1cf082b..fffe2ef 100644 --- a/src/unicode_property_data_posix.c +++ b/src/unicode_property_data_posix.c @@ -1,5 +1,5 @@ /* ANSI-C code produced by gperf version 3.0.4 */ -/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case -N unicode_lookup_property_name --output-file gperf.tmp unicode_property_data_posix.gperf */ +/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf.tmp unicode_property_data_posix.gperf */ /* Computed positions: -k'1,3' */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -4785,6 +4785,9 @@ const CodeRanges[] = { CR_ASCII, }; +#define pool_offset(s) offsetof(struct unicode_prop_name_pool_t, unicode_prop_name_pool_str##s) + + #define TOTAL_KEYWORDS 15 #define MIN_WORD_LENGTH 4 #define MAX_WORD_LENGTH 7 @@ -4879,48 +4882,85 @@ hash (register const char *str, register unsigned int len) return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]]; } +struct unicode_prop_name_pool_t + { + char unicode_prop_name_pool_str5[sizeof("print")]; + char unicode_prop_name_pool_str6[sizeof("punct")]; + char unicode_prop_name_pool_str7[sizeof("alpha")]; + char unicode_prop_name_pool_str8[sizeof("alnum")]; + char unicode_prop_name_pool_str9[sizeof("xdigit")]; + char unicode_prop_name_pool_str10[sizeof("newline")]; + char unicode_prop_name_pool_str11[sizeof("upper")]; + char unicode_prop_name_pool_str12[sizeof("ascii")]; + char unicode_prop_name_pool_str13[sizeof("cntrl")]; + char unicode_prop_name_pool_str14[sizeof("space")]; + char unicode_prop_name_pool_str15[sizeof("word")]; + char unicode_prop_name_pool_str16[sizeof("lower")]; + char unicode_prop_name_pool_str17[sizeof("graph")]; + char unicode_prop_name_pool_str18[sizeof("digit")]; + char unicode_prop_name_pool_str19[sizeof("blank")]; + }; +static const struct unicode_prop_name_pool_t unicode_prop_name_pool_contents = + { + "print", + "punct", + "alpha", + "alnum", + "xdigit", + "newline", + "upper", + "ascii", + "cntrl", + "space", + "word", + "lower", + "graph", + "digit", + "blank" + }; +#define unicode_prop_name_pool ((const char *) &unicode_prop_name_pool_contents) #ifdef __GNUC__ __inline #if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ __attribute__ ((__gnu_inline__)) #endif #endif -const struct PropertyNameCtype * +const struct PoolPropertyNameCtype * unicode_lookup_property_name (register const char *str, register unsigned int len) { - static const struct PropertyNameCtype wordlist[] = + static const struct PoolPropertyNameCtype wordlist[] = { - {""}, {""}, {""}, {""}, {""}, + {-1}, {-1}, {-1}, {-1}, {-1}, - {"print", 7}, + {pool_offset(5), 7}, - {"punct", 8}, + {pool_offset(6), 8}, - {"alpha", 1}, + {pool_offset(7), 1}, - {"alnum", 13}, + {pool_offset(8), 13}, - {"xdigit", 11}, + {pool_offset(9), 11}, - {"newline", 0}, + {pool_offset(10), 0}, - {"upper", 10}, + {pool_offset(11), 10}, - {"ascii", 14}, + {pool_offset(12), 14}, - {"cntrl", 3}, + {pool_offset(13), 3}, - {"space", 9}, + {pool_offset(14), 9}, - {"word", 12}, + {pool_offset(15), 12}, - {"lower", 6}, + {pool_offset(16), 6}, - {"graph", 5}, + {pool_offset(17), 5}, - {"digit", 4}, + {pool_offset(18), 4}, - {"blank", 2} + {pool_offset(19), 2} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -4929,10 +4969,14 @@ unicode_lookup_property_name (register const char *str, register unsigned int le if (key <= MAX_HASH_VALUE && key >= 0) { - register const char *s = wordlist[key].name; + register int o = wordlist[key].name; + if (o >= 0) + { + register const char *s = o + unicode_prop_name_pool; - if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') - return &wordlist[key]; + if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') + return &wordlist[key]; + } } } return 0; diff --git a/src/utf8.c b/src/utf8.c index 1ad9653..e5e59b2 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2017 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,7 +124,7 @@ mbc_to_code(const UChar* p, const UChar* end) OnigCodePoint n; len = mbc_enc_len(p); - if (len > end - p) len = end - p; + if (len > (int )(end - p)) len = (int )(end - p); c = *p++; if (len > 1) { @@ -216,7 +216,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) } *p++ = UTF8_TRAIL0(code); - return p - buf; + return (int )(p - buf); } } diff --git a/test/Makefile.am b/test/Makefile.am index 2930e7f..9643bb0 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -3,7 +3,7 @@ lib_onig = ../src/libonig.la AM_LDFLAGS = -L$(prefix)/lib AM_CFLAGS = -AM_CPPFLAGS = -I../src -I$(includedir) +AM_CPPFLAGS = -I$(top_srcdir)/src -I$(includedir) TESTS = test_utf8 testc testp testcu diff --git a/test/test_utf8.c b/test/test_utf8.c index 31fcfcd..a14cacc 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -1051,7 +1051,20 @@ extern int main(int argc, char* argv[]) n("^\\X.$", "\xE0\xAE\xA8\xE0\xAE\xBF"); + // a + COMBINING GRAVE ACCENT (U+0300) + x2("h\\Xllo", "ha\xCC\x80llo", 0, 7); + x2("\\x40", "@", 0, 1); + x2("\\x1", "\x01", 0, 1); + x2("\\x{1}", "\x01", 0, 1); + x2("\\x{4E38}", "\xE4\xB8\xB8", 0, 3); + x2("\\u4E38", "\xE4\xB8\xB8", 0, 3); + x2("\\u0040", "@", 0, 1); + + x2("c.*\\b", "abc", 2, 3); + x2("\\b.*abc.*\\b", "abc", 0, 3); + + e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?\\g)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); -- cgit v1.2.3