diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2014-08-12 21:06:31 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2014-08-12 21:06:31 +0200 |
commit | 0dbf3761f6b98e5c16c9bfa0306ae5d3257d081f (patch) | |
tree | b122da5e29b81eb5489734f477afb645422980d0 /enc |
Imported Upstream version 5.9.5upstream/5.9.5
Diffstat (limited to 'enc')
-rw-r--r-- | enc/ascii.c | 58 | ||||
-rw-r--r-- | enc/big5.c | 162 | ||||
-rw-r--r-- | enc/cp1251.c | 200 | ||||
-rw-r--r-- | enc/euc_jp.c | 285 | ||||
-rw-r--r-- | enc/euc_kr.c | 158 | ||||
-rw-r--r-- | enc/euc_tw.c | 129 | ||||
-rw-r--r-- | enc/gb18030.c | 495 | ||||
-rw-r--r-- | enc/iso8859_1.c | 272 | ||||
-rw-r--r-- | enc/iso8859_10.c | 239 | ||||
-rw-r--r-- | enc/iso8859_11.c | 96 | ||||
-rw-r--r-- | enc/iso8859_13.c | 228 | ||||
-rw-r--r-- | enc/iso8859_14.c | 241 | ||||
-rw-r--r-- | enc/iso8859_15.c | 235 | ||||
-rw-r--r-- | enc/iso8859_16.c | 237 | ||||
-rw-r--r-- | enc/iso8859_2.c | 235 | ||||
-rw-r--r-- | enc/iso8859_3.c | 235 | ||||
-rw-r--r-- | enc/iso8859_4.c | 237 | ||||
-rw-r--r-- | enc/iso8859_5.c | 226 | ||||
-rw-r--r-- | enc/iso8859_6.c | 96 | ||||
-rw-r--r-- | enc/iso8859_7.c | 222 | ||||
-rw-r--r-- | enc/iso8859_8.c | 96 | ||||
-rw-r--r-- | enc/iso8859_9.c | 228 | ||||
-rw-r--r-- | enc/koi8.c | 250 | ||||
-rw-r--r-- | enc/koi8_r.c | 212 | ||||
-rw-r--r-- | enc/mktable.c | 1162 | ||||
-rw-r--r-- | enc/sjis.c | 318 | ||||
-rw-r--r-- | enc/unicode.c | 11356 | ||||
-rw-r--r-- | enc/utf16_be.c | 225 | ||||
-rw-r--r-- | enc/utf16_le.c | 226 | ||||
-rw-r--r-- | enc/utf32_be.c | 184 | ||||
-rw-r--r-- | enc/utf32_le.c | 184 | ||||
-rw-r--r-- | enc/utf8.c | 305 |
32 files changed, 19032 insertions, 0 deletions
diff --git a/enc/ascii.c b/enc/ascii.c new file mode 100644 index 0000000..c2715f4 --- /dev/null +++ b/enc/ascii.c @@ -0,0 +1,58 @@ +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingASCII = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/big5.c b/enc/big5.c new file mode 100644 index 0000000..ca1e01b --- /dev/null +++ b/enc/big5.c @@ -0,0 +1,162 @@ +/********************************************************************** + big5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_BIG5[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +big5_mbc_enc_len(const UChar* p) +{ + return EncLen_BIG5[*p]; +} + +static OnigCodePoint +big5_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); +} + +static int +big5_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); +} + +static int +big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag, + pp, end, lower); +} + +#if 0 +static int +big5_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); +} +#endif + +static int +big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); +} + +static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) +#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] + +static UChar* +big5_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (BIG5_ISMB_TRAIL(*p)) { + while (p > start) { + if (! BIG5_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(ONIG_ENCODING_BIG5, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + + return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingType OnigEncodingBIG5 = { + big5_mbc_enc_len, + "Big5", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + big5_mbc_to_code, + onigenc_mb2_code_to_mbclen, + big5_code_to_mbc, + big5_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + big5_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + big5_left_adjust_char_head, + big5_is_allowed_reverse_match +}; diff --git a/enc/cp1251.c b/enc/cp1251.c new file mode 100644 index 0000000..63e58d2 --- /dev/null +++ b/enc/cp1251.c @@ -0,0 +1,200 @@ +/********************************************************************** + cp1251.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2006-2007 Byte <byte AT mail DOT kna DOT ru> + * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] +#define ENC_IS_CP1251_CTYPE(code,ctype) \ + ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncCP1251_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncCP1251_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, + 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, + 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, + 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_CP1251_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +static int +cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_CP1251_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb8, 0xa8 }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +cp1251_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingCP1251 = { + onigenc_single_byte_mbc_enc_len, + "CP1251", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + cp1251_mbc_case_fold, + cp1251_apply_all_case_fold, + cp1251_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp1251_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/euc_jp.c b/enc/euc_jp.c new file mode 100644 index 0000000..f605297 --- /dev/null +++ b/enc/euc_jp.c @@ -0,0 +1,285 @@ +/********************************************************************** + euc_jp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static const int EncLen_EUCJP[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_EUCJP[*p]; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_EUC_JP, p); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +#if 0 +static int +code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} +#endif + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(ONIG_ENCODING_EUC_JP, p); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* In this encoding + mb-trail bytes doesn't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!eucjp_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_JP, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e || c == 0x8e || c == 0x8f) + return TRUE; + else + return FALSE; +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0xa4a1, 0xa4f3 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 3, + 0xa5a1, 0xa5f6, + 0xaaa6, 0xaaaf, + 0xaab1, 0xaadd +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + + +OnigEncodingType OnigEncodingEUC_JP = { + mbc_enc_len, + "EUC-JP", /* name */ + 3, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match +}; diff --git a/enc/euc_kr.c b/enc/euc_kr.c new file mode 100644 index 0000000..1beef09 --- /dev/null +++ b/enc/euc_kr.c @@ -0,0 +1,158 @@ +/********************************************************************** + euc_kr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCKR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euckr_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCKR[*p]; +} + +static OnigCodePoint +euckr_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); +} + +static int +euckr_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); +} + +static int +euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag, + pp, end, lower); +} + +#if 0 +static int +euckr_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); +} +#endif + +static int +euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); +} + +#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) + +static UChar* +euckr_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euckr_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_KR, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_KR = { + euckr_mbc_enc_len, + "EUC-KR", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; + +/* Same with OnigEncodingEUC_KR except the name */ +OnigEncodingType OnigEncodingEUC_CN = { + euckr_mbc_enc_len, + "EUC-CN", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; diff --git a/enc/euc_tw.c b/enc/euc_tw.c new file mode 100644 index 0000000..2ddeb93 --- /dev/null +++ b/enc/euc_tw.c @@ -0,0 +1,129 @@ +/********************************************************************** + euc_tw.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCTW[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euctw_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCTW[*p]; +} + +static OnigCodePoint +euctw_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); +} + +static int +euctw_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); +} + +static int +euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag, + pp, end, lower); +} + +static int +euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); +} + +#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static UChar* +euctw_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euctw_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_TW, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_TW = { + euctw_mbc_enc_len, + "EUC-TW", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euctw_mbc_to_code, + onigenc_mb4_code_to_mbclen, + euctw_code_to_mbc, + euctw_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euctw_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euctw_left_adjust_char_head, + euctw_is_allowed_reverse_match +}; diff --git a/enc/gb18030.c b/enc/gb18030.c new file mode 100644 index 0000000..6bbd109 --- /dev/null +++ b/enc/gb18030.c @@ -0,0 +1,495 @@ +/********************************************************************** + gb18030.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org> + * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#if 1 +#define DEBUG_GB18030(arg) +#else +#define DEBUG_GB18030(arg) printf arg +#endif + +enum { + C1, /* one-byte char */ + C2, /* one-byte or second of two-byte char */ + C4, /* one-byte or second or fourth of four-byte char */ + CM /* first of two- or four-byte char or second of two-byte char */ +}; + +static const char GB18030_MAP[] = { + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, + C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 +}; + +static int +gb18030_mbc_enc_len(const UChar* p) +{ + if (GB18030_MAP[*p] != CM) + return 1; + p++; + if (GB18030_MAP[*p] == C4) + return 4; + if (GB18030_MAP[*p] == C1) + return 1; /* illegal sequence */ + return 2; +} + +static OnigCodePoint +gb18030_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end); +} + +static int +gb18030_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf); +} + +static int +gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag, + pp, end, lower); +} + +#if 0 +static int +gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); +} +#endif + +static int +gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype); +} + +enum state { + S_START, + S_one_C2, + S_one_C4, + S_one_CM, + + S_odd_CM_one_CX, + S_even_CM_one_CX, + + /* CMC4 : pair of "CM C4" */ + S_one_CMC4, + S_odd_CMC4, + S_one_C4_odd_CMC4, + S_even_CMC4, + S_one_C4_even_CMC4, + + S_odd_CM_odd_CMC4, + S_even_CM_odd_CMC4, + + S_odd_CM_even_CMC4, + S_even_CM_even_CMC4, + + /* C4CM : pair of "C4 CM" */ + S_odd_C4CM, + S_one_CM_odd_C4CM, + S_even_C4CM, + S_one_CM_even_C4CM, + + S_even_CM_odd_C4CM, + S_odd_CM_odd_C4CM, + S_even_CM_even_C4CM, + S_odd_CM_even_C4CM, +}; + +static UChar* +gb18030_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + enum state state = S_START; + + DEBUG_GB18030(("----------------\n")); + for (p = s; p >= start; p--) { + DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); + switch (state) { + case S_START: + switch (GB18030_MAP[*p]) { + case C1: + return (UChar *)s; + case C2: + state = S_one_C2; /* C2 */ + break; + case C4: + state = S_one_C4; /* C4 */ + break; + case CM: + state = S_one_CM; /* CM */ + break; + } + break; + case S_one_C2: /* C2 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; /* CM C2 */ + break; + } + break; + case S_one_C4: /* C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CMC4; + break; + } + break; + case S_one_CM: /* CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)s; + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_odd_CM_one_CX; /* CM CM */ + break; + } + break; + + case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_one_CX; + break; + } + break; + case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; + break; + } + break; + + case S_one_CMC4: /* CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; /* C4 CM C4 */ + break; + case CM: + state = S_even_CM_one_CX; /* CM CM C4 */ + break; + } + break; + case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; + break; + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + case S_one_C4_odd_CMC4: /* C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CMC4; /* CM C4 CM C4 */ + break; + } + break; + case S_even_CMC4: /* CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 3); + case C4: + state = S_one_C4_even_CMC4; + break; + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CMC4; + break; + } + break; + + case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_even_CM_odd_CMC4; + break; + } + break; + case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + + case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_even_CMC4; + break; + } + break; + case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + + case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CM_odd_C4CM; /* CM C4 CM */ + break; + } + break; + case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 2); /* |CM C4 CM */ + case C4: + state = S_even_C4CM; + break; + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + case S_even_C4CM: /* C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* C4|CM C4 CM */ + case CM: + state = S_one_CM_even_C4CM; + break; + } + break; + case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + + case S_even_CM_odd_C4CM: /* CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|C4|CM */ + case CM: + state = S_odd_CM_odd_C4CM; + break; + } + break; + case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + + case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ + case CM: + state = S_odd_CM_even_C4CM; + break; + } + break; + case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + } + } + + DEBUG_GB18030(("state %d\n", state)); + switch (state) { + case S_START: return (UChar *)(s - 0); + case S_one_C2: return (UChar *)(s - 0); + case S_one_C4: return (UChar *)(s - 0); + case S_one_CM: return (UChar *)(s - 0); + + case S_odd_CM_one_CX: return (UChar *)(s - 1); + case S_even_CM_one_CX: return (UChar *)(s - 0); + + case S_one_CMC4: return (UChar *)(s - 1); + case S_odd_CMC4: return (UChar *)(s - 1); + case S_one_C4_odd_CMC4: return (UChar *)(s - 1); + case S_even_CMC4: return (UChar *)(s - 3); + case S_one_C4_even_CMC4: return (UChar *)(s - 3); + + case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); + case S_even_CM_odd_CMC4: return (UChar *)(s - 1); + + case S_odd_CM_even_CMC4: return (UChar *)(s - 1); + case S_even_CM_even_CMC4: return (UChar *)(s - 3); + + case S_odd_C4CM: return (UChar *)(s - 0); + case S_one_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_C4CM: return (UChar *)(s - 2); + case S_one_CM_even_C4CM: return (UChar *)(s - 0); + + case S_even_CM_odd_C4CM: return (UChar *)(s - 0); + case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_CM_even_C4CM: return (UChar *)(s - 2); + case S_odd_CM_even_C4CM: return (UChar *)(s - 0); + } + + return (UChar* )s; /* never come here. (escape warning) */ +} + +static int +gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + return GB18030_MAP[*s] == C1 ? TRUE : FALSE; +} + +OnigEncodingType OnigEncodingGB18030 = { + gb18030_mbc_enc_len, + "GB18030", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + gb18030_mbc_to_code, + onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbc, + gb18030_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + gb18030_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + gb18030_left_adjust_char_head, + gb18030_is_allowed_reverse_match +}; diff --git a/enc/iso8859_1.c b/enc/iso8859_1.c new file mode 100644 index 0000000..174b97f --- /dev/null +++ b/enc/iso8859_1.c @@ -0,0 +1,272 @@ +/********************************************************************** + iso8859_1.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ + ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0xc0 <= *p && *p <= 0xcf) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0xd0 <= *p && *p <= 0xdf) { + if (*p == 0xdf) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else if (*p != 0xd7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + } + else if (0xe0 <= *p && *p <= 0xef) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else if (0xf0 <= *p && *p <= 0xfe) { + if (*p != 0xf7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + } + + return 0; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_1_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_1 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-1", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_10.c b/enc/iso8859_10.c new file mode 100644 index 0000000..e35c19d --- /dev/null +++ b/enc/iso8859_10.c @@ -0,0 +1,239 @@ +/********************************************************************** + iso8859_10.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ + ((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247', + '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_10_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_10_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_10 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-10", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_11.c b/enc/iso8859_11.c new file mode 100644 index 0000000..8a460a3 --- /dev/null +++ b/enc/iso8859_11.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_11.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ + ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_11_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_11_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_11 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-11", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_13.c b/enc/iso8859_13.c new file mode 100644 index 0000000..3670d92 --- /dev/null +++ b/enc/iso8859_13.c @@ -0,0 +1,228 @@ +/********************************************************************** + iso8859_13.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ + ((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_13_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xb5 are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_13_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_13 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-13", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_14.c b/enc/iso8859_14.c new file mode 100644 index 0000000..3596d44 --- /dev/null +++ b/enc/iso8859_14.c @@ -0,0 +1,241 @@ +/********************************************************************** + iso8859_14.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ + ((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247', + '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377', + '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271', + '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_14_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2, + 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_14_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa4, 0xa5 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb2, 0xb3 }, + { 0xb4, 0xb5 }, + { 0xb7, 0xb9 }, + { 0xbb, 0xbf }, + { 0xbd, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_14 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-14", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_15.c b/enc/iso8859_15.c new file mode 100644 index 0000000..08492fb --- /dev/null +++ b/enc/iso8859_15.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_15.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ + ((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_15_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xaa || *p == 0xb5 || *p == 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_15_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa6, 0xa8 }, + + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_15 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-15", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_16.c b/enc/iso8859_16.c new file mode 100644 index 0000000..8b39c58 --- /dev/null +++ b/enc/iso8859_16.c @@ -0,0 +1,237 @@ +/********************************************************************** + iso8859_16.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ + ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', + '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', + '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_16_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2, + 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_16_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_16 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-16", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_2.c b/enc/iso8859_2.c new file mode 100644 index 0000000..80b93ba --- /dev/null +++ b/enc/iso8859_2.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_2.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ + ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_2_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_2_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_2 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-2", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_3.c b/enc/iso8859_3.c new file mode 100644 index 0000000..fd1168c --- /dev/null +++ b/enc/iso8859_3.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_3.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ + ((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_3_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2, + 0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_3_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_3 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-3", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_4.c b/enc/iso8859_4.c new file mode 100644 index 0000000..c124f56 --- /dev/null +++ b/enc/iso8859_4.c @@ -0,0 +1,237 @@ +/********************************************************************** + iso8859_4.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ + ((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_4_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xa2) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_4_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_4 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-4", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_5.c b/enc/iso8859_5.c new file mode 100644 index 0000000..1ca67e7 --- /dev/null +++ b/enc/iso8859_5.c @@ -0,0 +1,226 @@ +/********************************************************************** + iso8859_5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ + ((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_5_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_5_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xf1 }, + { 0xa2, 0xf2 }, + { 0xa3, 0xf3 }, + { 0xa4, 0xf4 }, + { 0xa5, 0xf5 }, + { 0xa6, 0xf6 }, + { 0xa7, 0xf7 }, + { 0xa8, 0xf8 }, + { 0xa9, 0xf9 }, + { 0xaa, 0xfa }, + { 0xab, 0xfb }, + { 0xac, 0xfc }, + { 0xae, 0xfe }, + { 0xaf, 0xff }, + + { 0xb0, 0xd0 }, + { 0xb1, 0xd1 }, + { 0xb2, 0xd2 }, + { 0xb3, 0xd3 }, + { 0xb4, 0xd4 }, + { 0xb5, 0xd5 }, + { 0xb6, 0xd6 }, + { 0xb7, 0xd7 }, + { 0xb8, 0xd8 }, + { 0xb9, 0xd9 }, + { 0xba, 0xda }, + { 0xbb, 0xdb }, + { 0xbc, 0xdc }, + { 0xbd, 0xdd }, + { 0xbe, 0xde }, + { 0xbf, 0xdf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_5 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-5", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_6.c b/enc/iso8859_6.c new file mode 100644 index 0000000..ab42eee --- /dev/null +++ b/enc/iso8859_6.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_6.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ + ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_6_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_6_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_6 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-6", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_7.c b/enc/iso8859_7.c new file mode 100644 index 0000000..1090064 --- /dev/null +++ b/enc/iso8859_7.c @@ -0,0 +1,222 @@ +/********************************************************************** + iso8859_7.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ + ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', + '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', + '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_7_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, + 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xc0 || *p == 0xe0) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_7_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb6, 0xdc }, + { 0xb8, 0xdd }, + { 0xb9, 0xde }, + { 0xba, 0xdf }, + { 0xbc, 0xfc }, + { 0xbe, 0xfd }, + { 0xbf, 0xfe }, + + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + + +OnigEncodingType OnigEncodingISO_8859_7 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-7", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_8.c b/enc/iso8859_8.c new file mode 100644 index 0000000..fb9846f --- /dev/null +++ b/enc/iso8859_8.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ + ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_8_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_8_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_8 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/iso8859_9.c b/enc/iso8859_9.c new file mode 100644 index 0000000..079d681 --- /dev/null +++ b/enc/iso8859_9.c @@ -0,0 +1,228 @@ +/********************************************************************** + iso8859_9.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ + ((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_9_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_9_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_9 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-9", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/koi8.c b/enc/koi8.c new file mode 100644 index 0000000..c664957 --- /dev/null +++ b/enc/koi8.c @@ -0,0 +1,250 @@ +/********************************************************************** + koi8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c] +#define ENC_IS_KOI8_CTYPE(code,ctype) \ + ((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + + +static int +koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) +{ + const OnigUChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncKOI8_CtypeTable[*p] & + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} +#endif + +static int +koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +koi8_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingKOI8 = { + onigenc_single_byte_mbc_enc_len, + "KOI8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_mbc_case_fold, + koi8_apply_all_case_fold, + koi8_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/koi8_r.c b/enc/koi8_r.c new file mode 100644 index 0000000..364dda1 --- /dev/null +++ b/enc/koi8_r.c @@ -0,0 +1,212 @@ +/********************************************************************** + koi8_r.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] +#define ENC_IS_KOI8_R_CTYPE(code,ctype) \ + ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_R_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + +static int +koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_R_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa3, 0xb3 }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff } +}; + +static int +koi8_r_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingKOI8_R = { + onigenc_single_byte_mbc_enc_len, + "KOI8-R", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_r_mbc_case_fold, + koi8_r_apply_all_case_fold, + koi8_r_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_r_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/mktable.c b/enc/mktable.c new file mode 100644 index 0000000..285216e --- /dev/null +++ b/enc/mktable.c @@ -0,0 +1,1162 @@ +/********************************************************************** + mktable.c +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <locale.h> + +#define __USE_ISOC99 +#include <ctype.h> + +#include "regenc.h" + +#define ASCII 0 +#define UNICODE_ISO_8859_1 1 +#define ISO_8859_1 2 +#define ISO_8859_2 3 +#define ISO_8859_3 4 +#define ISO_8859_4 5 +#define ISO_8859_5 6 +#define ISO_8859_6 7 +#define ISO_8859_7 8 +#define ISO_8859_8 9 +#define ISO_8859_9 10 +#define ISO_8859_10 11 +#define ISO_8859_11 12 +#define ISO_8859_13 13 +#define ISO_8859_14 14 +#define ISO_8859_15 15 +#define ISO_8859_16 16 +#define KOI8 17 +#define KOI8_R 18 + +typedef struct { + int num; + char* name; +} ENC_INFO; + +static ENC_INFO Info[] = { + { ASCII, "ASCII" }, + { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, + { ISO_8859_1, "ISO_8859_1" }, + { ISO_8859_2, "ISO_8859_2" }, + { ISO_8859_3, "ISO_8859_3" }, + { ISO_8859_4, "ISO_8859_4" }, + { ISO_8859_5, "ISO_8859_5" }, + { ISO_8859_6, "ISO_8859_6" }, + { ISO_8859_7, "ISO_8859_7" }, + { ISO_8859_8, "ISO_8859_8" }, + { ISO_8859_9, "ISO_8859_9" }, + { ISO_8859_10, "ISO_8859_10" }, + { ISO_8859_11, "ISO_8859_11" }, + { ISO_8859_13, "ISO_8859_13" }, + { ISO_8859_14, "ISO_8859_14" }, + { ISO_8859_15, "ISO_8859_15" }, + { ISO_8859_16, "ISO_8859_16" }, + { KOI8, "KOI8" }, + { KOI8_R, "KOI8_R" } +}; + + +static int IsAlpha(int enc, int c) +{ + if (enc == ASCII) + return isalpha(c); + + if (c >= 0x41 && c <= 0x5a) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xff) return 1; + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xa4 || c == 0xa5) return 1; + if (c == 0xa6 || c == 0xa8) return 1; + if (c >= 0xaa && c <= 0xac) return 1; + if (c >= 0xaf && c <= 0xb5) return 1; + if (c >= 0xb7 && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsBlank(int enc, int c) +{ + if (enc == ASCII) + return isblank(c); + + if (c == 0x09 || c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsCntrl(int enc, int c) +{ + if (enc == ASCII) + return iscntrl(c); + + if (c >= 0x00 && c <= 0x1F) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0xad) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c >= 0x7f && c <= 0x9F) return 1; + break; + + + case KOI8_R: + if (c == 0x7f) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsDigit(int enc ARG_UNUSED, int c) +{ + if (c >= 0x30 && c <= 0x39) return 1; + return 0; +} + +static int IsGraph(int enc, int c) +{ + if (enc == ASCII) + return isgraph(c); + + if (c >= 0x21 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa1 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa1) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa1 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa2 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsLower(int enc, int c) +{ + if (enc == ASCII) + return islower(c); + + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c == 0xa2) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xc0) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + break; + + case ISO_8859_10: + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_11: + break; + + case ISO_8859_13: + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa2) return 1; + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa8) return 1; + if (c == 0xb8) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_16: + if (c == 0xa2) return 1; + if (c == 0xa8) return 1; + if (c == 0xae) return 1; + if (c == 0xb3) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xdf) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPrint(int enc, int c) +{ + if (enc == ASCII) + return isprint(c); + + if (c >= 0x20 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa0 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa0) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa0) return 1; + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa0 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa0 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + if (c == 0xa1) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa0 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c == 0xa0) return 1; + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPunct(int enc, int c) +{ + if (enc == ASCII) + return ispunct(c); + + if (enc == UNICODE_ISO_8859_1) { + if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || + c == 0x7c || c == 0x7e) return 1; + if (c >= 0x3c && c <= 0x3e) return 1; + } + + if (c >= 0x21 && c <= 0x2f) return 1; + if (c >= 0x3a && c <= 0x40) return 1; + if (c >= 0x5b && c <= 0x60) return 1; + if (c >= 0x7b && c <= 0x7e) return 1; + + switch (enc) { + case ISO_8859_1: + case ISO_8859_9: + case ISO_8859_15: + if (c == 0xad) return 1; + /* fall */ + case UNICODE_ISO_8859_1: + if (c == 0xa1) return 1; + if (c == 0xab) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_14: + if (c == 0xad) return 1; + break; + + case ISO_8859_3: + case ISO_8859_10: + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_6: + if (c == 0xac) return 1; + if (c == 0xad) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_7: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xab) return 1; + if (c == 0xaf) return 1; + if (c == 0xad) return 1; + if (c == 0xb7 || c == 0xbb) return 1; + break; + + case ISO_8859_8: + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xdf) return 1; + break; + + case ISO_8859_13: + if (c == 0xa1 || c == 0xa5) return 1; + if (c == 0xab || c == 0xad) return 1; + if (c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xff) return 1; + break; + + case ISO_8859_16: + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb5) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + break; + + case KOI8_R: + if (c == 0x9e) return 1; + break; + + case ISO_8859_11: + case KOI8: + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsSpace(int enc, int c) +{ + if (enc == ASCII) + return isspace(c); + + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsUpper(int enc, int c) +{ + if (enc == ASCII) + return isupper(c); + + if (c >= 0x41 && c <= 0x5a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_4: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + break; + + case ISO_8859_8: + case ISO_8859_11: + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1) return 1; + if (c == 0xa4 || c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa || c == 0xac) return 1; + if (c == 0xaf || c == 0xb0) return 1; + if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb || c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_15: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + if (c == 0xa6) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case KOI8_R: + if (c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xe0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsXDigit(int enc, int c) +{ + if (enc == ASCII) + return isxdigit(c); + + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x46) return 1; + if (c >= 0x61 && c <= 0x66) return 1; + return 0; +} + +static int IsWord(int enc, int c) +{ + if (enc == ASCII) { + return (isalpha(c) || isdigit(c) || c == 0x5f); + } + + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x5a) return 1; + if (c == 0x5f) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xea) return 1; + if (c >= 0xeb && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb2 || c == 0xb3) return 1; + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xff) { + if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd) + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c >= 0xa1 && c <= 0xff) { + if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae || + c == 0xb6) return 0; + return 1; + } + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0x9d) return 1; + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsAscii(int enc ARG_UNUSED, int c) +{ + if (c >= 0x00 && c <= 0x7f) return 1; + return 0; +} + +static int IsNewline(int enc ARG_UNUSED, int c) +{ + if (c == 0x0a) return 1; + return 0; +} + +static int exec(FILE* fp, ENC_INFO* einfo) +{ +#define NCOL 8 + + int c, val, enc; + + enc = einfo->num; + + fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", + einfo->name); + + for (c = 0; c < 256; c++) { + val = 0; + if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE; + if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM); + if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK; + if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL; + if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM); + if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH; + if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER; + if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT; + if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT; + if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE; + if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER; + if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT; + if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; + if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; + + if (c % NCOL == 0) fputs(" ", fp); + fprintf(fp, "0x%04x", val); + if (c != 255) fputs(",", fp); + if (c != 0 && c % NCOL == (NCOL-1)) + fputs("\n", fp); + else + fputs(" ", fp); + } + fprintf(fp, "};\n"); + return 0; +} + +extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) +{ + int i; + FILE* fp = stdout; + + setlocale(LC_ALL, "C"); + /* setlocale(LC_ALL, "POSIX"); */ + /* setlocale(LC_ALL, "en_GB.iso88591"); */ + /* setlocale(LC_ALL, "de_BE.iso88591"); */ + /* setlocale(LC_ALL, "fr_FR.iso88591"); */ + + for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { + exec(fp, &Info[i]); + } + + return 0; +} diff --git a/enc/sjis.c b/enc/sjis.c new file mode 100644 index 0000000..7a54c9f --- /dev/null +++ b/enc/sjis.c @@ -0,0 +1,318 @@ +/********************************************************************** + sjis.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +static const int EncLen_SJIS[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; + +#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) +#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_SJIS[*p]; +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if (code < 256) { + if (EncLen_SJIS[(int )code] == 1) + return 1; + else + return 0; + } + else if (code <= 0xffff) { + return 2; + } + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_SJIS, p); + c = *p++; + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 0 + if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + int len = enclen(ONIG_ENCODING_SJIS, p); + + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); + +} +#endif + +#if 0 +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} +#endif + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (SJIS_ISMB_TRAIL(*p)) { + while (p > start) { + if (! SJIS_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(ONIG_ENCODING_SJIS, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0x829f, 0x82f1 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 4, + 0x00a6, 0x00af, + 0x00b1, 0x00dd, + 0x8340, 0x837e, + 0x8380, 0x8396, +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + +OnigEncodingType OnigEncodingSJIS = { + mbc_enc_len, + "Shift_JIS", /* name */ + 2, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match +}; diff --git a/enc/unicode.c b/enc/unicode.c new file mode 100644 index 0000000..af7a86e --- /dev/null +++ b/enc/unicode.c @@ -0,0 +1,11356 @@ +/********************************************************************** + unicode.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#if 0 +#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) +#endif + +static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +/* 'NEWLINE' */ +static const OnigCodePoint CR_NEWLINE[] = { + 1, + 0x000a, 0x000a +}; /* CR_NEWLINE */ + +/* 'Alpha': [[:Alpha:]] */ +static const OnigCodePoint CR_Alpha[] = { + 418, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alpha */ + +/* 'Blank': [[:Blank:]] */ +static const OnigCodePoint CR_Blank[] = { + 9, + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Blank */ + +/* 'Cntrl': [[:Cntrl:]] */ +static const OnigCodePoint CR_Cntrl[] = { + 19, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Cntrl */ + +/* 'Digit': [[:Digit:]] */ +static const OnigCodePoint CR_Digit[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Digit */ + +/* 'Graph': [[:Graph:]] */ +static const OnigCodePoint CR_Graph[] = { + 424, + 0x0021, 0x007e, + 0x00a1, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x205e, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Graph */ + +/* 'Lower': [[:Lower:]] */ +static const OnigCodePoint CR_Lower[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Lower */ + +/* 'Print': [[:Print:]] */ +static const OnigCodePoint CR_Print[] = { + 423, + 0x0009, 0x000d, + 0x0020, 0x007e, + 0x0085, 0x0085, + 0x00a0, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Print */ + +/* 'Punct': [[:Punct:]] */ +static const OnigCodePoint CR_Punct[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Punct */ + +/* 'Space': [[:Space:]] */ +static const OnigCodePoint CR_Space[] = { + 11, + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Space */ + +/* 'Upper': [[:Upper:]] */ +static const OnigCodePoint CR_Upper[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Upper */ + +/* 'XDigit': [[:XDigit:]] */ +static const OnigCodePoint CR_XDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; /* CR_XDigit */ + +/* 'Word': [[:Word:]] */ +static const OnigCodePoint CR_Word[] = { + 464, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1369, 0x137c, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Word */ + +/* 'Alnum': [[:Alnum:]] */ +static const OnigCodePoint CR_Alnum[] = { + 436, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alnum */ + +/* 'ASCII': [[:ASCII:]] */ +static const OnigCodePoint CR_ASCII[] = { + 1, + 0x0000, 0x007f +}; /* CR_ASCII */ + +#ifdef USE_UNICODE_PROPERTIES + +/* 'Any': - */ +static const OnigCodePoint CR_Any[] = { + 1, + 0x0000, 0x10ffff +}; /* CR_Any */ + +/* 'Assigned': - */ +static const OnigCodePoint CR_Assigned[] = { + 420, + 0x0000, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xd800, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Assigned */ + +/* 'C': Major Category */ +static const OnigCodePoint CR_C[] = { + 422, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x06dd, 0x06dd, + 0x070e, 0x070f, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17b4, 0x17b5, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xf8ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfffb, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d173, 0x1d17a, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; /* CR_C */ + +/* 'Cc': General Category */ +static const OnigCodePoint CR_Cc[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x009f +}; /* CR_Cc */ + +/* 'Cf': General Category */ +static const OnigCodePoint CR_Cf[] = { + 14, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f +}; /* CR_Cf */ + +/* 'Cn': General Category */ +static const OnigCodePoint CR_Cn[] = { + 420, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x05ff, + 0x0604, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x070e, 0x070e, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xd7ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +}; /* CR_Cn */ + +/* 'Co': General Category */ +static const OnigCodePoint CR_Co[] = { + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Co */ + +/* 'Cs': General Category */ +static const OnigCodePoint CR_Cs[] = { + 1, + 0xd800, 0xdfff +}; /* CR_Cs */ + +/* 'L': Major Category */ +static const OnigCodePoint CR_L[] = { + 347, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0640, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1d00, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_L */ + +/* 'Ll': General Category */ +static const OnigCodePoint CR_Ll[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Ll */ + +/* 'Lm': General Category */ +static const OnigCodePoint CR_Lm[] = { + 26, + 0x02b0, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0559, 0x0559, + 0x0640, 0x0640, + 0x06e5, 0x06e6, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2090, 0x2094, + 0x2d6f, 0x2d6f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xff70, 0xff70, + 0xff9e, 0xff9f +}; /* CR_Lm */ + +/* 'Lo': General Category */ +static const OnigCodePoint CR_Lo[] = { + 245, + 0x01bb, 0x01bb, + 0x01c0, 0x01c3, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e45, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10d0, 0x10fa, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x2135, 0x2138, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Lo */ + +/* 'Lt': General Category */ +static const OnigCodePoint CR_Lt[] = { + 10, + 0x01c5, 0x01c5, + 0x01c8, 0x01c8, + 0x01cb, 0x01cb, + 0x01f2, 0x01f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc +}; /* CR_Lt */ + +/* 'Lu': General Category */ +static const OnigCodePoint CR_Lu[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Lu */ + +/* 'M': Major Category */ +static const OnigCodePoint CR_M[] = { + 133, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0488, 0x0489, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0903, + 0x093c, 0x093c, + 0x093e, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09bc, 0x09bc, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a83, + 0x0abc, 0x0abc, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c82, 0x0c83, + 0x0cbc, 0x0cbc, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f71, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1056, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_M */ + +/* 'Mc': General Category */ +static const OnigCodePoint CR_Mc[] = { + 63, + 0x0903, 0x0903, + 0x093e, 0x0940, + 0x0949, 0x094c, + 0x0982, 0x0983, + 0x09be, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x0a03, 0x0a03, + 0x0a3e, 0x0a40, + 0x0a83, 0x0a83, + 0x0abe, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0b02, 0x0b03, + 0x0b3e, 0x0b3e, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c41, 0x0c44, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cbe, + 0x0cc0, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd1, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0f3e, 0x0f3f, + 0x0f7f, 0x0f7f, + 0x102c, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1056, 0x1057, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1b, + 0xa802, 0xa802, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172 +}; /* CR_Mc */ + +/* 'Me': General Category */ +static const OnigCodePoint CR_Me[] = { + 4, + 0x0488, 0x0489, + 0x06de, 0x06de, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4 +}; /* CR_Me */ + +/* 'Mn': General Category */ +static const OnigCodePoint CR_Mn[] = { + 124, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06df, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b43, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0d41, 0x0d43, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1032, + 0x1036, 0x1037, + 0x1039, 0x1039, + 0x1058, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_Mn */ + +/* 'N': Major Category */ +static const OnigCodePoint CR_N[] = { + 53, + 0x0030, 0x0039, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x09f4, 0x09f9, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bf2, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f33, + 0x1040, 0x1049, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10a40, 0x10a47, + 0x1d7ce, 0x1d7ff +}; /* CR_N */ + +/* 'Nd': General Category */ +static const OnigCodePoint CR_Nd[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Nd */ + +/* 'Nl': General Category */ +static const OnigCodePoint CR_Nl[] = { + 8, + 0x16ee, 0x16f0, + 0x2160, 0x2183, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x10140, 0x10174, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5 +}; /* CR_Nl */ + +/* 'No': General Category */ +static const OnigCodePoint CR_No[] = { + 26, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x09f4, 0x09f9, + 0x0bf0, 0x0bf2, + 0x0f2a, 0x0f33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x215f, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10a40, 0x10a47 +}; /* CR_No */ + +/* 'P': Major Category */ +static const OnigCodePoint CR_P[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_P */ + +/* 'Pc': General Category */ +static const OnigCodePoint CR_Pc[] = { + 6, + 0x005f, 0x005f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f +}; /* CR_Pc */ + +/* 'Pd': General Category */ +static const OnigCodePoint CR_Pd[] = { + 12, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d +}; /* CR_Pd */ + +/* 'Pe': General Category */ +static const OnigCodePoint CR_Pe[] = { + 65, + 0x0029, 0x0029, + 0x005d, 0x005d, + 0x007d, 0x007d, + 0x0f3b, 0x0f3b, + 0x0f3d, 0x0f3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x232a, 0x232a, + 0x23b5, 0x23b5, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3f, 0xfd3f, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63 +}; /* CR_Pe */ + +/* 'Pf': General Category */ +static const OnigCodePoint CR_Pf[] = { + 9, + 0x00bb, 0x00bb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d +}; /* CR_Pf */ + +/* 'Pi': General Category */ +static const OnigCodePoint CR_Pi[] = { + 10, + 0x00ab, 0x00ab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c +}; /* CR_Pi */ + +/* 'Po': General Category */ +static const OnigCodePoint CR_Po[] = { + 88, + 0x0021, 0x0023, + 0x0025, 0x0027, + 0x002a, 0x002a, + 0x002c, 0x002c, + 0x002e, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005c, 0x005c, + 0x00a1, 0x00a1, + 0x00b7, 0x00b7, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x0589, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x23b6, 0x23b6, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Po */ + +/* 'Ps': General Category */ +static const OnigCodePoint CR_Ps[] = { + 67, + 0x0028, 0x0028, + 0x005b, 0x005b, + 0x007b, 0x007b, + 0x0f3a, 0x0f3a, + 0x0f3c, 0x0f3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2329, 0x2329, + 0x23b4, 0x23b4, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3e, 0xfd3e, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62 +}; /* CR_Ps */ + +/* 'S': Major Category */ +static const OnigCodePoint CR_S[] = { + 162, + 0x0024, 0x0024, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00a2, 0x00a9, + 0x00ac, 0x00ac, + 0x00ae, 0x00b1, + 0x00b4, 0x00b4, + 0x00b6, 0x00b6, + 0x00b8, 0x00b8, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x03f6, 0x03f6, + 0x0482, 0x0482, + 0x060b, 0x060b, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09f2, 0x09f3, + 0x09fa, 0x09fa, + 0x0af1, 0x0af1, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bfa, + 0x0e3f, 0x0e3f, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20b5, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214c, + 0x2190, 0x2328, + 0x232b, 0x23b3, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa828, 0xa82b, + 0xfb29, 0xfb29, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_S */ + +/* 'Sc': General Category */ +static const OnigCodePoint CR_Sc[] = { + 14, + 0x0024, 0x0024, + 0x00a2, 0x00a5, + 0x060b, 0x060b, + 0x09f2, 0x09f3, + 0x0af1, 0x0af1, + 0x0bf9, 0x0bf9, + 0x0e3f, 0x0e3f, + 0x17db, 0x17db, + 0x20a0, 0x20b5, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6 +}; /* CR_Sc */ + +/* 'Sk': General Category */ +static const OnigCodePoint CR_Sk[] = { + 23, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b8, 0x00b8, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3 +}; /* CR_Sk */ + +/* 'Sm': General Category */ +static const OnigCodePoint CR_Sm[] = { + 59, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03f6, 0x03f6, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_Sm */ + +/* 'So': General Category */ +static const OnigCodePoint CR_So[] = { + 120, + 0x00a6, 0x00a7, + 0x00a9, 0x00a9, + 0x00ae, 0x00ae, + 0x00b0, 0x00b0, + 0x00b6, 0x00b6, + 0x0482, 0x0482, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09fa, 0x09fa, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bf8, + 0x0bfa, 0x0bfa, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214c, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x2800, 0x28ff, + 0x2b00, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356 +}; /* CR_So */ + +/* 'Z': Major Category */ +static const OnigCodePoint CR_Z[] = { + 9, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Z */ + +/* 'Zl': General Category */ +static const OnigCodePoint CR_Zl[] = { + 1, + 0x2028, 0x2028 +}; /* CR_Zl */ + +/* 'Zp': General Category */ +static const OnigCodePoint CR_Zp[] = { + 1, + 0x2029, 0x2029 +}; /* CR_Zp */ + +/* 'Zs': General Category */ +static const OnigCodePoint CR_Zs[] = { + 8, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Zs */ + +/* 'Arabic': Script */ +static const OnigCodePoint CR_Arabic[] = { + 17, + 0x060b, 0x060b, + 0x060d, 0x0615, + 0x061e, 0x061e, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x0656, 0x065e, + 0x066a, 0x066f, + 0x0671, 0x06dc, + 0x06de, 0x06ff, + 0x0750, 0x076d, + 0xfb50, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfc, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc +}; /* CR_Arabic */ + +/* 'Armenian': Script */ +static const OnigCodePoint CR_Armenian[] = { + 5, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x058a, 0x058a, + 0xfb13, 0xfb17 +}; /* CR_Armenian */ + +/* 'Bengali': Script */ +static const OnigCodePoint CR_Bengali[] = { + 14, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa +}; /* CR_Bengali */ + +/* 'Bopomofo': Script */ +static const OnigCodePoint CR_Bopomofo[] = { + 2, + 0x3105, 0x312c, + 0x31a0, 0x31b7 +}; /* CR_Bopomofo */ + +/* 'Braille': Script */ +static const OnigCodePoint CR_Braille[] = { + 1, + 0x2800, 0x28ff +}; /* CR_Braille */ + +/* 'Buginese': Script */ +static const OnigCodePoint CR_Buginese[] = { + 2, + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f +}; /* CR_Buginese */ + +/* 'Buhid': Script */ +static const OnigCodePoint CR_Buhid[] = { + 1, + 0x1740, 0x1753 +}; /* CR_Buhid */ + +/* 'Canadian_Aboriginal': Script */ +static const OnigCodePoint CR_Canadian_Aboriginal[] = { + 1, + 0x1401, 0x1676 +}; /* CR_Canadian_Aboriginal */ + +/* 'Cherokee': Script */ +static const OnigCodePoint CR_Cherokee[] = { + 1, + 0x13a0, 0x13f4 +}; /* CR_Cherokee */ + +/* 'Common': Script */ +static const OnigCodePoint CR_Common[] = { + 126, + 0x0000, 0x0040, + 0x005b, 0x0060, + 0x007b, 0x00a9, + 0x00ab, 0x00b9, + 0x00bb, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02b9, 0x02df, + 0x02e5, 0x02ff, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x0600, 0x0603, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0640, 0x0640, + 0x0660, 0x0669, + 0x06dd, 0x06dd, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0e3f, 0x0e3f, + 0x10fb, 0x10fb, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x2000, 0x200b, + 0x200e, 0x2063, + 0x206a, 0x2070, + 0x2074, 0x207e, + 0x2080, 0x208e, + 0x20a0, 0x20b5, + 0x2100, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x27ff, + 0x2900, 0x2b13, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2ff0, 0x2ffb, + 0x3000, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x303f, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3190, 0x319f, + 0x31c0, 0x31cf, + 0x3220, 0x3243, + 0x3250, 0x325f, + 0x327e, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa700, 0xa716, + 0xe000, 0xf8ff, + 0xfd3e, 0xfd3f, + 0xfdfd, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfeff, 0xfeff, + 0xff01, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Common */ + +/* 'Coptic': Script */ +static const OnigCodePoint CR_Coptic[] = { + 3, + 0x03e2, 0x03ef, + 0x2c80, 0x2cea, + 0x2cf9, 0x2cff +}; /* CR_Coptic */ + +/* 'Cypriot': Script */ +static const OnigCodePoint CR_Cypriot[] = { + 6, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f +}; /* CR_Cypriot */ + +/* 'Cyrillic': Script */ +static const OnigCodePoint CR_Cyrillic[] = { + 6, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78 +}; /* CR_Cyrillic */ + +/* 'Deseret': Script */ +static const OnigCodePoint CR_Deseret[] = { + 1, + 0x10400, 0x1044f +}; /* CR_Deseret */ + +/* 'Devanagari': Script */ +static const OnigCodePoint CR_Devanagari[] = { + 6, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d +}; /* CR_Devanagari */ + +/* 'Ethiopic': Script */ +static const OnigCodePoint CR_Ethiopic[] = { + 27, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde +}; /* CR_Ethiopic */ + +/* 'Georgian': Script */ +static const OnigCodePoint CR_Georgian[] = { + 4, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x2d00, 0x2d25 +}; /* CR_Georgian */ + +/* 'Glagolitic': Script */ +static const OnigCodePoint CR_Glagolitic[] = { + 2, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e +}; /* CR_Glagolitic */ + +/* 'Gothic': Script */ +static const OnigCodePoint CR_Gothic[] = { + 1, + 0x10330, 0x1034a +}; /* CR_Gothic */ + +/* 'Greek': Script */ +static const OnigCodePoint CR_Greek[] = { + 31, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03e1, + 0x03f0, 0x03ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0x10140, 0x1018a, + 0x1d200, 0x1d245 +}; /* CR_Greek */ + +/* 'Gujarati': Script */ +static const OnigCodePoint CR_Gujarati[] = { + 14, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1 +}; /* CR_Gujarati */ + +/* 'Gurmukhi': Script */ +static const OnigCodePoint CR_Gurmukhi[] = { + 15, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74 +}; /* CR_Gurmukhi */ + +/* 'Han': Script */ +static const OnigCodePoint CR_Han[] = { + 14, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Han */ + +/* 'Hangul': Script */ +static const OnigCodePoint CR_Hangul[] = { + 12, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327d, + 0xac00, 0xd7a3, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc +}; /* CR_Hangul */ + +/* 'Hanunoo': Script */ +static const OnigCodePoint CR_Hanunoo[] = { + 1, + 0x1720, 0x1734 +}; /* CR_Hanunoo */ + +/* 'Hebrew': Script */ +static const OnigCodePoint CR_Hebrew[] = { + 10, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f +}; /* CR_Hebrew */ + +/* 'Hiragana': Script */ +static const OnigCodePoint CR_Hiragana[] = { + 2, + 0x3041, 0x3096, + 0x309d, 0x309f +}; /* CR_Hiragana */ + +/* 'Inherited': Script */ +static const OnigCodePoint CR_Inherited[] = { + 15, + 0x0300, 0x036f, + 0x064b, 0x0655, + 0x0670, 0x0670, + 0x1dc0, 0x1dc3, + 0x200c, 0x200d, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef +}; /* CR_Inherited */ + +/* 'Kannada': Script */ +static const OnigCodePoint CR_Kannada[] = { + 13, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef +}; /* CR_Kannada */ + +/* 'Katakana': Script */ +static const OnigCodePoint CR_Katakana[] = { + 5, + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0xff66, 0xff6f, + 0xff71, 0xff9d +}; /* CR_Katakana */ + +/* 'Kharoshthi': Script */ +static const OnigCodePoint CR_Kharoshthi[] = { + 8, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58 +}; /* CR_Kharoshthi */ + +/* 'Khmer': Script */ +static const OnigCodePoint CR_Khmer[] = { + 4, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff +}; /* CR_Khmer */ + +/* 'Lao': Script */ +static const OnigCodePoint CR_Lao[] = { + 18, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd +}; /* CR_Lao */ + +/* 'Latin': Script */ +static const OnigCodePoint CR_Latin[] = { + 23, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02b8, + 0x02e0, 0x02e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x212a, 0x212b, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a +}; /* CR_Latin */ + +/* 'Limbu': Script */ +static const OnigCodePoint CR_Limbu[] = { + 5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f +}; /* CR_Limbu */ + +/* 'Linear_B': Script */ +static const OnigCodePoint CR_Linear_B[] = { + 7, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa +}; /* CR_Linear_B */ + +/* 'Malayalam': Script */ +static const OnigCodePoint CR_Malayalam[] = { + 11, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f +}; /* CR_Malayalam */ + +/* 'Mongolian': Script */ +static const OnigCodePoint CR_Mongolian[] = { + 4, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9 +}; /* CR_Mongolian */ + +/* 'Myanmar': Script */ +static const OnigCodePoint CR_Myanmar[] = { + 6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059 +}; /* CR_Myanmar */ + +/* 'New_Tai_Lue': Script */ +static const OnigCodePoint CR_New_Tai_Lue[] = { + 4, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x19df +}; /* CR_New_Tai_Lue */ + +/* 'Ogham': Script */ +static const OnigCodePoint CR_Ogham[] = { + 1, + 0x1680, 0x169c +}; /* CR_Ogham */ + +/* 'Old_Italic': Script */ +static const OnigCodePoint CR_Old_Italic[] = { + 2, + 0x10300, 0x1031e, + 0x10320, 0x10323 +}; /* CR_Old_Italic */ + +/* 'Old_Persian': Script */ +static const OnigCodePoint CR_Old_Persian[] = { + 2, + 0x103a0, 0x103c3, + 0x103c8, 0x103d5 +}; /* CR_Old_Persian */ + +/* 'Oriya': Script */ +static const OnigCodePoint CR_Oriya[] = { + 14, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71 +}; /* CR_Oriya */ + +/* 'Osmanya': Script */ +static const OnigCodePoint CR_Osmanya[] = { + 2, + 0x10480, 0x1049d, + 0x104a0, 0x104a9 +}; /* CR_Osmanya */ + +/* 'Runic': Script */ +static const OnigCodePoint CR_Runic[] = { + 2, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0 +}; /* CR_Runic */ + +/* 'Shavian': Script */ +static const OnigCodePoint CR_Shavian[] = { + 1, + 0x10450, 0x1047f +}; /* CR_Shavian */ + +/* 'Sinhala': Script */ +static const OnigCodePoint CR_Sinhala[] = { + 11, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4 +}; /* CR_Sinhala */ + +/* 'Syloti_Nagri': Script */ +static const OnigCodePoint CR_Syloti_Nagri[] = { + 1, + 0xa800, 0xa82b +}; /* CR_Syloti_Nagri */ + +/* 'Syriac': Script */ +static const OnigCodePoint CR_Syriac[] = { + 3, + 0x0700, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f +}; /* CR_Syriac */ + +/* 'Tagalog': Script */ +static const OnigCodePoint CR_Tagalog[] = { + 2, + 0x1700, 0x170c, + 0x170e, 0x1714 +}; /* CR_Tagalog */ + +/* 'Tagbanwa': Script */ +static const OnigCodePoint CR_Tagbanwa[] = { + 3, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773 +}; /* CR_Tagbanwa */ + +/* 'Tai_Le': Script */ +static const OnigCodePoint CR_Tai_Le[] = { + 2, + 0x1950, 0x196d, + 0x1970, 0x1974 +}; /* CR_Tai_Le */ + +/* 'Tamil': Script */ +static const OnigCodePoint CR_Tamil[] = { + 15, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa +}; /* CR_Tamil */ + +/* 'Telugu': Script */ +static const OnigCodePoint CR_Telugu[] = { + 12, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f +}; /* CR_Telugu */ + +/* 'Thaana': Script */ +static const OnigCodePoint CR_Thaana[] = { + 1, + 0x0780, 0x07b1 +}; /* CR_Thaana */ + +/* 'Thai': Script */ +static const OnigCodePoint CR_Thai[] = { + 2, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e5b +}; /* CR_Thai */ + +/* 'Tibetan': Script */ +static const OnigCodePoint CR_Tibetan[] = { + 7, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1 +}; /* CR_Tibetan */ + +/* 'Tifinagh': Script */ +static const OnigCodePoint CR_Tifinagh[] = { + 2, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f +}; /* CR_Tifinagh */ + +/* 'Ugaritic': Script */ +static const OnigCodePoint CR_Ugaritic[] = { + 2, + 0x10380, 0x1039d, + 0x1039f, 0x1039f +}; /* CR_Ugaritic */ + +/* 'Yi': Script */ +static const OnigCodePoint CR_Yi[] = { + 2, + 0xa000, 0xa48c, + 0xa490, 0xa4c6 +}; /* CR_Yi */ + + +#endif /* USE_UNICODE_PROPERTIES */ + + +typedef struct { + int n; + OnigCodePoint code[3]; +} CodePointList3; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseFold_11_Type; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseUnfold_11_Type; + +typedef struct { + int n; + OnigCodePoint code[2]; +} CodePointList2; + +typedef struct { + OnigCodePoint from[2]; + CodePointList2 to; +} CaseUnfold_12_Type; + +typedef struct { + OnigCodePoint from[3]; + CodePointList2 to; +} CaseUnfold_13_Type; + +static const CaseFold_11_Type CaseFold[] = { + { 0x0041, {1, {0x0061}}}, + { 0x0042, {1, {0x0062}}}, + { 0x0043, {1, {0x0063}}}, + { 0x0044, {1, {0x0064}}}, + { 0x0045, {1, {0x0065}}}, + { 0x0046, {1, {0x0066}}}, + { 0x0047, {1, {0x0067}}}, + { 0x0048, {1, {0x0068}}}, + { 0x004a, {1, {0x006a}}}, + { 0x004b, {1, {0x006b}}}, + { 0x004c, {1, {0x006c}}}, + { 0x004d, {1, {0x006d}}}, + { 0x004e, {1, {0x006e}}}, + { 0x004f, {1, {0x006f}}}, + { 0x0050, {1, {0x0070}}}, + { 0x0051, {1, {0x0071}}}, + { 0x0052, {1, {0x0072}}}, + { 0x0053, {1, {0x0073}}}, + { 0x0054, {1, {0x0074}}}, + { 0x0055, {1, {0x0075}}}, + { 0x0056, {1, {0x0076}}}, + { 0x0057, {1, {0x0077}}}, + { 0x0058, {1, {0x0078}}}, + { 0x0059, {1, {0x0079}}}, + { 0x005a, {1, {0x007a}}}, + { 0x00b5, {1, {0x03bc}}}, + { 0x00c0, {1, {0x00e0}}}, + { 0x00c1, {1, {0x00e1}}}, + { 0x00c2, {1, {0x00e2}}}, + { 0x00c3, {1, {0x00e3}}}, + { 0x00c4, {1, {0x00e4}}}, + { 0x00c5, {1, {0x00e5}}}, + { 0x00c6, {1, {0x00e6}}}, + { 0x00c7, {1, {0x00e7}}}, + { 0x00c8, {1, {0x00e8}}}, + { 0x00c9, {1, {0x00e9}}}, + { 0x00ca, {1, {0x00ea}}}, + { 0x00cb, {1, {0x00eb}}}, + { 0x00cc, {1, {0x00ec}}}, + { 0x00cd, {1, {0x00ed}}}, + { 0x00ce, {1, {0x00ee}}}, + { 0x00cf, {1, {0x00ef}}}, + { 0x00d0, {1, {0x00f0}}}, + { 0x00d1, {1, {0x00f1}}}, + { 0x00d2, {1, {0x00f2}}}, + { 0x00d3, {1, {0x00f3}}}, + { 0x00d4, {1, {0x00f4}}}, + { 0x00d5, {1, {0x00f5}}}, + { 0x00d6, {1, {0x00f6}}}, + { 0x00d8, {1, {0x00f8}}}, + { 0x00d9, {1, {0x00f9}}}, + { 0x00da, {1, {0x00fa}}}, + { 0x00db, {1, {0x00fb}}}, + { 0x00dc, {1, {0x00fc}}}, + { 0x00dd, {1, {0x00fd}}}, + { 0x00de, {1, {0x00fe}}}, + { 0x00df, {2, {0x0073, 0x0073}}}, + { 0x0100, {1, {0x0101}}}, + { 0x0102, {1, {0x0103}}}, + { 0x0104, {1, {0x0105}}}, + { 0x0106, {1, {0x0107}}}, + { 0x0108, {1, {0x0109}}}, + { 0x010a, {1, {0x010b}}}, + { 0x010c, {1, {0x010d}}}, + { 0x010e, {1, {0x010f}}}, + { 0x0110, {1, {0x0111}}}, + { 0x0112, {1, {0x0113}}}, + { 0x0114, {1, {0x0115}}}, + { 0x0116, {1, {0x0117}}}, + { 0x0118, {1, {0x0119}}}, + { 0x011a, {1, {0x011b}}}, + { 0x011c, {1, {0x011d}}}, + { 0x011e, {1, {0x011f}}}, + { 0x0120, {1, {0x0121}}}, + { 0x0122, {1, {0x0123}}}, + { 0x0124, {1, {0x0125}}}, + { 0x0126, {1, {0x0127}}}, + { 0x0128, {1, {0x0129}}}, + { 0x012a, {1, {0x012b}}}, + { 0x012c, {1, {0x012d}}}, + { 0x012e, {1, {0x012f}}}, + { 0x0132, {1, {0x0133}}}, + { 0x0134, {1, {0x0135}}}, + { 0x0136, {1, {0x0137}}}, + { 0x0139, {1, {0x013a}}}, + { 0x013b, {1, {0x013c}}}, + { 0x013d, {1, {0x013e}}}, + { 0x013f, {1, {0x0140}}}, + { 0x0141, {1, {0x0142}}}, + { 0x0143, {1, {0x0144}}}, + { 0x0145, {1, {0x0146}}}, + { 0x0147, {1, {0x0148}}}, + { 0x0149, {2, {0x02bc, 0x006e}}}, + { 0x014a, {1, {0x014b}}}, + { 0x014c, {1, {0x014d}}}, + { 0x014e, {1, {0x014f}}}, + { 0x0150, {1, {0x0151}}}, + { 0x0152, {1, {0x0153}}}, + { 0x0154, {1, {0x0155}}}, + { 0x0156, {1, {0x0157}}}, + { 0x0158, {1, {0x0159}}}, + { 0x015a, {1, {0x015b}}}, + { 0x015c, {1, {0x015d}}}, + { 0x015e, {1, {0x015f}}}, + { 0x0160, {1, {0x0161}}}, + { 0x0162, {1, {0x0163}}}, + { 0x0164, {1, {0x0165}}}, + { 0x0166, {1, {0x0167}}}, + { 0x0168, {1, {0x0169}}}, + { 0x016a, {1, {0x016b}}}, + { 0x016c, {1, {0x016d}}}, + { 0x016e, {1, {0x016f}}}, + { 0x0170, {1, {0x0171}}}, + { 0x0172, {1, {0x0173}}}, + { 0x0174, {1, {0x0175}}}, + { 0x0176, {1, {0x0177}}}, + { 0x0178, {1, {0x00ff}}}, + { 0x0179, {1, {0x017a}}}, + { 0x017b, {1, {0x017c}}}, + { 0x017d, {1, {0x017e}}}, + { 0x017f, {1, {0x0073}}}, + { 0x0181, {1, {0x0253}}}, + { 0x0182, {1, {0x0183}}}, + { 0x0184, {1, {0x0185}}}, + { 0x0186, {1, {0x0254}}}, + { 0x0187, {1, {0x0188}}}, + { 0x0189, {1, {0x0256}}}, + { 0x018a, {1, {0x0257}}}, + { 0x018b, {1, {0x018c}}}, + { 0x018e, {1, {0x01dd}}}, + { 0x018f, {1, {0x0259}}}, + { 0x0190, {1, {0x025b}}}, + { 0x0191, {1, {0x0192}}}, + { 0x0193, {1, {0x0260}}}, + { 0x0194, {1, {0x0263}}}, + { 0x0196, {1, {0x0269}}}, + { 0x0197, {1, {0x0268}}}, + { 0x0198, {1, {0x0199}}}, + { 0x019c, {1, {0x026f}}}, + { 0x019d, {1, {0x0272}}}, + { 0x019f, {1, {0x0275}}}, + { 0x01a0, {1, {0x01a1}}}, + { 0x01a2, {1, {0x01a3}}}, + { 0x01a4, {1, {0x01a5}}}, + { 0x01a6, {1, {0x0280}}}, + { 0x01a7, {1, {0x01a8}}}, + { 0x01a9, {1, {0x0283}}}, + { 0x01ac, {1, {0x01ad}}}, + { 0x01ae, {1, {0x0288}}}, + { 0x01af, {1, {0x01b0}}}, + { 0x01b1, {1, {0x028a}}}, + { 0x01b2, {1, {0x028b}}}, + { 0x01b3, {1, {0x01b4}}}, + { 0x01b5, {1, {0x01b6}}}, + { 0x01b7, {1, {0x0292}}}, + { 0x01b8, {1, {0x01b9}}}, + { 0x01bc, {1, {0x01bd}}}, + { 0x01c4, {1, {0x01c6}}}, + { 0x01c5, {1, {0x01c6}}}, + { 0x01c7, {1, {0x01c9}}}, + { 0x01c8, {1, {0x01c9}}}, + { 0x01ca, {1, {0x01cc}}}, + { 0x01cb, {1, {0x01cc}}}, + { 0x01cd, {1, {0x01ce}}}, + { 0x01cf, {1, {0x01d0}}}, + { 0x01d1, {1, {0x01d2}}}, + { 0x01d3, {1, {0x01d4}}}, + { 0x01d5, {1, {0x01d6}}}, + { 0x01d7, {1, {0x01d8}}}, + { 0x01d9, {1, {0x01da}}}, + { 0x01db, {1, {0x01dc}}}, + { 0x01de, {1, {0x01df}}}, + { 0x01e0, {1, {0x01e1}}}, + { 0x01e2, {1, {0x01e3}}}, + { 0x01e4, {1, {0x01e5}}}, + { 0x01e6, {1, {0x01e7}}}, + { 0x01e8, {1, {0x01e9}}}, + { 0x01ea, {1, {0x01eb}}}, + { 0x01ec, {1, {0x01ed}}}, + { 0x01ee, {1, {0x01ef}}}, + { 0x01f0, {2, {0x006a, 0x030c}}}, + { 0x01f1, {1, {0x01f3}}}, + { 0x01f2, {1, {0x01f3}}}, + { 0x01f4, {1, {0x01f5}}}, + { 0x01f6, {1, {0x0195}}}, + { 0x01f7, {1, {0x01bf}}}, + { 0x01f8, {1, {0x01f9}}}, + { 0x01fa, {1, {0x01fb}}}, + { 0x01fc, {1, {0x01fd}}}, + { 0x01fe, {1, {0x01ff}}}, + { 0x0200, {1, {0x0201}}}, + { 0x0202, {1, {0x0203}}}, + { 0x0204, {1, {0x0205}}}, + { 0x0206, {1, {0x0207}}}, + { 0x0208, {1, {0x0209}}}, + { 0x020a, {1, {0x020b}}}, + { 0x020c, {1, {0x020d}}}, + { 0x020e, {1, {0x020f}}}, + { 0x0210, {1, {0x0211}}}, + { 0x0212, {1, {0x0213}}}, + { 0x0214, {1, {0x0215}}}, + { 0x0216, {1, {0x0217}}}, + { 0x0218, {1, {0x0219}}}, + { 0x021a, {1, {0x021b}}}, + { 0x021c, {1, {0x021d}}}, + { 0x021e, {1, {0x021f}}}, + { 0x0220, {1, {0x019e}}}, + { 0x0222, {1, {0x0223}}}, + { 0x0224, {1, {0x0225}}}, + { 0x0226, {1, {0x0227}}}, + { 0x0228, {1, {0x0229}}}, + { 0x022a, {1, {0x022b}}}, + { 0x022c, {1, {0x022d}}}, + { 0x022e, {1, {0x022f}}}, + { 0x0230, {1, {0x0231}}}, + { 0x0232, {1, {0x0233}}}, + { 0x023b, {1, {0x023c}}}, + { 0x023d, {1, {0x019a}}}, + { 0x0241, {1, {0x0294}}}, + { 0x0345, {1, {0x03b9}}}, + { 0x0386, {1, {0x03ac}}}, + { 0x0388, {1, {0x03ad}}}, + { 0x0389, {1, {0x03ae}}}, + { 0x038a, {1, {0x03af}}}, + { 0x038c, {1, {0x03cc}}}, + { 0x038e, {1, {0x03cd}}}, + { 0x038f, {1, {0x03ce}}}, + { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x0391, {1, {0x03b1}}}, + { 0x0392, {1, {0x03b2}}}, + { 0x0393, {1, {0x03b3}}}, + { 0x0394, {1, {0x03b4}}}, + { 0x0395, {1, {0x03b5}}}, + { 0x0396, {1, {0x03b6}}}, + { 0x0397, {1, {0x03b7}}}, + { 0x0398, {1, {0x03b8}}}, + { 0x0399, {1, {0x03b9}}}, + { 0x039a, {1, {0x03ba}}}, + { 0x039b, {1, {0x03bb}}}, + { 0x039c, {1, {0x03bc}}}, + { 0x039d, {1, {0x03bd}}}, + { 0x039e, {1, {0x03be}}}, + { 0x039f, {1, {0x03bf}}}, + { 0x03a0, {1, {0x03c0}}}, + { 0x03a1, {1, {0x03c1}}}, + { 0x03a3, {1, {0x03c3}}}, + { 0x03a4, {1, {0x03c4}}}, + { 0x03a5, {1, {0x03c5}}}, + { 0x03a6, {1, {0x03c6}}}, + { 0x03a7, {1, {0x03c7}}}, + { 0x03a8, {1, {0x03c8}}}, + { 0x03a9, {1, {0x03c9}}}, + { 0x03aa, {1, {0x03ca}}}, + { 0x03ab, {1, {0x03cb}}}, + { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x03c2, {1, {0x03c3}}}, + { 0x03d0, {1, {0x03b2}}}, + { 0x03d1, {1, {0x03b8}}}, + { 0x03d5, {1, {0x03c6}}}, + { 0x03d6, {1, {0x03c0}}}, + { 0x03d8, {1, {0x03d9}}}, + { 0x03da, {1, {0x03db}}}, + { 0x03dc, {1, {0x03dd}}}, + { 0x03de, {1, {0x03df}}}, + { 0x03e0, {1, {0x03e1}}}, + { 0x03e2, {1, {0x03e3}}}, + { 0x03e4, {1, {0x03e5}}}, + { 0x03e6, {1, {0x03e7}}}, + { 0x03e8, {1, {0x03e9}}}, + { 0x03ea, {1, {0x03eb}}}, + { 0x03ec, {1, {0x03ed}}}, + { 0x03ee, {1, {0x03ef}}}, + { 0x03f0, {1, {0x03ba}}}, + { 0x03f1, {1, {0x03c1}}}, + { 0x03f4, {1, {0x03b8}}}, + { 0x03f5, {1, {0x03b5}}}, + { 0x03f7, {1, {0x03f8}}}, + { 0x03f9, {1, {0x03f2}}}, + { 0x03fa, {1, {0x03fb}}}, + { 0x0400, {1, {0x0450}}}, + { 0x0401, {1, {0x0451}}}, + { 0x0402, {1, {0x0452}}}, + { 0x0403, {1, {0x0453}}}, + { 0x0404, {1, {0x0454}}}, + { 0x0405, {1, {0x0455}}}, + { 0x0406, {1, {0x0456}}}, + { 0x0407, {1, {0x0457}}}, + { 0x0408, {1, {0x0458}}}, + { 0x0409, {1, {0x0459}}}, + { 0x040a, {1, {0x045a}}}, + { 0x040b, {1, {0x045b}}}, + { 0x040c, {1, {0x045c}}}, + { 0x040d, {1, {0x045d}}}, + { 0x040e, {1, {0x045e}}}, + { 0x040f, {1, {0x045f}}}, + { 0x0410, {1, {0x0430}}}, + { 0x0411, {1, {0x0431}}}, + { 0x0412, {1, {0x0432}}}, + { 0x0413, {1, {0x0433}}}, + { 0x0414, {1, {0x0434}}}, + { 0x0415, {1, {0x0435}}}, + { 0x0416, {1, {0x0436}}}, + { 0x0417, {1, {0x0437}}}, + { 0x0418, {1, {0x0438}}}, + { 0x0419, {1, {0x0439}}}, + { 0x041a, {1, {0x043a}}}, + { 0x041b, {1, {0x043b}}}, + { 0x041c, {1, {0x043c}}}, + { 0x041d, {1, {0x043d}}}, + { 0x041e, {1, {0x043e}}}, + { 0x041f, {1, {0x043f}}}, + { 0x0420, {1, {0x0440}}}, + { 0x0421, {1, {0x0441}}}, + { 0x0422, {1, {0x0442}}}, + { 0x0423, {1, {0x0443}}}, + { 0x0424, {1, {0x0444}}}, + { 0x0425, {1, {0x0445}}}, + { 0x0426, {1, {0x0446}}}, + { 0x0427, {1, {0x0447}}}, + { 0x0428, {1, {0x0448}}}, + { 0x0429, {1, {0x0449}}}, + { 0x042a, {1, {0x044a}}}, + { 0x042b, {1, {0x044b}}}, + { 0x042c, {1, {0x044c}}}, + { 0x042d, {1, {0x044d}}}, + { 0x042e, {1, {0x044e}}}, + { 0x042f, {1, {0x044f}}}, + { 0x0460, {1, {0x0461}}}, + { 0x0462, {1, {0x0463}}}, + { 0x0464, {1, {0x0465}}}, + { 0x0466, {1, {0x0467}}}, + { 0x0468, {1, {0x0469}}}, + { 0x046a, {1, {0x046b}}}, + { 0x046c, {1, {0x046d}}}, + { 0x046e, {1, {0x046f}}}, + { 0x0470, {1, {0x0471}}}, + { 0x0472, {1, {0x0473}}}, + { 0x0474, {1, {0x0475}}}, + { 0x0476, {1, {0x0477}}}, + { 0x0478, {1, {0x0479}}}, + { 0x047a, {1, {0x047b}}}, + { 0x047c, {1, {0x047d}}}, + { 0x047e, {1, {0x047f}}}, + { 0x0480, {1, {0x0481}}}, + { 0x048a, {1, {0x048b}}}, + { 0x048c, {1, {0x048d}}}, + { 0x048e, {1, {0x048f}}}, + { 0x0490, {1, {0x0491}}}, + { 0x0492, {1, {0x0493}}}, + { 0x0494, {1, {0x0495}}}, + { 0x0496, {1, {0x0497}}}, + { 0x0498, {1, {0x0499}}}, + { 0x049a, {1, {0x049b}}}, + { 0x049c, {1, {0x049d}}}, + { 0x049e, {1, {0x049f}}}, + { 0x04a0, {1, {0x04a1}}}, + { 0x04a2, {1, {0x04a3}}}, + { 0x04a4, {1, {0x04a5}}}, + { 0x04a6, {1, {0x04a7}}}, + { 0x04a8, {1, {0x04a9}}}, + { 0x04aa, {1, {0x04ab}}}, + { 0x04ac, {1, {0x04ad}}}, + { 0x04ae, {1, {0x04af}}}, + { 0x04b0, {1, {0x04b1}}}, + { 0x04b2, {1, {0x04b3}}}, + { 0x04b4, {1, {0x04b5}}}, + { 0x04b6, {1, {0x04b7}}}, + { 0x04b8, {1, {0x04b9}}}, + { 0x04ba, {1, {0x04bb}}}, + { 0x04bc, {1, {0x04bd}}}, + { 0x04be, {1, {0x04bf}}}, + { 0x04c1, {1, {0x04c2}}}, + { 0x04c3, {1, {0x04c4}}}, + { 0x04c5, {1, {0x04c6}}}, + { 0x04c7, {1, {0x04c8}}}, + { 0x04c9, {1, {0x04ca}}}, + { 0x04cb, {1, {0x04cc}}}, + { 0x04cd, {1, {0x04ce}}}, + { 0x04d0, {1, {0x04d1}}}, + { 0x04d2, {1, {0x04d3}}}, + { 0x04d4, {1, {0x04d5}}}, + { 0x04d6, {1, {0x04d7}}}, + { 0x04d8, {1, {0x04d9}}}, + { 0x04da, {1, {0x04db}}}, + { 0x04dc, {1, {0x04dd}}}, + { 0x04de, {1, {0x04df}}}, + { 0x04e0, {1, {0x04e1}}}, + { 0x04e2, {1, {0x04e3}}}, + { 0x04e4, {1, {0x04e5}}}, + { 0x04e6, {1, {0x04e7}}}, + { 0x04e8, {1, {0x04e9}}}, + { 0x04ea, {1, {0x04eb}}}, + { 0x04ec, {1, {0x04ed}}}, + { 0x04ee, {1, {0x04ef}}}, + { 0x04f0, {1, {0x04f1}}}, + { 0x04f2, {1, {0x04f3}}}, + { 0x04f4, {1, {0x04f5}}}, + { 0x04f6, {1, {0x04f7}}}, + { 0x04f8, {1, {0x04f9}}}, + { 0x0500, {1, {0x0501}}}, + { 0x0502, {1, {0x0503}}}, + { 0x0504, {1, {0x0505}}}, + { 0x0506, {1, {0x0507}}}, + { 0x0508, {1, {0x0509}}}, + { 0x050a, {1, {0x050b}}}, + { 0x050c, {1, {0x050d}}}, + { 0x050e, {1, {0x050f}}}, + { 0x0531, {1, {0x0561}}}, + { 0x0532, {1, {0x0562}}}, + { 0x0533, {1, {0x0563}}}, + { 0x0534, {1, {0x0564}}}, + { 0x0535, {1, {0x0565}}}, + { 0x0536, {1, {0x0566}}}, + { 0x0537, {1, {0x0567}}}, + { 0x0538, {1, {0x0568}}}, + { 0x0539, {1, {0x0569}}}, + { 0x053a, {1, {0x056a}}}, + { 0x053b, {1, {0x056b}}}, + { 0x053c, {1, {0x056c}}}, + { 0x053d, {1, {0x056d}}}, + { 0x053e, {1, {0x056e}}}, + { 0x053f, {1, {0x056f}}}, + { 0x0540, {1, {0x0570}}}, + { 0x0541, {1, {0x0571}}}, + { 0x0542, {1, {0x0572}}}, + { 0x0543, {1, {0x0573}}}, + { 0x0544, {1, {0x0574}}}, + { 0x0545, {1, {0x0575}}}, + { 0x0546, {1, {0x0576}}}, + { 0x0547, {1, {0x0577}}}, + { 0x0548, {1, {0x0578}}}, + { 0x0549, {1, {0x0579}}}, + { 0x054a, {1, {0x057a}}}, + { 0x054b, {1, {0x057b}}}, + { 0x054c, {1, {0x057c}}}, + { 0x054d, {1, {0x057d}}}, + { 0x054e, {1, {0x057e}}}, + { 0x054f, {1, {0x057f}}}, + { 0x0550, {1, {0x0580}}}, + { 0x0551, {1, {0x0581}}}, + { 0x0552, {1, {0x0582}}}, + { 0x0553, {1, {0x0583}}}, + { 0x0554, {1, {0x0584}}}, + { 0x0555, {1, {0x0585}}}, + { 0x0556, {1, {0x0586}}}, + { 0x0587, {2, {0x0565, 0x0582}}}, + { 0x10a0, {1, {0x2d00}}}, + { 0x10a1, {1, {0x2d01}}}, + { 0x10a2, {1, {0x2d02}}}, + { 0x10a3, {1, {0x2d03}}}, + { 0x10a4, {1, {0x2d04}}}, + { 0x10a5, {1, {0x2d05}}}, + { 0x10a6, {1, {0x2d06}}}, + { 0x10a7, {1, {0x2d07}}}, + { 0x10a8, {1, {0x2d08}}}, + { 0x10a9, {1, {0x2d09}}}, + { 0x10aa, {1, {0x2d0a}}}, + { 0x10ab, {1, {0x2d0b}}}, + { 0x10ac, {1, {0x2d0c}}}, + { 0x10ad, {1, {0x2d0d}}}, + { 0x10ae, {1, {0x2d0e}}}, + { 0x10af, {1, {0x2d0f}}}, + { 0x10b0, {1, {0x2d10}}}, + { 0x10b1, {1, {0x2d11}}}, + { 0x10b2, {1, {0x2d12}}}, + { 0x10b3, {1, {0x2d13}}}, + { 0x10b4, {1, {0x2d14}}}, + { 0x10b5, {1, {0x2d15}}}, + { 0x10b6, {1, {0x2d16}}}, + { 0x10b7, {1, {0x2d17}}}, + { 0x10b8, {1, {0x2d18}}}, + { 0x10b9, {1, {0x2d19}}}, + { 0x10ba, {1, {0x2d1a}}}, + { 0x10bb, {1, {0x2d1b}}}, + { 0x10bc, {1, {0x2d1c}}}, + { 0x10bd, {1, {0x2d1d}}}, + { 0x10be, {1, {0x2d1e}}}, + { 0x10bf, {1, {0x2d1f}}}, + { 0x10c0, {1, {0x2d20}}}, + { 0x10c1, {1, {0x2d21}}}, + { 0x10c2, {1, {0x2d22}}}, + { 0x10c3, {1, {0x2d23}}}, + { 0x10c4, {1, {0x2d24}}}, + { 0x10c5, {1, {0x2d25}}}, + { 0x1e00, {1, {0x1e01}}}, + { 0x1e02, {1, {0x1e03}}}, + { 0x1e04, {1, {0x1e05}}}, + { 0x1e06, {1, {0x1e07}}}, + { 0x1e08, {1, {0x1e09}}}, + { 0x1e0a, {1, {0x1e0b}}}, + { 0x1e0c, {1, {0x1e0d}}}, + { 0x1e0e, {1, {0x1e0f}}}, + { 0x1e10, {1, {0x1e11}}}, + { 0x1e12, {1, {0x1e13}}}, + { 0x1e14, {1, {0x1e15}}}, + { 0x1e16, {1, {0x1e17}}}, + { 0x1e18, {1, {0x1e19}}}, + { 0x1e1a, {1, {0x1e1b}}}, + { 0x1e1c, {1, {0x1e1d}}}, + { 0x1e1e, {1, {0x1e1f}}}, + { 0x1e20, {1, {0x1e21}}}, + { 0x1e22, {1, {0x1e23}}}, + { 0x1e24, {1, {0x1e25}}}, + { 0x1e26, {1, {0x1e27}}}, + { 0x1e28, {1, {0x1e29}}}, + { 0x1e2a, {1, {0x1e2b}}}, + { 0x1e2c, {1, {0x1e2d}}}, + { 0x1e2e, {1, {0x1e2f}}}, + { 0x1e30, {1, {0x1e31}}}, + { 0x1e32, {1, {0x1e33}}}, + { 0x1e34, {1, {0x1e35}}}, + { 0x1e36, {1, {0x1e37}}}, + { 0x1e38, {1, {0x1e39}}}, + { 0x1e3a, {1, {0x1e3b}}}, + { 0x1e3c, {1, {0x1e3d}}}, + { 0x1e3e, {1, {0x1e3f}}}, + { 0x1e40, {1, {0x1e41}}}, + { 0x1e42, {1, {0x1e43}}}, + { 0x1e44, {1, {0x1e45}}}, + { 0x1e46, {1, {0x1e47}}}, + { 0x1e48, {1, {0x1e49}}}, + { 0x1e4a, {1, {0x1e4b}}}, + { 0x1e4c, {1, {0x1e4d}}}, + { 0x1e4e, {1, {0x1e4f}}}, + { 0x1e50, {1, {0x1e51}}}, + { 0x1e52, {1, {0x1e53}}}, + { 0x1e54, {1, {0x1e55}}}, + { 0x1e56, {1, {0x1e57}}}, + { 0x1e58, {1, {0x1e59}}}, + { 0x1e5a, {1, {0x1e5b}}}, + { 0x1e5c, {1, {0x1e5d}}}, + { 0x1e5e, {1, {0x1e5f}}}, + { 0x1e60, {1, {0x1e61}}}, + { 0x1e62, {1, {0x1e63}}}, + { 0x1e64, {1, {0x1e65}}}, + { 0x1e66, {1, {0x1e67}}}, + { 0x1e68, {1, {0x1e69}}}, + { 0x1e6a, {1, {0x1e6b}}}, + { 0x1e6c, {1, {0x1e6d}}}, + { 0x1e6e, {1, {0x1e6f}}}, + { 0x1e70, {1, {0x1e71}}}, + { 0x1e72, {1, {0x1e73}}}, + { 0x1e74, {1, {0x1e75}}}, + { 0x1e76, {1, {0x1e77}}}, + { 0x1e78, {1, {0x1e79}}}, + { 0x1e7a, {1, {0x1e7b}}}, + { 0x1e7c, {1, {0x1e7d}}}, + { 0x1e7e, {1, {0x1e7f}}}, + { 0x1e80, {1, {0x1e81}}}, + { 0x1e82, {1, {0x1e83}}}, + { 0x1e84, {1, {0x1e85}}}, + { 0x1e86, {1, {0x1e87}}}, + { 0x1e88, {1, {0x1e89}}}, + { 0x1e8a, {1, {0x1e8b}}}, + { 0x1e8c, {1, {0x1e8d}}}, + { 0x1e8e, {1, {0x1e8f}}}, + { 0x1e90, {1, {0x1e91}}}, + { 0x1e92, {1, {0x1e93}}}, + { 0x1e94, {1, {0x1e95}}}, + { 0x1e96, {2, {0x0068, 0x0331}}}, + { 0x1e97, {2, {0x0074, 0x0308}}}, + { 0x1e98, {2, {0x0077, 0x030a}}}, + { 0x1e99, {2, {0x0079, 0x030a}}}, + { 0x1e9a, {2, {0x0061, 0x02be}}}, + { 0x1e9b, {1, {0x1e61}}}, + { 0x1ea0, {1, {0x1ea1}}}, + { 0x1ea2, {1, {0x1ea3}}}, + { 0x1ea4, {1, {0x1ea5}}}, + { 0x1ea6, {1, {0x1ea7}}}, + { 0x1ea8, {1, {0x1ea9}}}, + { 0x1eaa, {1, {0x1eab}}}, + { 0x1eac, {1, {0x1ead}}}, + { 0x1eae, {1, {0x1eaf}}}, + { 0x1eb0, {1, {0x1eb1}}}, + { 0x1eb2, {1, {0x1eb3}}}, + { 0x1eb4, {1, {0x1eb5}}}, + { 0x1eb6, {1, {0x1eb7}}}, + { 0x1eb8, {1, {0x1eb9}}}, + { 0x1eba, {1, {0x1ebb}}}, + { 0x1ebc, {1, {0x1ebd}}}, + { 0x1ebe, {1, {0x1ebf}}}, + { 0x1ec0, {1, {0x1ec1}}}, + { 0x1ec2, {1, {0x1ec3}}}, + { 0x1ec4, {1, {0x1ec5}}}, + { 0x1ec6, {1, {0x1ec7}}}, + { 0x1ec8, {1, {0x1ec9}}}, + { 0x1eca, {1, {0x1ecb}}}, + { 0x1ecc, {1, {0x1ecd}}}, + { 0x1ece, {1, {0x1ecf}}}, + { 0x1ed0, {1, {0x1ed1}}}, + { 0x1ed2, {1, {0x1ed3}}}, + { 0x1ed4, {1, {0x1ed5}}}, + { 0x1ed6, {1, {0x1ed7}}}, + { 0x1ed8, {1, {0x1ed9}}}, + { 0x1eda, {1, {0x1edb}}}, + { 0x1edc, {1, {0x1edd}}}, + { 0x1ede, {1, {0x1edf}}}, + { 0x1ee0, {1, {0x1ee1}}}, + { 0x1ee2, {1, {0x1ee3}}}, + { 0x1ee4, {1, {0x1ee5}}}, + { 0x1ee6, {1, {0x1ee7}}}, + { 0x1ee8, {1, {0x1ee9}}}, + { 0x1eea, {1, {0x1eeb}}}, + { 0x1eec, {1, {0x1eed}}}, + { 0x1eee, {1, {0x1eef}}}, + { 0x1ef0, {1, {0x1ef1}}}, + { 0x1ef2, {1, {0x1ef3}}}, + { 0x1ef4, {1, {0x1ef5}}}, + { 0x1ef6, {1, {0x1ef7}}}, + { 0x1ef8, {1, {0x1ef9}}}, + { 0x1f08, {1, {0x1f00}}}, + { 0x1f09, {1, {0x1f01}}}, + { 0x1f0a, {1, {0x1f02}}}, + { 0x1f0b, {1, {0x1f03}}}, + { 0x1f0c, {1, {0x1f04}}}, + { 0x1f0d, {1, {0x1f05}}}, + { 0x1f0e, {1, {0x1f06}}}, + { 0x1f0f, {1, {0x1f07}}}, + { 0x1f18, {1, {0x1f10}}}, + { 0x1f19, {1, {0x1f11}}}, + { 0x1f1a, {1, {0x1f12}}}, + { 0x1f1b, {1, {0x1f13}}}, + { 0x1f1c, {1, {0x1f14}}}, + { 0x1f1d, {1, {0x1f15}}}, + { 0x1f28, {1, {0x1f20}}}, + { 0x1f29, {1, {0x1f21}}}, + { 0x1f2a, {1, {0x1f22}}}, + { 0x1f2b, {1, {0x1f23}}}, + { 0x1f2c, {1, {0x1f24}}}, + { 0x1f2d, {1, {0x1f25}}}, + { 0x1f2e, {1, {0x1f26}}}, + { 0x1f2f, {1, {0x1f27}}}, + { 0x1f38, {1, {0x1f30}}}, + { 0x1f39, {1, {0x1f31}}}, + { 0x1f3a, {1, {0x1f32}}}, + { 0x1f3b, {1, {0x1f33}}}, + { 0x1f3c, {1, {0x1f34}}}, + { 0x1f3d, {1, {0x1f35}}}, + { 0x1f3e, {1, {0x1f36}}}, + { 0x1f3f, {1, {0x1f37}}}, + { 0x1f48, {1, {0x1f40}}}, + { 0x1f49, {1, {0x1f41}}}, + { 0x1f4a, {1, {0x1f42}}}, + { 0x1f4b, {1, {0x1f43}}}, + { 0x1f4c, {1, {0x1f44}}}, + { 0x1f4d, {1, {0x1f45}}}, + { 0x1f50, {2, {0x03c5, 0x0313}}}, + { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, + { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, + { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, + { 0x1f59, {1, {0x1f51}}}, + { 0x1f5b, {1, {0x1f53}}}, + { 0x1f5d, {1, {0x1f55}}}, + { 0x1f5f, {1, {0x1f57}}}, + { 0x1f68, {1, {0x1f60}}}, + { 0x1f69, {1, {0x1f61}}}, + { 0x1f6a, {1, {0x1f62}}}, + { 0x1f6b, {1, {0x1f63}}}, + { 0x1f6c, {1, {0x1f64}}}, + { 0x1f6d, {1, {0x1f65}}}, + { 0x1f6e, {1, {0x1f66}}}, + { 0x1f6f, {1, {0x1f67}}}, + { 0x1f80, {2, {0x1f00, 0x03b9}}}, + { 0x1f81, {2, {0x1f01, 0x03b9}}}, + { 0x1f82, {2, {0x1f02, 0x03b9}}}, + { 0x1f83, {2, {0x1f03, 0x03b9}}}, + { 0x1f84, {2, {0x1f04, 0x03b9}}}, + { 0x1f85, {2, {0x1f05, 0x03b9}}}, + { 0x1f86, {2, {0x1f06, 0x03b9}}}, + { 0x1f87, {2, {0x1f07, 0x03b9}}}, + { 0x1f88, {2, {0x1f00, 0x03b9}}}, + { 0x1f89, {2, {0x1f01, 0x03b9}}}, + { 0x1f8a, {2, {0x1f02, 0x03b9}}}, + { 0x1f8b, {2, {0x1f03, 0x03b9}}}, + { 0x1f8c, {2, {0x1f04, 0x03b9}}}, + { 0x1f8d, {2, {0x1f05, 0x03b9}}}, + { 0x1f8e, {2, {0x1f06, 0x03b9}}}, + { 0x1f8f, {2, {0x1f07, 0x03b9}}}, + { 0x1f90, {2, {0x1f20, 0x03b9}}}, + { 0x1f91, {2, {0x1f21, 0x03b9}}}, + { 0x1f92, {2, {0x1f22, 0x03b9}}}, + { 0x1f93, {2, {0x1f23, 0x03b9}}}, + { 0x1f94, {2, {0x1f24, 0x03b9}}}, + { 0x1f95, {2, {0x1f25, 0x03b9}}}, + { 0x1f96, {2, {0x1f26, 0x03b9}}}, + { 0x1f97, {2, {0x1f27, 0x03b9}}}, + { 0x1f98, {2, {0x1f20, 0x03b9}}}, + { 0x1f99, {2, {0x1f21, 0x03b9}}}, + { 0x1f9a, {2, {0x1f22, 0x03b9}}}, + { 0x1f9b, {2, {0x1f23, 0x03b9}}}, + { 0x1f9c, {2, {0x1f24, 0x03b9}}}, + { 0x1f9d, {2, {0x1f25, 0x03b9}}}, + { 0x1f9e, {2, {0x1f26, 0x03b9}}}, + { 0x1f9f, {2, {0x1f27, 0x03b9}}}, + { 0x1fa0, {2, {0x1f60, 0x03b9}}}, + { 0x1fa1, {2, {0x1f61, 0x03b9}}}, + { 0x1fa2, {2, {0x1f62, 0x03b9}}}, + { 0x1fa3, {2, {0x1f63, 0x03b9}}}, + { 0x1fa4, {2, {0x1f64, 0x03b9}}}, + { 0x1fa5, {2, {0x1f65, 0x03b9}}}, + { 0x1fa6, {2, {0x1f66, 0x03b9}}}, + { 0x1fa7, {2, {0x1f67, 0x03b9}}}, + { 0x1fa8, {2, {0x1f60, 0x03b9}}}, + { 0x1fa9, {2, {0x1f61, 0x03b9}}}, + { 0x1faa, {2, {0x1f62, 0x03b9}}}, + { 0x1fab, {2, {0x1f63, 0x03b9}}}, + { 0x1fac, {2, {0x1f64, 0x03b9}}}, + { 0x1fad, {2, {0x1f65, 0x03b9}}}, + { 0x1fae, {2, {0x1f66, 0x03b9}}}, + { 0x1faf, {2, {0x1f67, 0x03b9}}}, + { 0x1fb2, {2, {0x1f70, 0x03b9}}}, + { 0x1fb3, {2, {0x03b1, 0x03b9}}}, + { 0x1fb4, {2, {0x03ac, 0x03b9}}}, + { 0x1fb6, {2, {0x03b1, 0x0342}}}, + { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, + { 0x1fb8, {1, {0x1fb0}}}, + { 0x1fb9, {1, {0x1fb1}}}, + { 0x1fba, {1, {0x1f70}}}, + { 0x1fbb, {1, {0x1f71}}}, + { 0x1fbc, {2, {0x03b1, 0x03b9}}}, + { 0x1fbe, {1, {0x03b9}}}, + { 0x1fc2, {2, {0x1f74, 0x03b9}}}, + { 0x1fc3, {2, {0x03b7, 0x03b9}}}, + { 0x1fc4, {2, {0x03ae, 0x03b9}}}, + { 0x1fc6, {2, {0x03b7, 0x0342}}}, + { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, + { 0x1fc8, {1, {0x1f72}}}, + { 0x1fc9, {1, {0x1f73}}}, + { 0x1fca, {1, {0x1f74}}}, + { 0x1fcb, {1, {0x1f75}}}, + { 0x1fcc, {2, {0x03b7, 0x03b9}}}, + { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, + { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x1fd6, {2, {0x03b9, 0x0342}}}, + { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, + { 0x1fd8, {1, {0x1fd0}}}, + { 0x1fd9, {1, {0x1fd1}}}, + { 0x1fda, {1, {0x1f76}}}, + { 0x1fdb, {1, {0x1f77}}}, + { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, + { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x1fe4, {2, {0x03c1, 0x0313}}}, + { 0x1fe6, {2, {0x03c5, 0x0342}}}, + { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, + { 0x1fe8, {1, {0x1fe0}}}, + { 0x1fe9, {1, {0x1fe1}}}, + { 0x1fea, {1, {0x1f7a}}}, + { 0x1feb, {1, {0x1f7b}}}, + { 0x1fec, {1, {0x1fe5}}}, + { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, + { 0x1ff3, {2, {0x03c9, 0x03b9}}}, + { 0x1ff4, {2, {0x03ce, 0x03b9}}}, + { 0x1ff6, {2, {0x03c9, 0x0342}}}, + { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, + { 0x1ff8, {1, {0x1f78}}}, + { 0x1ff9, {1, {0x1f79}}}, + { 0x1ffa, {1, {0x1f7c}}}, + { 0x1ffb, {1, {0x1f7d}}}, + { 0x1ffc, {2, {0x03c9, 0x03b9}}}, + { 0x2126, {1, {0x03c9}}}, + { 0x212a, {1, {0x006b}}}, + { 0x212b, {1, {0x00e5}}}, + { 0x2160, {1, {0x2170}}}, + { 0x2161, {1, {0x2171}}}, + { 0x2162, {1, {0x2172}}}, + { 0x2163, {1, {0x2173}}}, + { 0x2164, {1, {0x2174}}}, + { 0x2165, {1, {0x2175}}}, + { 0x2166, {1, {0x2176}}}, + { 0x2167, {1, {0x2177}}}, + { 0x2168, {1, {0x2178}}}, + { 0x2169, {1, {0x2179}}}, + { 0x216a, {1, {0x217a}}}, + { 0x216b, {1, {0x217b}}}, + { 0x216c, {1, {0x217c}}}, + { 0x216d, {1, {0x217d}}}, + { 0x216e, {1, {0x217e}}}, + { 0x216f, {1, {0x217f}}}, + { 0x24b6, {1, {0x24d0}}}, + { 0x24b7, {1, {0x24d1}}}, + { 0x24b8, {1, {0x24d2}}}, + { 0x24b9, {1, {0x24d3}}}, + { 0x24ba, {1, {0x24d4}}}, + { 0x24bb, {1, {0x24d5}}}, + { 0x24bc, {1, {0x24d6}}}, + { 0x24bd, {1, {0x24d7}}}, + { 0x24be, {1, {0x24d8}}}, + { 0x24bf, {1, {0x24d9}}}, + { 0x24c0, {1, {0x24da}}}, + { 0x24c1, {1, {0x24db}}}, + { 0x24c2, {1, {0x24dc}}}, + { 0x24c3, {1, {0x24dd}}}, + { 0x24c4, {1, {0x24de}}}, + { 0x24c5, {1, {0x24df}}}, + { 0x24c6, {1, {0x24e0}}}, + { 0x24c7, {1, {0x24e1}}}, + { 0x24c8, {1, {0x24e2}}}, + { 0x24c9, {1, {0x24e3}}}, + { 0x24ca, {1, {0x24e4}}}, + { 0x24cb, {1, {0x24e5}}}, + { 0x24cc, {1, {0x24e6}}}, + { 0x24cd, {1, {0x24e7}}}, + { 0x24ce, {1, {0x24e8}}}, + { 0x24cf, {1, {0x24e9}}}, + { 0x2c00, {1, {0x2c30}}}, + { 0x2c01, {1, {0x2c31}}}, + { 0x2c02, {1, {0x2c32}}}, + { 0x2c03, {1, {0x2c33}}}, + { 0x2c04, {1, {0x2c34}}}, + { 0x2c05, {1, {0x2c35}}}, + { 0x2c06, {1, {0x2c36}}}, + { 0x2c07, {1, {0x2c37}}}, + { 0x2c08, {1, {0x2c38}}}, + { 0x2c09, {1, {0x2c39}}}, + { 0x2c0a, {1, {0x2c3a}}}, + { 0x2c0b, {1, {0x2c3b}}}, + { 0x2c0c, {1, {0x2c3c}}}, + { 0x2c0d, {1, {0x2c3d}}}, + { 0x2c0e, {1, {0x2c3e}}}, + { 0x2c0f, {1, {0x2c3f}}}, + { 0x2c10, {1, {0x2c40}}}, + { 0x2c11, {1, {0x2c41}}}, + { 0x2c12, {1, {0x2c42}}}, + { 0x2c13, {1, {0x2c43}}}, + { 0x2c14, {1, {0x2c44}}}, + { 0x2c15, {1, {0x2c45}}}, + { 0x2c16, {1, {0x2c46}}}, + { 0x2c17, {1, {0x2c47}}}, + { 0x2c18, {1, {0x2c48}}}, + { 0x2c19, {1, {0x2c49}}}, + { 0x2c1a, {1, {0x2c4a}}}, + { 0x2c1b, {1, {0x2c4b}}}, + { 0x2c1c, {1, {0x2c4c}}}, + { 0x2c1d, {1, {0x2c4d}}}, + { 0x2c1e, {1, {0x2c4e}}}, + { 0x2c1f, {1, {0x2c4f}}}, + { 0x2c20, {1, {0x2c50}}}, + { 0x2c21, {1, {0x2c51}}}, + { 0x2c22, {1, {0x2c52}}}, + { 0x2c23, {1, {0x2c53}}}, + { 0x2c24, {1, {0x2c54}}}, + { 0x2c25, {1, {0x2c55}}}, + { 0x2c26, {1, {0x2c56}}}, + { 0x2c27, {1, {0x2c57}}}, + { 0x2c28, {1, {0x2c58}}}, + { 0x2c29, {1, {0x2c59}}}, + { 0x2c2a, {1, {0x2c5a}}}, + { 0x2c2b, {1, {0x2c5b}}}, + { 0x2c2c, {1, {0x2c5c}}}, + { 0x2c2d, {1, {0x2c5d}}}, + { 0x2c2e, {1, {0x2c5e}}}, + { 0x2c80, {1, {0x2c81}}}, + { 0x2c82, {1, {0x2c83}}}, + { 0x2c84, {1, {0x2c85}}}, + { 0x2c86, {1, {0x2c87}}}, + { 0x2c88, {1, {0x2c89}}}, + { 0x2c8a, {1, {0x2c8b}}}, + { 0x2c8c, {1, {0x2c8d}}}, + { 0x2c8e, {1, {0x2c8f}}}, + { 0x2c90, {1, {0x2c91}}}, + { 0x2c92, {1, {0x2c93}}}, + { 0x2c94, {1, {0x2c95}}}, + { 0x2c96, {1, {0x2c97}}}, + { 0x2c98, {1, {0x2c99}}}, + { 0x2c9a, {1, {0x2c9b}}}, + { 0x2c9c, {1, {0x2c9d}}}, + { 0x2c9e, {1, {0x2c9f}}}, + { 0x2ca0, {1, {0x2ca1}}}, + { 0x2ca2, {1, {0x2ca3}}}, + { 0x2ca4, {1, {0x2ca5}}}, + { 0x2ca6, {1, {0x2ca7}}}, + { 0x2ca8, {1, {0x2ca9}}}, + { 0x2caa, {1, {0x2cab}}}, + { 0x2cac, {1, {0x2cad}}}, + { 0x2cae, {1, {0x2caf}}}, + { 0x2cb0, {1, {0x2cb1}}}, + { 0x2cb2, {1, {0x2cb3}}}, + { 0x2cb4, {1, {0x2cb5}}}, + { 0x2cb6, {1, {0x2cb7}}}, + { 0x2cb8, {1, {0x2cb9}}}, + { 0x2cba, {1, {0x2cbb}}}, + { 0x2cbc, {1, {0x2cbd}}}, + { 0x2cbe, {1, {0x2cbf}}}, + { 0x2cc0, {1, {0x2cc1}}}, + { 0x2cc2, {1, {0x2cc3}}}, + { 0x2cc4, {1, {0x2cc5}}}, + { 0x2cc6, {1, {0x2cc7}}}, + { 0x2cc8, {1, {0x2cc9}}}, + { 0x2cca, {1, {0x2ccb}}}, + { 0x2ccc, {1, {0x2ccd}}}, + { 0x2cce, {1, {0x2ccf}}}, + { 0x2cd0, {1, {0x2cd1}}}, + { 0x2cd2, {1, {0x2cd3}}}, + { 0x2cd4, {1, {0x2cd5}}}, + { 0x2cd6, {1, {0x2cd7}}}, + { 0x2cd8, {1, {0x2cd9}}}, + { 0x2cda, {1, {0x2cdb}}}, + { 0x2cdc, {1, {0x2cdd}}}, + { 0x2cde, {1, {0x2cdf}}}, + { 0x2ce0, {1, {0x2ce1}}}, + { 0x2ce2, {1, {0x2ce3}}}, + { 0xfb00, {2, {0x0066, 0x0066}}}, + { 0xfb01, {2, {0x0066, 0x0069}}}, + { 0xfb02, {2, {0x0066, 0x006c}}}, + { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, + { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, + { 0xfb05, {2, {0x0073, 0x0074}}}, + { 0xfb06, {2, {0x0073, 0x0074}}}, + { 0xfb13, {2, {0x0574, 0x0576}}}, + { 0xfb14, {2, {0x0574, 0x0565}}}, + { 0xfb15, {2, {0x0574, 0x056b}}}, + { 0xfb16, {2, {0x057e, 0x0576}}}, + { 0xfb17, {2, {0x0574, 0x056d}}}, + { 0xff21, {1, {0xff41}}}, + { 0xff22, {1, {0xff42}}}, + { 0xff23, {1, {0xff43}}}, + { 0xff24, {1, {0xff44}}}, + { 0xff25, {1, {0xff45}}}, + { 0xff26, {1, {0xff46}}}, + { 0xff27, {1, {0xff47}}}, + { 0xff28, {1, {0xff48}}}, + { 0xff29, {1, {0xff49}}}, + { 0xff2a, {1, {0xff4a}}}, + { 0xff2b, {1, {0xff4b}}}, + { 0xff2c, {1, {0xff4c}}}, + { 0xff2d, {1, {0xff4d}}}, + { 0xff2e, {1, {0xff4e}}}, + { 0xff2f, {1, {0xff4f}}}, + { 0xff30, {1, {0xff50}}}, + { 0xff31, {1, {0xff51}}}, + { 0xff32, {1, {0xff52}}}, + { 0xff33, {1, {0xff53}}}, + { 0xff34, {1, {0xff54}}}, + { 0xff35, {1, {0xff55}}}, + { 0xff36, {1, {0xff56}}}, + { 0xff37, {1, {0xff57}}}, + { 0xff38, {1, {0xff58}}}, + { 0xff39, {1, {0xff59}}}, + { 0xff3a, {1, {0xff5a}}}, + { 0x10400, {1, {0x10428}}}, + { 0x10401, {1, {0x10429}}}, + { 0x10402, {1, {0x1042a}}}, + { 0x10403, {1, {0x1042b}}}, + { 0x10404, {1, {0x1042c}}}, + { 0x10405, {1, {0x1042d}}}, + { 0x10406, {1, {0x1042e}}}, + { 0x10407, {1, {0x1042f}}}, + { 0x10408, {1, {0x10430}}}, + { 0x10409, {1, {0x10431}}}, + { 0x1040a, {1, {0x10432}}}, + { 0x1040b, {1, {0x10433}}}, + { 0x1040c, {1, {0x10434}}}, + { 0x1040d, {1, {0x10435}}}, + { 0x1040e, {1, {0x10436}}}, + { 0x1040f, {1, {0x10437}}}, + { 0x10410, {1, {0x10438}}}, + { 0x10411, {1, {0x10439}}}, + { 0x10412, {1, {0x1043a}}}, + { 0x10413, {1, {0x1043b}}}, + { 0x10414, {1, {0x1043c}}}, + { 0x10415, {1, {0x1043d}}}, + { 0x10416, {1, {0x1043e}}}, + { 0x10417, {1, {0x1043f}}}, + { 0x10418, {1, {0x10440}}}, + { 0x10419, {1, {0x10441}}}, + { 0x1041a, {1, {0x10442}}}, + { 0x1041b, {1, {0x10443}}}, + { 0x1041c, {1, {0x10444}}}, + { 0x1041d, {1, {0x10445}}}, + { 0x1041e, {1, {0x10446}}}, + { 0x1041f, {1, {0x10447}}}, + { 0x10420, {1, {0x10448}}}, + { 0x10421, {1, {0x10449}}}, + { 0x10422, {1, {0x1044a}}}, + { 0x10423, {1, {0x1044b}}}, + { 0x10424, {1, {0x1044c}}}, + { 0x10425, {1, {0x1044d}}}, + { 0x10426, {1, {0x1044e}}}, + { 0x10427, {1, {0x1044f}}} +}; + +static const CaseFold_11_Type CaseFold_Locale[] = { + { 0x0049, {1, {0x0069}}}, + { 0x0130, {2, {0x0069, 0x0307}}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11[] = { + { 0x0061, {1, {0x0041 }}}, + { 0x0062, {1, {0x0042 }}}, + { 0x0063, {1, {0x0043 }}}, + { 0x0064, {1, {0x0044 }}}, + { 0x0065, {1, {0x0045 }}}, + { 0x0066, {1, {0x0046 }}}, + { 0x0067, {1, {0x0047 }}}, + { 0x0068, {1, {0x0048 }}}, + { 0x006a, {1, {0x004a }}}, + { 0x006b, {2, {0x212a, 0x004b }}}, + { 0x006c, {1, {0x004c }}}, + { 0x006d, {1, {0x004d }}}, + { 0x006e, {1, {0x004e }}}, + { 0x006f, {1, {0x004f }}}, + { 0x0070, {1, {0x0050 }}}, + { 0x0071, {1, {0x0051 }}}, + { 0x0072, {1, {0x0052 }}}, + { 0x0073, {2, {0x0053, 0x017f }}}, + { 0x0074, {1, {0x0054 }}}, + { 0x0075, {1, {0x0055 }}}, + { 0x0076, {1, {0x0056 }}}, + { 0x0077, {1, {0x0057 }}}, + { 0x0078, {1, {0x0058 }}}, + { 0x0079, {1, {0x0059 }}}, + { 0x007a, {1, {0x005a }}}, + { 0x00e0, {1, {0x00c0 }}}, + { 0x00e1, {1, {0x00c1 }}}, + { 0x00e2, {1, {0x00c2 }}}, + { 0x00e3, {1, {0x00c3 }}}, + { 0x00e4, {1, {0x00c4 }}}, + { 0x00e5, {2, {0x212b, 0x00c5 }}}, + { 0x00e6, {1, {0x00c6 }}}, + { 0x00e7, {1, {0x00c7 }}}, + { 0x00e8, {1, {0x00c8 }}}, + { 0x00e9, {1, {0x00c9 }}}, + { 0x00ea, {1, {0x00ca }}}, + { 0x00eb, {1, {0x00cb }}}, + { 0x00ec, {1, {0x00cc }}}, + { 0x00ed, {1, {0x00cd }}}, + { 0x00ee, {1, {0x00ce }}}, + { 0x00ef, {1, {0x00cf }}}, + { 0x00f0, {1, {0x00d0 }}}, + { 0x00f1, {1, {0x00d1 }}}, + { 0x00f2, {1, {0x00d2 }}}, + { 0x00f3, {1, {0x00d3 }}}, + { 0x00f4, {1, {0x00d4 }}}, + { 0x00f5, {1, {0x00d5 }}}, + { 0x00f6, {1, {0x00d6 }}}, + { 0x00f8, {1, {0x00d8 }}}, + { 0x00f9, {1, {0x00d9 }}}, + { 0x00fa, {1, {0x00da }}}, + { 0x00fb, {1, {0x00db }}}, + { 0x00fc, {1, {0x00dc }}}, + { 0x00fd, {1, {0x00dd }}}, + { 0x00fe, {1, {0x00de }}}, + { 0x00ff, {1, {0x0178 }}}, + { 0x0101, {1, {0x0100 }}}, + { 0x0103, {1, {0x0102 }}}, + { 0x0105, {1, {0x0104 }}}, + { 0x0107, {1, {0x0106 }}}, + { 0x0109, {1, {0x0108 }}}, + { 0x010b, {1, {0x010a }}}, + { 0x010d, {1, {0x010c }}}, + { 0x010f, {1, {0x010e }}}, + { 0x0111, {1, {0x0110 }}}, + { 0x0113, {1, {0x0112 }}}, + { 0x0115, {1, {0x0114 }}}, + { 0x0117, {1, {0x0116 }}}, + { 0x0119, {1, {0x0118 }}}, + { 0x011b, {1, {0x011a }}}, + { 0x011d, {1, {0x011c }}}, + { 0x011f, {1, {0x011e }}}, + { 0x0121, {1, {0x0120 }}}, + { 0x0123, {1, {0x0122 }}}, + { 0x0125, {1, {0x0124 }}}, + { 0x0127, {1, {0x0126 }}}, + { 0x0129, {1, {0x0128 }}}, + { 0x012b, {1, {0x012a }}}, + { 0x012d, {1, {0x012c }}}, + { 0x012f, {1, {0x012e }}}, + { 0x0133, {1, {0x0132 }}}, + { 0x0135, {1, {0x0134 }}}, + { 0x0137, {1, {0x0136 }}}, + { 0x013a, {1, {0x0139 }}}, + { 0x013c, {1, {0x013b }}}, + { 0x013e, {1, {0x013d }}}, + { 0x0140, {1, {0x013f }}}, + { 0x0142, {1, {0x0141 }}}, + { 0x0144, {1, {0x0143 }}}, + { 0x0146, {1, {0x0145 }}}, + { 0x0148, {1, {0x0147 }}}, + { 0x014b, {1, {0x014a }}}, + { 0x014d, {1, {0x014c }}}, + { 0x014f, {1, {0x014e }}}, + { 0x0151, {1, {0x0150 }}}, + { 0x0153, {1, {0x0152 }}}, + { 0x0155, {1, {0x0154 }}}, + { 0x0157, {1, {0x0156 }}}, + { 0x0159, {1, {0x0158 }}}, + { 0x015b, {1, {0x015a }}}, + { 0x015d, {1, {0x015c }}}, + { 0x015f, {1, {0x015e }}}, + { 0x0161, {1, {0x0160 }}}, + { 0x0163, {1, {0x0162 }}}, + { 0x0165, {1, {0x0164 }}}, + { 0x0167, {1, {0x0166 }}}, + { 0x0169, {1, {0x0168 }}}, + { 0x016b, {1, {0x016a }}}, + { 0x016d, {1, {0x016c }}}, + { 0x016f, {1, {0x016e }}}, + { 0x0171, {1, {0x0170 }}}, + { 0x0173, {1, {0x0172 }}}, + { 0x0175, {1, {0x0174 }}}, + { 0x0177, {1, {0x0176 }}}, + { 0x017a, {1, {0x0179 }}}, + { 0x017c, {1, {0x017b }}}, + { 0x017e, {1, {0x017d }}}, + { 0x0183, {1, {0x0182 }}}, + { 0x0185, {1, {0x0184 }}}, + { 0x0188, {1, {0x0187 }}}, + { 0x018c, {1, {0x018b }}}, + { 0x0192, {1, {0x0191 }}}, + { 0x0195, {1, {0x01f6 }}}, + { 0x0199, {1, {0x0198 }}}, + { 0x019a, {1, {0x023d }}}, + { 0x019e, {1, {0x0220 }}}, + { 0x01a1, {1, {0x01a0 }}}, + { 0x01a3, {1, {0x01a2 }}}, + { 0x01a5, {1, {0x01a4 }}}, + { 0x01a8, {1, {0x01a7 }}}, + { 0x01ad, {1, {0x01ac }}}, + { 0x01b0, {1, {0x01af }}}, + { 0x01b4, {1, {0x01b3 }}}, + { 0x01b6, {1, {0x01b5 }}}, + { 0x01b9, {1, {0x01b8 }}}, + { 0x01bd, {1, {0x01bc }}}, + { 0x01bf, {1, {0x01f7 }}}, + { 0x01c6, {2, {0x01c4, 0x01c5 }}}, + { 0x01c9, {2, {0x01c7, 0x01c8 }}}, + { 0x01cc, {2, {0x01ca, 0x01cb }}}, + { 0x01ce, {1, {0x01cd }}}, + { 0x01d0, {1, {0x01cf }}}, + { 0x01d2, {1, {0x01d1 }}}, + { 0x01d4, {1, {0x01d3 }}}, + { 0x01d6, {1, {0x01d5 }}}, + { 0x01d8, {1, {0x01d7 }}}, + { 0x01da, {1, {0x01d9 }}}, + { 0x01dc, {1, {0x01db }}}, + { 0x01dd, {1, {0x018e }}}, + { 0x01df, {1, {0x01de }}}, + { 0x01e1, {1, {0x01e0 }}}, + { 0x01e3, {1, {0x01e2 }}}, + { 0x01e5, {1, {0x01e4 }}}, + { 0x01e7, {1, {0x01e6 }}}, + { 0x01e9, {1, {0x01e8 }}}, + { 0x01eb, {1, {0x01ea }}}, + { 0x01ed, {1, {0x01ec }}}, + { 0x01ef, {1, {0x01ee }}}, + { 0x01f3, {2, {0x01f1, 0x01f2 }}}, + { 0x01f5, {1, {0x01f4 }}}, + { 0x01f9, {1, {0x01f8 }}}, + { 0x01fb, {1, {0x01fa }}}, + { 0x01fd, {1, {0x01fc }}}, + { 0x01ff, {1, {0x01fe }}}, + { 0x0201, {1, {0x0200 }}}, + { 0x0203, {1, {0x0202 }}}, + { 0x0205, {1, {0x0204 }}}, + { 0x0207, {1, {0x0206 }}}, + { 0x0209, {1, {0x0208 }}}, + { 0x020b, {1, {0x020a }}}, + { 0x020d, {1, {0x020c }}}, + { 0x020f, {1, {0x020e }}}, + { 0x0211, {1, {0x0210 }}}, + { 0x0213, {1, {0x0212 }}}, + { 0x0215, {1, {0x0214 }}}, + { 0x0217, {1, {0x0216 }}}, + { 0x0219, {1, {0x0218 }}}, + { 0x021b, {1, {0x021a }}}, + { 0x021d, {1, {0x021c }}}, + { 0x021f, {1, {0x021e }}}, + { 0x0223, {1, {0x0222 }}}, + { 0x0225, {1, {0x0224 }}}, + { 0x0227, {1, {0x0226 }}}, + { 0x0229, {1, {0x0228 }}}, + { 0x022b, {1, {0x022a }}}, + { 0x022d, {1, {0x022c }}}, + { 0x022f, {1, {0x022e }}}, + { 0x0231, {1, {0x0230 }}}, + { 0x0233, {1, {0x0232 }}}, + { 0x023c, {1, {0x023b }}}, + { 0x0253, {1, {0x0181 }}}, + { 0x0254, {1, {0x0186 }}}, + { 0x0256, {1, {0x0189 }}}, + { 0x0257, {1, {0x018a }}}, + { 0x0259, {1, {0x018f }}}, + { 0x025b, {1, {0x0190 }}}, + { 0x0260, {1, {0x0193 }}}, + { 0x0263, {1, {0x0194 }}}, + { 0x0268, {1, {0x0197 }}}, + { 0x0269, {1, {0x0196 }}}, + { 0x026f, {1, {0x019c }}}, + { 0x0272, {1, {0x019d }}}, + { 0x0275, {1, {0x019f }}}, + { 0x0280, {1, {0x01a6 }}}, + { 0x0283, {1, {0x01a9 }}}, + { 0x0288, {1, {0x01ae }}}, + { 0x028a, {1, {0x01b1 }}}, + { 0x028b, {1, {0x01b2 }}}, + { 0x0292, {1, {0x01b7 }}}, + { 0x0294, {1, {0x0241 }}}, + { 0x03ac, {1, {0x0386 }}}, + { 0x03ad, {1, {0x0388 }}}, + { 0x03ae, {1, {0x0389 }}}, + { 0x03af, {1, {0x038a }}}, + { 0x03b1, {1, {0x0391 }}}, + { 0x03b2, {2, {0x0392, 0x03d0 }}}, + { 0x03b3, {1, {0x0393 }}}, + { 0x03b4, {1, {0x0394 }}}, + { 0x03b5, {2, {0x03f5, 0x0395 }}}, + { 0x03b6, {1, {0x0396 }}}, + { 0x03b7, {1, {0x0397 }}}, + { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, + { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, + { 0x03ba, {2, {0x03f0, 0x039a }}}, + { 0x03bb, {1, {0x039b }}}, + { 0x03bc, {2, {0x00b5, 0x039c }}}, + { 0x03bd, {1, {0x039d }}}, + { 0x03be, {1, {0x039e }}}, + { 0x03bf, {1, {0x039f }}}, + { 0x03c0, {2, {0x03a0, 0x03d6 }}}, + { 0x03c1, {2, {0x03f1, 0x03a1 }}}, + { 0x03c3, {2, {0x03a3, 0x03c2 }}}, + { 0x03c4, {1, {0x03a4 }}}, + { 0x03c5, {1, {0x03a5 }}}, + { 0x03c6, {2, {0x03a6, 0x03d5 }}}, + { 0x03c7, {1, {0x03a7 }}}, + { 0x03c8, {1, {0x03a8 }}}, + { 0x03c9, {2, {0x03a9, 0x2126 }}}, + { 0x03ca, {1, {0x03aa }}}, + { 0x03cb, {1, {0x03ab }}}, + { 0x03cc, {1, {0x038c }}}, + { 0x03cd, {1, {0x038e }}}, + { 0x03ce, {1, {0x038f }}}, + { 0x03d9, {1, {0x03d8 }}}, + { 0x03db, {1, {0x03da }}}, + { 0x03dd, {1, {0x03dc }}}, + { 0x03df, {1, {0x03de }}}, + { 0x03e1, {1, {0x03e0 }}}, + { 0x03e3, {1, {0x03e2 }}}, + { 0x03e5, {1, {0x03e4 }}}, + { 0x03e7, {1, {0x03e6 }}}, + { 0x03e9, {1, {0x03e8 }}}, + { 0x03eb, {1, {0x03ea }}}, + { 0x03ed, {1, {0x03ec }}}, + { 0x03ef, {1, {0x03ee }}}, + { 0x03f2, {1, {0x03f9 }}}, + { 0x03f8, {1, {0x03f7 }}}, + { 0x03fb, {1, {0x03fa }}}, + { 0x0430, {1, {0x0410 }}}, + { 0x0431, {1, {0x0411 }}}, + { 0x0432, {1, {0x0412 }}}, + { 0x0433, {1, {0x0413 }}}, + { 0x0434, {1, {0x0414 }}}, + { 0x0435, {1, {0x0415 }}}, + { 0x0436, {1, {0x0416 }}}, + { 0x0437, {1, {0x0417 }}}, + { 0x0438, {1, {0x0418 }}}, + { 0x0439, {1, {0x0419 }}}, + { 0x043a, {1, {0x041a }}}, + { 0x043b, {1, {0x041b }}}, + { 0x043c, {1, {0x041c }}}, + { 0x043d, {1, {0x041d }}}, + { 0x043e, {1, {0x041e }}}, + { 0x043f, {1, {0x041f }}}, + { 0x0440, {1, {0x0420 }}}, + { 0x0441, {1, {0x0421 }}}, + { 0x0442, {1, {0x0422 }}}, + { 0x0443, {1, {0x0423 }}}, + { 0x0444, {1, {0x0424 }}}, + { 0x0445, {1, {0x0425 }}}, + { 0x0446, {1, {0x0426 }}}, + { 0x0447, {1, {0x0427 }}}, + { 0x0448, {1, {0x0428 }}}, + { 0x0449, {1, {0x0429 }}}, + { 0x044a, {1, {0x042a }}}, + { 0x044b, {1, {0x042b }}}, + { 0x044c, {1, {0x042c }}}, + { 0x044d, {1, {0x042d }}}, + { 0x044e, {1, {0x042e }}}, + { 0x044f, {1, {0x042f }}}, + { 0x0450, {1, {0x0400 }}}, + { 0x0451, {1, {0x0401 }}}, + { 0x0452, {1, {0x0402 }}}, + { 0x0453, {1, {0x0403 }}}, + { 0x0454, {1, {0x0404 }}}, + { 0x0455, {1, {0x0405 }}}, + { 0x0456, {1, {0x0406 }}}, + { 0x0457, {1, {0x0407 }}}, + { 0x0458, {1, {0x0408 }}}, + { 0x0459, {1, {0x0409 }}}, + { 0x045a, {1, {0x040a }}}, + { 0x045b, {1, {0x040b }}}, + { 0x045c, {1, {0x040c }}}, + { 0x045d, {1, {0x040d }}}, + { 0x045e, {1, {0x040e }}}, + { 0x045f, {1, {0x040f }}}, + { 0x0461, {1, {0x0460 }}}, + { 0x0463, {1, {0x0462 }}}, + { 0x0465, {1, {0x0464 }}}, + { 0x0467, {1, {0x0466 }}}, + { 0x0469, {1, {0x0468 }}}, + { 0x046b, {1, {0x046a }}}, + { 0x046d, {1, {0x046c }}}, + { 0x046f, {1, {0x046e }}}, + { 0x0471, {1, {0x0470 }}}, + { 0x0473, {1, {0x0472 }}}, + { 0x0475, {1, {0x0474 }}}, + { 0x0477, {1, {0x0476 }}}, + { 0x0479, {1, {0x0478 }}}, + { 0x047b, {1, {0x047a }}}, + { 0x047d, {1, {0x047c }}}, + { 0x047f, {1, {0x047e }}}, + { 0x0481, {1, {0x0480 }}}, + { 0x048b, {1, {0x048a }}}, + { 0x048d, {1, {0x048c }}}, + { 0x048f, {1, {0x048e }}}, + { 0x0491, {1, {0x0490 }}}, + { 0x0493, {1, {0x0492 }}}, + { 0x0495, {1, {0x0494 }}}, + { 0x0497, {1, {0x0496 }}}, + { 0x0499, {1, {0x0498 }}}, + { 0x049b, {1, {0x049a }}}, + { 0x049d, {1, {0x049c }}}, + { 0x049f, {1, {0x049e }}}, + { 0x04a1, {1, {0x04a0 }}}, + { 0x04a3, {1, {0x04a2 }}}, + { 0x04a5, {1, {0x04a4 }}}, + { 0x04a7, {1, {0x04a6 }}}, + { 0x04a9, {1, {0x04a8 }}}, + { 0x04ab, {1, {0x04aa }}}, + { 0x04ad, {1, {0x04ac }}}, + { 0x04af, {1, {0x04ae }}}, + { 0x04b1, {1, {0x04b0 }}}, + { 0x04b3, {1, {0x04b2 }}}, + { 0x04b5, {1, {0x04b4 }}}, + { 0x04b7, {1, {0x04b6 }}}, + { 0x04b9, {1, {0x04b8 }}}, + { 0x04bb, {1, {0x04ba }}}, + { 0x04bd, {1, {0x04bc }}}, + { 0x04bf, {1, {0x04be }}}, + { 0x04c2, {1, {0x04c1 }}}, + { 0x04c4, {1, {0x04c3 }}}, + { 0x04c6, {1, {0x04c5 }}}, + { 0x04c8, {1, {0x04c7 }}}, + { 0x04ca, {1, {0x04c9 }}}, + { 0x04cc, {1, {0x04cb }}}, + { 0x04ce, {1, {0x04cd }}}, + { 0x04d1, {1, {0x04d0 }}}, + { 0x04d3, {1, {0x04d2 }}}, + { 0x04d5, {1, {0x04d4 }}}, + { 0x04d7, {1, {0x04d6 }}}, + { 0x04d9, {1, {0x04d8 }}}, + { 0x04db, {1, {0x04da }}}, + { 0x04dd, {1, {0x04dc }}}, + { 0x04df, {1, {0x04de }}}, + { 0x04e1, {1, {0x04e0 }}}, + { 0x04e3, {1, {0x04e2 }}}, + { 0x04e5, {1, {0x04e4 }}}, + { 0x04e7, {1, {0x04e6 }}}, + { 0x04e9, {1, {0x04e8 }}}, + { 0x04eb, {1, {0x04ea }}}, + { 0x04ed, {1, {0x04ec }}}, + { 0x04ef, {1, {0x04ee }}}, + { 0x04f1, {1, {0x04f0 }}}, + { 0x04f3, {1, {0x04f2 }}}, + { 0x04f5, {1, {0x04f4 }}}, + { 0x04f7, {1, {0x04f6 }}}, + { 0x04f9, {1, {0x04f8 }}}, + { 0x0501, {1, {0x0500 }}}, + { 0x0503, {1, {0x0502 }}}, + { 0x0505, {1, {0x0504 }}}, + { 0x0507, {1, {0x0506 }}}, + { 0x0509, {1, {0x0508 }}}, + { 0x050b, {1, {0x050a }}}, + { 0x050d, {1, {0x050c }}}, + { 0x050f, {1, {0x050e }}}, + { 0x0561, {1, {0x0531 }}}, + { 0x0562, {1, {0x0532 }}}, + { 0x0563, {1, {0x0533 }}}, + { 0x0564, {1, {0x0534 }}}, + { 0x0565, {1, {0x0535 }}}, + { 0x0566, {1, {0x0536 }}}, + { 0x0567, {1, {0x0537 }}}, + { 0x0568, {1, {0x0538 }}}, + { 0x0569, {1, {0x0539 }}}, + { 0x056a, {1, {0x053a }}}, + { 0x056b, {1, {0x053b }}}, + { 0x056c, {1, {0x053c }}}, + { 0x056d, {1, {0x053d }}}, + { 0x056e, {1, {0x053e }}}, + { 0x056f, {1, {0x053f }}}, + { 0x0570, {1, {0x0540 }}}, + { 0x0571, {1, {0x0541 }}}, + { 0x0572, {1, {0x0542 }}}, + { 0x0573, {1, {0x0543 }}}, + { 0x0574, {1, {0x0544 }}}, + { 0x0575, {1, {0x0545 }}}, + { 0x0576, {1, {0x0546 }}}, + { 0x0577, {1, {0x0547 }}}, + { 0x0578, {1, {0x0548 }}}, + { 0x0579, {1, {0x0549 }}}, + { 0x057a, {1, {0x054a }}}, + { 0x057b, {1, {0x054b }}}, + { 0x057c, {1, {0x054c }}}, + { 0x057d, {1, {0x054d }}}, + { 0x057e, {1, {0x054e }}}, + { 0x057f, {1, {0x054f }}}, + { 0x0580, {1, {0x0550 }}}, + { 0x0581, {1, {0x0551 }}}, + { 0x0582, {1, {0x0552 }}}, + { 0x0583, {1, {0x0553 }}}, + { 0x0584, {1, {0x0554 }}}, + { 0x0585, {1, {0x0555 }}}, + { 0x0586, {1, {0x0556 }}}, + { 0x1e01, {1, {0x1e00 }}}, + { 0x1e03, {1, {0x1e02 }}}, + { 0x1e05, {1, {0x1e04 }}}, + { 0x1e07, {1, {0x1e06 }}}, + { 0x1e09, {1, {0x1e08 }}}, + { 0x1e0b, {1, {0x1e0a }}}, + { 0x1e0d, {1, {0x1e0c }}}, + { 0x1e0f, {1, {0x1e0e }}}, + { 0x1e11, {1, {0x1e10 }}}, + { 0x1e13, {1, {0x1e12 }}}, + { 0x1e15, {1, {0x1e14 }}}, + { 0x1e17, {1, {0x1e16 }}}, + { 0x1e19, {1, {0x1e18 }}}, + { 0x1e1b, {1, {0x1e1a }}}, + { 0x1e1d, {1, {0x1e1c }}}, + { 0x1e1f, {1, {0x1e1e }}}, + { 0x1e21, {1, {0x1e20 }}}, + { 0x1e23, {1, {0x1e22 }}}, + { 0x1e25, {1, {0x1e24 }}}, + { 0x1e27, {1, {0x1e26 }}}, + { 0x1e29, {1, {0x1e28 }}}, + { 0x1e2b, {1, {0x1e2a }}}, + { 0x1e2d, {1, {0x1e2c }}}, + { 0x1e2f, {1, {0x1e2e }}}, + { 0x1e31, {1, {0x1e30 }}}, + { 0x1e33, {1, {0x1e32 }}}, + { 0x1e35, {1, {0x1e34 }}}, + { 0x1e37, {1, {0x1e36 }}}, + { 0x1e39, {1, {0x1e38 }}}, + { 0x1e3b, {1, {0x1e3a }}}, + { 0x1e3d, {1, {0x1e3c }}}, + { 0x1e3f, {1, {0x1e3e }}}, + { 0x1e41, {1, {0x1e40 }}}, + { 0x1e43, {1, {0x1e42 }}}, + { 0x1e45, {1, {0x1e44 }}}, + { 0x1e47, {1, {0x1e46 }}}, + { 0x1e49, {1, {0x1e48 }}}, + { 0x1e4b, {1, {0x1e4a }}}, + { 0x1e4d, {1, {0x1e4c }}}, + { 0x1e4f, {1, {0x1e4e }}}, + { 0x1e51, {1, {0x1e50 }}}, + { 0x1e53, {1, {0x1e52 }}}, + { 0x1e55, {1, {0x1e54 }}}, + { 0x1e57, {1, {0x1e56 }}}, + { 0x1e59, {1, {0x1e58 }}}, + { 0x1e5b, {1, {0x1e5a }}}, + { 0x1e5d, {1, {0x1e5c }}}, + { 0x1e5f, {1, {0x1e5e }}}, + { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, + { 0x1e63, {1, {0x1e62 }}}, + { 0x1e65, {1, {0x1e64 }}}, + { 0x1e67, {1, {0x1e66 }}}, + { 0x1e69, {1, {0x1e68 }}}, + { 0x1e6b, {1, {0x1e6a }}}, + { 0x1e6d, {1, {0x1e6c }}}, + { 0x1e6f, {1, {0x1e6e }}}, + { 0x1e71, {1, {0x1e70 }}}, + { 0x1e73, {1, {0x1e72 }}}, + { 0x1e75, {1, {0x1e74 }}}, + { 0x1e77, {1, {0x1e76 }}}, + { 0x1e79, {1, {0x1e78 }}}, + { 0x1e7b, {1, {0x1e7a }}}, + { 0x1e7d, {1, {0x1e7c }}}, + { 0x1e7f, {1, {0x1e7e }}}, + { 0x1e81, {1, {0x1e80 }}}, + { 0x1e83, {1, {0x1e82 }}}, + { 0x1e85, {1, {0x1e84 }}}, + { 0x1e87, {1, {0x1e86 }}}, + { 0x1e89, {1, {0x1e88 }}}, + { 0x1e8b, {1, {0x1e8a }}}, + { 0x1e8d, {1, {0x1e8c }}}, + { 0x1e8f, {1, {0x1e8e }}}, + { 0x1e91, {1, {0x1e90 }}}, + { 0x1e93, {1, {0x1e92 }}}, + { 0x1e95, {1, {0x1e94 }}}, + { 0x1ea1, {1, {0x1ea0 }}}, + { 0x1ea3, {1, {0x1ea2 }}}, + { 0x1ea5, {1, {0x1ea4 }}}, + { 0x1ea7, {1, {0x1ea6 }}}, + { 0x1ea9, {1, {0x1ea8 }}}, + { 0x1eab, {1, {0x1eaa }}}, + { 0x1ead, {1, {0x1eac }}}, + { 0x1eaf, {1, {0x1eae }}}, + { 0x1eb1, {1, {0x1eb0 }}}, + { 0x1eb3, {1, {0x1eb2 }}}, + { 0x1eb5, {1, {0x1eb4 }}}, + { 0x1eb7, {1, {0x1eb6 }}}, + { 0x1eb9, {1, {0x1eb8 }}}, + { 0x1ebb, {1, {0x1eba }}}, + { 0x1ebd, {1, {0x1ebc }}}, + { 0x1ebf, {1, {0x1ebe }}}, + { 0x1ec1, {1, {0x1ec0 }}}, + { 0x1ec3, {1, {0x1ec2 }}}, + { 0x1ec5, {1, {0x1ec4 }}}, + { 0x1ec7, {1, {0x1ec6 }}}, + { 0x1ec9, {1, {0x1ec8 }}}, + { 0x1ecb, {1, {0x1eca }}}, + { 0x1ecd, {1, {0x1ecc }}}, + { 0x1ecf, {1, {0x1ece }}}, + { 0x1ed1, {1, {0x1ed0 }}}, + { 0x1ed3, {1, {0x1ed2 }}}, + { 0x1ed5, {1, {0x1ed4 }}}, + { 0x1ed7, {1, {0x1ed6 }}}, + { 0x1ed9, {1, {0x1ed8 }}}, + { 0x1edb, {1, {0x1eda }}}, + { 0x1edd, {1, {0x1edc }}}, + { 0x1edf, {1, {0x1ede }}}, + { 0x1ee1, {1, {0x1ee0 }}}, + { 0x1ee3, {1, {0x1ee2 }}}, + { 0x1ee5, {1, {0x1ee4 }}}, + { 0x1ee7, {1, {0x1ee6 }}}, + { 0x1ee9, {1, {0x1ee8 }}}, + { 0x1eeb, {1, {0x1eea }}}, + { 0x1eed, {1, {0x1eec }}}, + { 0x1eef, {1, {0x1eee }}}, + { 0x1ef1, {1, {0x1ef0 }}}, + { 0x1ef3, {1, {0x1ef2 }}}, + { 0x1ef5, {1, {0x1ef4 }}}, + { 0x1ef7, {1, {0x1ef6 }}}, + { 0x1ef9, {1, {0x1ef8 }}}, + { 0x1f00, {1, {0x1f08 }}}, + { 0x1f01, {1, {0x1f09 }}}, + { 0x1f02, {1, {0x1f0a }}}, + { 0x1f03, {1, {0x1f0b }}}, + { 0x1f04, {1, {0x1f0c }}}, + { 0x1f05, {1, {0x1f0d }}}, + { 0x1f06, {1, {0x1f0e }}}, + { 0x1f07, {1, {0x1f0f }}}, + { 0x1f10, {1, {0x1f18 }}}, + { 0x1f11, {1, {0x1f19 }}}, + { 0x1f12, {1, {0x1f1a }}}, + { 0x1f13, {1, {0x1f1b }}}, + { 0x1f14, {1, {0x1f1c }}}, + { 0x1f15, {1, {0x1f1d }}}, + { 0x1f20, {1, {0x1f28 }}}, + { 0x1f21, {1, {0x1f29 }}}, + { 0x1f22, {1, {0x1f2a }}}, + { 0x1f23, {1, {0x1f2b }}}, + { 0x1f24, {1, {0x1f2c }}}, + { 0x1f25, {1, {0x1f2d }}}, + { 0x1f26, {1, {0x1f2e }}}, + { 0x1f27, {1, {0x1f2f }}}, + { 0x1f30, {1, {0x1f38 }}}, + { 0x1f31, {1, {0x1f39 }}}, + { 0x1f32, {1, {0x1f3a }}}, + { 0x1f33, {1, {0x1f3b }}}, + { 0x1f34, {1, {0x1f3c }}}, + { 0x1f35, {1, {0x1f3d }}}, + { 0x1f36, {1, {0x1f3e }}}, + { 0x1f37, {1, {0x1f3f }}}, + { 0x1f40, {1, {0x1f48 }}}, + { 0x1f41, {1, {0x1f49 }}}, + { 0x1f42, {1, {0x1f4a }}}, + { 0x1f43, {1, {0x1f4b }}}, + { 0x1f44, {1, {0x1f4c }}}, + { 0x1f45, {1, {0x1f4d }}}, + { 0x1f51, {1, {0x1f59 }}}, + { 0x1f53, {1, {0x1f5b }}}, + { 0x1f55, {1, {0x1f5d }}}, + { 0x1f57, {1, {0x1f5f }}}, + { 0x1f60, {1, {0x1f68 }}}, + { 0x1f61, {1, {0x1f69 }}}, + { 0x1f62, {1, {0x1f6a }}}, + { 0x1f63, {1, {0x1f6b }}}, + { 0x1f64, {1, {0x1f6c }}}, + { 0x1f65, {1, {0x1f6d }}}, + { 0x1f66, {1, {0x1f6e }}}, + { 0x1f67, {1, {0x1f6f }}}, + { 0x1f70, {1, {0x1fba }}}, + { 0x1f71, {1, {0x1fbb }}}, + { 0x1f72, {1, {0x1fc8 }}}, + { 0x1f73, {1, {0x1fc9 }}}, + { 0x1f74, {1, {0x1fca }}}, + { 0x1f75, {1, {0x1fcb }}}, + { 0x1f76, {1, {0x1fda }}}, + { 0x1f77, {1, {0x1fdb }}}, + { 0x1f78, {1, {0x1ff8 }}}, + { 0x1f79, {1, {0x1ff9 }}}, + { 0x1f7a, {1, {0x1fea }}}, + { 0x1f7b, {1, {0x1feb }}}, + { 0x1f7c, {1, {0x1ffa }}}, + { 0x1f7d, {1, {0x1ffb }}}, + { 0x1fb0, {1, {0x1fb8 }}}, + { 0x1fb1, {1, {0x1fb9 }}}, + { 0x1fd0, {1, {0x1fd8 }}}, + { 0x1fd1, {1, {0x1fd9 }}}, + { 0x1fe0, {1, {0x1fe8 }}}, + { 0x1fe1, {1, {0x1fe9 }}}, + { 0x1fe5, {1, {0x1fec }}}, + { 0x2170, {1, {0x2160 }}}, + { 0x2171, {1, {0x2161 }}}, + { 0x2172, {1, {0x2162 }}}, + { 0x2173, {1, {0x2163 }}}, + { 0x2174, {1, {0x2164 }}}, + { 0x2175, {1, {0x2165 }}}, + { 0x2176, {1, {0x2166 }}}, + { 0x2177, {1, {0x2167 }}}, + { 0x2178, {1, {0x2168 }}}, + { 0x2179, {1, {0x2169 }}}, + { 0x217a, {1, {0x216a }}}, + { 0x217b, {1, {0x216b }}}, + { 0x217c, {1, {0x216c }}}, + { 0x217d, {1, {0x216d }}}, + { 0x217e, {1, {0x216e }}}, + { 0x217f, {1, {0x216f }}}, + { 0x24d0, {1, {0x24b6 }}}, + { 0x24d1, {1, {0x24b7 }}}, + { 0x24d2, {1, {0x24b8 }}}, + { 0x24d3, {1, {0x24b9 }}}, + { 0x24d4, {1, {0x24ba }}}, + { 0x24d5, {1, {0x24bb }}}, + { 0x24d6, {1, {0x24bc }}}, + { 0x24d7, {1, {0x24bd }}}, + { 0x24d8, {1, {0x24be }}}, + { 0x24d9, {1, {0x24bf }}}, + { 0x24da, {1, {0x24c0 }}}, + { 0x24db, {1, {0x24c1 }}}, + { 0x24dc, {1, {0x24c2 }}}, + { 0x24dd, {1, {0x24c3 }}}, + { 0x24de, {1, {0x24c4 }}}, + { 0x24df, {1, {0x24c5 }}}, + { 0x24e0, {1, {0x24c6 }}}, + { 0x24e1, {1, {0x24c7 }}}, + { 0x24e2, {1, {0x24c8 }}}, + { 0x24e3, {1, {0x24c9 }}}, + { 0x24e4, {1, {0x24ca }}}, + { 0x24e5, {1, {0x24cb }}}, + { 0x24e6, {1, {0x24cc }}}, + { 0x24e7, {1, {0x24cd }}}, + { 0x24e8, {1, {0x24ce }}}, + { 0x24e9, {1, {0x24cf }}}, + { 0x2c30, {1, {0x2c00 }}}, + { 0x2c31, {1, {0x2c01 }}}, + { 0x2c32, {1, {0x2c02 }}}, + { 0x2c33, {1, {0x2c03 }}}, + { 0x2c34, {1, {0x2c04 }}}, + { 0x2c35, {1, {0x2c05 }}}, + { 0x2c36, {1, {0x2c06 }}}, + { 0x2c37, {1, {0x2c07 }}}, + { 0x2c38, {1, {0x2c08 }}}, + { 0x2c39, {1, {0x2c09 }}}, + { 0x2c3a, {1, {0x2c0a }}}, + { 0x2c3b, {1, {0x2c0b }}}, + { 0x2c3c, {1, {0x2c0c }}}, + { 0x2c3d, {1, {0x2c0d }}}, + { 0x2c3e, {1, {0x2c0e }}}, + { 0x2c3f, {1, {0x2c0f }}}, + { 0x2c40, {1, {0x2c10 }}}, + { 0x2c41, {1, {0x2c11 }}}, + { 0x2c42, {1, {0x2c12 }}}, + { 0x2c43, {1, {0x2c13 }}}, + { 0x2c44, {1, {0x2c14 }}}, + { 0x2c45, {1, {0x2c15 }}}, + { 0x2c46, {1, {0x2c16 }}}, + { 0x2c47, {1, {0x2c17 }}}, + { 0x2c48, {1, {0x2c18 }}}, + { 0x2c49, {1, {0x2c19 }}}, + { 0x2c4a, {1, {0x2c1a }}}, + { 0x2c4b, {1, {0x2c1b }}}, + { 0x2c4c, {1, {0x2c1c }}}, + { 0x2c4d, {1, {0x2c1d }}}, + { 0x2c4e, {1, {0x2c1e }}}, + { 0x2c4f, {1, {0x2c1f }}}, + { 0x2c50, {1, {0x2c20 }}}, + { 0x2c51, {1, {0x2c21 }}}, + { 0x2c52, {1, {0x2c22 }}}, + { 0x2c53, {1, {0x2c23 }}}, + { 0x2c54, {1, {0x2c24 }}}, + { 0x2c55, {1, {0x2c25 }}}, + { 0x2c56, {1, {0x2c26 }}}, + { 0x2c57, {1, {0x2c27 }}}, + { 0x2c58, {1, {0x2c28 }}}, + { 0x2c59, {1, {0x2c29 }}}, + { 0x2c5a, {1, {0x2c2a }}}, + { 0x2c5b, {1, {0x2c2b }}}, + { 0x2c5c, {1, {0x2c2c }}}, + { 0x2c5d, {1, {0x2c2d }}}, + { 0x2c5e, {1, {0x2c2e }}}, + { 0x2c81, {1, {0x2c80 }}}, + { 0x2c83, {1, {0x2c82 }}}, + { 0x2c85, {1, {0x2c84 }}}, + { 0x2c87, {1, {0x2c86 }}}, + { 0x2c89, {1, {0x2c88 }}}, + { 0x2c8b, {1, {0x2c8a }}}, + { 0x2c8d, {1, {0x2c8c }}}, + { 0x2c8f, {1, {0x2c8e }}}, + { 0x2c91, {1, {0x2c90 }}}, + { 0x2c93, {1, {0x2c92 }}}, + { 0x2c95, {1, {0x2c94 }}}, + { 0x2c97, {1, {0x2c96 }}}, + { 0x2c99, {1, {0x2c98 }}}, + { 0x2c9b, {1, {0x2c9a }}}, + { 0x2c9d, {1, {0x2c9c }}}, + { 0x2c9f, {1, {0x2c9e }}}, + { 0x2ca1, {1, {0x2ca0 }}}, + { 0x2ca3, {1, {0x2ca2 }}}, + { 0x2ca5, {1, {0x2ca4 }}}, + { 0x2ca7, {1, {0x2ca6 }}}, + { 0x2ca9, {1, {0x2ca8 }}}, + { 0x2cab, {1, {0x2caa }}}, + { 0x2cad, {1, {0x2cac }}}, + { 0x2caf, {1, {0x2cae }}}, + { 0x2cb1, {1, {0x2cb0 }}}, + { 0x2cb3, {1, {0x2cb2 }}}, + { 0x2cb5, {1, {0x2cb4 }}}, + { 0x2cb7, {1, {0x2cb6 }}}, + { 0x2cb9, {1, {0x2cb8 }}}, + { 0x2cbb, {1, {0x2cba }}}, + { 0x2cbd, {1, {0x2cbc }}}, + { 0x2cbf, {1, {0x2cbe }}}, + { 0x2cc1, {1, {0x2cc0 }}}, + { 0x2cc3, {1, {0x2cc2 }}}, + { 0x2cc5, {1, {0x2cc4 }}}, + { 0x2cc7, {1, {0x2cc6 }}}, + { 0x2cc9, {1, {0x2cc8 }}}, + { 0x2ccb, {1, {0x2cca }}}, + { 0x2ccd, {1, {0x2ccc }}}, + { 0x2ccf, {1, {0x2cce }}}, + { 0x2cd1, {1, {0x2cd0 }}}, + { 0x2cd3, {1, {0x2cd2 }}}, + { 0x2cd5, {1, {0x2cd4 }}}, + { 0x2cd7, {1, {0x2cd6 }}}, + { 0x2cd9, {1, {0x2cd8 }}}, + { 0x2cdb, {1, {0x2cda }}}, + { 0x2cdd, {1, {0x2cdc }}}, + { 0x2cdf, {1, {0x2cde }}}, + { 0x2ce1, {1, {0x2ce0 }}}, + { 0x2ce3, {1, {0x2ce2 }}}, + { 0x2d00, {1, {0x10a0 }}}, + { 0x2d01, {1, {0x10a1 }}}, + { 0x2d02, {1, {0x10a2 }}}, + { 0x2d03, {1, {0x10a3 }}}, + { 0x2d04, {1, {0x10a4 }}}, + { 0x2d05, {1, {0x10a5 }}}, + { 0x2d06, {1, {0x10a6 }}}, + { 0x2d07, {1, {0x10a7 }}}, + { 0x2d08, {1, {0x10a8 }}}, + { 0x2d09, {1, {0x10a9 }}}, + { 0x2d0a, {1, {0x10aa }}}, + { 0x2d0b, {1, {0x10ab }}}, + { 0x2d0c, {1, {0x10ac }}}, + { 0x2d0d, {1, {0x10ad }}}, + { 0x2d0e, {1, {0x10ae }}}, + { 0x2d0f, {1, {0x10af }}}, + { 0x2d10, {1, {0x10b0 }}}, + { 0x2d11, {1, {0x10b1 }}}, + { 0x2d12, {1, {0x10b2 }}}, + { 0x2d13, {1, {0x10b3 }}}, + { 0x2d14, {1, {0x10b4 }}}, + { 0x2d15, {1, {0x10b5 }}}, + { 0x2d16, {1, {0x10b6 }}}, + { 0x2d17, {1, {0x10b7 }}}, + { 0x2d18, {1, {0x10b8 }}}, + { 0x2d19, {1, {0x10b9 }}}, + { 0x2d1a, {1, {0x10ba }}}, + { 0x2d1b, {1, {0x10bb }}}, + { 0x2d1c, {1, {0x10bc }}}, + { 0x2d1d, {1, {0x10bd }}}, + { 0x2d1e, {1, {0x10be }}}, + { 0x2d1f, {1, {0x10bf }}}, + { 0x2d20, {1, {0x10c0 }}}, + { 0x2d21, {1, {0x10c1 }}}, + { 0x2d22, {1, {0x10c2 }}}, + { 0x2d23, {1, {0x10c3 }}}, + { 0x2d24, {1, {0x10c4 }}}, + { 0x2d25, {1, {0x10c5 }}}, + { 0xff41, {1, {0xff21 }}}, + { 0xff42, {1, {0xff22 }}}, + { 0xff43, {1, {0xff23 }}}, + { 0xff44, {1, {0xff24 }}}, + { 0xff45, {1, {0xff25 }}}, + { 0xff46, {1, {0xff26 }}}, + { 0xff47, {1, {0xff27 }}}, + { 0xff48, {1, {0xff28 }}}, + { 0xff49, {1, {0xff29 }}}, + { 0xff4a, {1, {0xff2a }}}, + { 0xff4b, {1, {0xff2b }}}, + { 0xff4c, {1, {0xff2c }}}, + { 0xff4d, {1, {0xff2d }}}, + { 0xff4e, {1, {0xff2e }}}, + { 0xff4f, {1, {0xff2f }}}, + { 0xff50, {1, {0xff30 }}}, + { 0xff51, {1, {0xff31 }}}, + { 0xff52, {1, {0xff32 }}}, + { 0xff53, {1, {0xff33 }}}, + { 0xff54, {1, {0xff34 }}}, + { 0xff55, {1, {0xff35 }}}, + { 0xff56, {1, {0xff36 }}}, + { 0xff57, {1, {0xff37 }}}, + { 0xff58, {1, {0xff38 }}}, + { 0xff59, {1, {0xff39 }}}, + { 0xff5a, {1, {0xff3a }}}, + { 0x10428, {1, {0x10400 }}}, + { 0x10429, {1, {0x10401 }}}, + { 0x1042a, {1, {0x10402 }}}, + { 0x1042b, {1, {0x10403 }}}, + { 0x1042c, {1, {0x10404 }}}, + { 0x1042d, {1, {0x10405 }}}, + { 0x1042e, {1, {0x10406 }}}, + { 0x1042f, {1, {0x10407 }}}, + { 0x10430, {1, {0x10408 }}}, + { 0x10431, {1, {0x10409 }}}, + { 0x10432, {1, {0x1040a }}}, + { 0x10433, {1, {0x1040b }}}, + { 0x10434, {1, {0x1040c }}}, + { 0x10435, {1, {0x1040d }}}, + { 0x10436, {1, {0x1040e }}}, + { 0x10437, {1, {0x1040f }}}, + { 0x10438, {1, {0x10410 }}}, + { 0x10439, {1, {0x10411 }}}, + { 0x1043a, {1, {0x10412 }}}, + { 0x1043b, {1, {0x10413 }}}, + { 0x1043c, {1, {0x10414 }}}, + { 0x1043d, {1, {0x10415 }}}, + { 0x1043e, {1, {0x10416 }}}, + { 0x1043f, {1, {0x10417 }}}, + { 0x10440, {1, {0x10418 }}}, + { 0x10441, {1, {0x10419 }}}, + { 0x10442, {1, {0x1041a }}}, + { 0x10443, {1, {0x1041b }}}, + { 0x10444, {1, {0x1041c }}}, + { 0x10445, {1, {0x1041d }}}, + { 0x10446, {1, {0x1041e }}}, + { 0x10447, {1, {0x1041f }}}, + { 0x10448, {1, {0x10420 }}}, + { 0x10449, {1, {0x10421 }}}, + { 0x1044a, {1, {0x10422 }}}, + { 0x1044b, {1, {0x10423 }}}, + { 0x1044c, {1, {0x10424 }}}, + { 0x1044d, {1, {0x10425 }}}, + { 0x1044e, {1, {0x10426 }}}, + { 0x1044f, {1, {0x10427 }}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { + { 0x0069, {1, {0x0049 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12[] = { + { {0x0061, 0x02be}, {1, {0x1e9a }}}, + { {0x0066, 0x0066}, {1, {0xfb00 }}}, + { {0x0066, 0x0069}, {1, {0xfb01 }}}, + { {0x0066, 0x006c}, {1, {0xfb02 }}}, + { {0x0068, 0x0331}, {1, {0x1e96 }}}, + { {0x006a, 0x030c}, {1, {0x01f0 }}}, + { {0x0073, 0x0073}, {1, {0x00df }}}, + { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, + { {0x0074, 0x0308}, {1, {0x1e97 }}}, + { {0x0077, 0x030a}, {1, {0x1e98 }}}, + { {0x0079, 0x030a}, {1, {0x1e99 }}}, + { {0x02bc, 0x006e}, {1, {0x0149 }}}, + { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, + { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, + { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, + { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, + { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, + { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, + { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, + { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, + { {0x03c5, 0x0313}, {1, {0x1f50 }}}, + { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, + { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, + { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, + { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, + { {0x0565, 0x0582}, {1, {0x0587 }}}, + { {0x0574, 0x0565}, {1, {0xfb14 }}}, + { {0x0574, 0x056b}, {1, {0xfb15 }}}, + { {0x0574, 0x056d}, {1, {0xfb17 }}}, + { {0x0574, 0x0576}, {1, {0xfb13 }}}, + { {0x057e, 0x0576}, {1, {0xfb16 }}}, + { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, + { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, + { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, + { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, + { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, + { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, + { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, + { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, + { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, + { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, + { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, + { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, + { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, + { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, + { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, + { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, + { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, + { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, + { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, + { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, + { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, + { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, + { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, + { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, + { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, + { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, + { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { + { {0x0069, 0x0307}, {1, {0x0130 }}} +}; + +static const CaseUnfold_13_Type CaseUnfold_13[] = { + { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, + { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, + { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, + { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, + { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, + { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, + { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, + { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, + { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, + { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, + { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, + { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, + { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, + { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} +}; + + +static PosixBracketEntryType HashEntryData[] = { + { (UChar* )"NEWLINE", 0, 7 }, + { (UChar* )"Alpha", 1, 5 }, + { (UChar* )"Blank", 2, 5 }, + { (UChar* )"Cntrl", 3, 5 }, + { (UChar* )"Digit", 4, 5 }, + { (UChar* )"Graph", 5, 5 }, + { (UChar* )"Lower", 6, 5 }, + { (UChar* )"Print", 7, 5 }, + { (UChar* )"Punct", 8, 5 }, + { (UChar* )"Space", 9, 5 }, + { (UChar* )"Upper", 10, 5 }, + { (UChar* )"XDigit", 11, 6 }, + { (UChar* )"Word", 12, 4 }, + { (UChar* )"Alnum", 13, 5 }, + { (UChar* )"ASCII", 14, 5 }, + +#ifdef USE_UNICODE_PROPERTIES + { (UChar* )"Any", 15, 3 }, + { (UChar* )"Assigned", 16, 8 }, + { (UChar* )"C", 17, 1 }, + { (UChar* )"Cc", 18, 2 }, + { (UChar* )"Cf", 19, 2 }, + { (UChar* )"Cn", 20, 2 }, + { (UChar* )"Co", 21, 2 }, + { (UChar* )"Cs", 22, 2 }, + { (UChar* )"L", 23, 1 }, + { (UChar* )"Ll", 24, 2 }, + { (UChar* )"Lm", 25, 2 }, + { (UChar* )"Lo", 26, 2 }, + { (UChar* )"Lt", 27, 2 }, + { (UChar* )"Lu", 28, 2 }, + { (UChar* )"M", 29, 1 }, + { (UChar* )"Mc", 30, 2 }, + { (UChar* )"Me", 31, 2 }, + { (UChar* )"Mn", 32, 2 }, + { (UChar* )"N", 33, 1 }, + { (UChar* )"Nd", 34, 2 }, + { (UChar* )"Nl", 35, 2 }, + { (UChar* )"No", 36, 2 }, + { (UChar* )"P", 37, 1 }, + { (UChar* )"Pc", 38, 2 }, + { (UChar* )"Pd", 39, 2 }, + { (UChar* )"Pe", 40, 2 }, + { (UChar* )"Pf", 41, 2 }, + { (UChar* )"Pi", 42, 2 }, + { (UChar* )"Po", 43, 2 }, + { (UChar* )"Ps", 44, 2 }, + { (UChar* )"S", 45, 1 }, + { (UChar* )"Sc", 46, 2 }, + { (UChar* )"Sk", 47, 2 }, + { (UChar* )"Sm", 48, 2 }, + { (UChar* )"So", 49, 2 }, + { (UChar* )"Z", 50, 1 }, + { (UChar* )"Zl", 51, 2 }, + { (UChar* )"Zp", 52, 2 }, + { (UChar* )"Zs", 53, 2 }, + { (UChar* )"Arabic", 54, 6 }, + { (UChar* )"Armenian", 55, 8 }, + { (UChar* )"Bengali", 56, 7 }, + { (UChar* )"Bopomofo", 57, 8 }, + { (UChar* )"Braille", 58, 7 }, + { (UChar* )"Buginese", 59, 8 }, + { (UChar* )"Buhid", 60, 5 }, + { (UChar* )"Canadian_Aboriginal", 61, 19 }, + { (UChar* )"Cherokee", 62, 8 }, + { (UChar* )"Common", 63, 6 }, + { (UChar* )"Coptic", 64, 6 }, + { (UChar* )"Cypriot", 65, 7 }, + { (UChar* )"Cyrillic", 66, 8 }, + { (UChar* )"Deseret", 67, 7 }, + { (UChar* )"Devanagari", 68, 10 }, + { (UChar* )"Ethiopic", 69, 8 }, + { (UChar* )"Georgian", 70, 8 }, + { (UChar* )"Glagolitic", 71, 10 }, + { (UChar* )"Gothic", 72, 6 }, + { (UChar* )"Greek", 73, 5 }, + { (UChar* )"Gujarati", 74, 8 }, + { (UChar* )"Gurmukhi", 75, 8 }, + { (UChar* )"Han", 76, 3 }, + { (UChar* )"Hangul", 77, 6 }, + { (UChar* )"Hanunoo", 78, 7 }, + { (UChar* )"Hebrew", 79, 6 }, + { (UChar* )"Hiragana", 80, 8 }, + { (UChar* )"Inherited", 81, 9 }, + { (UChar* )"Kannada", 82, 7 }, + { (UChar* )"Katakana", 83, 8 }, + { (UChar* )"Kharoshthi", 84, 10 }, + { (UChar* )"Khmer", 85, 5 }, + { (UChar* )"Lao", 86, 3 }, + { (UChar* )"Latin", 87, 5 }, + { (UChar* )"Limbu", 88, 5 }, + { (UChar* )"Linear_B", 89, 8 }, + { (UChar* )"Malayalam", 90, 9 }, + { (UChar* )"Mongolian", 91, 9 }, + { (UChar* )"Myanmar", 92, 7 }, + { (UChar* )"New_Tai_Lue", 93, 11 }, + { (UChar* )"Ogham", 94, 5 }, + { (UChar* )"Old_Italic", 95, 10 }, + { (UChar* )"Old_Persian", 96, 11 }, + { (UChar* )"Oriya", 97, 5 }, + { (UChar* )"Osmanya", 98, 7 }, + { (UChar* )"Runic", 99, 5 }, + { (UChar* )"Shavian", 100, 7 }, + { (UChar* )"Sinhala", 101, 7 }, + { (UChar* )"Syloti_Nagri", 102, 12 }, + { (UChar* )"Syriac", 103, 6 }, + { (UChar* )"Tagalog", 104, 7 }, + { (UChar* )"Tagbanwa", 105, 8 }, + { (UChar* )"Tai_Le", 106, 6 }, + { (UChar* )"Tamil", 107, 5 }, + { (UChar* )"Telugu", 108, 6 }, + { (UChar* )"Thaana", 109, 6 }, + { (UChar* )"Thai", 110, 4 }, + { (UChar* )"Tibetan", 111, 7 }, + { (UChar* )"Tifinagh", 112, 8 }, + { (UChar* )"Ugaritic", 113, 8 }, + { (UChar* )"Yi", 114, 2 }, +#endif /* USE_UNICODE_PROPERTIES */ + { (UChar* )NULL, -1, 0 } +}; + +#ifdef USE_UNICODE_PROPERTIES +#define CODE_RANGES_NUM 115 +#else +#define CODE_RANGES_NUM 15 +#endif + +static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; +static int CodeRangeTableInited = 0; + +static void init_code_range_array(void) { + THREAD_ATOMIC_START; + + CodeRanges[0] = CR_NEWLINE; + CodeRanges[1] = CR_Alpha; + CodeRanges[2] = CR_Blank; + CodeRanges[3] = CR_Cntrl; + CodeRanges[4] = CR_Digit; + CodeRanges[5] = CR_Graph; + CodeRanges[6] = CR_Lower; + CodeRanges[7] = CR_Print; + CodeRanges[8] = CR_Punct; + CodeRanges[9] = CR_Space; + CodeRanges[10] = CR_Upper; + CodeRanges[11] = CR_XDigit; + CodeRanges[12] = CR_Word; + CodeRanges[13] = CR_Alnum; + CodeRanges[14] = CR_ASCII; + +#ifdef USE_UNICODE_PROPERTIES + CodeRanges[15] = CR_Any; + CodeRanges[16] = CR_Assigned; + CodeRanges[17] = CR_C; + CodeRanges[18] = CR_Cc; + CodeRanges[19] = CR_Cf; + CodeRanges[20] = CR_Cn; + CodeRanges[21] = CR_Co; + CodeRanges[22] = CR_Cs; + CodeRanges[23] = CR_L; + CodeRanges[24] = CR_Ll; + CodeRanges[25] = CR_Lm; + CodeRanges[26] = CR_Lo; + CodeRanges[27] = CR_Lt; + CodeRanges[28] = CR_Lu; + CodeRanges[29] = CR_M; + CodeRanges[30] = CR_Mc; + CodeRanges[31] = CR_Me; + CodeRanges[32] = CR_Mn; + CodeRanges[33] = CR_N; + CodeRanges[34] = CR_Nd; + CodeRanges[35] = CR_Nl; + CodeRanges[36] = CR_No; + CodeRanges[37] = CR_P; + CodeRanges[38] = CR_Pc; + CodeRanges[39] = CR_Pd; + CodeRanges[40] = CR_Pe; + CodeRanges[41] = CR_Pf; + CodeRanges[42] = CR_Pi; + CodeRanges[43] = CR_Po; + CodeRanges[44] = CR_Ps; + CodeRanges[45] = CR_S; + CodeRanges[46] = CR_Sc; + CodeRanges[47] = CR_Sk; + CodeRanges[48] = CR_Sm; + CodeRanges[49] = CR_So; + CodeRanges[50] = CR_Z; + CodeRanges[51] = CR_Zl; + CodeRanges[52] = CR_Zp; + CodeRanges[53] = CR_Zs; + CodeRanges[54] = CR_Arabic; + CodeRanges[55] = CR_Armenian; + CodeRanges[56] = CR_Bengali; + CodeRanges[57] = CR_Bopomofo; + CodeRanges[58] = CR_Braille; + CodeRanges[59] = CR_Buginese; + CodeRanges[60] = CR_Buhid; + CodeRanges[61] = CR_Canadian_Aboriginal; + CodeRanges[62] = CR_Cherokee; + CodeRanges[63] = CR_Common; + CodeRanges[64] = CR_Coptic; + CodeRanges[65] = CR_Cypriot; + CodeRanges[66] = CR_Cyrillic; + CodeRanges[67] = CR_Deseret; + CodeRanges[68] = CR_Devanagari; + CodeRanges[69] = CR_Ethiopic; + CodeRanges[70] = CR_Georgian; + CodeRanges[71] = CR_Glagolitic; + CodeRanges[72] = CR_Gothic; + CodeRanges[73] = CR_Greek; + CodeRanges[74] = CR_Gujarati; + CodeRanges[75] = CR_Gurmukhi; + CodeRanges[76] = CR_Han; + CodeRanges[77] = CR_Hangul; + CodeRanges[78] = CR_Hanunoo; + CodeRanges[79] = CR_Hebrew; + CodeRanges[80] = CR_Hiragana; + CodeRanges[81] = CR_Inherited; + CodeRanges[82] = CR_Kannada; + CodeRanges[83] = CR_Katakana; + CodeRanges[84] = CR_Kharoshthi; + CodeRanges[85] = CR_Khmer; + CodeRanges[86] = CR_Lao; + CodeRanges[87] = CR_Latin; + CodeRanges[88] = CR_Limbu; + CodeRanges[89] = CR_Linear_B; + CodeRanges[90] = CR_Malayalam; + CodeRanges[91] = CR_Mongolian; + CodeRanges[92] = CR_Myanmar; + CodeRanges[93] = CR_New_Tai_Lue; + CodeRanges[94] = CR_Ogham; + CodeRanges[95] = CR_Old_Italic; + CodeRanges[96] = CR_Old_Persian; + CodeRanges[97] = CR_Oriya; + CodeRanges[98] = CR_Osmanya; + CodeRanges[99] = CR_Runic; + CodeRanges[100] = CR_Shavian; + CodeRanges[101] = CR_Sinhala; + CodeRanges[102] = CR_Syloti_Nagri; + CodeRanges[103] = CR_Syriac; + CodeRanges[104] = CR_Tagalog; + CodeRanges[105] = CR_Tagbanwa; + CodeRanges[106] = CR_Tai_Le; + CodeRanges[107] = CR_Tamil; + CodeRanges[108] = CR_Telugu; + CodeRanges[109] = CR_Thaana; + CodeRanges[110] = CR_Thai; + CodeRanges[111] = CR_Tibetan; + CodeRanges[112] = CR_Tifinagh; + CodeRanges[113] = CR_Ugaritic; + CodeRanges[114] = CR_Yi; +#endif /* USE_UNICODE_PROPERTIES */ + + CodeRangeTableInited = 1; + THREAD_ATOMIC_END; +} + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + *ranges = CodeRanges[ctype]; + + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +#include "st.h" + +#define PROPERTY_NAME_MAX_SIZE 20 + +static st_table* NameCtypeTable; +static int NameTableInited = 0; + +static int init_name_ctype_table(void) +{ + PosixBracketEntryType *pb; + + THREAD_ATOMIC_START; + + NameCtypeTable = onig_st_init_strend_table_with_size(100); + if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; + + for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { + onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, + (st_data_t )pb->ctype); + } + + NameTableInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + hash_data_type ctype; + UChar buf[PROPERTY_NAME_MAX_SIZE]; + UChar *p; + OnigCodePoint code; + + p = name; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + buf[len++] = (UChar )code; + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + p += enclen(enc, p); + } + + buf[len] = 0; + + if (NameTableInited == 0) init_name_ctype_table(); + + if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + return (int )ctype; +} + + +static int +code2_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1]) return 0; + return 1; +} + +static int +code2_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1]); +} + +static struct st_hash_type type_code2_hash = { + code2_cmp, + code2_hash, +}; + +static int +code3_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; + return 1; +} + +static int +code3_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1] + x[2]); +} + +static struct st_hash_type type_code3_hash = { + code3_cmp, + code3_hash, +}; + + +static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ +static st_table* Unfold1Table; +static st_table* Unfold2Table; +static st_table* Unfold3Table; +static int CaseFoldInited = 0; + +static int init_case_fold_table(void) +{ + const CaseFold_11_Type *p; + const CaseUnfold_11_Type *p1; + const CaseUnfold_12_Type *p2; + const CaseUnfold_13_Type *p3; + int i; + + THREAD_ATOMIC_START; + + FoldTable = st_init_numtable_with_size(1200); + if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; + for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { + p = &CaseFold[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); + i++) { + p = &CaseFold_Locale[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + + Unfold1Table = st_init_numtable_with_size(1000); + if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11_Locale[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + + Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); + if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12_Locale[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + + Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); + if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + p3 = &CaseUnfold_13[i]; + st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); + } + + CaseFoldInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_mbc_case_fold(OnigEncoding enc, + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) +{ + CodePointList3 *to; + OnigCodePoint code; + int i, len, rlen; + const UChar *p = *pp; + + if (CaseFoldInited == 0) init_case_fold_table(); + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + *pp += len; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); + } + else if (code == 0x0130) { + return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); + } +#if 0 + /* NO NEEDS TO CHECK */ + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { +#else + else { +#endif + rlen = 0; + for (i = 0; i < to->n; i++) { + len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); + fold += len; + rlen += len; + } + return rlen; + } + } + + for (i = 0; i < len; i++) { + *fold++ = *p++; + } + return len; +} + +extern int +onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + const CaseUnfold_11_Type* p11; + OnigCodePoint code; + int i, j, k, r; + + /* if (CaseFoldInited == 0) init_case_fold_table(); */ + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + code = 0x0131; + r = (*f)(0x0049, &code, 1, arg); + if (r != 0) return r; + code = 0x0049; + r = (*f)(0x0131, &code, 1, arg); + if (r != 0) return r; + + code = 0x0130; + r = (*f)(0x0069, &code, 1, arg); + if (r != 0) return r; + code = 0x0069; + r = (*f)(0x0130, &code, 1, arg); + if (r != 0) return r; + } + else { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11_Locale[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), + 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12[i].to.n; j++) { + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + for (j = 0; j < CaseUnfold_13[i].to.n; j++) { + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_13[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + } + + return 0; +} + +extern int +onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, + OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + int n, i, j, k, len; + OnigCodePoint code, codes[3]; + CodePointList3 *to, *z3; + CodePointList2 *z2; + + if (CaseFoldInited == 0) init_case_fold_table(); + + n = 0; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0131; + return 1; + } + else if (code == 0x0130) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0069; + return 1; + } + else if (code == 0x0131) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0049; + return 1; + } + else if (code == 0x0069) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0130; + return 1; + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + OnigCodePoint orig_code = code; + + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = to->code[0]; + n++; + + code = to->code[0]; + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + if (to->code[i] != orig_code) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + } + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + OnigCodePoint cs[3][4]; + int fn, ncs[3]; + + for (fn = 0; fn < to->n; fn++) { + cs[fn][0] = to->code[fn]; + if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], + (void* )&z3) != 0) { + for (i = 0; i < z3->n; i++) { + cs[fn][i+1] = z3->code[i]; + } + ncs[fn] = z3->n + 1; + } + else + ncs[fn] = 1; + } + + if (fn == 2) { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = len; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + n++; + } + } + + if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + else { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = len; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + n++; + } + } + } + + if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + + /* multi char folded code is not head of another folded multi char */ + flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ + } + } + else { + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + p += len; + if (p < end) { + int clen; + + codes[0] = code; + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[1] = to->code[0]; + } + else + codes[1] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + + p += clen; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[2] = to->code[0]; + } + else + codes[2] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold3Table, (st_data_t )codes, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + } + } + + return n; +} diff --git a/enc/utf16_be.c b/enc/utf16_be.c new file mode 100644 index 0000000..1e909eb --- /dev/null +++ b/enc/utf16_be.c @@ -0,0 +1,225 @@ +/********************************************************************** + utf16_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16be_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*p]; +} + +static int +utf16be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*(p+1) == 0x0a && *p == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+1) == 0x0d || +#endif + *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + OnigCodePoint code; + + if (UTF16_IS_SURROGATE_FIRST(*p)) { + code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) + + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) + + p[3]; + } + else { + code = p[0] * 256 + p[1]; + } + return code; +} + +static int +utf16be_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = (code >> 16) - 1; + *p++ = (plane >> 2) + 0xd8; + high = (code & 0xff00) >> 8; + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (high & 0x03) + 0xdc; + *p = (UChar )(code & 0xff); + return 4; + } + else { + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )(code & 0xff); + return 2; + } +} + +static int +utf16be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { + p++; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag, + pp, end, fold); +} + +#if 0 +static int +utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*p]; + + if (*p == 0) { + int c, v; + + p++; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF16_BE = { + utf16be_mbc_enc_len, + "UTF-16BE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16be_is_mbc_newline, + utf16be_mbc_to_code, + utf16be_code_to_mbclen, + utf16be_code_to_mbc, + utf16be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/enc/utf16_le.c b/enc/utf16_le.c new file mode 100644 index 0000000..5cc0759 --- /dev/null +++ b/enc/utf16_le.c @@ -0,0 +1,226 @@ +/********************************************************************** + utf16_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16le_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16le_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*(p+1)]; +} + +static int +utf16le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*p == 0x0a && *(p+1) == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + OnigCodePoint code; + UChar c0 = *p; + UChar c1 = *(p+1); + + if (UTF16_IS_SURROGATE_FIRST(c1)) { + code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) + + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) + + p[2]; + } + else { + code = c1 * 256 + p[0]; + } + return code; +} + +static int +utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = (code >> 16) - 1; + high = (code & 0xff00) >> 8; + + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (plane >> 2) + 0xd8; + *p++ = (UChar )(code & 0xff); + *p = (high & 0x03) + 0xdc; + return 4; + } + else { + *p++ = (UChar )(code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + return 2; + } +} + +static int +utf16le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold = 0x01; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold = 0; + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, + fold); +} + +#if 0 +static int +utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*(p+1)]; + + if (*(p+1) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF16_LE = { + utf16le_mbc_enc_len, + "UTF-16LE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16le_is_mbc_newline, + utf16le_mbc_to_code, + utf16le_code_to_mbclen, + utf16le_code_to_mbc, + utf16le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/enc/utf32_be.c b/enc/utf32_be.c new file mode 100644 index 0000000..b4f8226 --- /dev/null +++ b/enc/utf32_be.c @@ -0,0 +1,184 @@ +/********************************************************************** + utf32_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+3) == 0x0d || +#endif + *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +} + +static int +utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar )((code & 0xff000000) >>24); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar ) (code & 0xff); + return 4; +} + +static int +utf32be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { + *fold++ = 0; + *fold++ = 0; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*(p+3) == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 4; + return 4; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end, + fold); +} + +#if 0 +static int +utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { + int c, v; + + p += 3; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF32_BE = { + utf32be_mbc_enc_len, + "UTF-32BE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32be_is_mbc_newline, + utf32be_mbc_to_code, + utf32be_code_to_mbclen, + utf32be_code_to_mbc, + utf32be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/enc/utf32_le.c b/enc/utf32_le.c new file mode 100644 index 0000000..8f413bf --- /dev/null +++ b/enc/utf32_le.c @@ -0,0 +1,184 @@ +/********************************************************************** + utf32_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) + && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +} + +static int +utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar ) (code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff000000) >>24); + return 4; +} + +static int +utf32le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold++ = 0x01; + } + } + else { +#endif + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold++ = 0; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + *fold++ = 0; + *fold = 0; + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end, + fold); +} + +#if 0 +static int +utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF32_LE = { + utf32le_mbc_enc_len, + "UTF-32LE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32le_is_mbc_newline, + utf32le_mbc_to_code, + utf32le_code_to_mbclen, + utf32le_code_to_mbc, + utf32le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/enc/utf8.c b/enc/utf8.c new file mode 100644 index 0000000..5e2c172 --- /dev/null +++ b/enc/utf8.c @@ -0,0 +1,305 @@ +/********************************************************************** + utf8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static const int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_UTF8[*p]; +} + +static int +is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR + if (*p == 0x0d) return 1; +#endif + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + int c, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_UTF8, p); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) return 2; + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return p - buf; + } +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; + } + } +#endif + + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag, + pp, end, fold); + } +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + (*pp) += enclen(ONIG_ENCODING_UTF8, p); + + if (*p == 0xc3) { + int c = *(p + 1); + if (c >= 0x80) { + if (c <= (UChar )0x9e) { /* upper */ + if (c == (UChar )0x97) return FALSE; + return TRUE; + } + else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ + if (c == (UChar )'\267') return FALSE; + return TRUE; + } + else if (c == (UChar )0x9f && + (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + } + } + } + + return FALSE; +} +#endif + + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x80; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF8 = { + mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + is_mbc_newline, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; |