diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2019-12-23 07:44:50 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2019-12-23 07:44:50 +0100 |
commit | 9e629c8f43b43617fa5b7d3654f7d81e81b8a427 (patch) | |
tree | 581dcb2708a7eac0bcc7bbfa6478cfa50dfcf5a8 /src/gb18030.c | |
parent | 7bbf4ae1401bc6e40f71a32d3f97952796d85690 (diff) | |
parent | 091456e1a135d4674701a264495bd34918779391 (diff) |
Merge branch 'release/debian/6.9.4-1'debian/6.9.4-1
Diffstat (limited to 'src/gb18030.c')
-rw-r--r-- | src/gb18030.c | 34 |
1 files changed, 20 insertions, 14 deletions
diff --git a/src/gb18030.c b/src/gb18030.c index 7654432..50898eb 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,8 +2,8 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org> - * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org> + * K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ #if 1 #define DEBUG_GB18030(arg) #else +#include <stdio.h> #define DEBUG_GB18030(arg) printf arg #endif @@ -67,15 +68,29 @@ gb18030_mbc_enc_len(const UChar* p) { if (GB18030_MAP[*p] != CM) return 1; + p++; if (GB18030_MAP[*p] == C4) return 4; - if (GB18030_MAP[*p] == C1) - return 1; /* illegal sequence */ + return 2; } static int +gb18030_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) return 2; + else { + if (GB18030_MAP[(int )(code & 0xff)] == CM) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + return 1; + } +} + +static int is_valid_mbc_string(const UChar* p, const UChar* end) { while (p < end) { @@ -135,15 +150,6 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, pp, end, lower); } -#if 0 -static int -gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); -} -#endif - static int gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) { @@ -522,7 +528,7 @@ OnigEncodingType OnigEncodingGB18030 = { 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, gb18030_mbc_to_code, - onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbclen, gb18030_code_to_mbc, gb18030_mbc_case_fold, onigenc_ascii_apply_all_case_fold, |