diff options
Diffstat (limited to 'src/utf16_be.c')
-rw-r--r-- | src/utf16_be.c | 68 |
1 files changed, 31 insertions, 37 deletions
diff --git a/src/utf16_be.c b/src/utf16_be.c index 22bf74d..d99af71 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,7 +103,25 @@ utf16be_mbc_enc_len(const UChar* p) static int is_valid_mbc_string(const UChar* s, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end); + while (s < end) { + int len = utf16be_mbc_enc_len(s); + if (len == 4) { + if (s + 2 >= end) + return FALSE; + if (! UTF16_IS_SURROGATE_SECOND(*(s+2))) + return FALSE; + } + else + if (UTF16_IS_SURROGATE_SECOND(*s)) + return FALSE; + + s += len; + } + + if (s != end) + return FALSE; + else + return TRUE; } static int @@ -146,7 +164,15 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) static int utf16be_code_to_mbclen(OnigCodePoint code) { - return (code > 0xffff ? 4 : 2); + if (code > 0xffff) { + if (code > 0x10ffff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else + return 4; + } + else { + return 2; + } } static int @@ -201,39 +227,6 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag, pp, end, fold); } -#if 0 -static int -utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*p]; - - if (*p == 0) { - int c, v; - - p++; - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); - - if ((v | BIT_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - - return FALSE; -} -#endif - static UChar* utf16be_left_adjust_char_head(const UChar* start, const UChar* s) { @@ -243,7 +236,8 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s) s--; } - if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 && + UTF16_IS_SURROGATE_FIRST(*(s-2))) s -= 2; return (UChar* )s; |