From 40f3d0030e6e98bcb02d6523e5ee48497dec49a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 7 Aug 2019 09:32:48 +0200 Subject: New upstream version 6.9.3 --- src/utf16_be.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'src/utf16_be.c') diff --git a/src/utf16_be.c b/src/utf16_be.c index 22bf74d..b66d868 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,7 +103,25 @@ utf16be_mbc_enc_len(const UChar* p) static int is_valid_mbc_string(const UChar* s, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end); + while (s < end) { + int len = utf16be_mbc_enc_len(s); + if (len == 4) { + if (s + 2 >= end) + return FALSE; + if (! UTF16_IS_SURROGATE_SECOND(*(s+2))) + return FALSE; + } + else + if (UTF16_IS_SURROGATE_SECOND(*s)) + return FALSE; + + s += len; + } + + if (s != end) + return FALSE; + else + return TRUE; } static int @@ -146,7 +164,15 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) static int utf16be_code_to_mbclen(OnigCodePoint code) { - return (code > 0xffff ? 4 : 2); + if (code > 0xffff) { + if (code > 0x10ffff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else + return 4; + } + else { + return 2; + } } static int @@ -243,7 +269,8 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s) s--; } - if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 && + UTF16_IS_SURROGATE_FIRST(*(s-2))) s -= 2; return (UChar* )s; -- cgit v1.2.3 From 4216de6a3336cbc6dddb572cb7e6ab6193bf3729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Fri, 29 Nov 2019 11:26:35 +0100 Subject: New upstream version 6.9.4 --- src/utf16_be.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'src/utf16_be.c') diff --git a/src/utf16_be.c b/src/utf16_be.c index b66d868..d99af71 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,39 +227,6 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag, pp, end, fold); } -#if 0 -static int -utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*p]; - - if (*p == 0) { - int c, v; - - p++; - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); - - if ((v | BIT_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - - return FALSE; -} -#endif - static UChar* utf16be_left_adjust_char_head(const UChar* start, const UChar* s) { -- cgit v1.2.3