summaryrefslogtreecommitdiff
path: root/src/utf16_le.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf16_le.c')
-rw-r--r--src/utf16_le.c59
1 files changed, 22 insertions, 37 deletions
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 4b231c6..c6edd94 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
const UChar* end1 = end - 1;
while (p < end1) {
- p += utf16le_mbc_enc_len(p);
+ int len = utf16le_mbc_enc_len(p);
+ if (len == 4) {
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
}
if (p != end)
@@ -210,39 +227,6 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
-#if 0
-static int
-utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
- const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp) += EncLen_UTF16[*(p+1)];
-
- if (*(p+1) == 0) {
- int c, v;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
-
- c = *p;
- v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
- (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (c >= 0xaa && c <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
- return (v != 0 ? TRUE : FALSE);
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
@@ -252,7 +236,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;