summaryrefslogtreecommitdiff
path: root/src/gb18030.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gb18030.c')
-rw-r--r--src/gb18030.c34
1 files changed, 20 insertions, 14 deletions
diff --git a/src/gb18030.c b/src/gb18030.c
index 7654432..50898eb 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -2,8 +2,8 @@
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org>
- * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org>
+ * K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
#if 1
#define DEBUG_GB18030(arg)
#else
+#include <stdio.h>
#define DEBUG_GB18030(arg) printf arg
#endif
@@ -67,15 +68,29 @@ gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
+
p++;
if (GB18030_MAP[*p] == C4)
return 4;
- if (GB18030_MAP[*p] == C1)
- return 1; /* illegal sequence */
+
return 2;
}
static int
+gb18030_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) return 2;
+ else {
+ if (GB18030_MAP[(int )(code & 0xff)] == CM)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ return 1;
+ }
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -135,15 +150,6 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
-#if 0
-static int
-gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
-}
-#endif
-
static int
gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@@ -522,7 +528,7 @@ OnigEncodingType OnigEncodingGB18030 = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
- onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,