summaryrefslogtreecommitdiff
path: root/src/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode.c')
-rw-r--r--src/unicode.c70
1 files changed, 29 insertions, 41 deletions
diff --git a/src/unicode.c b/src/unicode.c
index a8bae66..63bc65c 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -2,7 +2,7 @@
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -104,7 +104,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
}
#endif
- buk = unicode_unfold_key(code);
+ buk = onigenc_unicode_unfold_key(code);
if (buk != 0) {
if (buk->fold_len == 1) {
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
@@ -316,7 +316,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
#endif
- buk = unicode_unfold_key(code);
+ buk = onigenc_unicode_unfold_key(code);
if (buk != 0) {
if (buk->fold_len == 1) {
int un;
@@ -356,7 +356,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 2; fn++) {
int index;
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
- index = unicode_fold1_key(&cs[fn][0]);
+ index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
@@ -393,7 +393,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 3; fn++) {
int index;
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
- index = unicode_fold1_key(&cs[fn][0]);
+ index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
@@ -424,7 +424,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
}
else {
- int index = unicode_fold1_key(&code);
+ int index = onigenc_unicode_fold1_key(&code);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
@@ -447,7 +447,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
codes[0] = code;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
- buk = unicode_unfold_key(code);
+ buk = onigenc_unicode_unfold_key(code);
if (buk != 0 && buk->fold_len == 1) {
codes[1] = *FOLDS1_FOLD(buk->index);
}
@@ -457,7 +457,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
clen = enclen(enc, p);
len += clen;
- index = unicode_fold2_key(codes);
+ index = onigenc_unicode_fold2_key(codes);
if (index >= 0) {
m = FOLDS2_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
@@ -471,7 +471,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
p += clen;
if (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
- buk = unicode_unfold_key(code);
+ buk = onigenc_unicode_unfold_key(code);
if (buk != 0 && buk->fold_len == 1) {
codes[2] = *FOLDS1_FOLD(buk->index);
}
@@ -481,7 +481,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
clen = enclen(enc, p);
len += clen;
- index = unicode_fold3_key(codes);
+ index = onigenc_unicode_fold3_key(codes);
if (index >= 0) {
m = FOLDS3_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
@@ -497,13 +497,19 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
return n;
}
+#ifdef USE_UNICODE_PROPERTIES
+#include "unicode_property_data.c"
+#else
+#include "unicode_property_data_posix.c"
+#endif
+
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
enum EGCB_BREAK_TYPE {
EGCB_NOT_BREAK = 0,
EGCB_BREAK = 1,
- EGCB_BREAK_UNDEF_E_MODIFIER = 2,
+ EGCB_BREAK_UNDEF_GB11 = 2,
EGCB_BREAK_UNDEF_RI_RI = 3
};
@@ -517,10 +523,13 @@ enum EGCB_TYPE {
EGCB_Regional_Indicator = 6,
EGCB_SpacingMark = 7,
EGCB_ZWJ = 8,
+#if 0
+ /* obsoleted */
EGCB_E_Base = 9,
EGCB_E_Base_GAZ = 10,
EGCB_E_Modifier = 11,
EGCB_Glue_After_Zwj = 12,
+#endif
EGCB_L = 13,
EGCB_LV = 14,
EGCB_LVT = 15,
@@ -588,7 +597,7 @@ unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
&& (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;
/* GB8 */
- if ((from == EGCB_LVT || from == EGCB_T) && (to == EGCB_T))
+ if ((to == EGCB_T) && (from == EGCB_LVT || from == EGCB_T))
return EGCB_NOT_BREAK;
goto GB999;
@@ -602,16 +611,13 @@ unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
/* GB9b */
if (from == EGCB_Prepend) return EGCB_NOT_BREAK;
- /* GB10 */
- if (to == EGCB_E_Modifier) {
- if (from == EGCB_E_Base || from == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
- if (from == EGCB_Extend) return EGCB_BREAK_UNDEF_E_MODIFIER;
- goto GB999;
- }
+ /* GB10 removed */
/* GB11 */
if (from == EGCB_ZWJ) {
- if (to == EGCB_Glue_After_Zwj || to == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
+ if (onigenc_unicode_is_code_ctype(to_code, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
+ return EGCB_BREAK_UNDEF_GB11;
+
goto GB999;
}
@@ -664,12 +670,13 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
return 1;
break;
- case EGCB_BREAK_UNDEF_E_MODIFIER:
+ case EGCB_BREAK_UNDEF_GB11:
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
- type = egcb_get_type(from);
- if (type == EGCB_E_Base || type == EGCB_E_Base_GAZ)
+ if (onigenc_unicode_is_code_ctype(from, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
return 0;
+
+ type = egcb_get_type(from);
if (type != EGCB_Extend)
break;
}
@@ -700,25 +707,6 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
}
-/*
- Undefine __GNUC__ for Escape warnings in Clang.
-
-./unicode_property_data.c:26730:44: warning: static variable
- 'unicode_prop_name_pool_contents' is used in an inline function with
- external linkage [-Wstatic-in-inline]
- register const char *s = o + unicode_prop_name_pool;
-*/
-
-#ifdef __clang__
-#undef __GNUC__
-#endif
-
-#ifdef USE_UNICODE_PROPERTIES
-#include "unicode_property_data.c"
-#else
-#include "unicode_property_data_posix.c"
-#endif
-
#define USER_DEFINED_PROPERTY_MAX_NUM 20
typedef struct {