summaryrefslogtreecommitdiff
path: root/src/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode.c')
-rw-r--r--src/unicode.c95
1 files changed, 60 insertions, 35 deletions
diff --git a/src/unicode.c b/src/unicode.c
index 6703d4b..efe5f73 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -2,7 +2,7 @@
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,9 +77,8 @@ static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
#include "unicode_fold_data.c"
extern int
-onigenc_unicode_mbc_case_fold(OnigEncoding enc,
- OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
- UChar* fold)
+onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end, UChar* fold)
{
const struct ByUnfoldKey* buk;
@@ -104,23 +103,27 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
}
#endif
- buk = onigenc_unicode_unfold_key(code);
- if (buk != 0) {
- if (buk->fold_len == 1) {
- return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
- }
- else {
- OnigCodePoint* addr;
-
- FOLDS_FOLD_ADDR_BUK(buk, addr);
- rlen = 0;
- for (i = 0; i < buk->fold_len; i++) {
- OnigCodePoint c = addr[i];
- len = ONIGENC_CODE_TO_MBC(enc, c, fold);
- fold += len;
- rlen += len;
+ if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(code)) {
+ buk = onigenc_unicode_unfold_key(code);
+ if (buk != 0) {
+ if (buk->fold_len == 1) {
+ if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
+ ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk->index)))
+ return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
+ }
+ else {
+ OnigCodePoint* addr;
+
+ FOLDS_FOLD_ADDR_BUK(buk, addr);
+ rlen = 0;
+ for (i = 0; i < buk->fold_len; i++) {
+ OnigCodePoint c = addr[i];
+ len = ONIGENC_CODE_TO_MBC(enc, c, fold);
+ fold += len;
+ rlen += len;
+ }
+ return rlen;
}
- return rlen;
}
}
@@ -131,16 +134,22 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
}
static int
-apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
+apply_case_fold1(OnigCaseFoldType flag, int from, int to,
+ OnigApplyAllCaseFoldFunc f, void* arg)
{
int i, j, k, n, r;
for (i = from; i < to; ) {
OnigCodePoint fold = *FOLDS1_FOLD(i);
+ if (CASE_FOLD_IS_ASCII_ONLY(flag) && ! ONIGENC_IS_ASCII_CODE(fold)) break;
+
n = FOLDS1_UNFOLDS_NUM(i);
for (j = 0; j < n; j++) {
OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
+ if (CASE_FOLD_IS_ASCII_ONLY(flag) && ! ONIGENC_IS_ASCII_CODE(unfold))
+ continue;
+
r = (*f)(fold, &unfold, 1, arg);
if (r != 0) return r;
r = (*f)(unfold, &fold, 1, arg);
@@ -148,6 +157,9 @@ apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
for (k = 0; k < j; k++) {
OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
+ if (CASE_FOLD_IS_ASCII_ONLY(flag) &&
+ ! ONIGENC_IS_ASCII_CODE(unfold2)) continue;
+
r = (*f)(unfold, &unfold2, 1, arg);
if (r != 0) return r;
r = (*f)(unfold2, &unfold, 1, arg);
@@ -225,7 +237,7 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
{
int r;
- r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
+ r = apply_case_fold1(flag, 0, FOLDS1_NORMAL_END_INDEX, f, arg);
if (r != 0) return r;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
@@ -246,7 +258,7 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
}
else {
#endif
- r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
+ r = apply_case_fold1(flag, FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
if (r != 0) return r;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
@@ -288,6 +300,9 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
n = 0;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (CASE_FOLD_IS_ASCII_ONLY(flag)) {
+ if (! ONIGENC_IS_ASCII_CODE(code)) return n;
+ }
len = enclen(enc, p);
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
@@ -449,19 +464,26 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
if (buk1 != 0) {
if (buk1->fold_len == 1) {
int un;
- items[0].byte_len = lens[0];
- items[0].code_len = 1;
- items[0].code[0] = *FOLDS1_FOLD(buk1->index);
- n++;
+
+ if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
+ ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk1->index))) {
+ items[0].byte_len = lens[0];
+ items[0].code_len = 1;
+ items[0].code[0] = *FOLDS1_FOLD(buk1->index);
+ n++;
+ }
un = FOLDS1_UNFOLDS_NUM(buk1->index);
for (i = 0; i < un; i++) {
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk1->index)[i];
if (unfold != orig_codes[0]) {
- items[n].byte_len = lens[0];
- items[n].code_len = 1;
- items[n].code[0] = unfold;
- n++;
+ if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
+ ONIGENC_IS_ASCII_CODE(unfold)) {
+ items[n].byte_len = lens[0];
+ items[n].code_len = 1;
+ items[n].code[0] = unfold;
+ n++;
+ }
}
}
}
@@ -548,10 +570,13 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
- items[n].byte_len = lens[0];
- items[n].code_len = 1;
- items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
- n++;
+ code = FOLDS1_UNFOLDS(index)[i];
+ if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag)||ONIGENC_IS_ASCII_CODE(code)) {
+ items[n].byte_len = lens[0];
+ items[n].code_len = 1;
+ items[n].code[0] = code;
+ n++;
+ }
}
}
}