summaryrefslogtreecommitdiff
path: root/src/regenc.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2020-04-20 20:33:51 +0200
committerJörg Frings-Fürst <debian@jff.email>2020-04-20 20:33:51 +0200
commit6b986090d954dbac91bbb3c43ce7c3328c91a780 (patch)
tree34b34e41a3f7b7f4794c75be4482bb14695f36a9 /src/regenc.c
parent4216de6a3336cbc6dddb572cb7e6ab6193bf3729 (diff)
New upstream version 6.9.5upstream/6.9.5
Diffstat (limited to 'src/regenc.c')
-rw-r--r--src/regenc.c66
1 files changed, 41 insertions, 25 deletions
diff --git a/src/regenc.c b/src/regenc.c
index 16ac313..dbfbc89 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -29,6 +29,9 @@
#include "regint.h"
+#define LARGE_S 0x53
+#define SMALL_S 0x73
+
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
#define INITED_LIST_SIZE 20
@@ -549,7 +552,7 @@ static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
- static OnigCodePoint ss[] = { 0x73, 0x73 };
+ static OnigCodePoint ss[] = { SMALL_S, SMALL_S };
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
@@ -588,35 +591,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
- if (0x41 <= *p && *p <= 0x5a) {
+ int i, j, n;
+ static OnigUChar sa[] = { LARGE_S, SMALL_S };
+
+ if (0x41 <= *p && *p <= 0x5a) { /* A - Z */
+ if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */
+ ss_combination:
+ items[0].byte_len = 2;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )0xdf;
+
+ n = 1;
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ if (sa[i] == *p && sa[j] == *(p+1))
+ continue;
+
+ items[n].byte_len = 2;
+ items[n].code_len = 2;
+ items[n].code[0] = (OnigCodePoint )sa[i];
+ items[n].code[1] = (OnigCodePoint )sa[j];
+ n++;
+ }
+ }
+ return 4;
+ }
+
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
- if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
- /* SS */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
+ return 1;
}
- else if (0x61 <= *p && *p <= 0x7a) {
+ else if (0x61 <= *p && *p <= 0x7a) { /* a - z */
+ if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) {
+ goto ss_combination;
+ }
+
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
- if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
- /* ss */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
+ return 1;
}
else if (*p == 0xdf && ess_tsett_flag != 0) {
items[0].byte_len = 1;
@@ -676,7 +692,7 @@ extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
- if (*p == 0x0a) return 1;
+ if (*p == NEWLINE_CODE) return 1;
}
return 0;
}
@@ -887,7 +903,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
{
OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
- if (code > 127) return 0;
+ if (code > ASCII_LIMIT) return 0;
return ONIGENC_IS_ASCII_CODE_WORD(code);
}