diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2016-08-31 03:42:05 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2016-08-31 03:42:05 +0200 | 
| commit | a76fa337cc657dbe669ffb8dbdac606d4d6616f1 (patch) | |
| tree | a6f004237df60876d087f79ac369fdc2545697c9 /src/utf8.c | |
| parent | 5e01a4852b31d537307994248869caf38b4023cc (diff) | |
Imported Upstream version 6.1.0upstream/6.1.0
Diffstat (limited to 'src/utf8.c')
| -rw-r--r-- | src/utf8.c | 36 | 
1 files changed, 32 insertions, 4 deletions
| @@ -29,7 +29,7 @@  #include "regenc.h" -#define USE_INVALID_CODE_SCHEME +//#define USE_INVALID_CODE_SCHEME  #ifdef USE_INVALID_CODE_SCHEME  /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ @@ -39,6 +39,7 @@  #endif  #define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80) +#define utf8_istail(c)     ((UChar )((c) & 0xc0) == 0x80)  static const int EncLen_UTF8[] = {    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,6 +67,30 @@ mbc_enc_len(const UChar* p)  }  static int +is_valid_mbc_string(const UChar* p, const UChar* end) +{ +  int i, len; + +  while (p < end) { +    if (! utf8_islead(*p)) +      return FALSE; + +    len = mbc_enc_len(p++); +    if (len > 1) { +      for (i = 1; i < len; i++) { +	if (p == end) +	  return FALSE; + +	if (! utf8_istail(*p++)) +	  return FALSE; +      } +    } +  } + +  return TRUE; +} + +static int  is_mbc_newline(const UChar* p, const UChar* end)  {    if (p < end) { @@ -91,12 +116,14 @@ is_mbc_newline(const UChar* p, const UChar* end)  }  static OnigCodePoint -mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +mbc_to_code(const UChar* p, const UChar* end)  {    int c, len;    OnigCodePoint n; -  len = enclen(ONIG_ENCODING_UTF8, p); +  len = mbc_enc_len(p); +  if (len > end - p) len = end - p; +    c = *p++;    if (len > 1) {      len--; @@ -303,5 +330,6 @@ OnigEncodingType OnigEncodingUTF8 = {    left_adjust_char_head,    onigenc_always_true_is_allowed_reverse_match,    NULL, /* init */ -  NULL  /* is_initialized */ +  NULL, /* is_initialized */ +  is_valid_mbc_string  }; | 
