diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2018-09-05 22:06:58 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2018-09-05 22:06:58 +0200 | 
| commit | c9b3c26db59d89e320a6eb86d9558051ecba0900 (patch) | |
| tree | f262530a11155451c4d60b04493ea580e7d3e054 /src/unicode.c | |
| parent | 83ea1be016fd5fc27ee721cf62a18716675815dc (diff) | |
New upstream version 6.9.0upstream/6.9.0
Diffstat (limited to 'src/unicode.c')
| -rw-r--r-- | src/unicode.c | 70 | 
1 files changed, 29 insertions, 41 deletions
| diff --git a/src/unicode.c b/src/unicode.c index a8bae66..63bc65c 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -2,7 +2,7 @@    unicode.c -  Oniguruma (regular expression library)  **********************************************************************/  /*- - * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>   * All rights reserved.   *   * Redistribution and use in source and binary forms, with or without @@ -104,7 +104,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,    }  #endif -  buk = unicode_unfold_key(code); +  buk = onigenc_unicode_unfold_key(code);    if (buk != 0) {      if (buk->fold_len == 1) {        return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold); @@ -316,7 +316,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,    }  #endif -  buk = unicode_unfold_key(code); +  buk = onigenc_unicode_unfold_key(code);    if (buk != 0) {      if (buk->fold_len == 1) {        int un; @@ -356,7 +356,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,          for (fn = 0; fn < 2; fn++) {            int index;            cs[fn][0] = FOLDS2_FOLD(buk->index)[fn]; -          index = unicode_fold1_key(&cs[fn][0]); +          index = onigenc_unicode_fold1_key(&cs[fn][0]);            if (index >= 0) {              int m = FOLDS1_UNFOLDS_NUM(index);              for (i = 0; i < m; i++) { @@ -393,7 +393,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,          for (fn = 0; fn < 3; fn++) {            int index;            cs[fn][0] = FOLDS3_FOLD(buk->index)[fn]; -          index = unicode_fold1_key(&cs[fn][0]); +          index = onigenc_unicode_fold1_key(&cs[fn][0]);            if (index >= 0) {              int m = FOLDS1_UNFOLDS_NUM(index);              for (i = 0; i < m; i++) { @@ -424,7 +424,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,      }    }    else { -    int index = unicode_fold1_key(&code); +    int index = onigenc_unicode_fold1_key(&code);      if (index >= 0) {        int m = FOLDS1_UNFOLDS_NUM(index);        for (i = 0; i < m; i++) { @@ -447,7 +447,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,      codes[0] = code;      code = ONIGENC_MBC_TO_CODE(enc, p, end); -    buk = unicode_unfold_key(code); +    buk = onigenc_unicode_unfold_key(code);      if (buk != 0 && buk->fold_len == 1) {        codes[1] = *FOLDS1_FOLD(buk->index);      } @@ -457,7 +457,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,      clen = enclen(enc, p);      len += clen; -    index = unicode_fold2_key(codes); +    index = onigenc_unicode_fold2_key(codes);      if (index >= 0) {        m = FOLDS2_UNFOLDS_NUM(index);        for (i = 0; i < m; i++) { @@ -471,7 +471,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,      p += clen;      if (p < end) {        code = ONIGENC_MBC_TO_CODE(enc, p, end); -      buk = unicode_unfold_key(code); +      buk = onigenc_unicode_unfold_key(code);        if (buk != 0 && buk->fold_len == 1) {          codes[2] = *FOLDS1_FOLD(buk->index);        } @@ -481,7 +481,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,        clen = enclen(enc, p);        len += clen; -      index = unicode_fold3_key(codes); +      index = onigenc_unicode_fold3_key(codes);        if (index >= 0) {          m = FOLDS3_UNFOLDS_NUM(index);          for (i = 0; i < m; i++) { @@ -497,13 +497,19 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,    return n;  } +#ifdef USE_UNICODE_PROPERTIES +#include "unicode_property_data.c" +#else +#include "unicode_property_data_posix.c" +#endif +  #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER  enum EGCB_BREAK_TYPE {    EGCB_NOT_BREAK = 0,    EGCB_BREAK     = 1, -  EGCB_BREAK_UNDEF_E_MODIFIER = 2, +  EGCB_BREAK_UNDEF_GB11  = 2,    EGCB_BREAK_UNDEF_RI_RI = 3  }; @@ -517,10 +523,13 @@ enum EGCB_TYPE {    EGCB_Regional_Indicator = 6,    EGCB_SpacingMark = 7,    EGCB_ZWJ         = 8, +#if 0 +  /* obsoleted */    EGCB_E_Base         = 9,    EGCB_E_Base_GAZ     = 10,    EGCB_E_Modifier     = 11,    EGCB_Glue_After_Zwj = 12, +#endif    EGCB_L   = 13,    EGCB_LV  = 14,    EGCB_LVT = 15, @@ -588,7 +597,7 @@ unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)          && (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;      /* GB8 */ -    if ((from == EGCB_LVT || from == EGCB_T) && (to == EGCB_T)) +    if ((to == EGCB_T) && (from == EGCB_LVT || from == EGCB_T))        return EGCB_NOT_BREAK;      goto GB999; @@ -602,16 +611,13 @@ unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)    /* GB9b */    if (from == EGCB_Prepend) return EGCB_NOT_BREAK; -  /* GB10 */ -  if (to == EGCB_E_Modifier) { -    if (from == EGCB_E_Base || from == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK; -    if (from == EGCB_Extend) return EGCB_BREAK_UNDEF_E_MODIFIER; -    goto GB999; -  } +  /* GB10 removed */    /* GB11 */    if (from == EGCB_ZWJ) { -    if (to == EGCB_Glue_After_Zwj || to == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK; +    if (onigenc_unicode_is_code_ctype(to_code, PROP_INDEX_EXTENDEDPICTOGRAPHIC)) +      return EGCB_BREAK_UNDEF_GB11; +      goto GB999;    } @@ -664,12 +670,13 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,      return 1;      break; -  case EGCB_BREAK_UNDEF_E_MODIFIER: +  case EGCB_BREAK_UNDEF_GB11:      while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {        from = ONIGENC_MBC_TO_CODE(enc, prev, end); -      type = egcb_get_type(from); -      if (type == EGCB_E_Base || type == EGCB_E_Base_GAZ) +      if (onigenc_unicode_is_code_ctype(from, PROP_INDEX_EXTENDEDPICTOGRAPHIC))          return 0; + +      type = egcb_get_type(from);        if (type != EGCB_Extend)          break;      } @@ -700,25 +707,6 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,  } -/* - Undefine __GNUC__ for Escape warnings in Clang. - -./unicode_property_data.c:26730:44: warning: static variable -      'unicode_prop_name_pool_contents' is used in an inline function with -      external linkage [-Wstatic-in-inline] -              register const char *s = o + unicode_prop_name_pool; -*/ - -#ifdef __clang__ -#undef __GNUC__ -#endif - -#ifdef USE_UNICODE_PROPERTIES -#include "unicode_property_data.c" -#else -#include "unicode_property_data_posix.c" -#endif -  #define USER_DEFINED_PROPERTY_MAX_NUM  20  typedef struct { | 
