diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2024-03-03 19:11:32 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2024-03-03 19:11:32 +0100 |
commit | 00893e79fc62966067af1a106567db96bd170338 (patch) | |
tree | 52b35cd0b42ca28e62a2ffbecade2e13dd8332cf /lib/unigbrk/u-grapheme-breaks.h | |
parent | 26112352a774737e1ce5580c93654a26c1e82b39 (diff) |
New upstream version 1.2upstream/1.2
Diffstat (limited to 'lib/unigbrk/u-grapheme-breaks.h')
-rw-r--r-- | lib/unigbrk/u-grapheme-breaks.h | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/lib/unigbrk/u-grapheme-breaks.h b/lib/unigbrk/u-grapheme-breaks.h index 9adce155..3e8f4953 100644 --- a/lib/unigbrk/u-grapheme-breaks.h +++ b/lib/unigbrk/u-grapheme-breaks.h @@ -1,5 +1,5 @@ /* Grapheme cluster break function. - Copyright (C) 2010-2022 Free Software Foundation, Inc. + Copyright (C) 2010-2024 Free Software Foundation, Inc. Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. This file is free software. @@ -37,6 +37,16 @@ FUNC (const UNIT *s, size_t n, char *p) -1 at the very beginning of the string. */ int last_char_prop = -1; + /* True if the last character ends a sequence of Indic_Conjunct_Break + values: consonant {extend|linker}* */ + bool incb_consonant_extended = false; + /* True if the last character ends a sequence of Indic_Conjunct_Break + values: consonant {extend|linker}* linker */ + bool incb_consonant_extended_linker = false; + /* True if the last character ends a sequence of Indic_Conjunct_Break + values: consonant {extend|linker}* linker {extend|linker}* */ + bool incb_consonant_extended_linker_extended = false; + /* True if the last character ends an emoji modifier sequence \p{Extended_Pictographic} Extend*. */ bool emoji_modifier_sequence = false; @@ -56,6 +66,7 @@ FUNC (const UNIT *s, size_t n, char *p) ucs4_t uc; int count = U_MBTOUC (&uc, s, s_end - s); int prop = uc_graphemeclusterbreak_property (uc); + int incb = uc_indic_conjunct_break (uc); /* Break at the start of the string (GB1). */ if (last_char_prop < 0) @@ -96,6 +107,15 @@ FUNC (const UNIT *s, size_t n, char *p) /* No break after Prepend characters (GB9b). */ else if (last_char_prop == GBP_PREPEND) /* *p = 0 */; + /* No break within certain combinations of Indic_Conjunct_Break + values: Between + consonant {extend|linker}* linker {extend|linker}* + and + consonant + (GB9c). */ + else if (incb_consonant_extended_linker_extended + && incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT) + /* *p = 0 */; /* No break within emoji modifier sequences or emoji zwj sequences (GB11). */ else if (last_char_prop == GBP_ZWJ @@ -111,6 +131,17 @@ FUNC (const UNIT *s, size_t n, char *p) *p = 1; } + incb_consonant_extended_linker = + incb_consonant_extended && incb == UC_INDIC_CONJUNCT_BREAK_LINKER; + incb_consonant_extended_linker_extended = + (incb_consonant_extended_linker + || (incb_consonant_extended_linker_extended + && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER)); + incb_consonant_extended = + (incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT + || (incb_consonant_extended + && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER)); + emoji_modifier_sequence_before_last_char = emoji_modifier_sequence; emoji_modifier_sequence = (emoji_modifier_sequence && prop == GBP_EXTEND) |