diff options
Diffstat (limited to 'lib/unilbrk/u32-possible-linebreaks.c')
-rw-r--r-- | lib/unilbrk/u32-possible-linebreaks.c | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c index f242c9a8..f953be75 100644 --- a/lib/unilbrk/u32-possible-linebreaks.c +++ b/lib/unilbrk/u32-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of UTF-32 strings. - Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2025 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This file is free software. @@ -43,7 +43,7 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, { if (n > 0) { - int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID1 : LBP_AL1); + int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL1); const uint32_t *s_end = s + n; /* We need 2 characters of lookahead: @@ -79,7 +79,7 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, (= last character, ignoring intervening characters of class CM or ZWJ) */ int prev2_ea = 0; /* EastAsian property of character before the previous character */ bool prev_initial_hyphen = false; /* the previous character was a - word-initial hyphen or U+2010 */ + word-initial hyphen or unambiguous hyphen */ bool prev_nus = false; /* before the previous character, there was a character with line break property LBP_NU and since then only characters with line break property LBP_SY @@ -143,11 +143,11 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, break; case LBP_CB: /* This is arbitrary. */ - prop = LBP_ID1; + prop = LBP_ID; break; - case LBP_SA: + case LBP_SA1: /* We don't handle complex scripts yet. - Treat LBP_SA like LBP_XX. */ + Treat LBP_SA1 like LBP_XX. */ case LBP_XX: /* This is arbitrary. */ prop = LBP_AL1; @@ -168,7 +168,7 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, last_prop = LBP_ZW; seen_space = NULL; } - else if (prop == LBP_CM || prop == LBP_ZWJ) + else if (prop == LBP_CM || prop == LBP_SA2 || prop == LBP_ZWJ) { /* (LB9) Don't break just before a combining character or zero-width joiner, except immediately after a mandatory @@ -243,14 +243,14 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, *p = UC_BREAK_PROHIBITED; } else if (prev_initial_hyphen - && (prop == LBP_AL1 || prop == LBP_AL2)) + && (prop == LBP_AL1 || prop == LBP_AL2 || prop == LBP_HL)) { /* (LB20a) Don't break after a word-initial hyphen. */ *p = UC_BREAK_PROHIBITED; } - else if (prev_prop == LBP_HL_BA && prop != LBP_HL) + else if (prev_prop == LBP_HL_HY && prop != LBP_HL) { - /* (LB21a) Don't break after Hebrew + Hyphen/Break-After, + /* (LB21a) Don't break after Hebrew + Hyphen/Unambiguous hyphen, before non-Hebrew. */ *p = UC_BREAK_PROHIBITED; } @@ -385,7 +385,7 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, || prev_prop == LBP_SP || prev_prop == LBP_ZW))) { prev_initial_hyphen = - (prop == LBP_HY || uc == 0x2010) + (prop == LBP_HY || prop == LBP_HH) && (prev_prop == LBP_BK || prev_prop == LBP_CR || prev_prop == LBP_LF || prev_prop == LBP_SP || prev_prop == LBP_ZW || prev_prop == LBP_CB || prev_prop == LBP_GL); @@ -393,9 +393,8 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, || prev_prop == LBP_AL2 || prev_prop == LBP_AS) ? LBP_AKLS_VI : - prev_prop == LBP_HL && (prop == LBP_HY - || (prop == LBP_BA && !ea)) - ? LBP_HL_BA : + prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_HH) + ? LBP_HL_HY : prop); prev2_ea = prev_ea; prev_ea = ea; |