summaryrefslogtreecommitdiff
path: root/lib/unilbrk/u8-possible-linebreaks.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2025-10-18 19:07:35 +0200
committerJörg Frings-Fürst <debian@jff.email>2025-10-18 19:07:35 +0200
commitfa23d938c040bc8af305a31fa874df55b2a02576 (patch)
tree9704e2f7bd8962ea8911cd6f4e2d37227d7eff2e /lib/unilbrk/u8-possible-linebreaks.c
parentdf9dbf9b0915e432a4a2c4182f60af36374eaaab (diff)
parent693ae7b71dfdd1a8146266b5794a71c0dbe5dff0 (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'lib/unilbrk/u8-possible-linebreaks.c')
-rw-r--r--lib/unilbrk/u8-possible-linebreaks.c27
1 files changed, 13 insertions, 14 deletions
diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c
index 90d64c51..bc4796f4 100644
--- a/lib/unilbrk/u8-possible-linebreaks.c
+++ b/lib/unilbrk/u8-possible-linebreaks.c
@@ -1,5 +1,5 @@
/* Line breaking of UTF-8 strings.
- Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2006-2025 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This file is free software.
@@ -45,7 +45,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
{
if (n > 0)
{
- int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID1 : LBP_AL1);
+ int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL1);
/* Don't break inside multibyte characters. */
memset (p, UC_BREAK_PROHIBITED, n);
@@ -85,7 +85,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
(= last character, ignoring intervening characters of class CM or ZWJ) */
int prev2_ea = 0; /* EastAsian property of character before the previous character */
bool prev_initial_hyphen = false; /* the previous character was a
- word-initial hyphen or U+2010 */
+ word-initial hyphen or unambiguous hyphen */
bool prev_nus = false; /* before the previous character, there was a character
with line break property LBP_NU and since then
only characters with line break property LBP_SY
@@ -150,11 +150,11 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
break;
case LBP_CB:
/* This is arbitrary. */
- prop = LBP_ID1;
+ prop = LBP_ID;
break;
- case LBP_SA:
+ case LBP_SA1:
/* We don't handle complex scripts yet.
- Treat LBP_SA like LBP_XX. */
+ Treat LBP_SA1 like LBP_XX. */
case LBP_XX:
/* This is arbitrary. */
prop = LBP_AL1;
@@ -175,7 +175,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
last_prop = LBP_ZW;
seen_space = NULL;
}
- else if (prop == LBP_CM || prop == LBP_ZWJ)
+ else if (prop == LBP_CM || prop == LBP_SA2 || prop == LBP_ZWJ)
{
/* (LB9) Don't break just before a combining character or
zero-width joiner, except immediately after a mandatory
@@ -250,14 +250,14 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
*p = UC_BREAK_PROHIBITED;
}
else if (prev_initial_hyphen
- && (prop == LBP_AL1 || prop == LBP_AL2))
+ && (prop == LBP_AL1 || prop == LBP_AL2 || prop == LBP_HL))
{
/* (LB20a) Don't break after a word-initial hyphen. */
*p = UC_BREAK_PROHIBITED;
}
- else if (prev_prop == LBP_HL_BA && prop != LBP_HL)
+ else if (prev_prop == LBP_HL_HY && prop != LBP_HL)
{
- /* (LB21a) Don't break after Hebrew + Hyphen/Break-After,
+ /* (LB21a) Don't break after Hebrew + Hyphen/Unambiguous hyphen,
before non-Hebrew. */
*p = UC_BREAK_PROHIBITED;
}
@@ -392,7 +392,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
|| prev_prop == LBP_SP || prev_prop == LBP_ZW)))
{
prev_initial_hyphen =
- (prop == LBP_HY || uc == 0x2010)
+ (prop == LBP_HY || prop == LBP_HH)
&& (prev_prop == LBP_BK || prev_prop == LBP_CR || prev_prop == LBP_LF
|| prev_prop == LBP_SP || prev_prop == LBP_ZW
|| prev_prop == LBP_CB || prev_prop == LBP_GL);
@@ -400,9 +400,8 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
|| prev_prop == LBP_AL2
|| prev_prop == LBP_AS)
? LBP_AKLS_VI :
- prev_prop == LBP_HL && (prop == LBP_HY
- || (prop == LBP_BA && !ea))
- ? LBP_HL_BA :
+ prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_HH)
+ ? LBP_HL_HY :
prop);
prev2_ea = prev_ea;
prev_ea = ea;