diff options
Diffstat (limited to 'lib/unilbrk/u8-possible-linebreaks.c')
-rw-r--r-- | lib/unilbrk/u8-possible-linebreaks.c | 67 |
1 files changed, 64 insertions, 3 deletions
diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c index bc19f801..72d0749b 100644 --- a/lib/unilbrk/u8-possible-linebreaks.c +++ b/lib/unilbrk/u8-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of UTF-8 strings. - Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. This file is free software. @@ -62,6 +62,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding, { ucs4_t uc; int count = u8_mbtouc_unsafe (&uc, s, s_end - s); + s += count; int prop = unilbrkprop_lookup (uc); if (prop == LBP_BK || prop == LBP_LF || prop == LBP_CR) @@ -96,6 +97,62 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding, /* This is arbitrary. */ prop = LBP_AL; break; + case LBP_QU2: + /* For (LB15a): Replace LBP_QU2 with LBP_QU1 if the previous + character's line break property was not one of + BK, CR, LF, OP, QU, GL, SP, ZW. */ + switch (prev_prop) + { + case LBP_BK: + case LBP_CR: + case LBP_LF: + case LBP_OP1: case LBP_OP2: + case LBP_QU1: case LBP_QU2: case LBP_QU3: + case LBP_GL: + case LBP_SP: + case LBP_ZW: + break; + default: + prop = LBP_QU1; + break; + } + break; + case LBP_QU3: + /* For (LB15b): Replace LBP_QU3 with LBP_QU1 if the next + character's line break property is not one of + BK, CR, LF, SP, GL, WJ, CL, QU, CP, EX, IS, SY, ZW. */ + { + int next_prop; + if (s < s_end) + { + ucs4_t next_uc; + (void) u8_mbtouc_unsafe (&next_uc, s, s_end - s); + next_prop = unilbrkprop_lookup (next_uc); + } + else + next_prop = LBP_BK; + switch (next_prop) + { + case LBP_BK: + case LBP_CR: + case LBP_LF: + case LBP_SP: + case LBP_GL: + case LBP_WJ: + case LBP_CL: + case LBP_QU1: case LBP_QU2: case LBP_QU3: + case LBP_CP1: case LBP_CP2: + case LBP_EX: + case LBP_IS: + case LBP_SY: + case LBP_ZW: + break; + default: + prop = LBP_QU1; + break; + } + } + break; } /* Deal with spaces and combining characters. */ @@ -209,14 +266,16 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding, else ri_count = 0; - s += count; p += count; } while (s < s_end); } } -#undef u8_possible_linebreaks +#if defined IN_LIBUNISTRING +/* For backward compatibility with older versions of libunistring. */ + +# undef u8_possible_linebreaks void u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, @@ -225,6 +284,8 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, u8_possible_linebreaks_loop (s, n, encoding, -1, p); } +#endif + void u8_possible_linebreaks_v2 (const uint8_t *s, size_t n, const char *encoding, char *p) |