summaryrefslogtreecommitdiff
path: root/lib/unilbrk/u16-possible-linebreaks.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2024-03-24 08:54:48 +0100
committerJörg Frings-Fürst <debian@jff.email>2024-03-24 08:54:48 +0100
commit163a663518f33bab48b28431972e580b366b4d49 (patch)
treef518ffabaca4a0b93f0103d617e803792d3b0b43 /lib/unilbrk/u16-possible-linebreaks.c
parent1b3a8d5ad2ea2f099d514d9dd51ebf926a628076 (diff)
parentdd0000f7e25abe6c28d4329d324fd7fcab54094f (diff)
Merge branch 'release/debian/1.2-1'HEADdebian/1.2-1master
Diffstat (limited to 'lib/unilbrk/u16-possible-linebreaks.c')
-rw-r--r--lib/unilbrk/u16-possible-linebreaks.c67
1 files changed, 64 insertions, 3 deletions
diff --git a/lib/unilbrk/u16-possible-linebreaks.c b/lib/unilbrk/u16-possible-linebreaks.c
index 2b79d6aa..6a9c15b7 100644
--- a/lib/unilbrk/u16-possible-linebreaks.c
+++ b/lib/unilbrk/u16-possible-linebreaks.c
@@ -1,5 +1,5 @@
/* Line breaking of UTF-16 strings.
- Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This file is free software.
@@ -62,6 +62,7 @@ u16_possible_linebreaks_loop (const uint16_t *s, size_t n, const char *encoding,
{
ucs4_t uc;
int count = u16_mbtouc_unsafe (&uc, s, s_end - s);
+ s += count;
int prop = unilbrkprop_lookup (uc);
if (prop == LBP_BK || prop == LBP_LF || prop == LBP_CR)
@@ -96,6 +97,62 @@ u16_possible_linebreaks_loop (const uint16_t *s, size_t n, const char *encoding,
/* This is arbitrary. */
prop = LBP_AL;
break;
+ case LBP_QU2:
+ /* For (LB15a): Replace LBP_QU2 with LBP_QU1 if the previous
+ character's line break property was not one of
+ BK, CR, LF, OP, QU, GL, SP, ZW. */
+ switch (prev_prop)
+ {
+ case LBP_BK:
+ case LBP_CR:
+ case LBP_LF:
+ case LBP_OP1: case LBP_OP2:
+ case LBP_QU1: case LBP_QU2: case LBP_QU3:
+ case LBP_GL:
+ case LBP_SP:
+ case LBP_ZW:
+ break;
+ default:
+ prop = LBP_QU1;
+ break;
+ }
+ break;
+ case LBP_QU3:
+ /* For (LB15b): Replace LBP_QU3 with LBP_QU1 if the next
+ character's line break property is not one of
+ BK, CR, LF, SP, GL, WJ, CL, QU, CP, EX, IS, SY, ZW. */
+ {
+ int next_prop;
+ if (s < s_end)
+ {
+ ucs4_t next_uc;
+ (void) u16_mbtouc_unsafe (&next_uc, s, s_end - s);
+ next_prop = unilbrkprop_lookup (next_uc);
+ }
+ else
+ next_prop = LBP_BK;
+ switch (next_prop)
+ {
+ case LBP_BK:
+ case LBP_CR:
+ case LBP_LF:
+ case LBP_SP:
+ case LBP_GL:
+ case LBP_WJ:
+ case LBP_CL:
+ case LBP_QU1: case LBP_QU2: case LBP_QU3:
+ case LBP_CP1: case LBP_CP2:
+ case LBP_EX:
+ case LBP_IS:
+ case LBP_SY:
+ case LBP_ZW:
+ break;
+ default:
+ prop = LBP_QU1;
+ break;
+ }
+ }
+ break;
}
/* Deal with spaces and combining characters. */
@@ -209,14 +266,16 @@ u16_possible_linebreaks_loop (const uint16_t *s, size_t n, const char *encoding,
else
ri_count = 0;
- s += count;
p += count;
}
while (s < s_end);
}
}
-#undef u16_possible_linebreaks
+#if defined IN_LIBUNISTRING
+/* For backward compatibility with older versions of libunistring. */
+
+# undef u16_possible_linebreaks
void
u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding,
@@ -225,6 +284,8 @@ u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding,
u16_possible_linebreaks_loop (s, n, encoding, -1, p);
}
+#endif
+
void
u16_possible_linebreaks_v2 (const uint16_t *s, size_t n, const char *encoding,
char *p)