summaryrefslogtreecommitdiff
path: root/lib/unilbrk/u8-possible-linebreaks.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2024-03-03 19:11:58 +0100
committerJörg Frings-Fürst <debian@jff.email>2024-03-03 19:11:58 +0100
commit9853b168f68cbb09b75a817343cedde2aca4c76c (patch)
treedb628840acea83dbccaf5676b89579a80e02ef51 /lib/unilbrk/u8-possible-linebreaks.c
parentd83e85a2e6064c36f6ad3c848e39d8b8c101c4f7 (diff)
parent7cf710f6587e71a193a55d84dd6d8ae1a8a69ce0 (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'lib/unilbrk/u8-possible-linebreaks.c')
-rw-r--r--lib/unilbrk/u8-possible-linebreaks.c67
1 files changed, 64 insertions, 3 deletions
diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c
index bc19f801..72d0749b 100644
--- a/lib/unilbrk/u8-possible-linebreaks.c
+++ b/lib/unilbrk/u8-possible-linebreaks.c
@@ -1,5 +1,5 @@
/* Line breaking of UTF-8 strings.
- Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This file is free software.
@@ -62,6 +62,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
{
ucs4_t uc;
int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
+ s += count;
int prop = unilbrkprop_lookup (uc);
if (prop == LBP_BK || prop == LBP_LF || prop == LBP_CR)
@@ -96,6 +97,62 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
/* This is arbitrary. */
prop = LBP_AL;
break;
+ case LBP_QU2:
+ /* For (LB15a): Replace LBP_QU2 with LBP_QU1 if the previous
+ character's line break property was not one of
+ BK, CR, LF, OP, QU, GL, SP, ZW. */
+ switch (prev_prop)
+ {
+ case LBP_BK:
+ case LBP_CR:
+ case LBP_LF:
+ case LBP_OP1: case LBP_OP2:
+ case LBP_QU1: case LBP_QU2: case LBP_QU3:
+ case LBP_GL:
+ case LBP_SP:
+ case LBP_ZW:
+ break;
+ default:
+ prop = LBP_QU1;
+ break;
+ }
+ break;
+ case LBP_QU3:
+ /* For (LB15b): Replace LBP_QU3 with LBP_QU1 if the next
+ character's line break property is not one of
+ BK, CR, LF, SP, GL, WJ, CL, QU, CP, EX, IS, SY, ZW. */
+ {
+ int next_prop;
+ if (s < s_end)
+ {
+ ucs4_t next_uc;
+ (void) u8_mbtouc_unsafe (&next_uc, s, s_end - s);
+ next_prop = unilbrkprop_lookup (next_uc);
+ }
+ else
+ next_prop = LBP_BK;
+ switch (next_prop)
+ {
+ case LBP_BK:
+ case LBP_CR:
+ case LBP_LF:
+ case LBP_SP:
+ case LBP_GL:
+ case LBP_WJ:
+ case LBP_CL:
+ case LBP_QU1: case LBP_QU2: case LBP_QU3:
+ case LBP_CP1: case LBP_CP2:
+ case LBP_EX:
+ case LBP_IS:
+ case LBP_SY:
+ case LBP_ZW:
+ break;
+ default:
+ prop = LBP_QU1;
+ break;
+ }
+ }
+ break;
}
/* Deal with spaces and combining characters. */
@@ -209,14 +266,16 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,
else
ri_count = 0;
- s += count;
p += count;
}
while (s < s_end);
}
}
-#undef u8_possible_linebreaks
+#if defined IN_LIBUNISTRING
+/* For backward compatibility with older versions of libunistring. */
+
+# undef u8_possible_linebreaks
void
u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding,
@@ -225,6 +284,8 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding,
u8_possible_linebreaks_loop (s, n, encoding, -1, p);
}
+#endif
+
void
u8_possible_linebreaks_v2 (const uint8_t *s, size_t n, const char *encoding,
char *p)