summaryrefslogtreecommitdiff
path: root/tests/unilbrk/test-uc-possible-linebreaks.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/unilbrk/test-uc-possible-linebreaks.c')
-rw-r--r--tests/unilbrk/test-uc-possible-linebreaks.c57
1 files changed, 31 insertions, 26 deletions
diff --git a/tests/unilbrk/test-uc-possible-linebreaks.c b/tests/unilbrk/test-uc-possible-linebreaks.c
index 31bab9eb..bf268286 100644
--- a/tests/unilbrk/test-uc-possible-linebreaks.c
+++ b/tests/unilbrk/test-uc-possible-linebreaks.c
@@ -1,5 +1,5 @@
/* Line break function test, using test data from UCD.
- Copyright (C) 2024 Free Software Foundation, Inc.
+ Copyright (C) 2024-2026 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -118,25 +118,33 @@ main (int argc, char *argv[])
u32_possible_linebreaks (input, i - 1, "UTF-8", breaks);
int matches = 1;
- {
- int j;
- for (j = 0; j < i - 1; j++)
- {
- /* The character U+FFFC has line break property CB, which according
- to rule (LB1) is resolved "into other line breaking classes
- depending on criteria outside the scope of this algorithm".
- Thus it makes no sense to check the breaks[] entry before or
- after such a character. */
- if (!(input[j] == 0xFFFC
- || (j > 0 && input[j - 1] == 0xFFFC)
- /* Also consider intervening characters with property LBP_CM
- or LBP_ZWJ, per (LB9). */
- || (j > 1 && (input[j - 1] == 0x0308 || input[j - 1] == 0x200D)
- && input[j - 2] == 0xFFFC)))
- /* A regional indicator with a combining character is nonsense,
- because regional indicators are supposed to come in pairs. */
- if (!(j >= 2 && (input[0] >= 0x1F1E6 && input[0] <= 0x1F1FF)
- && input[1] == 0x0308))
+ for (int j = 0; j < i - 1; j++)
+ {
+ /* The character U+FFFC has line break property CB, which according
+ to rule (LB1) is resolved "into other line breaking classes
+ depending on criteria outside the scope of this algorithm".
+ Thus it makes no sense to check the breaks[] entry before or
+ after such a character. */
+ if (!(input[j] == 0xFFFC
+ || (j > 0 && input[j - 1] == 0xFFFC)
+ /* Also consider intervening characters with property LBP_CM
+ or LBP_ZWJ, per (LB9). */
+ || (j > 1 && (input[j - 1] == 0x0308 || input[j - 1] == 0x200D)
+ && input[j - 2] == 0xFFFC)))
+ /* A regional indicator with a combining character is nonsense,
+ because regional indicators are supposed to come in pairs. */
+ if (!(j >= 2 && (input[0] >= 0x1F1E6 && input[0] <= 0x1F1FF)
+ && input[1] == 0x0308))
+ /* It is nonsense to treat U+1F8FF differently than U+1F02C.
+ Both are unassigned Extended_Pictographic characters and
+ should therefore be treated like LBP_EB (or LBP_ID, if you
+ want), not like LBP_AL. See rule (LB30b). */
+ if (!(input[j] == 0x1F8FF
+ || (j > 0 && input[j - 1] == 0x1F8FF)
+ /* Also consider intervening characters with property LBP_CM
+ or LBP_ZWJ, per (LB9). */
+ || (j > 1 && (input[j - 1] == 0x0308 || input[j - 1] == 0x200D)
+ && input[j - 2] == 0x1F8FF)))
/* There is a disagreement regarding whether to allow a line break
after a U+0020 SPACE character at the start of the text.
We consider that the start of the text is equivalent to the
@@ -159,20 +167,17 @@ main (int argc, char *argv[])
|| breaks[j] == UC_BREAK_CR_BEFORE_LF)
|| (j > 0 && breaks[j - 1] == UC_BREAK_MANDATORY))
== breaks_expected[j];
- }
- }
+ }
if (!matches)
{
- int j;
-
fprintf (stderr, "%s:%d: expected: ", filename, lineno);
- for (j = 0; j < i - 1; j++)
+ for (int j = 0; j < i - 1; j++)
fprintf (stderr, "%s U+%04X ",
breaks_expected[j] == 1 ? "\303\267" : "\303\227",
input[j]);
fprintf (stderr, "\n");
fprintf (stderr, "%s:%d: actual: ", filename, lineno);
- for (j = 0; j < i - 1; j++)
+ for (int j = 0; j < i - 1; j++)
fprintf (stderr, "%s U+%04X ",
(!(breaks[j] == UC_BREAK_PROHIBITED
|| breaks[j] == UC_BREAK_MANDATORY