summaryrefslogtreecommitdiff
path: root/tests/unilbrk/test-uc-possible-linebreaks.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/unilbrk/test-uc-possible-linebreaks.c')
-rw-r--r--tests/unilbrk/test-uc-possible-linebreaks.c56
1 files changed, 33 insertions, 23 deletions
diff --git a/tests/unilbrk/test-uc-possible-linebreaks.c b/tests/unilbrk/test-uc-possible-linebreaks.c
index 31bab9eb..a54699b6 100644
--- a/tests/unilbrk/test-uc-possible-linebreaks.c
+++ b/tests/unilbrk/test-uc-possible-linebreaks.c
@@ -1,5 +1,5 @@
/* Line break function test, using test data from UCD.
- Copyright (C) 2024 Free Software Foundation, Inc.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -137,28 +137,38 @@ main (int argc, char *argv[])
because regional indicators are supposed to come in pairs. */
if (!(j >= 2 && (input[0] >= 0x1F1E6 && input[0] <= 0x1F1FF)
&& input[1] == 0x0308))
- /* There is a disagreement regarding whether to allow a line break
- after a U+0020 SPACE character at the start of the text.
- We consider that the start of the text is equivalent to the
- state after a newline was seen; hence the loop starts with
- property LBP_BK. By the rules (LB4,LB5,LB6) an extra line
- break after a mandatory line break is undesired, even with
- intervening spaces (because these rules come before (LB18)).
- Whereas the LineBreakTest.txt file allows a line break after
- the space.
- Similarly when the first two characters at the start of the
- text have property LBP_CM and LBP_ZWJ, respectively. (LB9). */
- if (!(((j == 1 || (j > 1 && ((input[j - 2] >= 0x000A && input[j - 2] <= 0x000D) || input[j - 2] == 0x0085)))
- && input[j - 1] == 0x0020)
- || ((j == 2 || (j > 2 && ((input[j - 3] >= 0x000A && input[j - 3] <= 0x000D) || input[j - 3] == 0x0085)))
- && ((input[j - 2] == 0x0020 && input[j - 1] == 0x0020)
- || (input[j - 2] == 0x0308 && input[j - 1] == 0x200D)
- || (input[j - 2] == 0x200D && input[j - 1] == 0x0308)))))
- matches &= (!(breaks[j] == UC_BREAK_PROHIBITED
- || breaks[j] == UC_BREAK_MANDATORY
- || breaks[j] == UC_BREAK_CR_BEFORE_LF)
- || (j > 0 && breaks[j - 1] == UC_BREAK_MANDATORY))
- == breaks_expected[j];
+ /* It is nonsense to treat U+1F8FF differently than U+1F02C.
+ Both are unassigned Extended_Pictographic characters and
+ should therefore be treated like LBP_EB (or LBP_ID, if you
+ want), not like LBP_AL. See rule (LB30b). */
+ if (!(input[j] == 0x1F8FF
+ || (j > 0 && input[j - 1] == 0x1F8FF)
+ /* Also consider intervening characters with property LBP_CM
+ or LBP_ZWJ, per (LB9). */
+ || (j > 1 && (input[j - 1] == 0x0308 || input[j - 1] == 0x200D)
+ && input[j - 2] == 0x1F8FF)))
+ /* There is a disagreement regarding whether to allow a line break
+ after a U+0020 SPACE character at the start of the text.
+ We consider that the start of the text is equivalent to the
+ state after a newline was seen; hence the loop starts with
+ property LBP_BK. By the rules (LB4,LB5,LB6) an extra line
+ break after a mandatory line break is undesired, even with
+ intervening spaces (because these rules come before (LB18)).
+ Whereas the LineBreakTest.txt file allows a line break after
+ the space.
+ Similarly when the first two characters at the start of the
+ text have property LBP_CM and LBP_ZWJ, respectively. (LB9). */
+ if (!(((j == 1 || (j > 1 && ((input[j - 2] >= 0x000A && input[j - 2] <= 0x000D) || input[j - 2] == 0x0085)))
+ && input[j - 1] == 0x0020)
+ || ((j == 2 || (j > 2 && ((input[j - 3] >= 0x000A && input[j - 3] <= 0x000D) || input[j - 3] == 0x0085)))
+ && ((input[j - 2] == 0x0020 && input[j - 1] == 0x0020)
+ || (input[j - 2] == 0x0308 && input[j - 1] == 0x200D)
+ || (input[j - 2] == 0x200D && input[j - 1] == 0x0308)))))
+ matches &= (!(breaks[j] == UC_BREAK_PROHIBITED
+ || breaks[j] == UC_BREAK_MANDATORY
+ || breaks[j] == UC_BREAK_CR_BEFORE_LF)
+ || (j > 0 && breaks[j - 1] == UC_BREAK_MANDATORY))
+ == breaks_expected[j];
}
}
if (!matches)