diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2022-01-08 11:53:52 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2022-01-08 11:53:52 +0100 |
commit | fa838e76139763f902c7d27cb9e1d393ed6a15e4 (patch) | |
tree | 7d0ae09775ea950056193eaa2ca93844299d46f1 /lib/uniwbrk/u-wordbreaks.h | |
parent | c78359d9542c86b972aac373efcf7bc7a8a560e5 (diff) | |
parent | 2959e59fab3bab834368adefd90bd4b1b094366b (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'lib/uniwbrk/u-wordbreaks.h')
-rw-r--r-- | lib/uniwbrk/u-wordbreaks.h | 59 |
1 files changed, 35 insertions, 24 deletions
diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h index 0d881c7b..e8eb01a7 100644 --- a/lib/uniwbrk/u-wordbreaks.h +++ b/lib/uniwbrk/u-wordbreaks.h @@ -1,28 +1,30 @@ /* Word breaks in UTF-8/UTF-16/UTF-32 strings. -*- coding: utf-8 -*- - Copyright (C) 2009-2018 Free Software Foundation, Inc. + Copyright (C) 2009-2022 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. - This program is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - This program is distributed in the hope that it will be useful, + This file is free software. + It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". + You can redistribute it and/or modify it under either + - the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version, or + - the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) + any later version, or + - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". + + This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. + Lesser General Public License and the GNU General Public License + for more details. - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. */ + You should have received a copy of the GNU Lesser General Public + License and of the GNU General Public License along with this + program. If not, see <https://www.gnu.org/licenses/>. */ + +/* This file implements section 4 "Word Boundaries" + of Unicode Standard Annex #29 <https://www.unicode.org/reports/tr29/>. */ void FUNC (const UNIT *s, size_t n, char *p) @@ -48,6 +50,8 @@ FUNC (const UNIT *s, size_t n, char *p) -1 at the very beginning of the string. */ int secondlast_compchar_prop = -1; + /* Number of consecutive regional indicator (RI) characters seen + immediately before the current point. */ size_t ri_count = 0; /* Don't break inside multibyte characters. */ @@ -74,11 +78,18 @@ FUNC (const UNIT *s, size_t n, char *p) || prop == WBP_NEWLINE)) *p = 1; /* No break within emoji zwj sequence (WB3c). */ - else if (last_char_prop == WBP_ZWJ && - (prop == WBP_GAZ || prop == WBP_EBG)) + else if (last_char_prop == WBP_ZWJ + && uc_is_property_extended_pictographic (uc)) + /* *p = 0 */; + /* Keep horizontal whitespace together (WB3d). */ + else if (last_char_prop == WBP_WSS && prop == WBP_WSS) /* *p = 0 */; - /* Ignore Format and Extend characters. */ - else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT || prop == WBP_ZWJ)) + /* Ignore Format and Extend characters (WB4). */ + else if (prop == WBP_EXTEND + || prop == WBP_FORMAT + || prop == WBP_ZWJ) + /* *p = 0 */; + else { /* No break in these situations (see UAX #29): @@ -147,7 +158,7 @@ FUNC (const UNIT *s, size_t n, char *p) last_char_prop = prop; /* Ignore Format and Extend characters, except at the - start of the line. */ + start of the line (WB4). */ if (last_compchar_prop < 0 || last_compchar_prop == WBP_CR || last_compchar_prop == WBP_LF |