summaryrefslogtreecommitdiff
path: root/lib/unigbrk/u-grapheme-breaks.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unigbrk/u-grapheme-breaks.h')
-rw-r--r--lib/unigbrk/u-grapheme-breaks.h131
1 files changed, 131 insertions, 0 deletions
diff --git a/lib/unigbrk/u-grapheme-breaks.h b/lib/unigbrk/u-grapheme-breaks.h
new file mode 100644
index 00000000..18d8bcb1
--- /dev/null
+++ b/lib/unigbrk/u-grapheme-breaks.h
@@ -0,0 +1,131 @@
+/* Grapheme cluster break function.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+void
+FUNC (const UNIT *s, size_t n, char *p)
+{
+ if (n > 0)
+ {
+ const UNIT *s_end = s + n;
+
+ /* Grapheme Cluster break property of the last character.
+ -1 at the very beginning of the string. */
+ int last_char_prop = -1;
+
+ /* Grapheme Cluster break property of the last complex character.
+ -1 at the very beginning of the string. */
+ int last_compchar_prop = -1;
+
+ size_t ri_count = 0;
+
+ /* Don't break inside multibyte characters. */
+ memset (p, 0, n);
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count = U_MBTOUC (&uc, s, s_end - s);
+ int prop = uc_graphemeclusterbreak_property (uc);
+
+ /* Break at the start of the string (GB1). */
+ if (last_char_prop < 0)
+ *p = 1;
+ else
+ {
+ /* No break between CR and LF (GB3). */
+ if (last_char_prop == GBP_CR && prop == GBP_LF)
+ /* *p = 0 */;
+ /* Break before and after newlines (GB4, GB5). */
+ else if ((last_char_prop == GBP_CR
+ || last_char_prop == GBP_LF
+ || last_char_prop == GBP_CONTROL)
+ || (prop == GBP_CR
+ || prop == GBP_LF
+ || prop == GBP_CONTROL))
+ *p = 1;
+ /* No break between Hangul syllable sequences (GB6, GB7, GB8). */
+ else if ((last_char_prop == GBP_L
+ && (prop == GBP_L
+ || prop == GBP_V
+ || prop == GBP_LV
+ || prop == GBP_LVT))
+ || ((last_char_prop == GBP_LV
+ || last_char_prop == GBP_V)
+ && (prop == GBP_V
+ || prop == GBP_T))
+ || ((last_char_prop == GBP_LVT
+ || last_char_prop == GBP_T)
+ && prop == GBP_T))
+ /* *p = 0 */;
+ /* No break before extending characters or ZWJ (GB9). */
+ else if (prop == GBP_EXTEND || prop == GBP_ZWJ)
+ /* *p = 0 */;
+ /* No break before SpacingMarks (GB9a). */
+ else if (prop == GBP_SPACINGMARK)
+ /* *p = 0 */;
+ /* No break after Prepend characters (GB9b). */
+ else if (last_char_prop == GBP_PREPEND)
+ /* *p = 0 */;
+ /* No break within emoji modifier sequences (GB10). */
+ else if ((last_compchar_prop == GBP_EB
+ || last_compchar_prop == GBP_EBG)
+ && prop == GBP_EM)
+ /* *p = 0 */;
+ /* No break within emoji zwj sequences (GB11). */
+ else if (last_char_prop == GBP_ZWJ
+ && (prop == GBP_GAZ
+ || prop == GBP_EBG))
+ /* *p = 0 */;
+ /* No break between RI if there is an odd number of RI
+ characters before (GB12, GB13). */
+ else if (prop == GBP_RI)
+ {
+ if (ri_count % 2 == 0)
+ *p = 1;
+ /* else *p = 0; */
+ }
+ /* Break everywhere (GBP999). */
+ else
+ *p = 1;
+ }
+
+ last_char_prop = prop;
+
+ if (!(prop == GBP_EXTEND
+ && (last_compchar_prop == GBP_EB
+ || last_compchar_prop == GBP_EBG)))
+ last_compchar_prop = prop;
+
+ if (prop == GBP_RI)
+ ri_count++;
+ else
+ ri_count = 0;
+
+ s += count;
+ p += count;
+ }
+ }
+}