summaryrefslogtreecommitdiff
path: root/lib/unilbrk/ulc-width-linebreaks.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unilbrk/ulc-width-linebreaks.c')
-rw-r--r--lib/unilbrk/ulc-width-linebreaks.c94
1 files changed, 64 insertions, 30 deletions
diff --git a/lib/unilbrk/ulc-width-linebreaks.c b/lib/unilbrk/ulc-width-linebreaks.c
index fbe970de..b7e6c7c5 100644
--- a/lib/unilbrk/ulc-width-linebreaks.c
+++ b/lib/unilbrk/ulc-width-linebreaks.c
@@ -1,28 +1,27 @@
/* Line breaking of strings.
- Copyright (C) 2001-2003, 2006-2018 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
- This program is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
- This program is distributed in the hope that it will be useful,
+ This file is free software.
+ It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+ You can redistribute it and/or modify it under either
+ - the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version, or
+ - the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option)
+ any later version, or
+ - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+ This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
+ Lesser General Public License and the GNU General Public License
+ for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
+ You should have received a copy of the GNU Lesser General Public
+ License and of the GNU General Public License along with this
+ program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
@@ -34,6 +33,8 @@
#include "c-ctype.h"
#include "uniconv.h"
+#include "unilbrk/internal.h"
+#include "unilbrk/lbrktables.h"
#include "unilbrk/ulc-common.h"
/* Line breaking of a string in an arbitrary encoding.
@@ -49,16 +50,16 @@
but this is not backed by an RFC. So we use UTF-8. It supports
characters up to \U7FFFFFFF and is unambiguously defined. */
-int
-ulc_width_linebreaks (const char *s, size_t n,
- int width, int start_column, int at_end_columns,
- const char *o, const char *encoding,
- char *p)
+static int
+ulc_width_linebreaks_internal (const char *s, size_t n,
+ int width, int start_column, int at_end_columns,
+ const char *o, const char *encoding, int cr,
+ char *p)
{
if (n > 0)
{
if (is_utf8_encoding (encoding))
- return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
+ return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p);
else
{
/* Convert the string to UTF-8 and build a translation table
@@ -95,7 +96,7 @@ ulc_width_linebreaks (const char *s, size_t n,
/* Determine the line breaks of the UTF-8 string. */
res_column =
- u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q);
+ u8_width_linebreaks_internal (t, m, width, start_column, at_end_columns, o8, encoding, cr, q);
/* Translate the result back to the original string. */
memset (p, UC_BREAK_PROHIBITED, n);
@@ -117,7 +118,7 @@ ulc_width_linebreaks (const char *s, size_t n,
if (is_all_ascii (s, n))
{
/* ASCII is a subset of UTF-8. */
- return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
+ return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p);
}
#endif
/* We have a non-ASCII string and cannot convert it.
@@ -128,9 +129,16 @@ ulc_width_linebreaks (const char *s, size_t n,
const char *s_end = s + n;
while (s < s_end)
{
- *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n'
+ *p = ((o != NULL && *o == UC_BREAK_MANDATORY)
+ || *s == '\n'
? UC_BREAK_MANDATORY
- : UC_BREAK_PROHIBITED);
+ : ((o != NULL && *o == UC_BREAK_CR_BEFORE_LF)
+ || (cr >= 0
+ && *s == '\r'
+ && s + 1 < s_end
+ && *(s + 1) == '\n')
+ ? UC_BREAK_CR_BEFORE_LF
+ : UC_BREAK_PROHIBITED));
s++;
p++;
if (o != NULL)
@@ -143,6 +151,30 @@ ulc_width_linebreaks (const char *s, size_t n,
return start_column;
}
+#undef ulc_width_linebreaks
+
+int
+ulc_width_linebreaks (const char *s, size_t n,
+ int width, int start_column, int at_end_columns,
+ const char *o, const char *encoding,
+ char *p)
+{
+ return ulc_width_linebreaks_internal (s, n,
+ width, start_column, at_end_columns,
+ o, encoding, -1, p);
+}
+
+int
+ulc_width_linebreaks_v2 (const char *s, size_t n,
+ int width, int start_column, int at_end_columns,
+ const char *o, const char *encoding,
+ char *p)
+{
+ return ulc_width_linebreaks_internal (s, n,
+ width, start_column, at_end_columns,
+ o, encoding, LBP_CR, p);
+}
+
#ifdef TEST
@@ -210,7 +242,7 @@ main (int argc, char * argv[])
char *breaks = malloc (length);
int i;
- ulc_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks);
+ ulc_width_linebreaks_v2 (input, length, width, 0, 0, NULL, locale_charset (), breaks);
for (i = 0; i < length; i++)
{
@@ -221,6 +253,8 @@ main (int argc, char * argv[])
break;
case UC_BREAK_MANDATORY:
break;
+ case UC_BREAK_CR_BEFORE_LF:
+ break;
case UC_BREAK_PROHIBITED:
break;
default: