New upstream version 1.0upstream/1.0

author: Jörg Frings-Fürst <debian@jff.email> 2022-01-08 11:51:07 +0100
committer: Jörg Frings-Fürst <debian@jff.email> 2022-01-08 11:51:07 +0100
commit: be8efac78d067c138ad8dda03df4336e73f94887 (patch)
tree: 5f5254a628ba0ef72065b93d949d1c985742ea8e /lib/unilbrk/ulc-width-linebreaks.c
parent: 7b65dbd4ebade81d504cfe5e681292a58ad1fdf0 (diff)
1 files changed, 64 insertions, 30 deletions
diff --git a/lib/unilbrk/ulc-width-linebreaks.c b/lib/unilbrk/ulc-width-linebreaks.c
index fbe970de..b7e6c7c5 100644
--- a/lib/unilbrk/ulc-width-linebreaks.c
+++ b/lib/unilbrk/ulc-width-linebreaks.c
@@ -1,28 +1,27 @@
 /* Line breaking of strings.
-   Copyright (C) 2001-2003, 2006-2018 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2001.
 
-   This program is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-   This program is distributed in the hope that it will be useful,
+   This file is free software.
+   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+   You can redistribute it and/or modify it under either
+     - the terms of the GNU Lesser General Public License as published
+       by the Free Software Foundation; either version 3, or (at your
+       option) any later version, or
+     - the terms of the GNU General Public License as published by the
+       Free Software Foundation; either version 2, or (at your option)
+       any later version, or
+     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+   This file is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
+   Lesser General Public License and the GNU General Public License
+   for more details.
 
-   You should have received a copy of the GNU Lesser General Public License
-   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+   You should have received a copy of the GNU Lesser General Public
+   License and of the GNU General Public License along with this
+   program.  If not, see <https://www.gnu.org/licenses/>.  */
 
 #include <config.h>
 
@@ -34,6 +33,8 @@
 
 #include "c-ctype.h"
 #include "uniconv.h"
+#include "unilbrk/internal.h"
+#include "unilbrk/lbrktables.h"
 #include "unilbrk/ulc-common.h"
 
 /* Line breaking of a string in an arbitrary encoding.
@@ -49,16 +50,16 @@
    but this is not backed by an RFC.  So we use UTF-8. It supports
    characters up to \U7FFFFFFF and is unambiguously defined.  */
 
-int
-ulc_width_linebreaks (const char *s, size_t n,
-                      int width, int start_column, int at_end_columns,
-                      const char *o, const char *encoding,
-                      char *p)
+static int
+ulc_width_linebreaks_internal (const char *s, size_t n,
+                               int width, int start_column, int at_end_columns,
+                               const char *o, const char *encoding, int cr,
+                               char *p)
 {
   if (n > 0)
     {
       if (is_utf8_encoding (encoding))
-        return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
+        return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p);
       else
         {
           /* Convert the string to UTF-8 and build a translation table
@@ -95,7 +96,7 @@ ulc_width_linebreaks (const char *s, size_t n,
 
                       /* Determine the line breaks of the UTF-8 string.  */
                       res_column =
-                        u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q);
+                        u8_width_linebreaks_internal (t, m, width, start_column, at_end_columns, o8, encoding, cr, q);
 
                       /* Translate the result back to the original string.  */
                       memset (p, UC_BREAK_PROHIBITED, n);
@@ -117,7 +118,7 @@ ulc_width_linebreaks (const char *s, size_t n,
           if (is_all_ascii (s, n))
             {
               /* ASCII is a subset of UTF-8.  */
-              return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p);
+              return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p);
             }
 #endif
           /* We have a non-ASCII string and cannot convert it.
@@ -128,9 +129,16 @@ ulc_width_linebreaks (const char *s, size_t n,
             const char *s_end = s + n;
             while (s < s_end)
               {
-                *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n'
+                *p = ((o != NULL && *o == UC_BREAK_MANDATORY)
+                      || *s == '\n'
                       ? UC_BREAK_MANDATORY
-                      : UC_BREAK_PROHIBITED);
+                      : ((o != NULL && *o == UC_BREAK_CR_BEFORE_LF)
+                         || (cr >= 0
+                             && *s == '\r'
+                             && s + 1 < s_end
+                             && *(s + 1) == '\n')
+                         ? UC_BREAK_CR_BEFORE_LF
+                         : UC_BREAK_PROHIBITED));
                 s++;
                 p++;
                 if (o != NULL)
@@ -143,6 +151,30 @@ ulc_width_linebreaks (const char *s, size_t n,
   return start_column;
 }
 
+#undef ulc_width_linebreaks
+
+int
+ulc_width_linebreaks (const char *s, size_t n,
+                      int width, int start_column, int at_end_columns,
+                      const char *o, const char *encoding,
+                      char *p)
+{
+  return ulc_width_linebreaks_internal (s, n,
+                                        width, start_column, at_end_columns,
+                                        o, encoding, -1, p);
+}
+
+int
+ulc_width_linebreaks_v2 (const char *s, size_t n,
+                         int width, int start_column, int at_end_columns,
+                         const char *o, const char *encoding,
+                         char *p)
+{
+  return ulc_width_linebreaks_internal (s, n,
+                                        width, start_column, at_end_columns,
+                                        o, encoding, LBP_CR, p);
+}
+
 
 #ifdef TEST
 
@@ -210,7 +242,7 @@ main (int argc, char * argv[])
       char *breaks = malloc (length);
       int i;
 
-      ulc_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks);
+      ulc_width_linebreaks_v2 (input, length, width, 0, 0, NULL, locale_charset (), breaks);
 
       for (i = 0; i < length; i++)
         {
@@ -221,6 +253,8 @@ main (int argc, char * argv[])
               break;
             case UC_BREAK_MANDATORY:
               break;
+            case UC_BREAK_CR_BEFORE_LF:
+              break;
             case UC_BREAK_PROHIBITED:
               break;
             default:
author	Jörg Frings-Fürst <debian@jff.email>	2022-01-08 11:51:07 +0100
committer	Jörg Frings-Fürst <debian@jff.email>	2022-01-08 11:51:07 +0100
commit	be8efac78d067c138ad8dda03df4336e73f94887 (patch)
tree	5f5254a628ba0ef72065b93d949d1c985742ea8e /lib/unilbrk/ulc-width-linebreaks.c
parent	7b65dbd4ebade81d504cfe5e681292a58ad1fdf0 (diff)