diff options
Diffstat (limited to 'lib/unilbrk/u8-possible-linebreaks.c')
| -rw-r--r-- | lib/unilbrk/u8-possible-linebreaks.c | 67 | 
1 files changed, 64 insertions, 3 deletions
| diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c index bc19f801..72d0749b 100644 --- a/lib/unilbrk/u8-possible-linebreaks.c +++ b/lib/unilbrk/u8-possible-linebreaks.c @@ -1,5 +1,5 @@  /* Line breaking of UTF-8 strings. -   Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc. +   Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.     Written by Bruno Haible <bruno@clisp.org>, 2001.     This file is free software. @@ -62,6 +62,7 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,          {            ucs4_t uc;            int count = u8_mbtouc_unsafe (&uc, s, s_end - s); +          s += count;            int prop = unilbrkprop_lookup (uc);            if (prop == LBP_BK || prop == LBP_LF || prop == LBP_CR) @@ -96,6 +97,62 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,                    /* This is arbitrary.  */                    prop = LBP_AL;                    break; +                case LBP_QU2: +                  /* For (LB15a): Replace LBP_QU2 with LBP_QU1 if the previous +                     character's line break property was not one of +                     BK, CR, LF, OP, QU, GL, SP, ZW.  */ +                  switch (prev_prop) +                    { +                    case LBP_BK: +                    case LBP_CR: +                    case LBP_LF: +                    case LBP_OP1: case LBP_OP2: +                    case LBP_QU1: case LBP_QU2: case LBP_QU3: +                    case LBP_GL: +                    case LBP_SP: +                    case LBP_ZW: +                      break; +                    default: +                      prop = LBP_QU1; +                      break; +                    } +                  break; +                case LBP_QU3: +                  /* For (LB15b): Replace LBP_QU3 with LBP_QU1 if the next +                     character's line break property is not one of +                     BK, CR, LF, SP, GL, WJ, CL, QU, CP, EX, IS, SY, ZW.  */ +                  { +                    int next_prop; +                    if (s < s_end) +                      { +                        ucs4_t next_uc; +                        (void) u8_mbtouc_unsafe (&next_uc, s, s_end - s); +                        next_prop = unilbrkprop_lookup (next_uc); +                      } +                    else +                      next_prop = LBP_BK; +                    switch (next_prop) +                      { +                      case LBP_BK: +                      case LBP_CR: +                      case LBP_LF: +                      case LBP_SP: +                      case LBP_GL: +                      case LBP_WJ: +                      case LBP_CL: +                      case LBP_QU1: case LBP_QU2: case LBP_QU3: +                      case LBP_CP1: case LBP_CP2: +                      case LBP_EX: +                      case LBP_IS: +                      case LBP_SY: +                      case LBP_ZW: +                        break; +                      default: +                        prop = LBP_QU1; +                        break; +                      } +                  } +                  break;                  }                /* Deal with spaces and combining characters.  */ @@ -209,14 +266,16 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding,            else              ri_count = 0; -          s += count;            p += count;          }        while (s < s_end);      }  } -#undef u8_possible_linebreaks +#if defined IN_LIBUNISTRING +/* For backward compatibility with older versions of libunistring.  */ + +# undef u8_possible_linebreaks  void  u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, @@ -225,6 +284,8 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding,    u8_possible_linebreaks_loop (s, n, encoding, -1, p);  } +#endif +  void  u8_possible_linebreaks_v2 (const uint8_t *s, size_t n, const char *encoding,                             char *p) | 
