diff options
Diffstat (limited to 'lib/unilbrk/u32-possible-linebreaks.c')
| -rw-r--r-- | lib/unilbrk/u32-possible-linebreaks.c | 66 | 
1 files changed, 63 insertions, 3 deletions
| diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c index 05fda96e..eb28891d 100644 --- a/lib/unilbrk/u32-possible-linebreaks.c +++ b/lib/unilbrk/u32-possible-linebreaks.c @@ -1,5 +1,5 @@  /* Line breaking of UTF-32 strings. -   Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc. +   Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.     Written by Bruno Haible <bruno@clisp.org>, 2001.     This file is free software. @@ -56,6 +56,7 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding,        do          {            ucs4_t uc = *s; +          s++;            int prop = unilbrkprop_lookup (uc);            if (prop == LBP_BK || prop == LBP_LF || prop == LBP_CR) @@ -90,6 +91,61 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding,                    /* This is arbitrary.  */                    prop = LBP_AL;                    break; +                case LBP_QU2: +                  /* For (LB15a): Replace LBP_QU2 with LBP_QU1 if the previous +                     character's line break property was not one of +                     BK, CR, LF, OP, QU, GL, SP, ZW.  */ +                  switch (prev_prop) +                    { +                    case LBP_BK: +                    case LBP_CR: +                    case LBP_LF: +                    case LBP_OP1: case LBP_OP2: +                    case LBP_QU1: case LBP_QU2: case LBP_QU3: +                    case LBP_GL: +                    case LBP_SP: +                    case LBP_ZW: +                      break; +                    default: +                      prop = LBP_QU1; +                      break; +                    } +                  break; +                case LBP_QU3: +                  /* For (LB15b): Replace LBP_QU3 with LBP_QU1 if the next +                     character's line break property is not one of +                     BK, CR, LF, SP, GL, WJ, CL, QU, CP, EX, IS, SY, ZW.  */ +                  { +                    int next_prop; +                    if (s < s_end) +                      { +                        ucs4_t next_uc = *s; +                        next_prop = unilbrkprop_lookup (next_uc); +                      } +                    else +                      next_prop = LBP_BK; +                    switch (next_prop) +                      { +                      case LBP_BK: +                      case LBP_CR: +                      case LBP_LF: +                      case LBP_SP: +                      case LBP_GL: +                      case LBP_WJ: +                      case LBP_CL: +                      case LBP_QU1: case LBP_QU2: case LBP_QU3: +                      case LBP_CP1: case LBP_CP2: +                      case LBP_EX: +                      case LBP_IS: +                      case LBP_SY: +                      case LBP_ZW: +                        break; +                      default: +                        prop = LBP_QU1; +                        break; +                      } +                  } +                  break;                  }                /* Deal with spaces and combining characters.  */ @@ -203,14 +259,16 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding,            else              ri_count = 0; -          s++;            p++;          }        while (s < s_end);      }  } -#undef u32_possible_linebreaks +#if defined IN_LIBUNISTRING +/* For backward compatibility with older versions of libunistring.  */ + +# undef u32_possible_linebreaks  void  u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, @@ -219,6 +277,8 @@ u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding,    u32_possible_linebreaks_loop (s, n, encoding, -1, p);  } +#endif +  void  u32_possible_linebreaks_v2 (const uint32_t *s, size_t n, const char *encoding,                              char *p) | 
