diff options
Diffstat (limited to 'lib/unistr/u8-mbtouc-unsafe.c')
| -rw-r--r-- | lib/unistr/u8-mbtouc-unsafe.c | 182 | 
1 files changed, 137 insertions, 45 deletions
| diff --git a/lib/unistr/u8-mbtouc-unsafe.c b/lib/unistr/u8-mbtouc-unsafe.c index 41583f96..01d12dc4 100644 --- a/lib/unistr/u8-mbtouc-unsafe.c +++ b/lib/unistr/u8-mbtouc-unsafe.c @@ -1,5 +1,5 @@  /* Look at first character in UTF-8 string. -   Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. +   Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.     Written by Bruno Haible <bruno@clisp.org>, 2001.     This program is free software: you can redistribute it and/or modify it @@ -52,13 +52,15 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)                           | (unsigned int) (s[1] ^ 0x80);                    return 2;                  } +#if CONFIG_UNICODE_SAFETY                /* invalid multibyte character */ +#endif              }            else              {                /* incomplete multibyte character */                *puc = 0xfffd; -              return n; +              return 1;              }          }        else if (c < 0xf0) @@ -66,23 +68,39 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)            if (n >= 3)              {  #if CONFIG_UNICODE_SAFETY -              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 -                  && (c >= 0xe1 || s[1] >= 0xa0) -                  && (c != 0xed || s[1] < 0xa0)) -#endif +              if ((s[1] ^ 0x80) < 0x40)                  { -                  *puc = ((unsigned int) (c & 0x0f) << 12) -                         | ((unsigned int) (s[1] ^ 0x80) << 6) -                         | (unsigned int) (s[2] ^ 0x80); -                  return 3; +                  if ((s[2] ^ 0x80) < 0x40) +                    { +                      if ((c >= 0xe1 || s[1] >= 0xa0) +                          && (c != 0xed || s[1] < 0xa0)) +#endif +                        { +                          *puc = ((unsigned int) (c & 0x0f) << 12) +                                 | ((unsigned int) (s[1] ^ 0x80) << 6) +                                 | (unsigned int) (s[2] ^ 0x80); +                          return 3; +                        } +#if CONFIG_UNICODE_SAFETY +                      /* invalid multibyte character */ +                      *puc = 0xfffd; +                      return 3; +                    } +                  /* invalid multibyte character */ +                  *puc = 0xfffd; +                  return 2;                  }                /* invalid multibyte character */ +#endif              }            else              {                /* incomplete multibyte character */                *puc = 0xfffd; -              return n; +              if (n == 1 || (s[1] ^ 0x80) >= 0x40) +                return 1; +              else +                return 2;              }          }        else if (c < 0xf8) @@ -90,28 +108,51 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)            if (n >= 4)              {  #if CONFIG_UNICODE_SAFETY -              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 -                  && (s[3] ^ 0x80) < 0x40 -                  && (c >= 0xf1 || s[1] >= 0x90) +              if ((s[1] ^ 0x80) < 0x40) +                { +                  if ((s[2] ^ 0x80) < 0x40) +                    { +                      if ((s[3] ^ 0x80) < 0x40) +                        { +                          if ((c >= 0xf1 || s[1] >= 0x90)  #if 1 -                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +                              && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))  #endif -                 ) +                             )  #endif -                { -                  *puc = ((unsigned int) (c & 0x07) << 18) -                         | ((unsigned int) (s[1] ^ 0x80) << 12) -                         | ((unsigned int) (s[2] ^ 0x80) << 6) -                         | (unsigned int) (s[3] ^ 0x80); -                  return 4; +                            { +                              *puc = ((unsigned int) (c & 0x07) << 18) +                                     | ((unsigned int) (s[1] ^ 0x80) << 12) +                                     | ((unsigned int) (s[2] ^ 0x80) << 6) +                                     | (unsigned int) (s[3] ^ 0x80); +                              return 4; +                            } +#if CONFIG_UNICODE_SAFETY +                          /* invalid multibyte character */ +                          *puc = 0xfffd; +                          return 4; +                        } +                      /* invalid multibyte character */ +                      *puc = 0xfffd; +                      return 3; +                    } +                  /* invalid multibyte character */ +                  *puc = 0xfffd; +                  return 2;                  }                /* invalid multibyte character */ +#endif              }            else              {                /* incomplete multibyte character */                *puc = 0xfffd; -              return n; +              if (n == 1 || (s[1] ^ 0x80) >= 0x40) +                return 1; +              else if (n == 2 || (s[2] ^ 0x80) >= 0x40) +                return 2; +              else +                return 3;              }          }  #if 0 @@ -120,19 +161,42 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)            if (n >= 5)              {  #if CONFIG_UNICODE_SAFETY -              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 -                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 -                  && (c >= 0xf9 || s[1] >= 0x88)) -#endif +              if ((s[1] ^ 0x80) < 0x40)                  { -                  *puc = ((unsigned int) (c & 0x03) << 24) -                         | ((unsigned int) (s[1] ^ 0x80) << 18) -                         | ((unsigned int) (s[2] ^ 0x80) << 12) -                         | ((unsigned int) (s[3] ^ 0x80) << 6) -                         | (unsigned int) (s[4] ^ 0x80); -                  return 5; +                  if ((s[2] ^ 0x80) < 0x40) +                    { +                      if ((s[3] ^ 0x80) < 0x40) +                        { +                          if ((s[4] ^ 0x80) < 0x40) +                            { +                              if (c >= 0xf9 || s[1] >= 0x88) +#endif +                                { +                                  *puc = ((unsigned int) (c & 0x03) << 24) +                                         | ((unsigned int) (s[1] ^ 0x80) << 18) +                                         | ((unsigned int) (s[2] ^ 0x80) << 12) +                                         | ((unsigned int) (s[3] ^ 0x80) << 6) +                                         | (unsigned int) (s[4] ^ 0x80); +                                  return 5; +                                } +#if CONFIG_UNICODE_SAFETY +                              /* invalid multibyte character */ +                              *puc = 0xfffd; +                              return 5; +                            } +                          /* invalid multibyte character */ +                          *puc = 0xfffd; +                          return 4; +                        } +                      /* invalid multibyte character */ +                      *puc = 0xfffd; +                      return 3; +                    } +                  /* invalid multibyte character */ +                  return 2;                  }                /* invalid multibyte character */ +#endif              }            else              { @@ -146,21 +210,49 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)            if (n >= 6)              {  #if CONFIG_UNICODE_SAFETY -              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 -                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 -                  && (s[5] ^ 0x80) < 0x40 -                  && (c >= 0xfd || s[1] >= 0x84)) -#endif +              if ((s[1] ^ 0x80) < 0x40)                  { -                  *puc = ((unsigned int) (c & 0x01) << 30) -                         | ((unsigned int) (s[1] ^ 0x80) << 24) -                         | ((unsigned int) (s[2] ^ 0x80) << 18) -                         | ((unsigned int) (s[3] ^ 0x80) << 12) -                         | ((unsigned int) (s[4] ^ 0x80) << 6) -                         | (unsigned int) (s[5] ^ 0x80); -                  return 6; +                  if ((s[2] ^ 0x80) < 0x40) +                    { +                      if ((s[3] ^ 0x80) < 0x40) +                        { +                          if ((s[4] ^ 0x80) < 0x40) +                            { +                              if ((s[5] ^ 0x80) < 0x40) +                                { +                                  if (c >= 0xfd || s[1] >= 0x84) +#endif +                                    { +                                      *puc = ((unsigned int) (c & 0x01) << 30) +                                             | ((unsigned int) (s[1] ^ 0x80) << 24) +                                             | ((unsigned int) (s[2] ^ 0x80) << 18) +                                             | ((unsigned int) (s[3] ^ 0x80) << 12) +                                             | ((unsigned int) (s[4] ^ 0x80) << 6) +                                             | (unsigned int) (s[5] ^ 0x80); +                                      return 6; +                                    } +#if CONFIG_UNICODE_SAFETY +                                  /* invalid multibyte character */ +                                  *puc = 0xfffd; +                                  return 6; +                                } +                              /* invalid multibyte character */ +                              *puc = 0xfffd; +                              return 5; +                            } +                          /* invalid multibyte character */ +                          *puc = 0xfffd; +                          return 4; +                        } +                      /* invalid multibyte character */ +                      *puc = 0xfffd; +                      return 3; +                    } +                  /* invalid multibyte character */ +                  return 2;                  }                /* invalid multibyte character */ +#endif              }            else              { | 
