diff options
Diffstat (limited to 'lib/unistr/u8-mbtouc-aux.c')
| -rw-r--r-- | lib/unistr/u8-mbtouc-aux.c | 166 | 
1 files changed, 42 insertions, 124 deletions
| diff --git a/lib/unistr/u8-mbtouc-aux.c b/lib/unistr/u8-mbtouc-aux.c index 834725fe..c9975896 100644 --- a/lib/unistr/u8-mbtouc-aux.c +++ b/lib/unistr/u8-mbtouc-aux.c @@ -1,5 +1,5 @@  /* Conversion UTF-8 to UCS-4. -   Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc. +   Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.     Written by Bruno Haible <bruno@clisp.org>, 2001.     This program is free software: you can redistribute it and/or modify it @@ -45,32 +45,21 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)              {                /* incomplete multibyte character */                *puc = 0xfffd; -              return 1; +              return n;              }          }        else if (c < 0xf0)          {            if (n >= 3)              { -              if ((s[1] ^ 0x80) < 0x40) +              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 +                  && (c >= 0xe1 || s[1] >= 0xa0) +                  && (c != 0xed || s[1] < 0xa0))                  { -                  if ((s[2] ^ 0x80) < 0x40) -                    { -                      if ((c >= 0xe1 || s[1] >= 0xa0) -                          && (c != 0xed || s[1] < 0xa0)) -                        { -                          *puc = ((unsigned int) (c & 0x0f) << 12) -                                 | ((unsigned int) (s[1] ^ 0x80) << 6) -                                 | (unsigned int) (s[2] ^ 0x80); -                          return 3; -                        } -                      /* invalid multibyte character */ -                      *puc = 0xfffd; -                      return 3; -                    } -                  /* invalid multibyte character */ -                  *puc = 0xfffd; -                  return 2; +                  *puc = ((unsigned int) (c & 0x0f) << 12) +                         | ((unsigned int) (s[1] ^ 0x80) << 6) +                         | (unsigned int) (s[2] ^ 0x80); +                  return 3;                  }                /* invalid multibyte character */              } @@ -78,45 +67,26 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)              {                /* incomplete multibyte character */                *puc = 0xfffd; -              if (n == 1 || (s[1] ^ 0x80) >= 0x40) -                return 1; -              else -                return 2; +              return n;              }          }        else if (c < 0xf8)          {            if (n >= 4)              { -              if ((s[1] ^ 0x80) < 0x40) -                { -                  if ((s[2] ^ 0x80) < 0x40) -                    { -                      if ((s[3] ^ 0x80) < 0x40) -                        { -                          if ((c >= 0xf1 || s[1] >= 0x90) +              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 +                  && (s[3] ^ 0x80) < 0x40 +                  && (c >= 0xf1 || s[1] >= 0x90)  #if 1 -                              && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))  #endif -                             ) -                            { -                              *puc = ((unsigned int) (c & 0x07) << 18) -                                     | ((unsigned int) (s[1] ^ 0x80) << 12) -                                     | ((unsigned int) (s[2] ^ 0x80) << 6) -                                     | (unsigned int) (s[3] ^ 0x80); -                              return 4; -                            } -                          /* invalid multibyte character */ -                          *puc = 0xfffd; -                          return 4; -                        } -                      /* invalid multibyte character */ -                      *puc = 0xfffd; -                      return 3; -                    } -                  /* invalid multibyte character */ -                  *puc = 0xfffd; -                  return 2; +                 ) +                { +                  *puc = ((unsigned int) (c & 0x07) << 18) +                         | ((unsigned int) (s[1] ^ 0x80) << 12) +                         | ((unsigned int) (s[2] ^ 0x80) << 6) +                         | (unsigned int) (s[3] ^ 0x80); +                  return 4;                  }                /* invalid multibyte character */              } @@ -124,12 +94,7 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)              {                /* incomplete multibyte character */                *puc = 0xfffd; -              if (n == 1 || (s[1] ^ 0x80) >= 0x40) -                return 1; -              else if (n == 2 || (s[2] ^ 0x80) >= 0x40) -                return 2; -              else -                return 3; +              return n;              }          }  #if 0 @@ -137,37 +102,16 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)          {            if (n >= 5)              { -              if ((s[1] ^ 0x80) < 0x40) +              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 +                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 +                  && (c >= 0xf9 || s[1] >= 0x88))                  { -                  if ((s[2] ^ 0x80) < 0x40) -                    { -                      if ((s[3] ^ 0x80) < 0x40) -                        { -                          if ((s[4] ^ 0x80) < 0x40) -                            { -                              if (c >= 0xf9 || s[1] >= 0x88) -                                { -                                  *puc = ((unsigned int) (c & 0x03) << 24) -                                         | ((unsigned int) (s[1] ^ 0x80) << 18) -                                         | ((unsigned int) (s[2] ^ 0x80) << 12) -                                         | ((unsigned int) (s[3] ^ 0x80) << 6) -                                         | (unsigned int) (s[4] ^ 0x80); -                                  return 5; -                                } -                              /* invalid multibyte character */ -                              *puc = 0xfffd; -                              return 5; -                            } -                          /* invalid multibyte character */ -                          *puc = 0xfffd; -                          return 4; -                        } -                      /* invalid multibyte character */ -                      *puc = 0xfffd; -                      return 3; -                    } -                  /* invalid multibyte character */ -                  return 2; +                  *puc = ((unsigned int) (c & 0x03) << 24) +                         | ((unsigned int) (s[1] ^ 0x80) << 18) +                         | ((unsigned int) (s[2] ^ 0x80) << 12) +                         | ((unsigned int) (s[3] ^ 0x80) << 6) +                         | (unsigned int) (s[4] ^ 0x80); +                  return 5;                  }                /* invalid multibyte character */              } @@ -182,44 +126,18 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)          {            if (n >= 6)              { -              if ((s[1] ^ 0x80) < 0x40) +              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 +                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 +                  && (s[5] ^ 0x80) < 0x40 +                  && (c >= 0xfd || s[1] >= 0x84))                  { -                  if ((s[2] ^ 0x80) < 0x40) -                    { -                      if ((s[3] ^ 0x80) < 0x40) -                        { -                          if ((s[4] ^ 0x80) < 0x40) -                            { -                              if ((s[5] ^ 0x80) < 0x40) -                                { -                                  if (c >= 0xfd || s[1] >= 0x84) -                                    { -                                      *puc = ((unsigned int) (c & 0x01) << 30) -                                             | ((unsigned int) (s[1] ^ 0x80) << 24) -                                             | ((unsigned int) (s[2] ^ 0x80) << 18) -                                             | ((unsigned int) (s[3] ^ 0x80) << 12) -                                             | ((unsigned int) (s[4] ^ 0x80) << 6) -                                             | (unsigned int) (s[5] ^ 0x80); -                                      return 6; -                                    } -                                  /* invalid multibyte character */ -                                  *puc = 0xfffd; -                                  return 6; -                                } -                              /* invalid multibyte character */ -                              *puc = 0xfffd; -                              return 5; -                            } -                          /* invalid multibyte character */ -                          *puc = 0xfffd; -                          return 4; -                        } -                      /* invalid multibyte character */ -                      *puc = 0xfffd; -                      return 3; -                    } -                  /* invalid multibyte character */ -                  return 2; +                  *puc = ((unsigned int) (c & 0x01) << 30) +                         | ((unsigned int) (s[1] ^ 0x80) << 24) +                         | ((unsigned int) (s[2] ^ 0x80) << 18) +                         | ((unsigned int) (s[3] ^ 0x80) << 12) +                         | ((unsigned int) (s[4] ^ 0x80) << 6) +                         | (unsigned int) (s[5] ^ 0x80); +                  return 6;                  }                /* invalid multibyte character */              } | 
