Imported Upstream version 0.9.6+really0.9.3upstream/0.9.6+really0.9.3

author: Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> 2016-05-27 14:28:30 +0100
committer: Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> 2016-05-27 14:28:30 +0100
commit: a9a31b1de5776a3b08a82101a4fa711294f0dd1d (patch)
tree: 159134a624e51509f40ed8823249f09a70d1dda3 /lib/uniname/uniname.c
parent: 5f2b09982312c98863eb9a8dfe2c608b81f58259 (diff)
1 files changed, 85 insertions, 193 deletions
diff --git a/lib/uniname/uniname.c b/lib/uniname/uniname.c
index 2191f09b..e4b81cc3 100644
--- a/lib/uniname/uniname.c
+++ b/lib/uniname/uniname.c
@@ -1,5 +1,5 @@
 /* Association between Unicode characters and their names.
-   Copyright (C) 2000-2002, 2005-2007, 2009-2015 Free Software Foundation, Inc.
+   Copyright (C) 2000-2002, 2005-2007, 2009-2010 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
@@ -45,11 +45,10 @@
   #define UNICODE_CHARNAME_WORD_CJK 417
   #define UNICODE_CHARNAME_WORD_COMPATIBILITY 6107
   static const uint16_t unicode_names[68940] = ...;
-  static const struct { uint16_t index; uint32_t name:24; } unicode_name_to_index[16626] = ...;
-  static const struct { uint16_t index; uint32_t name:24; } unicode_index_to_name[16626] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_name_to_code[16626] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_code_to_name[16626] = ...;
   #define UNICODE_CHARNAME_MAX_LENGTH 83
   #define UNICODE_CHARNAME_MAX_WORDS 13
-  static const struct { uint32_t index; uint32_t gap; uint16_t length; } unicode_ranges[401] = ...;
 */
 
 /* Returns the word with a given index.  */
@@ -128,82 +127,6 @@ unicode_name_word_lookup (const char *word, unsigned int length)
   return -1;
 }
 
-#define UNINAME_INVALID_INDEX UINT16_MAX
-
-/* Looks up the internal index of a Unicode character.  */
-static uint16_t
-unicode_code_to_index (ucs4_t c)
-{
-  /* Binary search in unicode_ranges.  */
-  unsigned int i1 = 0;
-  unsigned int i2 = SIZEOF (unicode_ranges);
-
-  for (;;)
-    {
-      unsigned int i = (i1 + i2) >> 1;
-      ucs4_t start_code =
-        unicode_ranges[i].index + unicode_ranges[i].gap;
-      ucs4_t end_code =
-        start_code + unicode_ranges[i].length - 1;
-
-      if (start_code <= c && c <= end_code)
-        return c - unicode_ranges[i].gap;
-
-      if (end_code < c)
-        {
-          if (i1 == i)
-            break;
-          /* Note here: i1 < i < i2.  */
-          i1 = i;
-        }
-      else if (c < start_code)
-        {
-          if (i2 == i)
-            break;
-          /* Note here: i1 <= i < i2.  */
-          i2 = i;
-        }
-    }
-  return UNINAME_INVALID_INDEX;
-}
-
-/* Looks up the codepoint of a Unicode character, from the given
-   internal index.  */
-static ucs4_t
-unicode_index_to_code (uint16_t index)
-{
-  /* Binary search in unicode_ranges.  */
-  unsigned int i1 = 0;
-  unsigned int i2 = SIZEOF (unicode_ranges);
-
-  for (;;)
-    {
-      unsigned int i = (i1 + i2) >> 1;
-      uint16_t start_index = unicode_ranges[i].index;
-      uint16_t end_index = start_index + unicode_ranges[i].length - 1;
-
-      if (start_index <= index && index <= end_index)
-        return index + unicode_ranges[i].gap;
-
-      if (end_index < index)
-        {
-          if (i1 == i)
-            break;
-          /* Note here: i1 < i < i2.  */
-          i1 = i;
-        }
-      else if (index < start_index)
-        {
-          if (i2 == i)
-            break;
-          /* Note here: i1 <= i < i2.  */
-          i2 = i;
-        }
-    }
-  return UNINAME_INVALID;
-}
-
-
 /* Auxiliary tables for Hangul syllable names, see the Unicode 3.0 book,
    sections 3.11 and 4.4.  */
 static const char jamo_initial_short_name[19][3] =
@@ -278,59 +201,80 @@ unicode_character_name (ucs4_t c, char *buf)
       *ptr = '\0';
       return buf;
     }
-  else if ((c >= 0xFE00 && c <= 0xFE0F) || (c >= 0xE0100 && c <= 0xE01EF))
-    {
-      /* Special case for variation selectors. Keeps the tables
-         small.  */
-
-      /* buf needs to have at least 19 + 3 bytes here.  */
-      sprintf (buf, "VARIATION SELECTOR-%d",
-               c <= 0xFE0F ? c - 0xFE00 + 1 : c - 0xE0100 + 17);
-      return buf;
-    }
   else
     {
-      uint16_t index = unicode_code_to_index (c);
-      const uint16_t *words = NULL;
+      const uint16_t *words;
 
-      if (index != UNINAME_INVALID_INDEX)
+      /* Transform the code so that it fits in 16 bits.  */
+      switch (c >> 12)
         {
-          /* Binary search in unicode_code_to_name.  */
-          unsigned int i1 = 0;
-          unsigned int i2 = SIZEOF (unicode_index_to_name);
-          for (;;)
-            {
-              unsigned int i = (i1 + i2) >> 1;
-              if (unicode_index_to_name[i].index == index)
-                {
-                  words = &unicode_names[unicode_index_to_name[i].name];
-                  break;
-                }
-              else if (unicode_index_to_name[i].index < index)
-                {
-                  if (i1 == i)
-                    {
-                      words = NULL;
-                      break;
-                    }
-                  /* Note here: i1 < i < i2.  */
-                  i1 = i;
-                }
-              else if (unicode_index_to_name[i].index > index)
-                {
-                  if (i2 == i)
-                    {
-                      words = NULL;
-                      break;
-                    }
-                  /* Note here: i1 <= i < i2.  */
-                  i2 = i;
-                }
-            }
+        case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
+          break;
+        case 0x0A:
+          c -= 0x05000;
+          break;
+        case 0x0F:
+          c -= 0x09000;
+          break;
+        case 0x10:
+          c -= 0x09000;
+          break;
+        case 0x12:
+          c -= 0x0A000;
+          break;
+        case 0x1D:
+          c -= 0x14000;
+          break;
+        case 0x1F:
+          c -= 0x15000;
+          break;
+        case 0x2F:
+          c -= 0x24000;
+          break;
+        case 0xE0:
+          c -= 0xD4000;
+          break;
+        default:
+          return NULL;
         }
+
+      {
+        /* Binary search in unicode_code_to_name.  */
+        unsigned int i1 = 0;
+        unsigned int i2 = SIZEOF (unicode_code_to_name);
+        for (;;)
+          {
+            unsigned int i = (i1 + i2) >> 1;
+            if (unicode_code_to_name[i].code == c)
+              {
+                words = &unicode_names[unicode_code_to_name[i].name];
+                break;
+              }
+            else if (unicode_code_to_name[i].code < c)
+              {
+                if (i1 == i)
+                  {
+                    words = NULL;
+                    break;
+                  }
+                /* Note here: i1 < i < i2.  */
+                i1 = i;
+              }
+            else if (unicode_code_to_name[i].code > c)
+              {
+                if (i2 == i)
+                  {
+                    words = NULL;
+                    break;
+                  }
+                /* Note here: i1 <= i < i2.  */
+                i2 = i;
+              }
+          }
+      }
       if (words != NULL)
         {
-          /* Found it in unicode_index_to_name. Now concatenate the words.  */
+          /* Found it in unicode_code_to_name. Now concatenate the words.  */
           /* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes.  */
           char *ptr = buf;
           for (;;)
@@ -375,37 +319,6 @@ unicode_name_character (const char *name)
       if (false)
       filled_buf:
         {
-          {
-            /* Special case for variation selector aliases. Keeps the
-               tables small.  */
-            const char *p1 = buf;
-            if (ptr >= buf + 3 && *p1++ == 'V')
-              {
-                if (*p1++ == 'S')
-                  {
-                    if (*p1 != '0')
-                      {
-                        unsigned int c = 0;
-                        for (;;)
-                          {
-                            if (*p1 >= '0' && *p1 <= '9')
-                              c += (*p1 - '0');
-                            p1++;
-                            if (p1 == ptr)
-                              {
-                                if (c >= 1 && c <= 16)
-                                  return c - 1 + 0xFE00;
-                                else if (c >= 17 && c <= 256)
-                                  return c - 17 + 0xE0100;
-                                else
-                                  break;
-                              }
-                            c = c * 10;
-                          }
-                      }
-                  }
-              }
-          }
           /* Convert the constituents to uint16_t words.  */
           uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
           uint16_t *wordptr = words;
@@ -537,38 +450,6 @@ unicode_name_character (const char *name)
                           }
                       }
                   }
-                /* Special case for variation selectors. Keeps the
-                   tables small.  */
-                if (wordptr == &words[1]
-                    && words[0] == UNICODE_CHARNAME_WORD_VARIATION
-                    && p1 + 10 <= ptr
-                    && p1 + 12 >= ptr
-                    && memcmp (p1, "SELECTOR-", 9) == 0)
-                  {
-                    const char *p2 = p1 + 9;
-
-                    if (*p2 != '0')
-                      {
-                        unsigned int c = 0;
-
-                        for (;;)
-                          {
-                            if (*p2 >= '0' && *p2 <= '9')
-                              c += (*p2 - '0');
-                            p2++;
-                            if (p2 == ptr)
-                              {
-                                if (c >= 1 && c <= 16)
-                                  return c - 1 + 0xFE00;
-                                else if (c >= 17 && c <= 256)
-                                  return c - 17 + 0xE0100;
-                                else
-                                  break;
-                              }
-                            c = c * 10;
-                          }
-                      }
-                  }
               }
           }
           if (false)
@@ -582,15 +463,15 @@ unicode_name_character (const char *name)
                 for (; --i >= 0; )
                   words[i] = 2 * words[i] + 1;
               }
-              /* Binary search in unicode_name_to_index.  */
+              /* Binary search in unicode_name_to_code.  */
               {
                 unsigned int i1 = 0;
-                unsigned int i2 = SIZEOF (unicode_name_to_index);
+                unsigned int i2 = SIZEOF (unicode_name_to_code);
                 for (;;)
                   {
                     unsigned int i = (i1 + i2) >> 1;
                     const uint16_t *w = words;
-                    const uint16_t *p = &unicode_names[unicode_name_to_index[i].name];
+                    const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
                     unsigned int n = words_length;
                     for (;;)
                       {
@@ -612,7 +493,18 @@ unicode_name_character (const char *name)
                           }
                         p++; w++; n--;
                         if (n == 0)
-                          return unicode_index_to_code (unicode_name_to_index[i].index);
+                          {
+                            unsigned int c = unicode_name_to_code[i].code;
+
+                            /* Undo the transformation to 16-bit space.  */
+                            static const unsigned int offset[13] =
+                              {
+                                0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
+                                0x05000, 0x09000, 0x09000, 0x0A000, 0x14000,
+                                0x15000, 0x24000, 0xD4000
+                              };
+                            return c + offset[c >> 12];
+                          }
                       }
                   }
               }
author	Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>	2016-05-27 14:28:30 +0100
committer	Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>	2016-05-27 14:28:30 +0100
commit	a9a31b1de5776a3b08a82101a4fa711294f0dd1d (patch)
tree	159134a624e51509f40ed8823249f09a70d1dda3 /lib/uniname/uniname.c
parent	5f2b09982312c98863eb9a8dfe2c608b81f58259 (diff)