summaryrefslogtreecommitdiff
path: root/lib/uninorm/decomposition-table.h
blob: 555f34e5896a68993f242d517f1157ab06fff3f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
/* Decomposition of Unicode characters.
   Copyright (C) 2001-2003, 2009-2022 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2009.

   This file is free software: you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as
   published by the Free Software Foundation; either version 2.1 of the
   License, or (at your option) any later version.

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */


#include "unitypes.h"

/* The decomposition table is made of two parts:
     - A table containing the actual arrays of decomposed equivalents.
       (This table is separate because the maximum length of a decomposition
       is 18, much larger than the average length 1.497 of a decomposition).
     - A 3-level table of indices into this array.  */

#include "decomposition-table1.h"

static inline unsigned short
decomp_index (ucs4_t uc)
{
  unsigned int index1 = uc >> decomp_header_0;
  if (index1 < decomp_header_1)
    {
      int lookup1 = gl_uninorm_decomp_index_table.level1[index1];
      if (lookup1 >= 0)
        {
          unsigned int index2 = (uc >> decomp_header_2) & decomp_header_3;
          int lookup2 = gl_uninorm_decomp_index_table.level2[lookup1 + index2];
          if (lookup2 >= 0)
            {
              unsigned int index3 = uc & decomp_header_4;
              return gl_uninorm_decomp_index_table.level3[lookup2 + index3];
            }
        }
    }
  return (unsigned short)(-1);
}