diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2019-11-29 11:26:57 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2019-11-29 11:26:57 +0100 | 
| commit | 7f4e90f2759d6a15812172ee19f3ad5b58940beb (patch) | |
| tree | 5f90c63b8ba73f4ecd23d6e642c1ab34dccea033 /src/make_unicode_property_data.py | |
| parent | 68d1ec60c90d27c511d51ce0bef44b132a7ddf11 (diff) | |
| parent | 7e149a97d276ce3b4c5e34f965766c8e40e03fef (diff) | |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src/make_unicode_property_data.py')
| -rwxr-xr-x | src/make_unicode_property_data.py | 77 | 
1 files changed, 47 insertions, 30 deletions
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index dc3071a..9776628 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -1,7 +1,7 @@  #!/usr/bin/python  # -*- coding: utf-8 -*-  # make_unicode_property_data.py -# Copyright (c) 2016-2018  K.Kosako +# Copyright (c) 2016-2019  K.Kosako  import sys  import re @@ -22,9 +22,12 @@ PR_LINE_REG  = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")  PA_LINE_REG  = re.compile("(\w+)\s*;\s*(\w+)")  PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")  BL_LINE_REG  = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)") -VERSION_REG  = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt") +UNICODE_VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt") +EMOJI_VERSION_REG   = re.compile("(?i)#\s*Version:\s*(\d+)\.(\d+)") + +VERSION_INFO = [-1, -1, -1] +EMOJI_VERSION_INFO = [-1, -1] -VERSION_INFO = None  DIC  = { }  KDIC = { }  PropIndex = { } @@ -40,13 +43,6 @@ def fix_block_name(name):    s = re.sub(r'[- ]+', '_', name)    return 'In_' + s -def check_version_info(s): -  global VERSION_INFO -  m = VERSION_REG.match(s) -  if m is not None: -    VERSION_INFO = m.group(1) - -  def print_ranges(ranges):    for (start, end) in ranges:      print "0x%06x, 0x%06x" % (start, end) @@ -233,7 +229,8 @@ def parse_unicode_data_file(f):    normalize_ranges_in_dic(dic)    return dic, assigned -def parse_properties(path, klass, prop_prefix = None): +def parse_properties(path, klass, prop_prefix = None, version_reg = None): +  version_match = None    with open(path, 'r') as f:      dic = { }      prop = None @@ -243,9 +240,10 @@ def parse_properties(path, klass, prop_prefix = None):        if len(s) == 0:          continue -      if s[0] == '#': -        if VERSION_INFO is None: -          check_version_info(s) +      if s[0] == '#' and version_reg is not None and version_match is None: +        version_match = version_reg.match(s) +        if version_match is not None: +          continue        m = PR_LINE_REG.match(s)        if m: @@ -266,7 +264,7 @@ def parse_properties(path, klass, prop_prefix = None):          props.append(prop)    normalize_ranges_in_dic(dic) -  return (dic, props) +  return (dic, props, version_match)  def parse_property_aliases(path):    a = { } @@ -414,11 +412,11 @@ def entry_and_print_prop_and_index(name, index):    nname = normalize_prop_name(name)    print_prop_and_index(nname, index) -def parse_and_merge_properties(path, klass): -  dic, props = parse_properties(path, klass) +def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None): +  dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)    merge_dic(DIC, dic)    merge_props(PROPS, props) -  return dic, props +  return dic, props, ver_m  ### main ###  argv = sys.argv @@ -447,11 +445,21 @@ with open('UnicodeData.txt', 'r') as f:  PROPS = DIC.keys()  PROPS = list_sub(PROPS, POSIX_LIST) -parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property') -dic, props = parse_and_merge_properties('Scripts.txt', 'Script') +_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG) +if ver_m is not None: +  VERSION_INFO[0] = int(ver_m.group(1)) +  VERSION_INFO[1] = int(ver_m.group(2)) +  VERSION_INFO[2] = int(ver_m.group(3)) + +dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')  DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic)) +  parse_and_merge_properties('PropList.txt',   'Binary Property') -parse_and_merge_properties('emoji-data.txt', 'Emoji Property') + +_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG) +if ver_m is not None: +  EMOJI_VERSION_INFO[0] = int(ver_m.group(1)) +  EMOJI_VERSION_INFO[1] = int(ver_m.group(2))  PROPS.append('Unknown')  KDIC['Unknown'] = 'Script' @@ -464,9 +472,9 @@ dic, BLOCKS = parse_blocks('Blocks.txt')  merge_dic(DIC, dic)  if INCLUDE_GRAPHEME_CLUSTER_DATA: -  dic, props = parse_properties('GraphemeBreakProperty.txt', -                                'GraphemeBreak Property', -                                GRAPHEME_CLUSTER_BREAK_NAME_PREFIX) +  dic, props, _ = parse_properties('GraphemeBreakProperty.txt', +                                   'GraphemeBreak Property', +                                   GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)    merge_dic(DIC, dic)    merge_props(PROPS, props)    #prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other' @@ -533,9 +541,13 @@ sys.stdout.write(s)  if OUTPUT_LIST_MODE:    UPF = open("UNICODE_PROPERTIES", "w") -  if VERSION_INFO is not None: -    print >> UPF, "Unicode Properties (from Unicode Version: %s)" % VERSION_INFO -    print >> UPF, '' +  if VERSION_INFO[0] < 0: +    raise RuntimeError("Unicode Version is not found") +  if EMOJI_VERSION_INFO[0] < 0: +    raise RuntimeError("Emoji Version is not found") + +  print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d,  Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]) +  print >> UPF, ''  index = -1  for prop in POSIX_LIST: @@ -569,9 +581,14 @@ if not(POSIX_ONLY):  print '%%'  print ''  if not(POSIX_ONLY): -  if VERSION_INFO is not None: -    print "#define UNICODE_PROPERTY_VERSION  %s" % re.sub(r'[\.-]', '_', VERSION_INFO) -    print '' +  if VERSION_INFO[0] < 0: +    raise RuntimeError("Unicode Version is not found") +  if EMOJI_VERSION_INFO[0] < 0: +    raise RuntimeError("Emoji Version is not found") + +  print "#define UNICODE_PROPERTY_VERSION  %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]) +  print "#define UNICODE_EMOJI_VERSION     %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]) +  print ''  print "#define PROPERTY_NAME_MAX_SIZE  %d" % (PROPERTY_NAME_MAX_LEN + 10)  print "#define CODE_RANGES_NUM         %d" % (index + 1)  | 
