summaryrefslogtreecommitdiff
path: root/src/make_unicode_property_data.py
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2017-09-01 18:53:26 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2017-09-01 18:53:26 +0200
commitd077404170160a6936e448a40cbb74a95d19ff3c (patch)
tree1ab0d5a03a46659e39de2e611e89642d9de5867b /src/make_unicode_property_data.py
parent6b31069db6198cd50cc17f2c63917dd2df5775fb (diff)
parentb62fc1758f4ae8459e6d7e8386ca547274b4daa2 (diff)
Updated version 6.6.1 from 'upstream/6.6.1'
with Debian dir 43da96f2ad45c214a26fc8b155a175b1c828e493
Diffstat (limited to 'src/make_unicode_property_data.py')
-rwxr-xr-xsrc/make_unicode_property_data.py723
1 files changed, 369 insertions, 354 deletions
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py
index 25ed092..51986b6 100755
--- a/src/make_unicode_property_data.py
+++ b/src/make_unicode_property_data.py
@@ -1,9 +1,13 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
+# make_unicode_property_data.py
+# Copyright (c) 2016-2017 K.Kosako
import sys
import re
+INCLUDE_GRAPHEME_CLUSTER_DATA = False
+
POSIX_LIST = [
'NEWLINE', 'Alpha', 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower',
'Print', 'Punct', 'Space', 'Upper', 'XDigit', 'Word', 'Alnum', 'ASCII'
@@ -11,6 +15,8 @@ POSIX_LIST = [
MAX_CODE_POINT = 0x10ffff
+GRAPHEME_CLUSTER_BREAK_NAME_PREFIX = 'Grapheme_Cluster_Break_'
+
UD_FIRST_REG = re.compile("<.+,\s*First>")
UD_LAST_REG = re.compile("<.+,\s*Last>")
PR_TOTAL_REG = re.compile("#\s*Total\s+code\s+points:")
@@ -18,7 +24,7 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
-VERSION_REG = re.compile("#\s*.*-(\d\.\d\.\d)\.txt")
+VERSION_REG = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt")
VERSION_INFO = None
DIC = { }
@@ -27,373 +33,377 @@ PropIndex = { }
PROPERTY_NAME_MAX_LEN = 0
def normalize_prop_name(name):
- name = re.sub(r'[ _]', '', name)
- name = name.lower()
- return name
+ name = re.sub(r'[ _]', '', name)
+ name = name.lower()
+ return name
def fix_block_name(name):
- s = re.sub(r'[- ]+', '_', name)
- return 'In_' + s
+ s = re.sub(r'[- ]+', '_', name)
+ return 'In_' + s
def check_version_info(s):
- global VERSION_INFO
- m = VERSION_REG.match(s)
- if m is not None:
- VERSION_INFO = m.group(1)
+ global VERSION_INFO
+ m = VERSION_REG.match(s)
+ if m is not None:
+ VERSION_INFO = m.group(1)
def print_ranges(ranges):
- for (start, end) in ranges:
- print "0x%06x, 0x%06x" % (start, end)
+ for (start, end) in ranges:
+ print "0x%06x, 0x%06x" % (start, end)
- print len(ranges)
+ print len(ranges)
def print_prop_and_index(prop, i):
- print "%-35s %3d" % (prop + ',', i)
- PropIndex[prop] = i
+ print "%-35s %3d" % (prop + ',', i)
+ PropIndex[prop] = i
-print_cache = { }
+PRINT_CACHE = { }
def print_property(prop, data, desc):
- print ''
- print "/* PROPERTY: '%s': %s */" % (prop, desc)
+ print ''
+ print "/* PROPERTY: '%s': %s */" % (prop, desc)
- prev_prop = dic_find_by_value(print_cache, data)
- if prev_prop is not None:
- print "#define CR_%s CR_%s" % (prop, prev_prop)
- else:
- print_cache[prop] = data
- print "static const OnigCodePoint"
- print "CR_%s[] = { %d," % (prop, len(data))
- for (start, end) in data:
- print "0x%04x, 0x%04x," % (start, end)
+ prev_prop = dic_find_by_value(PRINT_CACHE, data)
+ if prev_prop is not None:
+ print "#define CR_%s CR_%s" % (prop, prev_prop)
+ else:
+ PRINT_CACHE[prop] = data
+ print "static const OnigCodePoint"
+ print "CR_%s[] = { %d," % (prop, len(data))
+ for (start, end) in data:
+ print "0x%04x, 0x%04x," % (start, end)
- print "}; /* END of CR_%s */" % prop
+ print "}; /* END of CR_%s */" % prop
def dic_find_by_value(dic, v):
- for key, val in dic.items():
- if val == v:
- return key
+ for key, val in dic.items():
+ if val == v:
+ return key
- return None
+ return None
def normalize_ranges(in_ranges, sort=False):
- if sort:
- ranges = sorted(in_ranges)
- else:
- ranges = in_ranges
+ if sort:
+ ranges = sorted(in_ranges)
+ else:
+ ranges = in_ranges
- r = []
- prev = None
- for (start, end) in ranges:
- if prev >= start - 1:
- (pstart, pend) = r.pop()
- end = max(pend, end)
- start = pstart
+ r = []
+ prev = None
+ for (start, end) in ranges:
+ if prev >= start - 1:
+ (pstart, pend) = r.pop()
+ end = max(pend, end)
+ start = pstart
- r.append((start, end))
- prev = end
+ r.append((start, end))
+ prev = end
- return r
+ return r
def inverse_ranges(in_ranges):
- r = []
- prev = 0x000000
- for (start, end) in in_ranges:
- if prev < start:
- r.append((prev, start - 1))
+ r = []
+ prev = 0x000000
+ for (start, end) in in_ranges:
+ if prev < start:
+ r.append((prev, start - 1))
- prev = end + 1
+ prev = end + 1
- if prev < MAX_CODE_POINT:
- r.append((prev, MAX_CODE_POINT))
+ if prev < MAX_CODE_POINT:
+ r.append((prev, MAX_CODE_POINT))
- return r
+ return r
def add_ranges(r1, r2):
- r = r1 + r2
- return normalize_ranges(r, True)
+ r = r1 + r2
+ return normalize_ranges(r, True)
def sub_one_range(one_range, rs):
- r = []
- (s1, e1) = one_range
- n = len(rs)
- for i in range(0, n):
- (s2, e2) = rs[i]
- if s2 >= s1 and s2 <= e1:
- if s2 > s1:
- r.append((s1, s2 - 1))
- if e2 >= e1:
- return r
-
- s1 = e2 + 1
- elif s2 < s1 and e2 >= s1:
- if e2 < e1:
- s1 = e2 + 1
- else:
- return r
-
- r.append((s1, e1))
- return r
+ r = []
+ (s1, e1) = one_range
+ n = len(rs)
+ for i in range(0, n):
+ (s2, e2) = rs[i]
+ if s2 >= s1 and s2 <= e1:
+ if s2 > s1:
+ r.append((s1, s2 - 1))
+ if e2 >= e1:
+ return r
+
+ s1 = e2 + 1
+ elif s2 < s1 and e2 >= s1:
+ if e2 < e1:
+ s1 = e2 + 1
+ else:
+ return r
+
+ r.append((s1, e1))
+ return r
def sub_ranges(r1, r2):
- r = []
- for one_range in r1:
- rs = sub_one_range(one_range, r2)
- r.extend(rs)
+ r = []
+ for one_range in r1:
+ rs = sub_one_range(one_range, r2)
+ r.extend(rs)
- return r
+ return r
def add_ranges_in_dic(dic):
- r = []
- for k, v in dic.items():
- r = r + v
+ r = []
+ for k, v in dic.items():
+ r = r + v
- return normalize_ranges(r, True)
+ return normalize_ranges(r, True)
def normalize_ranges_in_dic(dic, sort=False):
- for k, v in dic.items():
- r = normalize_ranges(v, sort)
- dic[k] = r
+ for k, v in dic.items():
+ r = normalize_ranges(v, sort)
+ dic[k] = r
def merge_dic(to_dic, from_dic):
- to_keys = to_dic.keys()
- from_keys = from_dic.keys()
- common = list(set(to_keys) & set(from_keys))
- if len(common) != 0:
- print >> sys.stderr, "merge_dic: collision: %s" % sorted(common)
+ to_keys = to_dic.keys()
+ from_keys = from_dic.keys()
+ common = list(set(to_keys) & set(from_keys))
+ if len(common) != 0:
+ print >> sys.stderr, "merge_dic: collision: %s" % sorted(common)
- to_dic.update(from_dic)
+ to_dic.update(from_dic)
def merge_props(to_props, from_props):
- common = list(set(to_props) & set(from_props))
- if len(common) != 0:
- print >> sys.stderr, "merge_props: collision: %s" % sorted(common)
+ common = list(set(to_props) & set(from_props))
+ if len(common) != 0:
+ print >> sys.stderr, "merge_props: collision: %s" % sorted(common)
- to_props.extend(from_props)
+ to_props.extend(from_props)
def add_range_into_dic(dic, name, start, end):
- d = dic.get(name, None)
- if d is None:
- d = [(start, end)]
- dic[name] = d
- else:
- d.append((start, end))
+ d = dic.get(name, None)
+ if d is None:
+ d = [(start, end)]
+ dic[name] = d
+ else:
+ d.append((start, end))
def list_sub(a, b):
- x = set(a) - set(b)
- return list(x)
+ x = set(a) - set(b)
+ return list(x)
def parse_unicode_data_file(f):
+ dic = { }
+ assigned = []
+ for line in f:
+ s = line.strip()
+ if len(s) == 0:
+ continue
+ if s[0] == '#':
+ continue
+
+ a = s.split(';')
+ code = int(a[0], 16)
+ desc = a[1]
+ prop = a[2]
+ if UD_FIRST_REG.match(desc) is not None:
+ start = code
+ end = None
+ elif UD_LAST_REG.match(desc) is not None:
+ end = code
+ else:
+ start = end = code
+
+ if end is not None:
+ assigned.append((start, end))
+ add_range_into_dic(dic, prop, start, end)
+ if len(prop) == 2:
+ add_range_into_dic(dic, prop[0:1], start, end)
+
+ normalize_ranges_in_dic(dic)
+ return dic, assigned
+
+def parse_properties(path, klass, prop_prefix = None):
+ with open(path, 'r') as f:
dic = { }
- assigned = []
+ prop = None
+ props = []
for line in f:
- s = line.strip()
- if len(s) == 0:
- continue
- if s[0] == '#':
- continue
-
- a = s.split(';')
- code = int(a[0], 16)
- desc = a[1]
- prop = a[2]
- if UD_FIRST_REG.match(desc) is not None:
- start = code
- end = None
- elif UD_LAST_REG.match(desc) is not None:
- end = code
+ s = line.strip()
+ if len(s) == 0:
+ continue
+
+ if s[0] == '#':
+ if VERSION_INFO is None:
+ check_version_info(s)
+
+ m = PR_LINE_REG.match(s)
+ if m:
+ prop = m.group(3)
+ if prop_prefix is not None:
+ prop = prop_prefix + prop
+
+ if m.group(2):
+ start = int(m.group(1), 16)
+ end = int(m.group(2), 16)
+ add_range_into_dic(dic, prop, start, end)
else:
- start = end = code
-
- if end is not None:
- assigned.append((start, end))
- add_range_into_dic(dic, prop, start, end)
- if len(prop) == 2:
- add_range_into_dic(dic, prop[0:1], start, end)
-
- normalize_ranges_in_dic(dic)
- return dic, assigned
-
-def parse_properties(path, klass):
- with open(path, 'r') as f:
- dic = { }
- prop = None
- props = []
- for line in f:
- s = line.strip()
- if len(s) == 0:
- continue
-
- if s[0] == '#':
- if VERSION_INFO is None:
- check_version_info(s)
-
- m = PR_LINE_REG.match(s)
- if m:
- prop = m.group(3)
- if m.group(2):
- start = int(m.group(1), 16)
- end = int(m.group(2), 16)
- add_range_into_dic(dic, prop, start, end)
- else:
- start = int(m.group(1), 16)
- add_range_into_dic(dic, prop, start, start)
-
- elif PR_TOTAL_REG.match(s) is not None:
- KDIC[prop] = klass
- props.append(prop)
-
- normalize_ranges_in_dic(dic)
- return (dic, props)
+ start = int(m.group(1), 16)
+ add_range_into_dic(dic, prop, start, start)
+
+ elif PR_TOTAL_REG.match(s) is not None:
+ KDIC[prop] = klass
+ props.append(prop)
+
+ normalize_ranges_in_dic(dic)
+ return (dic, props)
def parse_property_aliases(path):
- a = { }
- with open(path, 'r') as f:
- for line in f:
- s = line.strip()
- if len(s) == 0:
- continue
+ a = { }
+ with open(path, 'r') as f:
+ for line in f:
+ s = line.strip()
+ if len(s) == 0:
+ continue
- m = PA_LINE_REG.match(s)
- if not(m):
- continue
+ m = PA_LINE_REG.match(s)
+ if not(m):
+ continue
- if m.group(1) == m.group(2):
- continue
+ if m.group(1) == m.group(2):
+ continue
- a[m.group(1)] = m.group(2)
+ a[m.group(1)] = m.group(2)
- return a
+ return a
def parse_property_value_aliases(path):
- a = { }
- with open(path, 'r') as f:
- for line in f:
- s = line.strip()
- if len(s) == 0:
- continue
-
- m = PVA_LINE_REG.match(s)
- if not(m):
- continue
-
- cat = m.group(1)
- x2 = m.group(2)
- x3 = m.group(3)
- x4 = m.group(4)
- if cat == 'sc':
- if x2 != x3:
- a[x2] = x3
- if x4 and x4 != x3:
- a[x4] = x3
- else:
- if x2 != x3:
- a[x3] = x2
- if x4 and x4 != x2:
- a[x4] = x2
-
- return a
+ a = { }
+ with open(path, 'r') as f:
+ for line in f:
+ s = line.strip()
+ if len(s) == 0:
+ continue
+
+ m = PVA_LINE_REG.match(s)
+ if not(m):
+ continue
+
+ cat = m.group(1)
+ x2 = m.group(2)
+ x3 = m.group(3)
+ x4 = m.group(4)
+ if cat == 'sc':
+ if x2 != x3:
+ a[x2] = x3
+ if x4 and x4 != x3:
+ a[x4] = x3
+ else:
+ if x2 != x3:
+ a[x3] = x2
+ if x4 and x4 != x2:
+ a[x4] = x2
+
+ return a
def parse_blocks(path):
- dic = { }
- blocks = []
- with open(path, 'r') as f:
- for line in f:
- s = line.strip()
- if len(s) == 0:
- continue
-
- m = BL_LINE_REG.match(s)
- if not(m):
- continue
-
- start = int(m.group(1), 16)
- end = int(m.group(2), 16)
- block = fix_block_name(m.group(3))
- add_range_into_dic(dic, block, start, end)
- blocks.append(block)
-
- noblock = fix_block_name('No_Block')
- dic[noblock] = inverse_ranges(add_ranges_in_dic(dic))
- blocks.append(noblock)
- return dic, blocks
+ dic = { }
+ blocks = []
+ with open(path, 'r') as f:
+ for line in f:
+ s = line.strip()
+ if len(s) == 0:
+ continue
+
+ m = BL_LINE_REG.match(s)
+ if not(m):
+ continue
+
+ start = int(m.group(1), 16)
+ end = int(m.group(2), 16)
+ block = fix_block_name(m.group(3))
+ add_range_into_dic(dic, block, start, end)
+ blocks.append(block)
+
+ noblock = fix_block_name('No_Block')
+ dic[noblock] = inverse_ranges(add_ranges_in_dic(dic))
+ blocks.append(noblock)
+ return dic, blocks
def add_primitive_props(assigned):
- DIC['Assigned'] = normalize_ranges(assigned)
- DIC['Any'] = [(0x000000, 0x10ffff)]
- DIC['ASCII'] = [(0x000000, 0x00007f)]
- DIC['NEWLINE'] = [(0x00000a, 0x00000a)]
- DIC['Cn'] = inverse_ranges(DIC['Assigned'])
- DIC['C'].extend(DIC['Cn'])
- DIC['C'] = normalize_ranges(DIC['C'], True)
-
- d = []
- d.extend(DIC['Ll'])
- d.extend(DIC['Lt'])
- d.extend(DIC['Lu'])
- DIC['LC'] = normalize_ranges(d, True)
+ DIC['Assigned'] = normalize_ranges(assigned)
+ DIC['Any'] = [(0x000000, 0x10ffff)]
+ DIC['ASCII'] = [(0x000000, 0x00007f)]
+ DIC['NEWLINE'] = [(0x00000a, 0x00000a)]
+ DIC['Cn'] = inverse_ranges(DIC['Assigned'])
+ DIC['C'].extend(DIC['Cn'])
+ DIC['C'] = normalize_ranges(DIC['C'], True)
+
+ d = []
+ d.extend(DIC['Ll'])
+ d.extend(DIC['Lt'])
+ d.extend(DIC['Lu'])
+ DIC['LC'] = normalize_ranges(d, True)
def add_posix_props(dic):
- alnum = []
- alnum.extend(dic['Alphabetic'])
- alnum.extend(dic['Nd']) # Nd == Decimal_Number
- alnum = normalize_ranges(alnum, True)
-
- blank = [(0x0009, 0x0009)]
- blank.extend(dic['Zs']) # Zs == Space_Separator
- blank = normalize_ranges(blank, True)
-
- word = []
- word.extend(dic['Alphabetic'])
- word.extend(dic['M']) # M == Mark
- word.extend(dic['Nd'])
- word.extend(dic['Pc']) # Pc == Connector_Punctuation
- word = normalize_ranges(word, True)
-
- graph = sub_ranges(dic['Any'], dic['White_Space'])
- graph = sub_ranges(graph, dic['Cc'])
- graph = sub_ranges(graph, dic['Cs']) # Cs == Surrogate
- graph = sub_ranges(graph, dic['Cn']) # Cn == Unassigned
- graph = normalize_ranges(graph, True)
-
- p = []
- p.extend(graph)
- p.extend(dic['Zs'])
- p = normalize_ranges(p, True)
-
- dic['Alpha'] = dic['Alphabetic']
- dic['Upper'] = dic['Uppercase']
- dic['Lower'] = dic['Lowercase']
- dic['Punct'] = dic['P'] # P == Punctuation
- dic['Digit'] = dic['Nd']
- dic['XDigit'] = [(0x0030, 0x0039), (0x0041, 0x0046), (0x0061, 0x0066)]
- dic['Alnum'] = alnum
- dic['Space'] = dic['White_Space']
- dic['Blank'] = blank
- dic['Cntrl'] = dic['Cc']
- dic['Word'] = word
- dic['Graph'] = graph
- dic['Print'] = p
+ alnum = []
+ alnum.extend(dic['Alphabetic'])
+ alnum.extend(dic['Nd']) # Nd == Decimal_Number
+ alnum = normalize_ranges(alnum, True)
+
+ blank = [(0x0009, 0x0009)]
+ blank.extend(dic['Zs']) # Zs == Space_Separator
+ blank = normalize_ranges(blank, True)
+
+ word = []
+ word.extend(dic['Alphabetic'])
+ word.extend(dic['M']) # M == Mark
+ word.extend(dic['Nd'])
+ word.extend(dic['Pc']) # Pc == Connector_Punctuation
+ word = normalize_ranges(word, True)
+
+ graph = sub_ranges(dic['Any'], dic['White_Space'])
+ graph = sub_ranges(graph, dic['Cc'])
+ graph = sub_ranges(graph, dic['Cs']) # Cs == Surrogate
+ graph = sub_ranges(graph, dic['Cn']) # Cn == Unassigned
+ graph = normalize_ranges(graph, True)
+
+ p = []
+ p.extend(graph)
+ p.extend(dic['Zs'])
+ p = normalize_ranges(p, True)
+
+ dic['Alpha'] = dic['Alphabetic']
+ dic['Upper'] = dic['Uppercase']
+ dic['Lower'] = dic['Lowercase']
+ dic['Punct'] = dic['P'] # P == Punctuation
+ dic['Digit'] = dic['Nd']
+ dic['XDigit'] = [(0x0030, 0x0039), (0x0041, 0x0046), (0x0061, 0x0066)]
+ dic['Alnum'] = alnum
+ dic['Space'] = dic['White_Space']
+ dic['Blank'] = blank
+ dic['Cntrl'] = dic['Cc']
+ dic['Word'] = word
+ dic['Graph'] = graph
+ dic['Print'] = p
def set_max_prop_name(name):
- global PROPERTY_NAME_MAX_LEN
- n = len(name)
- if n > PROPERTY_NAME_MAX_LEN:
- PROPERTY_NAME_MAX_LEN = n
+ global PROPERTY_NAME_MAX_LEN
+ n = len(name)
+ if n > PROPERTY_NAME_MAX_LEN:
+ PROPERTY_NAME_MAX_LEN = n
-LIST_COUNTER = 1
def entry_prop_name(name, index):
- global LIST_COUNTER
- set_max_prop_name(name)
- if OUTPUT_LIST and index >= len(POSIX_LIST):
- print >> UPF, "%3d: %s" % (LIST_COUNTER, name)
- LIST_COUNTER += 1
+ set_max_prop_name(name)
+ if OUTPUT_LIST_MODE and index >= len(POSIX_LIST):
+ print >> UPF, "%3d: %s" % (index, name)
+def entry_and_print_prop_and_index(name, index):
+ entry_prop_name(name, index)
+ nname = normalize_prop_name(name)
+ print_prop_and_index(nname, index)
### main ###
argv = sys.argv
@@ -401,19 +411,18 @@ argc = len(argv)
POSIX_ONLY = False
if argc >= 2:
- if argv[1] == '-posix':
- POSIX_ONLY = True
+ if argv[1] == '-posix':
+ POSIX_ONLY = True
-OUTPUT_LIST = not(POSIX_ONLY)
+OUTPUT_LIST_MODE = not(POSIX_ONLY)
with open('UnicodeData.txt', 'r') as f:
- dic, assigned = parse_unicode_data_file(f)
- DIC = dic
- add_primitive_props(assigned)
+ dic, assigned = parse_unicode_data_file(f)
+ DIC = dic
+ add_primitive_props(assigned)
PROPS = DIC.keys()
PROPS = list_sub(PROPS, POSIX_LIST)
-PROPS = sorted(PROPS)
dic, props = parse_properties('DerivedCoreProperties.txt', 'Derived Property')
merge_dic(DIC, dic)
@@ -437,47 +446,59 @@ merge_dic(ALIASES, a)
dic, BLOCKS = parse_blocks('Blocks.txt')
merge_dic(DIC, dic)
+if INCLUDE_GRAPHEME_CLUSTER_DATA:
+ dic, props = parse_properties('GraphemeBreakProperty.txt',
+ 'GraphemeBreak Property',
+ GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
+ merge_dic(DIC, dic)
+ merge_props(PROPS, props)
+ #prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other'
+ #DIC[prop] = inverse_ranges(add_ranges_in_dic(dic))
+ #PROPS.append(prop)
+ #KDIC[prop] = 'GrapemeBreak Property'
+
add_posix_props(DIC)
+PROPS = sorted(PROPS)
s = '''%{
/* Generated by make_unicode_property_data.py. */
'''
print s
for prop in POSIX_LIST:
- print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop)
+ print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop)
print ''
if not(POSIX_ONLY):
- for prop in PROPS:
- klass = KDIC.get(prop, None)
- if klass is None:
- n = len(prop)
- if n == 1:
- klass = 'Major Category'
- elif n == 2:
- klass = 'General Category'
- else:
- klass = '-'
+ for prop in PROPS:
+ klass = KDIC.get(prop, None)
+ if klass is None:
+ n = len(prop)
+ if n == 1:
+ klass = 'Major Category'
+ elif n == 2:
+ klass = 'General Category'
+ else:
+ klass = '-'
- print_property(prop, DIC[prop], klass)
+ print_property(prop, DIC[prop], klass)
- for block in BLOCKS:
- print_property(block, DIC[block], 'Block')
+ for block in BLOCKS:
+ print_property(block, DIC[block], 'Block')
print ''
print "static const OnigCodePoint*\nconst CodeRanges[] = {"
for prop in POSIX_LIST:
- print " CR_%s," % prop
+ print " CR_%s," % prop
if not(POSIX_ONLY):
- for prop in PROPS:
- print " CR_%s," % prop
+ for prop in PROPS:
+ print " CR_%s," % prop
- for prop in BLOCKS:
- print " CR_%s," % prop
+ for prop in BLOCKS:
+ print " CR_%s," % prop
s = '''};
%}
@@ -489,57 +510,51 @@ struct PropertyNameCtype {
'''
sys.stdout.write(s)
-if OUTPUT_LIST:
- UPF = open("UNICODE_PROPERTIES", "w")
- if VERSION_INFO is not None:
- print >> UPF, "Unicode Properties (from Unicode Version: %s)" % VERSION_INFO
- print >> UPF, ''
+if OUTPUT_LIST_MODE:
+ UPF = open("UNICODE_PROPERTIES", "w")
+ if VERSION_INFO is not None:
+ print >> UPF, "Unicode Properties (from Unicode Version: %s)" % VERSION_INFO
+ print >> UPF, ''
index = -1
for prop in POSIX_LIST:
index += 1
- entry_prop_name(prop, index)
- prop = normalize_prop_name(prop)
- print_prop_and_index(prop, index)
+ entry_and_print_prop_and_index(prop, index)
if not(POSIX_ONLY):
- for prop in PROPS:
- index += 1
- entry_prop_name(prop, index)
- prop = normalize_prop_name(prop)
- print_prop_and_index(prop, index)
-
- NALIASES = map(lambda (k,v):(normalize_prop_name(k), k, v), ALIASES.items())
- NALIASES = sorted(NALIASES)
- for (nk, k, v) in NALIASES:
- nv = normalize_prop_name(v)
- if PropIndex.get(nk, None) is not None:
- print >> sys.stderr, "ALIASES: already exists: %s => %s" % (k, v)
- continue
- index = PropIndex.get(nv, None)
- if index is None:
- #print >> sys.stderr, "ALIASES: value is not exist: %s => %s" % (k, v)
- continue
-
- entry_prop_name(k, index)
- print_prop_and_index(nk, index)
-
- for name in BLOCKS:
- index += 1
- entry_prop_name(name, index)
- name = normalize_prop_name(name)
- print_prop_and_index(name, index)
+ for prop in PROPS:
+ index += 1
+ entry_and_print_prop_and_index(prop, index)
+
+ NALIASES = map(lambda (k,v):(normalize_prop_name(k), k, v), ALIASES.items())
+ NALIASES = sorted(NALIASES)
+ for (nk, k, v) in NALIASES:
+ nv = normalize_prop_name(v)
+ if PropIndex.get(nk, None) is not None:
+ print >> sys.stderr, "ALIASES: already exists: %s => %s" % (k, v)
+ continue
+ aindex = PropIndex.get(nv, None)
+ if aindex is None:
+ #print >> sys.stderr, "ALIASES: value is not exist: %s => %s" % (k, v)
+ continue
+
+ entry_prop_name(k, aindex)
+ print_prop_and_index(nk, aindex)
+
+ for name in BLOCKS:
+ index += 1
+ entry_and_print_prop_and_index(name, index)
print '%%'
print ''
if VERSION_INFO is not None:
- print "#define PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
- print ''
+ print "#define PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
+ print ''
print "#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10)
print "#define CODE_RANGES_NUM %d" % (index + 1)
-if OUTPUT_LIST:
- UPF.close()
+if OUTPUT_LIST_MODE:
+ UPF.close()
sys.exit(0)