#!/usr/bin/env python3 # bc_ea_gc.py, 2012 Dec 23, Terry Reedy # line parsing adapted from code by Pander # script for http://bugs.python.org/issue16684 from urllib.request import urlopen url = 'http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt' # In about 10 tries, urlopen twice failed with this: # urllib.error.URLError: # Next try succeeded, so might want to wrap with retry mechanism. aliases = urlopen(url) def parseblock(name): # if this were revised to return dict, OrderedDict would keep alpha order # Find header line of block and skip following blank for line in aliases: line = str(line, encoding='ascii', errors='ignore') if line[0] == '#' and '('+name+')' in line: break next(aliases) # Print code to make code to long name dict for block print("dict{} = {{".format(name)) for line in aliases: line = str(line, encoding='ascii', errors='ignore') if line != '\n': fields = line.split(';') abbr = fields[1].strip() long = fields[2].split('#')[0].strip() print(" '{}': '{}',".format(abbr, long)) else: break print("}\n") # must parse blocks in alphabetical order parseblock('bc') parseblock('ea') parseblock('gc')