import re import sys import unicodedata from _sre import getlower from collections import defaultdict try: unichr except NameError: unichr = chr try: ascii except NameError: ascii = repr def re_lower(i): return getlower(i, re.U) def re_clower(i): return getlower(ord(i), re.U) def uname(i): return unicodedata.name(unichr(i), r'U+%04X' % i) class hexint(int): def __repr__(self): return hex(self) def alpha(i): c = unichr(i) return c if c.isalpha() else ascii(c)[1:-1] chars = ''.join(map(unichr, range(sys.maxunicode + 1))) #chars = ''.join(map(unichr, range(0x10000))) equivalences = defaultdict(str) for c in chars: equivalences[c.upper()] += c equivalences = {frozenset(map(re_clower, t)) for t in equivalences.values() if len(t) > 1} equivalences = sorted(tuple(sorted(t)) for t in equivalences if len(t) > 1) print('equivalences = (') for t in equivalences: print(' # %s' % ', '.join(map(uname, t))) print(' %r, # %s' % (tuple(map(hexint, t)), ''.join(map(alpha, t)))) print(')') mapping = {i: tuple(j for j in t if i != j) for t in equivalences for i in t} print('mapping = {') for i, t in sorted(mapping.items()): print(' # %s: %s' % (uname(i), ', '.join(map(uname, t)))) #print(' %s: %r,' % (hex(i), tuple(map(hexint, t)))) print(" %s: %r, # '%s': '%s'" % (hex(i), tuple(map(hexint, t)), alpha(i), ''.join(map(alpha, t)))) print('}')