Index: Tools/unicode/gencjkextcodecs.py =================================================================== --- Tools/unicode/gencjkextcodecs.py (revision 0) +++ Tools/unicode/gencjkextcodecs.py (revision 0) @@ -0,0 +1,260 @@ +import os +import re +import string + +EUC_KR_NO_MAKE_UP = 1 + +codecs = [ + # (extcodec, encode buffer size, decode buffer size, flags, + # base codec module, base codec, ext mapping, base mapping) + ('mac_korean', 5, 2, EUC_KR_NO_MAKE_UP, 'kr', 'euc_kr', + 'http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/KOREAN.TXT', + 'http://people.freebsd.org/~perky/i18n/EUC-KR.TXT'), + ('mac_japanese', 5, 2, 0, 'jp', 'shift_jis', + 'http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/JAPANESE.TXT', + ('http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/' + 'SHIFTJIS.TXT')), + ('mac_chinsimp', 2, 2, 0, 'cn', 'gb2312', + 'http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINSIMP.TXT', + 'http://people.freebsd.org/~perky/i18n/EUC-CN.TXT'), + ('mac_chintrad', 2, 2, 0, 'tw', 'big5', + 'http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINTRAD.TXT', + 'http://people.freebsd.org/~perky/i18n/BIG5.TXT'), +] + +basemapfix = { # apply our real mapping + 'mac_japanese': ( + {'\x5c': b'\x5c', '\x7e': b'\x7e'}, + {b'\x5c': '\x5c', b'\x7e': '\x7e'}, + ), +} + +additionaltune = { + 'mac_japanese': """\ + # User-defined range + lowset = list(range(0x40, 0x7f)) + list(range(0x80, 0xfd)) + for high in range(0xf0, 0xfd): + decode_map[bytes([high])] = 2 + for j, low in enumerate(lowset): + u = chr(high * 188 + j + 12224) + c = bytes([high, low]) + encode_map[u] = c + decode_map[c] = u + del high, j, low, lowset, u, c + """ +} + +TEMPLATE = string.Template("""\ +# +# $encoding.py: Python Unicode Codec for $ENCODING +# +# Written by Hye-Shik Chang +# + +import _codecs_$basemodule, codecs +import _multibytecodec as mbc + +encode_map = { +$encodemap +} + +decode_map = { +$decodemap +} +$additionaltune +basecodec = _codecs_$basemodule.getcodec('$baseencoding') +codec = mbc.create_extcodec('$encoding', basecodec, encode_map, decode_map, + $encbuf, $decbuf, $flags) + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='$encoding', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) +""") + +def tobytes(codepoint): + codepoint = int(codepoint[2:], 16) + if codepoint >= 0x100: + return bytes([codepoint // 256, codepoint % 256]) + else: + return bytes([codepoint]) + +def tostr(codepoint): + return ''.join( + chr(int(point[2:], 16)) for point in codepoint.split('+')) + +def readmap(f): + emap, dmap = {}, {} + comments = {} + for l in f: + if '#' in l: + content, comment = map(str.strip, l.split('#', 1)) + else: + content, comment = l.strip(), '' + + if not content: + continue + legacy, uni = content.split() + lch, uch = tobytes(legacy), tostr(uni) + emap[uch] = lch + dmap[lch] = uch + comments[uch] = comment + + return emap, dmap, comments + +def openurl(url): + from urllib import request + basepart = os.path.basename(url) + if not os.path.exists(basepart): + request.urlretrieve(url, filename=basepart) + return open(basepart) + +def removeduplicates(extmap, basemap): + extmap, basemap = extmap.copy(), basemap.copy() + commonkeys = set(extmap) & set(basemap) + + for key in commonkeys: + if extmap[key] == basemap[key]: + del extmap[key] + del basemap[key] + + for dkey in basemap: # place blockers for deleted codes in extension + extmap.setdefault(dkey, None) + + return extmap + +def codecdifference(encoding, extmapurl, basemapurl): + extemap, extdmap, extcomments = readmap(openurl(extmapurl)) + baseemap, basedmap, basecomments = readmap(openurl(basemapurl)) + if encoding in basemapfix: + baseemap.update(basemapfix[encoding][0]) + basedmap.update(basemapfix[encoding][1]) + + emappatch = removeduplicates(extemap, baseemap) + dmappatch = removeduplicates(extdmap, basedmap) + + # remove control characters from the comparison (character maps miss them) + for i in list(range(0x20)) + [0x7f]: + if chr(i) in emappatch: + del emappatch[chr(i)] + if bytes([i]) in dmappatch: + del dmappatch[bytes([i])] + + # place window size hints for multiple-byte codes + for dkey in list(dmappatch): + if len(dkey) > 2: + raise NotImplementedError + elif len(dkey) > 1: + if dmappatch.get(dkey[:1], 2) != 2: + raise ValueError("multiple candidates come for 0x%x" % dkey[0]) + if dkey[:1] in extdmap or dkey[:1] in basedmap: + raise ValueError("collision of the first char in " + "decoding map") + dmappatch[dkey[:1]] = 2 + + # place window size hints for encode maps. These are bit trickier + # because some unicode points have corresponding legacy codepoints for both + # unmodified and modified by following characters. + for ekey in list(emappatch): + if len(ekey) <= 1: + continue + + for st in range(1, len(ekey)): + prefix = ekey[:st] + if prefix in emappatch: + if isinstance(emappatch[prefix], bytes): + emappatch[prefix] = (st + 1, emappatch[prefix]) + elif emappatch[prefix] is None: + raise NotImplementedError + elif prefix in baseemap: + emappatch[prefix] = (st + 1, baseemap[prefix]) + else: + emappatch[prefix] = st + 1 + + basecomments.update(extcomments) + return emappatch, dmappatch, basecomments + +def getadditionaltune(enc): + if enc not in additionaltune: + return '' + + code = additionaltune[enc].splitlines() + # reindent to top-level + indent = re.compile('^' + re.match('^ *', code[0]).group()) + return '\n' + '\n'.join(indent.sub('', line) for line in code) + +def reprmap(m, comments, commentfield=0): + r = [] + for k, v in sorted(m.items()): + comment = comments.get((k, v)[commentfield], '') + s = ' %s: %s,' % (repr(k), repr(v)) + r.append(('%-35s # %s' % (s, comment)).rstrip('# \t')) + + return '\n'.join(r) + +def reprmap_condensed(m, comments, commentfield=0): + linewidth = 79 + r = [''] + def write(s): + if len(r[-1]) + len(s) <= linewidth: + r[-1] += s + else: + r.append(s) + + for k, v in sorted(m.items()): + write(ascii(k)) + write(':') + write(ascii(v) if not isinstance(v, tuple) + else '(%s,%s)' % (ascii(v[0]), ascii(v[1]))) + write(',') + + return '\n'.join(r).rstrip(',\n') + +if __name__ == '__main__': + import sys + + prefix = sys.argv[1] if len(sys.argv) >= 2 else '.' + condensed = (len(sys.argv) >= 3 and sys.argv[2] == '-c') + if condensed: + reprmap = reprmap_condensed + + for enc, encbuf, decbuf, flags, basemod, baseenc, \ + extmapurl, basemapurl in codecs: + emap, dmap, comments = codecdifference(enc, extmapurl, basemapurl) + + encodemap = reprmap(emap, comments, 0) + decodemap = reprmap(dmap, comments, 1) + + tune = getadditionaltune(enc) + + code = TEMPLATE.substitute(ENCODING=enc.upper(), encoding=enc.lower(), + baseencoding=baseenc, basemodule=basemod, + encodemap=encodemap, decodemap=decodemap, + encbuf=str(encbuf), decbuf=str(decbuf), + flags=flags, additionaltune=tune) + codecpath = os.path.join(prefix, enc + '.py') + open(codecpath, 'w').write(code) Index: Tools/unicode/Makefile =================================================================== --- Tools/unicode/Makefile (revision 64533) +++ Tools/unicode/Makefile (working copy) @@ -15,7 +15,7 @@ all: distclean mappings codecs -codecs: misc windows iso apple ebcdic custom-mappings cjk +codecs: misc windows iso apple ebcdic custom-mappings cjk cjkext ### Mappings @@ -75,6 +75,9 @@ cjk: build/ $(PYTHON) gencjkcodecs.py build/ +cjkext: build/ + $(PYTHON) gencjkextcodecs.py build/ -c + ### Cleanup clean: Index: Doc/library/codecs.rst =================================================================== --- Doc/library/codecs.rst (revision 64533) +++ Doc/library/codecs.rst (working copy) @@ -1047,6 +1047,14 @@ +-----------------+--------------------------------+--------------------------------+ | koi8_u | | Ukrainian | +-----------------+--------------------------------+--------------------------------+ +| mac_chinsimp | cp10008, macchinsimp, | Simplified Chinese | +| | macchinesesimplified, | | +| | x-mac-chinesesimp | | ++-----------------+--------------------------------+--------------------------------+ +| mac_chintrad | cp10002, macchintrad, | Traditional Chinese | +| | macchinesetraditional, | | +| | x-mac-chinesetrad | | ++-----------------+--------------------------------+--------------------------------+ | mac_cyrillic | maccyrillic | Bulgarian, Byelorussian, | | | | Macedonian, Russian, Serbian | +-----------------+--------------------------------+--------------------------------+ @@ -1056,6 +1064,12 @@ +-----------------+--------------------------------+--------------------------------+ | mac_latin2 | maclatin2, maccentraleurope | Central and Eastern Europe | +-----------------+--------------------------------+--------------------------------+ +| mac_japanese | cp10001, macjapanese, | Japanese | +| | x-mac-japanese | | ++-----------------+--------------------------------+--------------------------------+ +| mac_korean | cp10003, mackorean, | Korean | +| | x-mac-korean | | ++-----------------+--------------------------------+--------------------------------+ | mac_roman | macroman | Western Europe | +-----------------+--------------------------------+--------------------------------+ | mac_turkish | macturkish | Turkish | Index: Lib/encodings/mac_korean.py =================================================================== --- Lib/encodings/mac_korean.py (revision 0) +++ Lib/encodings/mac_korean.py (revision 0) @@ -0,0 +1,926 @@ +# +# mac_korean.py: Python Unicode Codec for MAC_KOREAN +# +# Written by Hye-Shik Chang +# + +import _codecs_kr, codecs +import _multibytecodec as mbc + +encode_map = { +'!':(2,b'!'),'!\uf877':b'\xa1\x8a','!\uf87f':b'\xad\xb0','(':(2,b'('),'(\uf87c' +:b'\xa1W','(\uf87f':b'\xa2K',')':(2,b')'),')\uf87c':b'\xa1X',')\uf87f':b'\xa2L' +,'*':(2,b'*'),'*\uf877':b'\xa1l','-':(2,b'-'),'-\u0308':b'\xa7g','0':(2,b'0'), +'0\u20de':b'\xa5A','1':(2,b'1'),'1\u20de':(3,b'\xa5B'),'1\u20de\uf875': +b'\xac\xc2','1\u20de\uf87a':b'\xa4U','1\u20de\uf87b':b'\xa3A','1\u20de\uf87c': +b'\xa2\xe6','1\u20de\uf87f':b'\xa4A','2':(2,b'2'),'2\u20de':(3,b'\xa5C'), +'2\u20de\uf875':b'\xac\xc3','2\u20de\uf87a':b'\xa4V','2\u20de\uf87b':b'\xa3B', +'2\u20de\uf87c':b'\xa2\xe7','2\u20de\uf87f':b'\xa4B','3':(2,b'3'),'3\u20de': +(3,b'\xa5D'),'3\u20de\uf875':b'\xac\xc4','3\u20de\uf87a':b'\xa4W', +'3\u20de\uf87b':b'\xa3C','3\u20de\uf87c':b'\xa2\xe8','3\u20de\uf87f':b'\xa4C', +'4':(2,b'4'),'4\u20de':(3,b'\xa5E'),'4\u20de\uf875':b'\xac\xc5','4\u20de\uf87a' +:b'\xa4X','4\u20de\uf87b':b'\xa3D','4\u20de\uf87c':b'\xa2\xe9','4\u20de\uf87f': +b'\xa4D','5':(2,b'5'),'5\u20de':(3,b'\xa5F'),'5\u20de\uf875':b'\xac\xc6', +'5\u20de\uf87a':b'\xa4Y','5\u20de\uf87b':b'\xa3E','5\u20de\uf87c':b'\xa2\xea', +'5\u20de\uf87f':b'\xa4E','6':(2,b'6'),'6\u20de':(3,b'\xa5G'),'6\u20de\uf875': +b'\xac\xc7','6\u20de\uf87a':b'\xa4Z','6\u20de\uf87b':b'\xa3F','6\u20de\uf87c': +b'\xa2\xeb','6\u20de\uf87f':b'\xa4F','7':(2,b'7'),'7\u20de':(3,b'\xa5H'), +'7\u20de\uf875':b'\xac\xc8','7\u20de\uf87a':b'\xa4[','7\u20de\uf87b':b'\xa3G', +'7\u20de\uf87c':b'\xa2\xec','7\u20de\uf87f':b'\xa4G','8':(2,b'8'),'8\u20de': +(3,b'\xa5I'),'8\u20de\uf875':b'\xac\xc9','8\u20de\uf87a':b'\xa4\\', +'8\u20de\uf87b':b'\xa3H','8\u20de\uf87c':b'\xa2\xed','8\u20de\uf87f':b'\xa4H', +'9':(2,b'9'),'9\u20de':(3,b'\xa5J'),'9\u20de\uf875':b'\xac\xca','9\u20de\uf87a' +:b'\xa4]','9\u20de\uf87b':b'\xa3I','9\u20de\uf87c':b'\xa2\xee','9\u20de\uf87f': +b'\xa4I','<':(2,b'<'),'<\uf877':b'\xa1y','=':(2,b'='),'=\u20d2':b'\xa7e', +'=\u20e5':b'\xa7b','>':(2,b'>'),'>\uf877':b'\xa1x','[':(2,b'['),'[\uf877': +b'\xa1\x83','[\uf87b':b'\xa1a','[\uf87c':b'\xa1c',']':(2,b']'),']\uf877': +b'\xa1\x84',']\uf87b':b'\xa1b',']\uf87c':b'\xa1d','{':(2,b'{'),'{\uf877': +b'\xa1\x81','}':(2,b'}'),'}\uf877':b'\xa1\x82','\x85':b'\x85','\x86':b'\x86', +'\x87':b'\x87','\x88':b'\x88','\x89':b'\x89','\x8a':b'\x8a','\x8b':b'\x8b', +'\x8c':b'\x8c','\x8d':b'\x8d','\x8e':b'\x8e','\x8f':b'\x8f','\x90':b'\x90', +'\x91':b'\x91','\x92':b'\x92','\x93':b'\x93','\x94':b'\x94','\x95':b'\x95', +'\x96':b'\x96','\x97':b'\x97','\x98':b'\x98','\x99':b'\x99','\x9a':b'\x9a', +'\x9b':b'\x9b','\x9c':b'\x9c','\x9d':b'\x9d','\x9e':b'\x9e','\x9f':b'\x9f', +'\xa0':b'\x80','\xa2':b'\xa1\xcb','\xa3':b'\xa1\xcc','\xa5':b'\xa1\xcd','\xa7': +(2,b'\xa1\xd7'),'\xa7\uf87c':b'\xa6G','\xa9':b'\x83','\xab':b'\xa6\\','\xac': +b'\xa1\xfe','\xad':None,'\xae':None,'\xb1':(2,b'\xa1\xbe'),'\xb1\uf877': +b'\xa1v','\xb6':(2,b'\xa2\xd2'),'\xb6\uf87f':b'\xa2\xfa','\xbb':b'\xa6]','\xd7' +:(2,b'\xa1\xbf'),'\xd7\uf877':b'\xa1s','\u02bc':b'\xa1\x98','\u02dc': +b'\xa2\xa6','\u03d5':b'\xa7j','\u2013':(2,b'\xa1\xa9'),'\u2013\uf87f':b'\x82', +'\u2014':b'\xa1\xaa','\u2015':None,'\u2016':(2,b'\xa1\xab'),'\u2016\uf87b': +b'\xa2\xfb','\u2016\uf87c':b'\xa2\xfc','\u201b':b'\xa1p','\u201f':b'\xa1o', +'\u2020':(2,b'\xa2\xd3'),'\u2020\uf877':b'\xa1k','\u2020\uf87b':b'\xa1i', +'\u2020\uf87c':b'\xa6C','\u2020\uf87f':b'\xa1g','\u2021':(2,b'\xa2\xd4'), +'\u2021\uf87c':b'\xa1j','\u2021\uf87f':b'\xa1h','\u2022':b'\xa6U','\u2026': +(2,b'\xa1\xa6'),'\u2026\uf87f':b'\xff','\u2032':(2,b'\xa1\xc7'),'\u2032\uf873': +b'\xad\xae','\u2032\uf87f':b'\xa5\xdc','\u2033':(2,b'\xa1\xc8'),'\u2033\uf873': +b'\xad\xac','\u2033\uf87f':b'\xa5\xdd','\u2034':b'\xa5\xde','\u2035': +b'\xad\xad','\u2036':b'\xad\xab','\u2039':b'\xa6Z','\u203a':b'\xa6[','\u203c': +(2,b'\xa7\x84'),'\u203c\uf87f':b'\xa7\x86','\u203e':b'\xa3\xfe','\u2042': +(2,b'\xa6M'),'\u2042\uf879':b'\xa6Q','\u2047':b'\xa7\x87','\u2049':b'\xa7\x85', +'\u204c':b'\xa1\x96','\u204d':b'\xa1\x97','\u204e':b'\xa6N','\u2051': +(2,b'\xa1m'),'\u2051\uf871':b'\xa6O','\u2051\uf874':b'\xa6K','\u2051\uf879': +b'\xa1\xa0','\u2051\uf87c':b'\xa1\x9d','\u207a':b'\xa1q','\u207b':b'\xa1r', +'\u207c':b'\xa1z','\u207d':b'\xa1|','\u207e':b'\xa1}','\u20a9':(2,b'\x81'), +'\u20a9\uf87f':b'\xa6\x90','\u20ac':None,'\u2190':(2,b'\xa1\xe7'), +'\u2190\uf870':b'\xac\x89','\u2190\uf871':b'\xa8i','\u2190\uf872':b'\xa8k', +'\u2190\uf873':b'\xac]','\u2190\uf874':b'\xacf','\u2190\uf875':b'\xa8c', +'\u2190\uf878':b'\xacb','\u2190\uf879':b'\xa8S','\u2190\uf87a':b'\xacU', +'\u2190\uf87b':b'\xa8B','\u2190\uf87c':b'\xa8N','\u2190\uf87f':b'\xa8_', +'\u2191':(2,b'\xa1\xe8'),'\u2191\uf870':b'\xac\x8b','\u2191\uf872':b'\xa8m', +'\u2191\uf873':b'\xac`','\u2191\uf874':b'\xach','\u2191\uf875':b'\xa8e', +'\u2191\uf878':b'\xacd','\u2191\uf879':b'\xa8U','\u2191\uf87a':b'\xacW', +'\u2191\uf87b':b'\xa8C','\u2191\uf87c':b'\xa8O','\u2191\uf87f':b'\xa8a', +'\u2192':(2,b'\xa1\xe6'),'\u2192\uf870':b'\xac\x8a','\u2192\uf872':b'\xa8l', +'\u2192\uf874':b'\xacg','\u2192\uf875':b'\xa8d','\u2192\uf878':b'\xacc', +'\u2192\uf879':b'\xa8T','\u2192\uf87a':b'\xacV','\u2192\uf87b':b'\xa8A', +'\u2192\uf87c':b'\xa8M','\u2193':(2,b'\xa1\xe9'),'\u2193\uf870':b'\xac\x8c', +'\u2193\uf872':b'\xa8n','\u2193\uf873':b'\xaca','\u2193\uf874':b'\xaci', +'\u2193\uf875':b'\xa8f','\u2193\uf878':b'\xace','\u2193\uf879':b'\xa8V', +'\u2193\uf87a':b'\xacX','\u2193\uf87b':b'\xa8D','\u2193\uf87c':b'\xa8P', +'\u2193\uf87f':b'\xa8b','\u2194':(2,b'\xa1\xea'),'\u2194\uf87c':b'\xa8Q', +'\u2195':(2,b'\xa2\xd5'),'\u2195\uf87c':b'\xa8R','\u2196':(2,b'\xa2\xd8'), +'\u2196\uf87b':b'\xa8E','\u2197':(2,b'\xa2\xd6'),'\u2197\uf87b':b'\xa8F', +'\u2198':(2,b'\xa2\xd9'),'\u2198\uf87b':b'\xa8G','\u2199':(2,b'\xa2\xd7'), +'\u2199\uf87b':b'\xa8H','\u219c':b'\xacS','\u219d':b'\xacR','\u21b0': +(2,b'\xa8\x82'),'\u21b0\uf87a':b'\xacz','\u21b0\uf87c':b'\xac\x85', +'\u21b0\uf87f':b'\xa8\x8a','\u21b1':(2,b'\xa8|'),'\u21b1\uf87a':b'\xacw', +'\u21b1\uf87c':b'\xac\x82','\u21b1\uf87f':b'\xa8\x87','\u21b2':b'\xa8{', +'\u21b3':b'\xa8\x83','\u21b4':b'\xa8\x81','\u21b6':b'\xacP','\u21b7':b'\xacQ', +'\u21bb':2,'\u21bb\uf87a':b'\xacx','\u21bb\uf87b':b'\xa8}','\u21bb\uf87c': +b'\xac\x83','\u21bb\uf87f':b'\xa8\x88','\u21bc':(2,b'\xa8\x92'),'\u21bc\uf879': +b'\xa8\x99','\u21bc\uf87f':b'\xacM','\u21c0':(2,b'\xa8\x93'),'\u21c0\uf879': +b'\xa8\x98','\u21c0\uf87f':b'\xacL','\u21c4':b'\xa8\x9e','\u21c5':b'\xa8\x9f', +'\u21cd':b'\xa8K','\u21cf':b'\xa8J','\u21d0':(2,b'\xa8I'),'\u21d0\uf87c': +b'\xa8\x9b','\u21d2':(2,b'\xa2\xa1'),'\u21d2\uf87c':b'\xa8\x9a','\u21d4': +(2,b'\xa2\xa2'),'\u21d4\uf879':b'\xa8\x95','\u21d4\uf87f':b'\xa8L','\u21e0': +b'\xacj','\u21e1':b'\xacl','\u21e2':b'\xack','\u21e3':b'\xacm','\u21e6': +(2,b'\xacr'),'\u21e6\u20dd':b'\xa8[','\u21e6\u20de':b'\xa8W','\u21e6\uf870': +b'\xacG','\u21e6\uf874':b'\xa8\x97','\u21e6\uf875':b'\xacn','\u21e6\uf878': +b'\xad\xa6','\u21e6\uf879':b'\xa8\x8e','\u21e6\uf87a':b'\xa8s','\u21e6\uf87b': +b'\xa8w','\u21e6\uf87c':b'\xacY','\u21e6\uf87f':b'\xacO','\u21e7':(2,b'\xact'), +'\u21e7\u20dd':b'\xa8]','\u21e7\u20de':b'\xa8Y','\u21e7\uf875':b'\xacp', +'\u21e7\uf878':b'\xad\xa7','\u21e7\uf879':b'\xa8\x90','\u21e7\uf87a':b'\xa8u', +'\u21e7\uf87b':b'\xa8y','\u21e7\uf87c':b'\xac[','\u21e7\uf87f':b'\xad\xaf', +'\u21e8':(2,b'\xacs'),'\u21e8\u20de':b'\xa8X','\u21e8\uf870':b'\xacF', +'\u21e8\uf874':b'\xa8\x96','\u21e8\uf875':b'\xaco','\u21e8\uf878':b'\xad\xa5', +'\u21e8\uf879':b'\xa8\x8f','\u21e8\uf87c':b'\xacZ','\u21e8\uf87f':b'\xacN', +'\u21e9':(2,b'\xacu'),'\u21e9\u20dd':b'\xa8^','\u21e9\u20de':b'\xa8Z', +'\u21e9\uf875':b'\xacq','\u21e9\uf878':b'\xad\xa8','\u21e9\uf879':b'\xa8\x91', +'\u21e9\uf87a':b'\xa8v','\u21e9\uf87b':b'\xa8z','\u21e9\uf87c':b'\xac\\', +'\u21f0':b'\xacA','\u2206':(2,b'\xa7Q'),'\u2206\uf87f':b'\xa7R','\u2208': +(2,b'\xa1\xf4'),'\u2208\uf877':b'\xa1\x88','\u2209':b'\xa7s','\u220c':b'\xa7t', +'\u2211':(2,b'\xa2\xb2'),'\u2211\uf877':b'\xa1\x89','\u2213':(2,b'\xa7\\'), +'\u2213\uf877':b'\xa1w','\u221e':(2,b'\xa1\xc4'),'\u221e\uf877':b'\xa1t', +'\u221f':b'\xa7S','\u2222':(2,b'\xa7h'),'\u2222\uf87f':b'\xa4\x98','\u2225': +(2,b'\xa7U'),'\u2225\u0347':b'\xa4\x9e','\u2226':b'\xa7V','\u2229': +(2,b'\xa1\xfb'),'\u2229\uf877':b'\xa1\x85','\u2229\uf87f':b'\xa7W','\u222a': +(2,b'\xa1\xfa'),'\u222a\uf877':b'\xa1\x86','\u222a\uf87f':b'\xa7T','\u2237': +b'\xa2\xfe','\u223c':None,'\u223d':(2,b'\xa1\xef'),'\u223d\u0336':b'\xa7y', +'\u223d\uf877':b'\xa1u','\u2243':b'\xa4\x9a','\u2245':b'\xa4\x99','\u2248': +b'\xa4\x9b','\u2250':b'\xa7i','\u2251':b'\xa7Y','\u2253':b'\xa7X','\u225a': +b'\xa7w','\u2260':(2,b'\xa1\xc1'),'\u2260\uf877':b'\xa1{','\u2261': +(2,b'\xa1\xd5'),'\u2261\u20d2':b'\xa7n','\u2261\u20e5':b'\xa7c','\u2262': +b'\xa7d','\u2266':b'\xa7Z','\u2267':b'\xa7[','\u226e':b'\xa7o','\u226f': +b'\xa7p','\u2270':b'\xa4\x87','\u2271':b'\xa4\x88','\u2272':b'\xa4\x89', +'\u2273':b'\xa4\x8a','\u2276':b'\xa4\x8f','\u2277':b'\xa4\x90','\u2279': +b'\xa4\x91','\u227a':b'\xa4\x81','\u227b':b'\xa4\x82','\u2280':b'\xa4\x85', +'\u2281':b'\xa4\x86','\u2282':(2,b'\xa1\xf8'),'\u2282\uf877':b'\xa1\x87', +'\u2284':b'\xa7r','\u2285':b'\xa7q','\u2295':b'\xa7]','\u2296':b'\xa7^', +'\u2297':b'\xa7_','\u2299':None,'\u22a3':b'\xa7l','\u22a4':b'\xa4\x9d','\u22a5' +:(2,b'\xa1\xd1'),'\u22a5\u0338':b'\xa7m','\u22bb':b'\xa7u','\u22bc':b'\xa7v', +'\u22ce':b'\xa4\x83','\u22cf':b'\xa4\x84','\u22da':b'\xa4\x92','\u22db': +b'\xa4\x93','\u22ee':b'\xa2\xfd','\u2306':b'\xa7x','\u2314':(2,b'\xa7a'), +'\u2314\uf87f':b'\xa7z','\u2394':2,'\u2394\uf876':b'\xa7H','\u2460': +(2,b'\xa8\xe7'),'\u2460\uf87f':b'\xa5L','\u2461':(2,b'\xa8\xe8'),'\u2461\uf87f' +:b'\xa5M','\u2462':(2,b'\xa8\xe9'),'\u2462\uf87f':b'\xa5N','\u2463': +(2,b'\xa8\xea'),'\u2463\uf87f':b'\xa5O','\u2464':(2,b'\xa8\xeb'),'\u2464\uf87f' +:b'\xa5P','\u2465':(2,b'\xa8\xec'),'\u2465\uf87f':b'\xa5Q','\u2466': +(2,b'\xa8\xed'),'\u2466\uf87f':b'\xa5R','\u2467':(2,b'\xa8\xee'),'\u2467\uf87f' +:b'\xa5S','\u2468':(2,b'\xa8\xef'),'\u2468\uf87f':b'\xa5T','\u246f':b'\xa7\xf0' +,'\u2470':b'\xa7\xf1','\u2471':b'\xa7\xf2','\u2472':b'\xa7\xf3','\u2473': +b'\xa7\xf4','\u2483':b'\xaa\xf4','\u2484':b'\xaa\xf5','\u2485':b'\xaa\xf6', +'\u2486':b'\xaa\xf7','\u2487':b'\xaa\xf8','\u24b6':b'\xa3\x86','\u24b7': +b'\xa3\x87','\u24b8':b'\xa3\x88','\u24b9':b'\xa3\x89','\u24ba':b'\xa3\x8a', +'\u24bb':b'\xa3\x8b','\u24bc':b'\xa3\x8c','\u24bd':b'\xa3\x8d','\u24be': +b'\xa3\x8e','\u24bf':b'\xa3\x8f','\u24c0':b'\xa3\x90','\u24c1':b'\xa3\x91', +'\u24c2':b'\xa3\x92','\u24c3':b'\xa3\x93','\u24c4':b'\xa3\x94','\u24c5': +b'\xa3\x95','\u24c6':b'\xa3\x96','\u24c7':b'\xa3\x97','\u24c8':b'\xa3\x98', +'\u24c9':b'\xa3\x99','\u24ca':b'\xa3\x9a','\u24cb':b'\xa3\x9b','\u24cc': +b'\xa3\x9c','\u24cd':b'\xa3\x9d','\u24ce':b'\xa3\x9e','\u24cf':b'\xa3\x9f', +'\u24ea':2,'\u24ea\uf87f':b'\xa5K','\u24eb':(2,b'\xa6\xef'),'\u24eb\uf878': +b'\xa4s','\u24eb\uf87f':b'\xa3_','\u24ec':(2,b'\xa6\xf0'),'\u24ec\uf878': +b'\xa4t','\u24ec\uf87f':b'\xa3`','\u24ed':(2,b'\xa6\xf1'),'\u24ed\uf878': +b'\xa4u','\u24ed\uf87f':b'\xa3a','\u24ee':(2,b'\xa6\xf2'),'\u24ee\uf878': +b'\xa4v','\u24ee\uf87f':b'\xa3b','\u24ef':(2,b'\xa6\xf3'),'\u24ef\uf878': +b'\xa4w','\u24ef\uf87f':b'\xa3c','\u24f0':(2,b'\xa6\xf4'),'\u24f0\uf878': +b'\xa4x','\u24f0\uf87f':b'\xa3d','\u24f1':(2,b'\xa6\xf5'),'\u24f1\uf878': +b'\xa4y','\u24f1\uf87f':b'\xa3e','\u24f2':(2,b'\xa6\xf6'),'\u24f2\uf878': +b'\xa4z','\u24f2\uf87f':b'\xa3f','\u24f3':(2,b'\xa6\xf7'),'\u24f3\uf878': +b'\xa4{','\u24f3\uf87f':b'\xa3g','\u24f4':(2,b'\xa6\xf8'),'\u24f4\uf878': +b'\xa4|','\u24f4\uf87f':b'\xa3h','\u2588':b'\xa7\x8f','\u25a0':(2,b'\xa1\xe1'), +'\u25a0\u20df':b'\xa6V','\u25a1':(2,b'\xa1\xe0'),'\u25a1\u20df':b'\xa6Y', +'\u25a1\uf879':b'\xa7\x8d','\u25a1\uf87b':b'\xa7\x8e','\u25a1\uf87c': +b'\xa7\x8c','\u25a2':b'\xa6x','\u25a8':(2,b'\xa2\xc9'),'\u25a8\uf87f': +b'\xa6\x8a','\u25ad':(2,b'\xa7J'),'\u25ad\uf878':b'\xa7I','\u25b1':b'\xa7f', +'\u25b2':(2,b'\xa1\xe3'),'\u25b2\u20dd':b'\xa6k','\u25b3':(2,b'\xa1\xe2'), +'\u25b3\u20dd':b'\xa6j','\u25b3\uf87f':b'\xa7E','\u25b4':2,'\u25b4\u20e4': +b'\xa7\x9b','\u25b5':b'\xa7\x95','\u25b9':b'\xa7\x96','\u25bf':b'\xa7\x94', +'\u25c3':b'\xa7\x97','\u25c6':(2,b'\xa1\xdf'),'\u25c6\u20de':b'\xa6e', +'\u25c6\uf879':b'\xa7\x8b','\u25c7':(2,b'\xa1\xde'),'\u25c7\u20de':b'\xa6b', +'\u25c7\u20df':(3,b'\xa6W'),'\u25c7\u20df\u20df':b'\xa6a','\u25c7\uf879': +b'\xa7\x89','\u25c7\uf87b':b'\xa7\x8a','\u25c7\uf87c':b'\xa7\x88', +'\u25c7\uf87f':b'\xa7N','\u25c8':(2,b'\xa2\xc2'),'\u25c8\uf87f':b'\xa6\x89', +'\u25c9':(2,b'\xa2\xc1'),'\u25c9\u20dd':b'\xa6\x82','\u25ca':b'\xa7\x9c', +'\u25cb':(2,b'\xa1\xdb'),'\u25cb\uf879':b'\xa7\x91','\u25cb\uf87b':b'\xa7\x92', +'\u25cb\uf87f':b'\xa7D','\u25cc':b'\xa6u','\u25cd':b'\xa6\x84','\u25ce': +(2,b'\xa1\xdd'),'\u25ce\u20dd':b'\xa6i','\u25cf':(2,b'\xa1\xdc'),'\u25cf\uf879' +:b'\xa7\x93','\u25e6':b'\xa7\x90','\u25ef':(2,b'\xa6o'),'\u25ef\uf87c':b'\xa6p' +,'\u25fb':b'\xa7F','\u25fc':b'\xa7\x9a','\u2610':(2,b'\xa6w'),'\u2610\uf87c': +b'\xa6q','\u2610\uf87f':b'\xa6v','\u261c':(2,b'\xa2\xd0'),'\u261c\uf87f': +b'\xa6^','\u261d':(2,b'\xac\x8d'),'\u261d\uf87f':b'\xac\x8f','\u261e': +(2,b'\xa2\xd1'),'\u261e\uf87f':b'\xa6_','\u261f':(2,b'\xac\x8e'),'\u261f\uf87f' +:b'\xac\x90','\u262f':(2,b'\xa6\x93'),'\u262f\uf876':b'\xa6\x98','\u262f\uf87a' +:b'\xa6\x97','\u2642':(2,b'\xa1\xce'),'\u2642\uf87f':b'\xa7A','\u2666': +b'\xa7\x98','\u266f':b'\xa6H','\u2716':b'\xa6m','\u271a':b'\xa6l','\u2720': +(2,b'\xa6\x88'),'\u2720\uf87a':b'\xa6\x87','\u2723':(2,b'\xa6r'),'\u2723\uf87a' +:b'\xa6y','\u2731':b'\xa6S','\u273d':b'\xa6R','\u273f':(2,b'\xa6|'), +'\u273f\uf87a':b'\xa6{','\u2740':b'\xa6\x99','\u2741':b'\xa6\x8d','\u2747': +b'\xa6T','\u2748':2,'\u2748\u20d8':b'\xa6\x9b','\u274d':b'\xa6\x83','\u2756': +(2,b'\xa6s'),'\u2756\uf87a':b'\xa6z','\u2756\uf87f':b'\xa6\x8e','\u2776': +b'\xa6\xe5','\u2777':b'\xa6\xe6','\u2778':b'\xa6\xe7','\u2779':b'\xa6\xe8', +'\u277a':b'\xa6\xe9','\u277b':b'\xa6\xea','\u277c':b'\xa6\xeb','\u277d': +b'\xa6\xec','\u277e':b'\xa6\xed','\u277f':b'\xa6\xee','\u278a':(2,b'\xa3U'), +'\u278a\uf87f':b'\xa4i','\u278b':(2,b'\xa3V'),'\u278b\uf87f':b'\xa4j','\u278c': +(2,b'\xa3W'),'\u278c\uf87f':b'\xa4k','\u278d':(2,b'\xa3X'),'\u278d\uf87f': +b'\xa4l','\u278e':(2,b'\xa3Y'),'\u278e\uf87f':b'\xa4m','\u278f':(2,b'\xa3Z'), +'\u278f\uf87f':b'\xa4n','\u2790':(2,b'\xa3['),'\u2790\uf87f':b'\xa4o','\u2791': +(2,b'\xa3\\'),'\u2791\uf87f':b'\xa4p','\u2792':(2,b'\xa3]'),'\u2792\uf87f': +b'\xa4q','\u2793':(2,b'\xa3^'),'\u2793\uf87f':b'\xa4r','\u2794':b'\xac^', +'\u279b':b'\xa8j','\u279c':b'\xa8`','\u279e':b'\xa8x','\u27a1':b'\xa8t', +'\u27a4':b'\xacH','\u27b2':b'\xa8\\','\u27b5':b'\xacC','\u27e1':2, +'\u27e1\u20dd':b'\xa6\x85','\u2934':(2,b'\xa8\x8d'),'\u2934\uf87a':b'\xac}', +'\u2934\uf87c':b'\xac\x88','\u2934\uf87f':b'\xa8\x85','\u2935':(2,b'\xa8\x89'), +'\u2935\uf87a':b'\xacy','\u2935\uf87c':b'\xac\x84','\u2936':(2,b'\xa8\x86'), +'\u2936\uf87a':b'\xacv','\u2936\uf87c':b'\xac\x81','\u2937':(2,b'\xa8\x8b'), +'\u2937\uf87a':b'\xac{','\u2937\uf87c':b'\xac\x86','\u2939':(2,b'\xa8\x8c'), +'\u2939\uf87a':b'\xac|','\u2939\uf87c':b'\xac\x87','\u2939\uf87f':b'\xa8\x84', +'\u2962':(2,b'\xa8o'),'\u2962\uf87f':b'\xacE','\u2963':b'\xa8q','\u2964': +(2,b'\xa8p'),'\u2964\uf87f':b'\xacD','\u2965':b'\xa8r','\u2981':b'\xa7\x99', +'\u2985':(2,b'\xa1Y'),'\u2985\uf873':b'\xa2C','\u2985\uf878':b'\xa2A', +'\u2985\uf879':b'\xa1S','\u2985\uf87b':b'\xa1e','\u2985\uf87c':b'\xa1U', +'\u2985\uf87f':b'\xa1Q','\u2986':(2,b'\xa1Z'),'\u2986\uf873':b'\xa2D', +'\u2986\uf878':b'\xa2B','\u2986\uf879':b'\xa1T','\u2986\uf87b':b'\xa1f', +'\u2986\uf87c':b'\xa1V','\u2986\uf87f':b'\xa1R','\u2997':b'\xa1\x99','\u2998': +b'\xa1\x9a','\u29a3':b'\xa4\x9c','\u29be':b'\xa6h','\u29bf':b'\xa6n','\u29c8': +(2,b'\xa6d'),'\u29c8\u20de':b'\xa6g','\u2a26':b'\xa4}','\u2a38':b'\xa7`', +'\u2a72':b'\xa7{','\u2a8b':b'\xa4\x94','\u2a8c':b'\xa4\x95','\u2a91': +b'\xa4\x96','\u2a92':b'\xa4\x97','\u2ac5':b'\xa4\x8b','\u2ac6':b'\xa4\x8d', +'\u2acb':b'\xa4\x8c','\u2acc':b'\xa4\x8e','\u2ae8':b'\xa7k','\u2afd':2, +'\u2afd\u0347':b'\xa4\x9f','\u3002':(2,b'\xa1\xa3'),'\u3002\uf87d':b'\xa5\xdb', +'\u3007':2,'\u3007\uf876':b'\xa1\x8b','\u3008':(2,b'\xa1\xb4'),'\u3008\uf878': +b'\xa1K','\u3009':(2,b'\xa1\xb5'),'\u3009\uf878':b'\xa1L','\u300a': +(2,b'\xa1\xb6'),'\u300a\uf878':b'\xa1I','\u300b':(2,b'\xa1\xb7'),'\u300b\uf878' +:b'\xa1J','\u300c':(2,b'\xa1\xb8'),'\u300c\uf879':b'\xad\xa1','\u300c\uf87b': +b'\xa1C','\u300c\uf87c':b'\xa1E','\u300c\uf87f':b'\xa1A','\u300d': +(2,b'\xa1\xb9'),'\u300d\uf879':b'\xad\xa2','\u300d\uf87b':b'\xa1D', +'\u300d\uf87c':b'\xa1F','\u300d\uf87f':b'\xa1B','\u300e':(2,b'\xa1\xba'), +'\u300e\uf879':b'\xad\xa3','\u300e\uf87c':b'\xa1G','\u300f':(2,b'\xa1\xbb'), +'\u300f\uf879':b'\xad\xa4','\u300f\uf87c':b'\xa1H','\u3010':(2,b'\xa1\xbc'), +'\u3010\uf878':b'\xa2I','\u3010\uf87f':b'\xa1[','\u3011':(2,b'\xa1\xbd'), +'\u3011\uf878':b'\xa2J','\u3011\uf87f':b'\xa1\\','\u3012':b'\xa7B','\u3013': +(2,b'\xa1\xeb'),'\u3013\uf87c':b'\xa6}','\u3016':(2,b'\xa1]'),'\u3016\uf878': +b'\xa2G','\u3017':(2,b'\xa1^'),'\u3017\uf878':b'\xa2H','\u3018':b'\xa1_', +'\u3019':b'\xa1`','\u301c':b'\xa1\xad','\u301e':b'\xad\xa9','\u301f': +b'\xad\xaa','\u3020':b'\xa6\x9e','\u3036':b'\xa7C','\u3231':b'\xa7\x9d', +'\u3239':b'\xa7\x9e','\u3251':(2,b'\xa7\xf5'),'\u3251\uf87a':b'\xa6\xf9', +'\u3252':(2,b'\xa7\xf6'),'\u3252\uf87a':b'\xa6\xfa','\u3253':(2,b'\xa7\xf7'), +'\u3253\uf87a':b'\xa6\xfb','\u3254':(2,b'\xa7\xf8'),'\u3254\uf87a':b'\xa6\xfc', +'\u3255':(2,b'\xa7\xf9'),'\u3255\uf87a':b'\xa6\xfd','\u3256':(2,b'\xa7\xfa'), +'\u3256\uf87a':b'\xa6\xfe','\u3257':(2,b'\xa7\xfb'),'\u3257\uf87a':b'\xa5\xf9', +'\u3258':(2,b'\xa7\xfc'),'\u3258\uf87a':b'\xa5\xfa','\u3259':(2,b'\xa7\xfd'), +'\u3259\uf87a':b'\xa5\xfb','\u325a':(2,b'\xa7\xfe'),'\u325a\uf87a':b'\xa5\xfc', +'\u328a':b'\xadq','\u328b':b'\xadr','\u328c':b'\xads','\u328d':b'\xadt', +'\u328e':b'\xadu','\u328f':b'\xadv','\u3290':b'\xadp','\u3294':b'\xab\\', +'\u329e':(2,b'\xa7\x82'),'\u329e\uf87f':b'\xa7\x83','\u32a5':b'\xabl','\u33cb': +b'\xa7\x9f','\u4e00':(2,b'\xec\xe9'),'\u4e00\u20de':(3,b'\xadU'), +'\u4e00\u20de\uf87a':b'\xadA','\u4e00\uf876':b'\xa1\x8c','\u4e03': +(2,b'\xf6\xd2'),'\u4e03\u20de':(3,b'\xad['),'\u4e03\u20de\uf87a':b'\xadG', +'\u4e03\uf876':b'\xa1\x92','\u4e09':(2,b'\xdf\xb2'),'\u4e09\u20de':(3,b'\xadW') +,'\u4e09\u20de\uf87a':b'\xadC','\u4e09\uf876':b'\xa1\x8e','\u4e5d': +(2,b'\xce\xfa'),'\u4e5d\u20de':(3,b'\xad]'),'\u4e5d\u20de\uf87a':b'\xadI', +'\u4e5d\uf876':b'\xa1\x94','\u4e8c':(2,b'\xec\xa3'),'\u4e8c\u20de':(3,b'\xadV') +,'\u4e8c\u20de\uf87a':b'\xadB','\u4e8c\uf876':b'\xa1\x8d','\u4e94': +(2,b'\xe7\xe9'),'\u4e94\u20de':(3,b'\xadY'),'\u4e94\u20de\uf87a':b'\xadE', +'\u4e94\uf876':b'\xa1\x90','\u4ed6':(2,b'\xf6\xe2'),'\u4ed6\u20dd':b'\xabm', +'\u4ed6\u20de':b'\xaan','\u4ee3':(2,b'\xd3\xdb'),'\u4ee3\u20dd':b'\xabZ', +'\u4ee3\u20de':b'\xaaZ','\u4f8b':(2,b'\xd6\xc7'),'\u4f8b\u20de':b'\xaaf', +'\u516b':(2,b'\xf8\xa2'),'\u516b\u20de':(3,b'\xad\\'),'\u516b\u20de\uf87a': +b'\xadH','\u516b\uf876':b'\xa1\x93','\u516d':(2,b'\xd7\xbf'),'\u516d\u20de': +(3,b'\xadZ'),'\u516d\u20de\uf87a':b'\xadF','\u516d\uf876':b'\xa1\x91','\u51a0': +(2,b'\xce\xae'),'\u51a0\u20dd':b'\xaa\x9f','\u51a0\u20de':b'\xaaX','\u51f8': +(2,b'\xf4\xc8'),'\u51f8\uf87f':b'\xa7P','\u51f9':(2,b'\xe8\xea'),'\u51f9\uf87f' +:b'\xa7O','\u524d':(2,b'\xee\xf1'),'\u524d\u20dd':b'\xab`','\u524d\u20de': +b'\xaai','\u526f':(2,b'\xdc\xf9'),'\u526f\u20dd':b'\xab^','\u526f\u20de': +b'\xaab','\u52a9':(2,b'\xf0\xbe'),'\u52a9\u20dd':b'\xabc','\u52a9\u20de': +b'\xaal','\u52d5':(2,b'\xd4\xd1'),'\u52d5\u20dd':b'\xab[','\u52d5\u20de': +b'\xaa\\','\u5341':(2,b'\xe4\xa8'),'\u5341\u20de':(3,b'\xad^'), +'\u5341\u20de\uf87a':b'\xadJ','\u5341\uf876':b'\xa1\x95','\u534d': +(2,b'\xd8\xb3'),'\u534d\uf87f':b'\xa6\x92','\u5370':(2,b'\xec\xd4'), +'\u5370\u20dd':b'\xabD','\u5370\u20de':b'\xaaT','\u53c3':(2,b'\xf3\xd1'), +'\u53c3\u20dd':b'\xabe','\u53cd':(2,b'\xda\xe3'),'\u53cd\u20dd':b'\xab]', +'\u53cd\u20de':b'\xaa_','\u540d':(2,b'\xd9\xa3'),'\u540d\u20de':b'\xaa]', +'\u56db':(2,b'\xde\xcc'),'\u56db\u20de':(3,b'\xadX'),'\u56db\u20de\uf87a': +b'\xadD','\u56db\uf876':b'\xa1\x8f','\u570b':(2,b'\xcf\xd0'),'\u570b\u20dd': +b'\xabk','\u571f':(2,b'\xf7\xcf'),'\u571f\u20de':(3,b'\xado'), +'\u571f\u20de\uf87c':b'\xad}','\u5b50':(2,b'\xed\xad'),'\u5b50\u20de':b'\xaah', +'\u5e8f':(2,b'\xdf\xed'),'\u5e8f\u20de':b'\xaac','\u5f62':(2,b'\xfb\xa1'), +'\u5f62\u20dd':b'\xabi','\u5f62\u20de':b'\xaap','\u5f71':(2,b'\xe7\xaf'), +'\u5f71\u20de':b'\xaae','\u611f':(2,b'\xca\xef'),'\u611f\u20dd':b'\xabX', +'\u611f\u20de':b'\xaaW','\u6163':(2,b'\xce\xb1'),'\u6163\u20dd':b'\xabY', +'\u6307':(2,b'\xf2\xa6'),'\u6307\u20de':b'\xaam','\u63a5':(2,b'\xef\xc8'), +'\u63a5\u20dd':b'\xabb','\u63a5\u20de':b'\xaak','\u65b0':(2,b'\xe3\xe6'), +'\u65b0\u20dd':b'\xabg','\u65e5':(2,b'\xec\xed'),'\u65e5\u20de':(3,b'\xadi'), +'\u65e5\u20de\uf87c':b'\xadw','\u6708':(2,b'\xea\xc5'),'\u6708\u20de': +(3,b'\xadj'),'\u6708\u20de\uf87c':b'\xadx','\u6728':(2,b'\xd9\xca'), +'\u6728\u20de':(3,b'\xadm'),'\u6728\u20de\uf87c':b'\xad{','\u672b': +(2,b'\xd8\xc7'),'\u672b\u20dd':b'\xabF','\u672c':(2,b'\xdc\xe2'),'\u672c\u20dd' +:b'\xabf','\u672c\u20de':b'\xaaa','\u6c34':(2,b'\xe2\xa9'),'\u6c34\u20de': +(3,b'\xadl'),'\u6c34\u20de\uf87c':b'\xadz','\u6ce8':(2,b'\xf1\xbc'), +'\u6ce8\u20dd':b'\xabd','\u6d3e':(2,b'\xf7\xef'),'\u6d3e\u20de':b'\xaao', +'\u6e90':(2,b'\xea\xb9'),'\u6e90\u20de':b'\xaag','\u706b':(2,b'\xfb\xfd'), +'\u706b\u20de':(3,b'\xadk'),'\u706b\u20de\uf87c':b'\xady','\u73fe': +(2,b'\xfa\xde'),'\u73fe\u20dd':b'\xabh','\u76ee':(2,b'\xd9\xcd'),'\u76ee\u20de' +:b'\xaa^','\u7b54':(2,b'\xd3\xcd'),'\u7b54\u20de':b'\xaaY','\u7bc0': +(2,b'\xef\xbd'),'\u7bc0\u20de':b'\xaaj','\u81ea':(2,b'\xed\xbb'),'\u81ea\u20dd' +:b'\xab_','\u8863':(2,b'\xeb\xfd'),'\u8863\u20dd':b'\xabE','\u88dc': +(2,b'\xdc\xcd'),'\u88dc\u20de':b'\xaa`','\u88dc\u20e4':b'\xa7|','\u8a3b': +(2,b'\xf1\xc9'),'\u8a3b\u20de':(3,b'\xaaU'),'\u8a3b\u20de\uf87a':b'\xaav', +'\u9023':(2,b'\xd6\xa7'),'\u9023\u20de':b'\xaad','\u91d1':(2,b'\xd1\xd1'), +'\u91d1\u20de':(3,b'\xadn'),'\u91d1\u20de\uf87c':b'\xad|','\u9593': +(2,b'\xca\xe0'),'\u9593\u20dd':b'\xabj','\u96fb':(2,b'\xef\xb3'),'\u96fb\u20dd' +:b'\xaba','\u982d':(2,b'\xd4\xe9'),'\u982d\u20de':b'\xaa[','\uac00': +(2,b'\xb0\xa1'),'\uac00\u20dd':(3,b'\xabu'),'\uac00\u20dd\uf87a':b'\xab\x9b', +'\uac04':(2,b'\xb0\xa3'),'\uac04\u20dd':b'\xabU','\uac10':(2,b'\xb0\xa8'), +'\uac10\u20dd':b'\xabC','\uac10\u20de':b'\xaaP','\uac19':(2,b'\xb0\xb0'), +'\uac19\u20dd':b'\xabV','\uac70':(2,b'\xb0\xc5'),'\uac70\u20dd':b'\xabG', +'\uac8c':(2,b'\xb0\xd4'),'\uac8c\u20dd':b'\xaa\x96','\uad00':(2,b'\xb0\xfc'), +'\uad00\u20dd':b'\xaa\x9e','\uad50':(2,b'\xb1\xb3'),'\uad50\u20de':3, +'\uad50\u20de\uf87a':b'\xaaw','\uad6d':(2,b'\xb1\xb9'),'\uad6d\u20dd':b'\xabB', +'\ub098':(2,b'\xb3\xaa'),'\ub098\u20dd':(3,b'\xabv'),'\ub098\u20dd\uf87a': +b'\xab\x9c','\ub0ae':(2,b'\xb3\xb7'),'\ub0ae\u20dd':(3,b'\xaa\x8c'), +'\ub0ae\u20dd\uf87a':b'\xab\x99','\ub192':(2,b'\xb3\xf4'),'\ub192\u20dd': +(3,b'\xaa\x8b'),'\ub192\u20dd\uf87a':b'\xab\x98','\ub290':(2,b'\xb4\xc0'), +'\ub290\u20dd':3,'\ub290\u20dd\uf87a':b'\xab\x97','\ub2e4':(2,b'\xb4\xd9'), +'\ub2e4\u20dd':(3,b'\xabw'),'\ub2e4\u20dd\uf87a':b'\xab\x9d','\ub2e8': +(2,b'\xb4\xdc'),'\ub2e8\u20dd':(3,b'\xac\x91'),'\ub2e8\u20dd\uf87a':b'\xab\x8a' +,'\ub2f5':(2,b'\xb4\xe4'),'\ub2f5\u20dd':(3,b'\xabH'),'\ub2f5\u20dd\uf87a': +b'\xab\x87','\ub2f5\u20de':(3,b'\xaaB'),'\ub2f5\u20de\uf87a':b'\xaas','\ub300': +(2,b'\xb4\xeb'),'\ub300\u20dd':b'\xaa\x8e','\ub300\u20de':b'\xaaE','\ub3d9': +(2,b'\xb5\xbf'),'\ub3d9\u20dd':b'\xaa\x94','\ub3d9\u20de':b'\xaaK','\ub73b': +(2,b'\xb6\xe6'),'\ub73b\u20de':(3,b'\xaaS'),'\ub73b\u20de\uf87a':b'\xaau', +'\ub77c':(2,b'\xb6\xf3'),'\ub77c\u20dd':(3,b'\xaa\x81'),'\ub77c\u20dd\uf87a': +b'\xab\x9e','\ub9c8':(2,b'\xb8\xb6'),'\ub9c8\u20dd':(3,b'\xaa\x82'), +'\ub9c8\u20dd\uf87a':b'\xaby','\uba85':(2,b'\xb8\xed'),'\uba85\u20dd': +b'\xaa\x8d','\uba85\u20de':b'\xaaD','\ubb38':(2,b'\xb9\xae'),'\ubb38\u20de':3, +'\ubb38\u20de\uf87a':b'\xaar','\ubc14':(2,b'\xb9\xd9'),'\ubc14\u20dd': +(3,b'\xaa\x83'),'\ubc14\u20dd\uf87a':b'\xabz','\ubc18':(2,b'\xb9\xdd'), +'\ubc18\u20dd':(3,b'\xaa\x97'),'\ubc18\u20dd\uf87a':b'\xab\x9a','\ubc18\u20de': +b'\xaaM','\ubcc0':(2,b'\xba\xaf'),'\ubcc0\u20dd':b'\xabI','\ubcf8': +(2,b'\xba\xbb'),'\ubcf8\u20dd':(3,b'\xaa\x9a'),'\ubcf8\u20dd\uf87a':b'\xab\x89' +,'\ubd80':(2,b'\xba\xce'),'\ubd80\u20dd':b'\xaa\x90','\ubd80\u20de':b'\xaaG', +'\ube44':(2,b'\xba\xf1'),'\ube44\u20dd':(3,b'\xaa\x95'),'\ube44\u20dd\uf87a': +b'\xab\x86','\ube44\u20de':b'\xaaL','\ube60':(2,b'\xba\xfc'),'\ube60\u20dd': +(3,b'\xabn'),'\ube60\u20dd\uf87a':b'\xab\x88','\uc0ac':(2,b'\xbb\xe7'), +'\uc0ac\u20dd':(3,b'\xaa\x84'),'\uc0ac\u20dd\uf87a':b'\xab{','\uc0c1': +(2,b'\xbb\xf3'),'\uc0c1\u20dd':b'\xabJ','\uc13c':(2,b'\xbc\xbe'),'\uc13c\u20dd' +:(3,b'\xabK'),'\uc13c\u20dd\uf87a':b'\xab\x8b','\uc18c':(2,b'\xbc\xd2'), +'\uc18c\u20dd':b'\xac\x93','\uc18d':(2,b'\xbc\xd3'),'\uc18d\u20dd':b'\xaa\x98', +'\uc218':(2,b'\xbc\xf6'),'\uc218\u20dd':b'\xaa\x93','\uc218\u20de':b'\xaaJ', +'\uc219':(2,b'\xbc\xf7'),'\uc219\u20dd':b'\xaa\x9c','\uc2dc':(2,b'\xbd\xc3'), +'\uc2dc\u20dd':(3,b'\xabo'),'\uc2dc\u20dd\uf87a':b'\xab\x8c','\uc2e0': +(2,b'\xbd\xc5'),'\uc2e0\u20dd':b'\xabL','\uc2e4':(2,b'\xbd\xc7'),'\uc2e4\u20dd' +:b'\xabW','\uc544':(2,b'\xbe\xc6'),'\uc544\u20dd':(3,b'\xaa\x85'), +'\uc544\u20dd\uf87a':b'\xab|','\uc57d':(2,b'\xbe\xe0'),'\uc57d\u20dd': +b'\xaa\x9b','\uc57d\u20de':b'\xaaQ','\uc5ec':(2,b'\xbf\xa9'),'\uc5ec\u20dd': +(3,b'\xabM'),'\uc5ec\u20dd\uf87a':b'\xab\x8d','\uc5ed':(2,b'\xbf\xaa'), +'\uc5ed\u20de':3,'\uc5ed\u20de\uf87a':b'\xaax','\uc608':(2,b'\xbf\xb9'), +'\uc608\u20dd':(3,b'\xabN'),'\uc608\u20dd\uf87a':b'\xab\x8e','\uc608\u20de': +(3,b'\xaaV'),'\uc608\u20de\uf87a':b'\xaa|','\uc678':(2,b'\xbf\xdc'), +'\uc678\u20dd':(3,b'\xabS'),'\uc678\u20dd\uf87a':b'\xab\x9f','\uc6b4': +(2,b'\xbf\xee'),'\uc6b4\u20de':b'\xaaA','\uc6d0':(2,b'\xbf\xf8'),'\uc6d0\u20dd' +:b'\xabO','\uc720':(2,b'\xc0\xaf'),'\uc720\u20dd':b'\xaa\x9d','\uc73c': +(2,b'\xc0\xb8'),'\uc73c\u20dd':(3,b'\xabq'),'\uc73c\u20dd\uf87a':b'\xab\x8f', +'\uc74c':(2,b'\xc0\xbd'),'\uc74c\u20dd':(3,b'\xabr'),'\uc74c\u20dd\uf87a': +b'\xab\x90','\uc74c\u20de':3,'\uc74c\u20de\uf87a':b'\xaay','\uc774': +(2,b'\xc0\xcc'),'\uc774\u20dd':b'\xac\x96','\uc778':(2,b'\xc0\xce'), +'\uc778\u20dd':b'\xaa\x99','\uc778\u20de':b'\xaaR','\uc77c':(2,b'\xc0\xcf'), +'\uc77c\u20dd':b'\xac\x95','\uc785':(2,b'\xc0\xd4'),'\uc785\u20dd':(3,b'\xabp') +,'\uc785\u20dd\uf87a':b'\xab\x91','\uc790':(2,b'\xc0\xda'),'\uc790\u20dd': +(3,b'\xaa\x86'),'\uc790\u20dd\uf87a':b'\xab}','\uc790\u20de':b'\xaaN','\uc791': +(2,b'\xc0\xdb'),'\uc791\u20dd':b'\xabP','\uc804':(2,b'\xc0\xfc'),'\uc804\u20dd' +:b'\xaa\x91','\uc804\u20de':b'\xaaH','\uc811':(2,b'\xc1\xa2'),'\uc811\u20dd': +b'\xaa\x92','\uc811\u20de':b'\xaaI','\uc815':(2,b'\xc1\xa4'),'\uc815\u20de':3, +'\uc815\u20de\uf87a':b'\xaaz','\uc81c':(2,b'\xc1\xa6'),'\uc81c\u20dd':3, +'\uc81c\u20dd\uf87a':b'\xab\x92','\uc870':(2,b'\xc1\xb6'),'\uc870\u20dd': +b'\xabA','\uc870\u20de':b'\xaaq','\uc874':(2,b'\xc1\xb8'),'\uc874\u20dd': +(3,b'\xaa}'),'\uc874\u20dd\uf87a':b'\xab\x93','\uc8fc':(2,b'\xc1\xd6'), +'\uc8fc\u20de':(3,b'\xaaC'),'\uc8fc\u20de\uf87a':b'\xaat','\uc900': +(2,b'\xc1\xd8'),'\uc900\u20dd':(3,b'\xabQ'),'\uc900\u20dd\uf87a':b'\xab\x94', +'\uc911':(2,b'\xc1\xdf'),'\uc911\u20dd':b'\xac\x94','\uc9c1':(2,b'\xc1\xf7'), +'\uc9c1\u20dd':b'\xabs','\ucc28':(2,b'\xc2\xf7'),'\ucc28\u20dd':(3,b'\xaa\x87') +,'\ucc28\u20dd\uf87a':b'\xab\x81','\ucc38':(2,b'\xc2\xfc'),'\ucc38\u20dd': +b'\xac\x92','\uce74':(2,b'\xc4\xab'),'\uce74\u20dd':(3,b'\xaa\x88'), +'\uce74\u20dd\uf87a':b'\xab\x82','\ud0b9':(2,b'\xc5\xb7'),'\ud0b9\u20dd': +b'\xabR','\ud0c0':(2,b'\xc5\xb8'),'\ud0c0\u20dd':(3,b'\xaa\x89'), +'\ud0c0\u20dd\uf87a':b'\xab\x83','\ud0c0\u20de':b'\xaaO','\ud30c': +(2,b'\xc6\xc4'),'\ud30c\u20dd':(3,b'\xaa\x8a'),'\ud30c\u20dd\uf87a':b'\xab\x84' +,'\ud45c':(2,b'\xc7\xa5'),'\ud45c\u20dd':(3,b'\xabt'),'\ud45c\u20dd\uf87a': +b'\xab\x95','\ud558':(2,b'\xc7\xcf'),'\ud558\u20dd':(3,b'\xabx'), +'\ud558\u20dd\uf87a':b'\xab\x85','\ud574':(2,b'\xc7\xd8'),'\ud574\u20dd':3, +'\ud574\u20dd\uf87a':b'\xab\x96','\ud574\u20de':3,'\ud574\u20de\uf87a':b'\xaa{' +,'\ud615':(2,b'\xc7\xfc'),'\ud615\u20dd':b'\xaa\x8f','\ud615\u20de':b'\xaaF', +'\ud65c':(2,b'\xc8\xb0'),'\ud65c\u20dd':b'\xabT','\ud734':(2,b'\xc8\xde'), +'\ud734\u20dd':b'\xac\x97','\uf805':(2,b'\xa6X'),'\uf805\u20de':b'\xa6f', +'\uf806':(2,b'\xa6c'),'\uf806\u20df':b'\xa6`','\uf807':b'\xa6\x9f','\uf808': +b'\xa6\x8f','\uf809':(2,b'\xa6\x81'),'\uf809\uf87a':b'\xa6\x91','\uf80a': +b'\xa6t','\uf80b':(2,b'\xa6\x96'),'\uf80b\uf87f':b'\xa6\x86','\uf80c': +b'\xa6\x9a','\uf83d':(2,b'\xa6B'),'\uf83d\uf87f':b'\xa6A','\uf840':b'\xa1n', +'\uf841':b'\xa8\x94','\uf842':b'\xacT','\uf843':b'\xacB','\uf844':b'\xacI', +'\uf845':b'\xac_','\uf846':b'\xa8g','\uf847':b'\xa8h','\uf848':b'\xa8\x9d', +'\uf849':b'\xa8\x9c','\uf84a':b'\xacK','\uf84b':b'\xacJ','\uf84c':b'\xa7G', +'\uf84d':b'\xa7K','\uf84e':b'\xa7L','\uf84f':b'\xa7M','\uf860':2,'\uf860*':3, +'\uf860**':b'\xa6L','\uf860A':3,'\uf860A)':b'\xa5U','\uf860A.':b'\xa9A', +'\uf860B':3,'\uf860B)':b'\xa5V','\uf860B.':b'\xa9B','\uf860C':3,'\uf860C)': +b'\xa5W','\uf860C.':b'\xa9C','\uf860D':3,'\uf860D)':b'\xa5X','\uf860D.': +b'\xa9D','\uf860E':3,'\uf860E)':b'\xa5Y','\uf860E.':b'\xa9E','\uf860F':3, +'\uf860F)':b'\xa5Z','\uf860F.':b'\xa9F','\uf860G':3,'\uf860G)':b'\xa5[', +'\uf860G.':b'\xa9G','\uf860H':3,'\uf860H)':b'\xa5\\','\uf860H.':b'\xa9H', +'\uf860I':3,'\uf860I)':b'\xa5]','\uf860I.':b'\xa9I','\uf860J':3,'\uf860J)': +b'\xa5^','\uf860J.':b'\xa9J','\uf860K':3,'\uf860K)':b'\xa5_','\uf860K.': +b'\xa9K','\uf860L':3,'\uf860L)':b'\xa5`','\uf860L.':b'\xa9L','\uf860M':3, +'\uf860M)':b'\xa5a','\uf860M.':b'\xa9M','\uf860N':3,'\uf860N)':b'\xa5b', +'\uf860N.':b'\xa9N','\uf860O':3,'\uf860O)':b'\xa5c','\uf860O.':b'\xa9O', +'\uf860P':3,'\uf860P)':b'\xa5d','\uf860P.':b'\xa9P','\uf860Q':3,'\uf860Q)': +b'\xa5e','\uf860Q.':b'\xa9Q','\uf860R':3,'\uf860R)':b'\xa5f','\uf860R.': +b'\xa9R','\uf860S':3,'\uf860S)':b'\xa5g','\uf860S.':b'\xa9S','\uf860T':3, +'\uf860T)':b'\xa5h','\uf860T.':b'\xa9T','\uf860U':3,'\uf860U)':b'\xa5i', +'\uf860U.':b'\xa9U','\uf860V':3,'\uf860V)':b'\xa5j','\uf860V.':b'\xa9V', +'\uf860W':3,'\uf860W)':b'\xa5k','\uf860W.':b'\xa9W','\uf860X':3,'\uf860X)': +b'\xa5l','\uf860X.':b'\xa9X','\uf860Y':3,'\uf860Y)':b'\xa5m','\uf860Y.': +b'\xa9Y','\uf860Z':3,'\uf860Z)':b'\xa5n','\uf860Z.':b'\xa9Z','\uf860a':3, +'\uf860a)':b'\xa5o','\uf860a.':b'\xa9[','\uf860b':3,'\uf860b)':b'\xa5p', +'\uf860b.':b'\xa9\\','\uf860c':3,'\uf860c)':b'\xa5q','\uf860c.':b'\xa9]', +'\uf860d':3,'\uf860d)':b'\xa5r','\uf860d.':b'\xa9^','\uf860e':3,'\uf860e)': +b'\xa5s','\uf860e.':b'\xa9_','\uf860f':3,'\uf860f)':b'\xa5t','\uf860f.': +b'\xa9`','\uf860g':3,'\uf860g)':b'\xa5u','\uf860g.':b'\xa9a','\uf860h':3, +'\uf860h)':b'\xa5v','\uf860h.':b'\xa9b','\uf860i':3,'\uf860i)':b'\xa5w', +'\uf860i.':b'\xa9c','\uf860j':3,'\uf860j)':b'\xa5x','\uf860j.':b'\xa9d', +'\uf860k':3,'\uf860k)':b'\xa5y','\uf860k.':b'\xa9e','\uf860l':3,'\uf860l)': +b'\xa5z','\uf860l.':b'\xa9f','\uf860m':3,'\uf860m)':b'\xa5{','\uf860m.': +b'\xa9g','\uf860n':3,'\uf860n)':b'\xa5|','\uf860n.':b'\xa9h','\uf860o':3, +'\uf860o)':b'\xa5}','\uf860o.':b'\xa9i','\uf860p':3,'\uf860p)':b'\xa5\x81', +'\uf860p.':b'\xa9j','\uf860q':3,'\uf860q)':b'\xa5\x82','\uf860q.':b'\xa9k', +'\uf860r':3,'\uf860r)':b'\xa5\x83','\uf860r.':b'\xa9l','\uf860s':3,'\uf860s)': +b'\xa5\x84','\uf860s.':b'\xa9m','\uf860t':3,'\uf860t)':b'\xa5\x85','\uf860t.': +b'\xa9n','\uf860u':3,'\uf860u)':b'\xa5\x86','\uf860u.':b'\xa9o','\uf860v':3, +'\uf860v)':b'\xa5\x87','\uf860v.':b'\xa9p','\uf860w':3,'\uf860w)':b'\xa5\x88', +'\uf860w.':b'\xa9q','\uf860x':3,'\uf860x)':b'\xa5\x89','\uf860x.':b'\xa9r', +'\uf860y':3,'\uf860y)':b'\xa5\x8a','\uf860y.':b'\xa9s','\uf860z':3,'\uf860z)': +b'\xa5\x8b','\uf860z.':b'\xa9t','\uf860\u2020':3,'\uf860\u2020\u2020':b'\xa6D', +'\uf860\u2021':3,'\uf860\u2021\u2021':b'\xa6E','\uf861':2,'\uf861(':3, +'\uf861(A':4,'\uf861(A)':b'\xa3i','\uf861(B':4,'\uf861(B)':b'\xa3j','\uf861(C': +4,'\uf861(C)':b'\xa3k','\uf861(D':4,'\uf861(D)':b'\xa3l','\uf861(E':4, +'\uf861(E)':b'\xa3m','\uf861(F':4,'\uf861(F)':b'\xa3n','\uf861(G':4,'\uf861(G)' +:b'\xa3o','\uf861(H':4,'\uf861(H)':b'\xa3p','\uf861(I':4,'\uf861(I)':b'\xa3q', +'\uf861(J':4,'\uf861(J)':b'\xa3r','\uf861(K':4,'\uf861(K)':b'\xa3s','\uf861(L': +4,'\uf861(L)':b'\xa3t','\uf861(M':4,'\uf861(M)':b'\xa3u','\uf861(N':4, +'\uf861(N)':b'\xa3v','\uf861(O':4,'\uf861(O)':b'\xa3w','\uf861(P':4,'\uf861(P)' +:b'\xa3x','\uf861(Q':4,'\uf861(Q)':b'\xa3y','\uf861(R':4,'\uf861(R)':b'\xa3z', +'\uf861(S':4,'\uf861(S)':b'\xa3{','\uf861(T':4,'\uf861(T)':b'\xa3|','\uf861(U': +4,'\uf861(U)':b'\xa3}','\uf861(V':4,'\uf861(V)':b'\xa3\x81','\uf861(W':4, +'\uf861(W)':b'\xa3\x82','\uf861(X':4,'\uf861(X)':b'\xa3\x83','\uf861(Y':4, +'\uf861(Y)':b'\xa3\x84','\uf861(Z':4,'\uf861(Z)':b'\xa3\x85','\uf861\u2020':3, +'\uf861\u2020\u2020':4,'\uf861\u2020\u2020\u2020':b'\xa6F','\uf862':2,'\uf862(' +:3,'\uf862(2':4,'\uf862(21':5,'\uf862(21)':b'\xaa\xf9','\uf862(22':5, +'\uf862(22)':b'\xaa\xfa','\uf862(23':5,'\uf862(23)':b'\xaa\xfb','\uf862(24':5, +'\uf862(24)':b'\xaa\xfc','\uf862(25':5,'\uf862(25)':b'\xaa\xfd','\uf862(26':5, +'\uf862(26)':b'\xaa\xfe','\uf862(27':5,'\uf862(27)':b'\xab\xf7','\uf862(28':5, +'\uf862(28)':b'\xab\xf8','\uf862(29':5,'\uf862(29)':b'\xab\xf9','\uf862(3':4, +'\uf862(30':5,'\uf862(30)':b'\xab\xfa','\uf862[':3,'\uf862[1':4,'\uf862[10':5, +'\uf862[10]':b'\xa4J','\uf862[11':5,'\uf862[11]':b'\xa4K','\uf862[12':5, +'\uf862[12]':b'\xa4L','\uf862[13':5,'\uf862[13]':b'\xa4M','\uf862[14':5, +'\uf862[14]':b'\xa4N','\uf862[15':5,'\uf862[15]':b'\xa4O','\uf862[16':5, +'\uf862[16]':b'\xa4P','\uf862[17':5,'\uf862[17]':b'\xa4Q','\uf862[18':5, +'\uf862[18]':b'\xa4R','\uf862[19':5,'\uf862[19]':b'\xa4S','\uf862[2':4, +'\uf862[20':5,'\uf862[20]':b'\xa4T','\uf862[\u4e8c':4,'\uf862[\u4e8c\u5341':5, +'\uf862[\u4e8c\u5341]':b'\xadh','\uf862[\u5341':4,'\uf862[\u5341\u4e00':5, +'\uf862[\u5341\u4e00]':b'\xad_','\uf862[\u5341\u4e03':5,'\uf862[\u5341\u4e03]': +b'\xade','\uf862[\u5341\u4e09':5,'\uf862[\u5341\u4e09]':b'\xada', +'\uf862[\u5341\u4e5d':5,'\uf862[\u5341\u4e5d]':b'\xadg','\uf862[\u5341\u4e8c':5 +,'\uf862[\u5341\u4e8c]':b'\xad`','\uf862[\u5341\u4e94':5,'\uf862[\u5341\u4e94]' +:b'\xadc','\uf862[\u5341\u516b':5,'\uf862[\u5341\u516b]':b'\xadf', +'\uf862[\u5341\u516d':5,'\uf862[\u5341\u516d]':b'\xadd','\uf862[\u5341\u56db':5 +,'\uf862[\u5341\u56db]':b'\xadb','\uf862\uc8fc':3,'\uf862\uc8fc\uc2dd':4, +'\uf862\uc8fc\uc2dd\ud68c':5,'\uf862\uc8fc\uc2dd\ud68c\uc0ac':b'\xa7}','\uf863' +:2,'\uf863[':3,'\uf863[1':4,'\uf863[10':5,'\uf863[10]':b'\xa2\xef','\uf863[11': +5,'\uf863[11]':b'\xa2\xf0','\uf863[12':5,'\uf863[12]':b'\xa2\xf1','\uf863[13':5 +,'\uf863[13]':b'\xa2\xf2','\uf863[14':5,'\uf863[14]':b'\xa2\xf3','\uf863[15':5, +'\uf863[15]':b'\xa2\xf4','\uf863[16':5,'\uf863[16]':b'\xa2\xf5','\uf863[17':5, +'\uf863[17]':b'\xa2\xf6','\uf863[18':5,'\uf863[18]':b'\xa2\xf7','\uf863[19':5, +'\uf863[19]':b'\xa2\xf8','\uf863[2':4,'\uf863[20':5,'\uf863[20]':b'\xa2\xf9', +'\uf863[\u4e8c':4,'\uf863[\u4e8c\u5341':5,'\uf863[\u4e8c\u5341]':b'\xadT', +'\uf863[\u5341':4,'\uf863[\u5341\u4e00':5,'\uf863[\u5341\u4e00]':b'\xadK', +'\uf863[\u5341\u4e03':5,'\uf863[\u5341\u4e03]':b'\xadQ','\uf863[\u5341\u4e09':5 +,'\uf863[\u5341\u4e09]':b'\xadM','\uf863[\u5341\u4e5d':5,'\uf863[\u5341\u4e5d]' +:b'\xadS','\uf863[\u5341\u4e8c':5,'\uf863[\u5341\u4e8c]':b'\xadL', +'\uf863[\u5341\u4e94':5,'\uf863[\u5341\u4e94]':b'\xadO','\uf863[\u5341\u516b':5 +,'\uf863[\u5341\u516b]':b'\xadR','\uf863[\u5341\u516d':5,'\uf863[\u5341\u516d]' +:b'\xadP','\uf863[\u5341\u56db':5,'\uf863[\u5341\u56db]':b'\xadN', +'\uf863\uc8fc':3,'\uf863\uc8fc\uc2dd':4,'\uf863\uc8fc\uc2dd\ud68c':5, +'\uf863\uc8fc\uc2dd\ud68c\uc0ac':b'\xa7\x81','\uf864':2,'\uf864[':3,'\uf864[1': +4,'\uf864[10':5,'\uf864[10]':b'\xa3J','\uf864[11':5,'\uf864[11]':b'\xa3K', +'\uf864[12':5,'\uf864[12]':b'\xa3L','\uf864[13':5,'\uf864[13]':b'\xa3M', +'\uf864[14':5,'\uf864[14]':b'\xa3N','\uf864[15':5,'\uf864[15]':b'\xa3O', +'\uf864[16':5,'\uf864[16]':b'\xa3P','\uf864[17':5,'\uf864[17]':b'\xa3Q', +'\uf864[18':5,'\uf864[18]':b'\xa3R','\uf864[19':5,'\uf864[19]':b'\xa3S', +'\uf864[2':4,'\uf864[20':5,'\uf864[20]':b'\xa3T','\uf865':2,'\uf865[':3, +'\uf865[1':4,'\uf865[10':5,'\uf865[10]':b'\xa4^','\uf865[11':5,'\uf865[11]': +b'\xa4_','\uf865[12':5,'\uf865[12]':b'\xa4`','\uf865[13':5,'\uf865[13]': +b'\xa4a','\uf865[14':5,'\uf865[14]':b'\xa4b','\uf865[15':5,'\uf865[15]': +b'\xa4c','\uf865[16':5,'\uf865[16]':b'\xa4d','\uf865[17':5,'\uf865[17]': +b'\xa4e','\uf865[18':5,'\uf865[18]':b'\xa4f','\uf865[19':5,'\uf865[19]': +b'\xa4g','\uf865[2':4,'\uf865[20':5,'\uf865[20]':b'\xa4h','\uf866':2,'\uf866[': +3,'\uf866[1':4,'\uf866[10':5,'\uf866[10]':b'\xac\xcb','\uf866[11':5, +'\uf866[11]':b'\xac\xcc','\uf866[12':5,'\uf866[12]':b'\xac\xcd','\uf866[13':5, +'\uf866[13]':b'\xac\xce','\uf866[14':5,'\uf866[14]':b'\xac\xcf','\uf866[15':5, +'\uf866[15]':b'\xac\xd0','\uf866[16':5,'\uf866[16]':b'\xac\xf2','\uf866[17':5, +'\uf866[17]':b'\xac\xf3','\uf866[18':5,'\uf866[18]':b'\xac\xf4','\uf866[19':5, +'\uf866[19]':b'\xac\xf5','\uf866[2':4,'\uf866[20':5,'\uf866[20]':b'\xac\xf6', +'\uf867':2,'\uf867*':3,'\uf867**':b'\xa6P','\ufe59':(2,b'\xa1M'),'\ufe59\uf87c' +:b'\xa2E','\ufe59\uf87f':b'\xa1O','\ufe5a':(2,b'\xa1N'),'\ufe5a\uf87c':b'\xa2F' +,'\ufe5a\uf87f':b'\xa1P','\uff01':(2,b'\xa3\xa1'),'\uff01\uf874':b'\xa5\xda', +'\uff0a':(2,b'\xa3\xaa'),'\uff0a\uf871':b'\xa1\x9f','\uff0a\uf873':b'\xa6J', +'\uff0a\uf874':b'\xa1\x9c','\uff0a\uf875':b'\xa1\x9e','\uff0a\uf87f':b'\xa6I', +'\uff3f':(2,b'\xa3\xdf'),'\uff3f\uf87f':b'\x84','\uff5e':None,'\uffe0':None, +'\uffe1':None,'\uffe2':None,'\uffe3':None,'\uffe5':None +} + +decode_map = { +b'\x80':'\xa0',b'\x81':'\u20a9',b'\x82':'\u2013\uf87f',b'\x83':'\xa9',b'\x84': +'\uff3f\uf87f',b'\x85':'\x85',b'\x86':'\x86',b'\x87':'\x87',b'\x88':'\x88', +b'\x89':'\x89',b'\x8a':'\x8a',b'\x8b':'\x8b',b'\x8c':'\x8c',b'\x8d':'\x8d', +b'\x8e':'\x8e',b'\x8f':'\x8f',b'\x90':'\x90',b'\x91':'\x91',b'\x92':'\x92', +b'\x93':'\x93',b'\x94':'\x94',b'\x95':'\x95',b'\x96':'\x96',b'\x97':'\x97', +b'\x98':'\x98',b'\x99':'\x99',b'\x9a':'\x9a',b'\x9b':'\x9b',b'\x9c':'\x9c', +b'\x9d':'\x9d',b'\x9e':'\x9e',b'\x9f':'\x9f',b'\xa1':2,b'\xa1A':'\u300c\uf87f', +b'\xa1B':'\u300d\uf87f',b'\xa1C':'\u300c\uf87b',b'\xa1D':'\u300d\uf87b', +b'\xa1E':'\u300c\uf87c',b'\xa1F':'\u300d\uf87c',b'\xa1G':'\u300e\uf87c', +b'\xa1H':'\u300f\uf87c',b'\xa1I':'\u300a\uf878',b'\xa1J':'\u300b\uf878', +b'\xa1K':'\u3008\uf878',b'\xa1L':'\u3009\uf878',b'\xa1M':'\ufe59',b'\xa1N': +'\ufe5a',b'\xa1O':'\ufe59\uf87f',b'\xa1P':'\ufe5a\uf87f',b'\xa1Q': +'\u2985\uf87f',b'\xa1R':'\u2986\uf87f',b'\xa1S':'\u2985\uf879',b'\xa1T': +'\u2986\uf879',b'\xa1U':'\u2985\uf87c',b'\xa1V':'\u2986\uf87c',b'\xa1W': +'(\uf87c',b'\xa1X':')\uf87c',b'\xa1Y':'\u2985',b'\xa1Z':'\u2986',b'\xa1[': +'\u3010\uf87f',b'\xa1\\':'\u3011\uf87f',b'\xa1]':'\u3016',b'\xa1^':'\u3017', +b'\xa1_':'\u3018',b'\xa1`':'\u3019',b'\xa1a':'[\uf87b',b'\xa1b':']\uf87b', +b'\xa1c':'[\uf87c',b'\xa1d':']\uf87c',b'\xa1e':'\u2985\uf87b',b'\xa1f': +'\u2986\uf87b',b'\xa1g':'\u2020\uf87f',b'\xa1h':'\u2021\uf87f',b'\xa1i': +'\u2020\uf87b',b'\xa1j':'\u2021\uf87c',b'\xa1k':'\u2020\uf877',b'\xa1l': +'*\uf877',b'\xa1m':'\u2051',b'\xa1n':'\uf840',b'\xa1o':'\u201f',b'\xa1p': +'\u201b',b'\xa1q':'\u207a',b'\xa1r':'\u207b',b'\xa1s':'\xd7\uf877',b'\xa1t': +'\u221e\uf877',b'\xa1u':'\u223d\uf877',b'\xa1v':'\xb1\uf877',b'\xa1w': +'\u2213\uf877',b'\xa1x':'>\uf877',b'\xa1y':'<\uf877',b'\xa1z':'\u207c',b'\xa1{' +:'\u2260\uf877',b'\xa1|':'\u207d',b'\xa1}':'\u207e',b'\xa1\x81':'{\uf877', +b'\xa1\x82':'}\uf877',b'\xa1\x83':'[\uf877',b'\xa1\x84':']\uf877',b'\xa1\x85': +'\u2229\uf877',b'\xa1\x86':'\u222a\uf877',b'\xa1\x87':'\u2282\uf877', +b'\xa1\x88':'\u2208\uf877',b'\xa1\x89':'\u2211\uf877',b'\xa1\x8a':'!\uf877', +b'\xa1\x8b':'\u3007\uf876',b'\xa1\x8c':'\u4e00\uf876',b'\xa1\x8d': +'\u4e8c\uf876',b'\xa1\x8e':'\u4e09\uf876',b'\xa1\x8f':'\u56db\uf876', +b'\xa1\x90':'\u4e94\uf876',b'\xa1\x91':'\u516d\uf876',b'\xa1\x92': +'\u4e03\uf876',b'\xa1\x93':'\u516b\uf876',b'\xa1\x94':'\u4e5d\uf876', +b'\xa1\x95':'\u5341\uf876',b'\xa1\x96':'\u204c',b'\xa1\x97':'\u204d', +b'\xa1\x98':'\u02bc',b'\xa1\x99':'\u2997',b'\xa1\x9a':'\u2998',b'\xa1\x9c': +'\uff0a\uf874',b'\xa1\x9d':'\u2051\uf87c',b'\xa1\x9e':'\uff0a\uf875', +b'\xa1\x9f':'\uff0a\uf871',b'\xa1\xa0':'\u2051\uf879',b'\xa1\xa9':'\u2013', +b'\xa1\xaa':'\u2014',b'\xa1\xab':'\u2016',b'\xa1\xad':'\u301c',b'\xa1\xcb': +'\xa2',b'\xa1\xcc':'\xa3',b'\xa1\xcd':'\xa5',b'\xa1\xfe':'\xac',b'\xa2':2, +b'\xa2A':'\u2985\uf878',b'\xa2B':'\u2986\uf878',b'\xa2C':'\u2985\uf873', +b'\xa2D':'\u2986\uf873',b'\xa2E':'\ufe59\uf87c',b'\xa2F':'\ufe5a\uf87c', +b'\xa2G':'\u3016\uf878',b'\xa2H':'\u3017\uf878',b'\xa2I':'\u3010\uf878', +b'\xa2J':'\u3011\uf878',b'\xa2K':'(\uf87f',b'\xa2L':')\uf87f',b'\xa2\xa6': +'\u02dc',b'\xa2\xc1':'\u25c9',b'\xa2\xe6':'1\u20de\uf87c',b'\xa2\xe7': +'2\u20de\uf87c',b'\xa2\xe8':'3\u20de\uf87c',b'\xa2\xe9':'4\u20de\uf87c', +b'\xa2\xea':'5\u20de\uf87c',b'\xa2\xeb':'6\u20de\uf87c',b'\xa2\xec': +'7\u20de\uf87c',b'\xa2\xed':'8\u20de\uf87c',b'\xa2\xee':'9\u20de\uf87c', +b'\xa2\xef':'\uf863[10]',b'\xa2\xf0':'\uf863[11]',b'\xa2\xf1':'\uf863[12]', +b'\xa2\xf2':'\uf863[13]',b'\xa2\xf3':'\uf863[14]',b'\xa2\xf4':'\uf863[15]', +b'\xa2\xf5':'\uf863[16]',b'\xa2\xf6':'\uf863[17]',b'\xa2\xf7':'\uf863[18]', +b'\xa2\xf8':'\uf863[19]',b'\xa2\xf9':'\uf863[20]',b'\xa2\xfa':'\xb6\uf87f', +b'\xa2\xfb':'\u2016\uf87b',b'\xa2\xfc':'\u2016\uf87c',b'\xa2\xfd':'\u22ee', +b'\xa2\xfe':'\u2237',b'\xa3':2,b'\xa3A':'1\u20de\uf87b',b'\xa3B': +'2\u20de\uf87b',b'\xa3C':'3\u20de\uf87b',b'\xa3D':'4\u20de\uf87b',b'\xa3E': +'5\u20de\uf87b',b'\xa3F':'6\u20de\uf87b',b'\xa3G':'7\u20de\uf87b',b'\xa3H': +'8\u20de\uf87b',b'\xa3I':'9\u20de\uf87b',b'\xa3J':'\uf864[10]',b'\xa3K': +'\uf864[11]',b'\xa3L':'\uf864[12]',b'\xa3M':'\uf864[13]',b'\xa3N':'\uf864[14]', +b'\xa3O':'\uf864[15]',b'\xa3P':'\uf864[16]',b'\xa3Q':'\uf864[17]',b'\xa3R': +'\uf864[18]',b'\xa3S':'\uf864[19]',b'\xa3T':'\uf864[20]',b'\xa3U':'\u278a', +b'\xa3V':'\u278b',b'\xa3W':'\u278c',b'\xa3X':'\u278d',b'\xa3Y':'\u278e', +b'\xa3Z':'\u278f',b'\xa3[':'\u2790',b'\xa3\\':'\u2791',b'\xa3]':'\u2792', +b'\xa3^':'\u2793',b'\xa3_':'\u24eb\uf87f',b'\xa3`':'\u24ec\uf87f',b'\xa3a': +'\u24ed\uf87f',b'\xa3b':'\u24ee\uf87f',b'\xa3c':'\u24ef\uf87f',b'\xa3d': +'\u24f0\uf87f',b'\xa3e':'\u24f1\uf87f',b'\xa3f':'\u24f2\uf87f',b'\xa3g': +'\u24f3\uf87f',b'\xa3h':'\u24f4\uf87f',b'\xa3i':'\uf861(A)',b'\xa3j': +'\uf861(B)',b'\xa3k':'\uf861(C)',b'\xa3l':'\uf861(D)',b'\xa3m':'\uf861(E)', +b'\xa3n':'\uf861(F)',b'\xa3o':'\uf861(G)',b'\xa3p':'\uf861(H)',b'\xa3q': +'\uf861(I)',b'\xa3r':'\uf861(J)',b'\xa3s':'\uf861(K)',b'\xa3t':'\uf861(L)', +b'\xa3u':'\uf861(M)',b'\xa3v':'\uf861(N)',b'\xa3w':'\uf861(O)',b'\xa3x': +'\uf861(P)',b'\xa3y':'\uf861(Q)',b'\xa3z':'\uf861(R)',b'\xa3{':'\uf861(S)', +b'\xa3|':'\uf861(T)',b'\xa3}':'\uf861(U)',b'\xa3\x81':'\uf861(V)',b'\xa3\x82': +'\uf861(W)',b'\xa3\x83':'\uf861(X)',b'\xa3\x84':'\uf861(Y)',b'\xa3\x85': +'\uf861(Z)',b'\xa3\x86':'\u24b6',b'\xa3\x87':'\u24b7',b'\xa3\x88':'\u24b8', +b'\xa3\x89':'\u24b9',b'\xa3\x8a':'\u24ba',b'\xa3\x8b':'\u24bb',b'\xa3\x8c': +'\u24bc',b'\xa3\x8d':'\u24bd',b'\xa3\x8e':'\u24be',b'\xa3\x8f':'\u24bf', +b'\xa3\x90':'\u24c0',b'\xa3\x91':'\u24c1',b'\xa3\x92':'\u24c2',b'\xa3\x93': +'\u24c3',b'\xa3\x94':'\u24c4',b'\xa3\x95':'\u24c5',b'\xa3\x96':'\u24c6', +b'\xa3\x97':'\u24c7',b'\xa3\x98':'\u24c8',b'\xa3\x99':'\u24c9',b'\xa3\x9a': +'\u24ca',b'\xa3\x9b':'\u24cb',b'\xa3\x9c':'\u24cc',b'\xa3\x9d':'\u24cd', +b'\xa3\x9e':'\u24ce',b'\xa3\x9f':'\u24cf',b'\xa3\xfe':'\u203e',b'\xa4':2, +b'\xa4A':'1\u20de\uf87f',b'\xa4B':'2\u20de\uf87f',b'\xa4C':'3\u20de\uf87f', +b'\xa4D':'4\u20de\uf87f',b'\xa4E':'5\u20de\uf87f',b'\xa4F':'6\u20de\uf87f', +b'\xa4G':'7\u20de\uf87f',b'\xa4H':'8\u20de\uf87f',b'\xa4I':'9\u20de\uf87f', +b'\xa4J':'\uf862[10]',b'\xa4K':'\uf862[11]',b'\xa4L':'\uf862[12]',b'\xa4M': +'\uf862[13]',b'\xa4N':'\uf862[14]',b'\xa4O':'\uf862[15]',b'\xa4P':'\uf862[16]', +b'\xa4Q':'\uf862[17]',b'\xa4R':'\uf862[18]',b'\xa4S':'\uf862[19]',b'\xa4T': +'\uf862[20]',b'\xa4U':'1\u20de\uf87a',b'\xa4V':'2\u20de\uf87a',b'\xa4W': +'3\u20de\uf87a',b'\xa4X':'4\u20de\uf87a',b'\xa4Y':'5\u20de\uf87a',b'\xa4Z': +'6\u20de\uf87a',b'\xa4[':'7\u20de\uf87a',b'\xa4\\':'8\u20de\uf87a',b'\xa4]': +'9\u20de\uf87a',b'\xa4^':'\uf865[10]',b'\xa4_':'\uf865[11]',b'\xa4`': +'\uf865[12]',b'\xa4a':'\uf865[13]',b'\xa4b':'\uf865[14]',b'\xa4c':'\uf865[15]', +b'\xa4d':'\uf865[16]',b'\xa4e':'\uf865[17]',b'\xa4f':'\uf865[18]',b'\xa4g': +'\uf865[19]',b'\xa4h':'\uf865[20]',b'\xa4i':'\u278a\uf87f',b'\xa4j': +'\u278b\uf87f',b'\xa4k':'\u278c\uf87f',b'\xa4l':'\u278d\uf87f',b'\xa4m': +'\u278e\uf87f',b'\xa4n':'\u278f\uf87f',b'\xa4o':'\u2790\uf87f',b'\xa4p': +'\u2791\uf87f',b'\xa4q':'\u2792\uf87f',b'\xa4r':'\u2793\uf87f',b'\xa4s': +'\u24eb\uf878',b'\xa4t':'\u24ec\uf878',b'\xa4u':'\u24ed\uf878',b'\xa4v': +'\u24ee\uf878',b'\xa4w':'\u24ef\uf878',b'\xa4x':'\u24f0\uf878',b'\xa4y': +'\u24f1\uf878',b'\xa4z':'\u24f2\uf878',b'\xa4{':'\u24f3\uf878',b'\xa4|': +'\u24f4\uf878',b'\xa4}':'\u2a26',b'\xa4\x81':'\u227a',b'\xa4\x82':'\u227b', +b'\xa4\x83':'\u22ce',b'\xa4\x84':'\u22cf',b'\xa4\x85':'\u2280',b'\xa4\x86': +'\u2281',b'\xa4\x87':'\u2270',b'\xa4\x88':'\u2271',b'\xa4\x89':'\u2272', +b'\xa4\x8a':'\u2273',b'\xa4\x8b':'\u2ac5',b'\xa4\x8c':'\u2acb',b'\xa4\x8d': +'\u2ac6',b'\xa4\x8e':'\u2acc',b'\xa4\x8f':'\u2276',b'\xa4\x90':'\u2277', +b'\xa4\x91':'\u2279',b'\xa4\x92':'\u22da',b'\xa4\x93':'\u22db',b'\xa4\x94': +'\u2a8b',b'\xa4\x95':'\u2a8c',b'\xa4\x96':'\u2a91',b'\xa4\x97':'\u2a92', +b'\xa4\x98':'\u2222\uf87f',b'\xa4\x99':'\u2245',b'\xa4\x9a':'\u2243', +b'\xa4\x9b':'\u2248',b'\xa4\x9c':'\u29a3',b'\xa4\x9d':'\u22a4',b'\xa4\x9e': +'\u2225\u0347',b'\xa4\x9f':'\u2afd\u0347',b'\xa5':2,b'\xa5A':'0\u20de',b'\xa5B' +:'1\u20de',b'\xa5C':'2\u20de',b'\xa5D':'3\u20de',b'\xa5E':'4\u20de',b'\xa5F': +'5\u20de',b'\xa5G':'6\u20de',b'\xa5H':'7\u20de',b'\xa5I':'8\u20de',b'\xa5J': +'9\u20de',b'\xa5K':'\u24ea\uf87f',b'\xa5L':'\u2460\uf87f',b'\xa5M': +'\u2461\uf87f',b'\xa5N':'\u2462\uf87f',b'\xa5O':'\u2463\uf87f',b'\xa5P': +'\u2464\uf87f',b'\xa5Q':'\u2465\uf87f',b'\xa5R':'\u2466\uf87f',b'\xa5S': +'\u2467\uf87f',b'\xa5T':'\u2468\uf87f',b'\xa5U':'\uf860A)',b'\xa5V':'\uf860B)', +b'\xa5W':'\uf860C)',b'\xa5X':'\uf860D)',b'\xa5Y':'\uf860E)',b'\xa5Z':'\uf860F)' +,b'\xa5[':'\uf860G)',b'\xa5\\':'\uf860H)',b'\xa5]':'\uf860I)',b'\xa5^': +'\uf860J)',b'\xa5_':'\uf860K)',b'\xa5`':'\uf860L)',b'\xa5a':'\uf860M)',b'\xa5b' +:'\uf860N)',b'\xa5c':'\uf860O)',b'\xa5d':'\uf860P)',b'\xa5e':'\uf860Q)', +b'\xa5f':'\uf860R)',b'\xa5g':'\uf860S)',b'\xa5h':'\uf860T)',b'\xa5i':'\uf860U)' +,b'\xa5j':'\uf860V)',b'\xa5k':'\uf860W)',b'\xa5l':'\uf860X)',b'\xa5m': +'\uf860Y)',b'\xa5n':'\uf860Z)',b'\xa5o':'\uf860a)',b'\xa5p':'\uf860b)',b'\xa5q' +:'\uf860c)',b'\xa5r':'\uf860d)',b'\xa5s':'\uf860e)',b'\xa5t':'\uf860f)', +b'\xa5u':'\uf860g)',b'\xa5v':'\uf860h)',b'\xa5w':'\uf860i)',b'\xa5x':'\uf860j)' +,b'\xa5y':'\uf860k)',b'\xa5z':'\uf860l)',b'\xa5{':'\uf860m)',b'\xa5|': +'\uf860n)',b'\xa5}':'\uf860o)',b'\xa5\x81':'\uf860p)',b'\xa5\x82':'\uf860q)', +b'\xa5\x83':'\uf860r)',b'\xa5\x84':'\uf860s)',b'\xa5\x85':'\uf860t)', +b'\xa5\x86':'\uf860u)',b'\xa5\x87':'\uf860v)',b'\xa5\x88':'\uf860w)', +b'\xa5\x89':'\uf860x)',b'\xa5\x8a':'\uf860y)',b'\xa5\x8b':'\uf860z)', +b'\xa5\xda':'\uff01\uf874',b'\xa5\xdb':'\u3002\uf87d',b'\xa5\xdc': +'\u2032\uf87f',b'\xa5\xdd':'\u2033\uf87f',b'\xa5\xde':'\u2034',b'\xa5\xf9': +'\u3257\uf87a',b'\xa5\xfa':'\u3258\uf87a',b'\xa5\xfb':'\u3259\uf87a', +b'\xa5\xfc':'\u325a\uf87a',b'\xa6':2,b'\xa6A':'\uf83d\uf87f',b'\xa6B':'\uf83d', +b'\xa6C':'\u2020\uf87c',b'\xa6D':'\uf860\u2020\u2020',b'\xa6E': +'\uf860\u2021\u2021',b'\xa6F':'\uf861\u2020\u2020\u2020',b'\xa6G':'\xa7\uf87c', +b'\xa6H':'\u266f',b'\xa6I':'\uff0a\uf87f',b'\xa6J':'\uff0a\uf873',b'\xa6K': +'\u2051\uf874',b'\xa6L':'\uf860**',b'\xa6M':'\u2042',b'\xa6N':'\u204e',b'\xa6O' +:'\u2051\uf871',b'\xa6P':'\uf867**',b'\xa6Q':'\u2042\uf879',b'\xa6R':'\u273d', +b'\xa6S':'\u2731',b'\xa6T':'\u2747',b'\xa6U':'\u2022',b'\xa6V':'\u25a0\u20df', +b'\xa6W':'\u25c7\u20df',b'\xa6X':'\uf805',b'\xa6Y':'\u25a1\u20df',b'\xa6Z': +'\u2039',b'\xa6[':'\u203a',b'\xa6\\':'\xab',b'\xa6]':'\xbb',b'\xa6^': +'\u261c\uf87f',b'\xa6_':'\u261e\uf87f',b'\xa6`':'\uf806\u20df',b'\xa6a': +'\u25c7\u20df\u20df',b'\xa6b':'\u25c7\u20de',b'\xa6c':'\uf806',b'\xa6d': +'\u29c8',b'\xa6e':'\u25c6\u20de',b'\xa6f':'\uf805\u20de',b'\xa6g': +'\u29c8\u20de',b'\xa6h':'\u29be',b'\xa6i':'\u25ce\u20dd',b'\xa6j': +'\u25b3\u20dd',b'\xa6k':'\u25b2\u20dd',b'\xa6l':'\u271a',b'\xa6m':'\u2716', +b'\xa6n':'\u29bf',b'\xa6o':'\u25ef',b'\xa6p':'\u25ef\uf87c',b'\xa6q': +'\u2610\uf87c',b'\xa6r':'\u2723',b'\xa6s':'\u2756',b'\xa6t':'\uf80a',b'\xa6u': +'\u25cc',b'\xa6v':'\u2610\uf87f',b'\xa6w':'\u2610',b'\xa6x':'\u25a2',b'\xa6y': +'\u2723\uf87a',b'\xa6z':'\u2756\uf87a',b'\xa6{':'\u273f\uf87a',b'\xa6|': +'\u273f',b'\xa6}':'\u3013\uf87c',b'\xa6\x81':'\uf809',b'\xa6\x82': +'\u25c9\u20dd',b'\xa6\x83':'\u274d',b'\xa6\x84':'\u25cd',b'\xa6\x85': +'\u27e1\u20dd',b'\xa6\x86':'\uf80b\uf87f',b'\xa6\x87':'\u2720\uf87a', +b'\xa6\x88':'\u2720',b'\xa6\x89':'\u25c8\uf87f',b'\xa6\x8a':'\u25a8\uf87f', +b'\xa6\x8d':'\u2741',b'\xa6\x8e':'\u2756\uf87f',b'\xa6\x8f':'\uf808', +b'\xa6\x90':'\u20a9\uf87f',b'\xa6\x91':'\uf809\uf87a',b'\xa6\x92': +'\u534d\uf87f',b'\xa6\x93':'\u262f',b'\xa6\x96':'\uf80b',b'\xa6\x97': +'\u262f\uf87a',b'\xa6\x98':'\u262f\uf876',b'\xa6\x99':'\u2740',b'\xa6\x9a': +'\uf80c',b'\xa6\x9b':'\u2748\u20d8',b'\xa6\x9e':'\u3020',b'\xa6\x9f':'\uf807', +b'\xa6\xe5':'\u2776',b'\xa6\xe6':'\u2777',b'\xa6\xe7':'\u2778',b'\xa6\xe8': +'\u2779',b'\xa6\xe9':'\u277a',b'\xa6\xea':'\u277b',b'\xa6\xeb':'\u277c', +b'\xa6\xec':'\u277d',b'\xa6\xed':'\u277e',b'\xa6\xee':'\u277f',b'\xa6\xef': +'\u24eb',b'\xa6\xf0':'\u24ec',b'\xa6\xf1':'\u24ed',b'\xa6\xf2':'\u24ee', +b'\xa6\xf3':'\u24ef',b'\xa6\xf4':'\u24f0',b'\xa6\xf5':'\u24f1',b'\xa6\xf6': +'\u24f2',b'\xa6\xf7':'\u24f3',b'\xa6\xf8':'\u24f4',b'\xa6\xf9':'\u3251\uf87a', +b'\xa6\xfa':'\u3252\uf87a',b'\xa6\xfb':'\u3253\uf87a',b'\xa6\xfc': +'\u3254\uf87a',b'\xa6\xfd':'\u3255\uf87a',b'\xa6\xfe':'\u3256\uf87a',b'\xa7':2, +b'\xa7A':'\u2642\uf87f',b'\xa7B':'\u3012',b'\xa7C':'\u3036',b'\xa7D': +'\u25cb\uf87f',b'\xa7E':'\u25b3\uf87f',b'\xa7F':'\u25fb',b'\xa7G':'\uf84c', +b'\xa7H':'\u2394\uf876',b'\xa7I':'\u25ad\uf878',b'\xa7J':'\u25ad',b'\xa7K': +'\uf84d',b'\xa7L':'\uf84e',b'\xa7M':'\uf84f',b'\xa7N':'\u25c7\uf87f',b'\xa7O': +'\u51f9\uf87f',b'\xa7P':'\u51f8\uf87f',b'\xa7Q':'\u2206',b'\xa7R': +'\u2206\uf87f',b'\xa7S':'\u221f',b'\xa7T':'\u222a\uf87f',b'\xa7U':'\u2225', +b'\xa7V':'\u2226',b'\xa7W':'\u2229\uf87f',b'\xa7X':'\u2253',b'\xa7Y':'\u2251', +b'\xa7Z':'\u2266',b'\xa7[':'\u2267',b'\xa7\\':'\u2213',b'\xa7]':'\u2295', +b'\xa7^':'\u2296',b'\xa7_':'\u2297',b'\xa7`':'\u2a38',b'\xa7a':'\u2314', +b'\xa7b':'=\u20e5',b'\xa7c':'\u2261\u20e5',b'\xa7d':'\u2262',b'\xa7e':'=\u20d2' +,b'\xa7f':'\u25b1',b'\xa7g':'-\u0308',b'\xa7h':'\u2222',b'\xa7i':'\u2250', +b'\xa7j':'\u03d5',b'\xa7k':'\u2ae8',b'\xa7l':'\u22a3',b'\xa7m':'\u22a5\u0338', +b'\xa7n':'\u2261\u20d2',b'\xa7o':'\u226e',b'\xa7p':'\u226f',b'\xa7q':'\u2285', +b'\xa7r':'\u2284',b'\xa7s':'\u2209',b'\xa7t':'\u220c',b'\xa7u':'\u22bb', +b'\xa7v':'\u22bc',b'\xa7w':'\u225a',b'\xa7x':'\u2306',b'\xa7y':'\u223d\u0336', +b'\xa7z':'\u2314\uf87f',b'\xa7{':'\u2a72',b'\xa7|':'\u88dc\u20e4',b'\xa7}': +'\uf862\uc8fc\uc2dd\ud68c\uc0ac',b'\xa7\x81':'\uf863\uc8fc\uc2dd\ud68c\uc0ac', +b'\xa7\x82':'\u329e',b'\xa7\x83':'\u329e\uf87f',b'\xa7\x84':'\u203c', +b'\xa7\x85':'\u2049',b'\xa7\x86':'\u203c\uf87f',b'\xa7\x87':'\u2047', +b'\xa7\x88':'\u25c7\uf87c',b'\xa7\x89':'\u25c7\uf879',b'\xa7\x8a': +'\u25c7\uf87b',b'\xa7\x8b':'\u25c6\uf879',b'\xa7\x8c':'\u25a1\uf87c', +b'\xa7\x8d':'\u25a1\uf879',b'\xa7\x8e':'\u25a1\uf87b',b'\xa7\x8f':'\u2588', +b'\xa7\x90':'\u25e6',b'\xa7\x91':'\u25cb\uf879',b'\xa7\x92':'\u25cb\uf87b', +b'\xa7\x93':'\u25cf\uf879',b'\xa7\x94':'\u25bf',b'\xa7\x95':'\u25b5', +b'\xa7\x96':'\u25b9',b'\xa7\x97':'\u25c3',b'\xa7\x98':'\u2666',b'\xa7\x99': +'\u2981',b'\xa7\x9a':'\u25fc',b'\xa7\x9b':'\u25b4\u20e4',b'\xa7\x9c':'\u25ca', +b'\xa7\x9d':'\u3231',b'\xa7\x9e':'\u3239',b'\xa7\x9f':'\u33cb',b'\xa7\xf0': +'\u246f',b'\xa7\xf1':'\u2470',b'\xa7\xf2':'\u2471',b'\xa7\xf3':'\u2472', +b'\xa7\xf4':'\u2473',b'\xa7\xf5':'\u3251',b'\xa7\xf6':'\u3252',b'\xa7\xf7': +'\u3253',b'\xa7\xf8':'\u3254',b'\xa7\xf9':'\u3255',b'\xa7\xfa':'\u3256', +b'\xa7\xfb':'\u3257',b'\xa7\xfc':'\u3258',b'\xa7\xfd':'\u3259',b'\xa7\xfe': +'\u325a',b'\xa8':2,b'\xa8A':'\u2192\uf87b',b'\xa8B':'\u2190\uf87b',b'\xa8C': +'\u2191\uf87b',b'\xa8D':'\u2193\uf87b',b'\xa8E':'\u2196\uf87b',b'\xa8F': +'\u2197\uf87b',b'\xa8G':'\u2198\uf87b',b'\xa8H':'\u2199\uf87b',b'\xa8I': +'\u21d0',b'\xa8J':'\u21cf',b'\xa8K':'\u21cd',b'\xa8L':'\u21d4\uf87f',b'\xa8M': +'\u2192\uf87c',b'\xa8N':'\u2190\uf87c',b'\xa8O':'\u2191\uf87c',b'\xa8P': +'\u2193\uf87c',b'\xa8Q':'\u2194\uf87c',b'\xa8R':'\u2195\uf87c',b'\xa8S': +'\u2190\uf879',b'\xa8T':'\u2192\uf879',b'\xa8U':'\u2191\uf879',b'\xa8V': +'\u2193\uf879',b'\xa8W':'\u21e6\u20de',b'\xa8X':'\u21e8\u20de',b'\xa8Y': +'\u21e7\u20de',b'\xa8Z':'\u21e9\u20de',b'\xa8[':'\u21e6\u20dd',b'\xa8\\': +'\u27b2',b'\xa8]':'\u21e7\u20dd',b'\xa8^':'\u21e9\u20dd',b'\xa8_': +'\u2190\uf87f',b'\xa8`':'\u279c',b'\xa8a':'\u2191\uf87f',b'\xa8b': +'\u2193\uf87f',b'\xa8c':'\u2190\uf875',b'\xa8d':'\u2192\uf875',b'\xa8e': +'\u2191\uf875',b'\xa8f':'\u2193\uf875',b'\xa8g':'\uf846',b'\xa8h':'\uf847', +b'\xa8i':'\u2190\uf871',b'\xa8j':'\u279b',b'\xa8k':'\u2190\uf872',b'\xa8l': +'\u2192\uf872',b'\xa8m':'\u2191\uf872',b'\xa8n':'\u2193\uf872',b'\xa8o': +'\u2962',b'\xa8p':'\u2964',b'\xa8q':'\u2963',b'\xa8r':'\u2965',b'\xa8s': +'\u21e6\uf87a',b'\xa8t':'\u27a1',b'\xa8u':'\u21e7\uf87a',b'\xa8v': +'\u21e9\uf87a',b'\xa8w':'\u21e6\uf87b',b'\xa8x':'\u279e',b'\xa8y': +'\u21e7\uf87b',b'\xa8z':'\u21e9\uf87b',b'\xa8{':'\u21b2',b'\xa8|':'\u21b1', +b'\xa8}':'\u21bb\uf87b',b'\xa8\x81':'\u21b4',b'\xa8\x82':'\u21b0',b'\xa8\x83': +'\u21b3',b'\xa8\x84':'\u2939\uf87f',b'\xa8\x85':'\u2934\uf87f',b'\xa8\x86': +'\u2936',b'\xa8\x87':'\u21b1\uf87f',b'\xa8\x88':'\u21bb\uf87f',b'\xa8\x89': +'\u2935',b'\xa8\x8a':'\u21b0\uf87f',b'\xa8\x8b':'\u2937',b'\xa8\x8c':'\u2939', +b'\xa8\x8d':'\u2934',b'\xa8\x8e':'\u21e6\uf879',b'\xa8\x8f':'\u21e8\uf879', +b'\xa8\x90':'\u21e7\uf879',b'\xa8\x91':'\u21e9\uf879',b'\xa8\x92':'\u21bc', +b'\xa8\x93':'\u21c0',b'\xa8\x94':'\uf841',b'\xa8\x95':'\u21d4\uf879', +b'\xa8\x96':'\u21e8\uf874',b'\xa8\x97':'\u21e6\uf874',b'\xa8\x98': +'\u21c0\uf879',b'\xa8\x99':'\u21bc\uf879',b'\xa8\x9a':'\u21d2\uf87c', +b'\xa8\x9b':'\u21d0\uf87c',b'\xa8\x9c':'\uf849',b'\xa8\x9d':'\uf848', +b'\xa8\x9e':'\u21c4',b'\xa8\x9f':'\u21c5',b'\xa9':2,b'\xa9A':'\uf860A.', +b'\xa9B':'\uf860B.',b'\xa9C':'\uf860C.',b'\xa9D':'\uf860D.',b'\xa9E':'\uf860E.' +,b'\xa9F':'\uf860F.',b'\xa9G':'\uf860G.',b'\xa9H':'\uf860H.',b'\xa9I': +'\uf860I.',b'\xa9J':'\uf860J.',b'\xa9K':'\uf860K.',b'\xa9L':'\uf860L.',b'\xa9M' +:'\uf860M.',b'\xa9N':'\uf860N.',b'\xa9O':'\uf860O.',b'\xa9P':'\uf860P.', +b'\xa9Q':'\uf860Q.',b'\xa9R':'\uf860R.',b'\xa9S':'\uf860S.',b'\xa9T':'\uf860T.' +,b'\xa9U':'\uf860U.',b'\xa9V':'\uf860V.',b'\xa9W':'\uf860W.',b'\xa9X': +'\uf860X.',b'\xa9Y':'\uf860Y.',b'\xa9Z':'\uf860Z.',b'\xa9[':'\uf860a.', +b'\xa9\\':'\uf860b.',b'\xa9]':'\uf860c.',b'\xa9^':'\uf860d.',b'\xa9_': +'\uf860e.',b'\xa9`':'\uf860f.',b'\xa9a':'\uf860g.',b'\xa9b':'\uf860h.',b'\xa9c' +:'\uf860i.',b'\xa9d':'\uf860j.',b'\xa9e':'\uf860k.',b'\xa9f':'\uf860l.', +b'\xa9g':'\uf860m.',b'\xa9h':'\uf860n.',b'\xa9i':'\uf860o.',b'\xa9j':'\uf860p.' +,b'\xa9k':'\uf860q.',b'\xa9l':'\uf860r.',b'\xa9m':'\uf860s.',b'\xa9n': +'\uf860t.',b'\xa9o':'\uf860u.',b'\xa9p':'\uf860v.',b'\xa9q':'\uf860w.',b'\xa9r' +:'\uf860x.',b'\xa9s':'\uf860y.',b'\xa9t':'\uf860z.',b'\xaa':2,b'\xaaA': +'\uc6b4\u20de',b'\xaaB':'\ub2f5\u20de',b'\xaaC':'\uc8fc\u20de',b'\xaaD': +'\uba85\u20de',b'\xaaE':'\ub300\u20de',b'\xaaF':'\ud615\u20de',b'\xaaG': +'\ubd80\u20de',b'\xaaH':'\uc804\u20de',b'\xaaI':'\uc811\u20de',b'\xaaJ': +'\uc218\u20de',b'\xaaK':'\ub3d9\u20de',b'\xaaL':'\ube44\u20de',b'\xaaM': +'\ubc18\u20de',b'\xaaN':'\uc790\u20de',b'\xaaO':'\ud0c0\u20de',b'\xaaP': +'\uac10\u20de',b'\xaaQ':'\uc57d\u20de',b'\xaaR':'\uc778\u20de',b'\xaaS': +'\ub73b\u20de',b'\xaaT':'\u5370\u20de',b'\xaaU':'\u8a3b\u20de',b'\xaaV': +'\uc608\u20de',b'\xaaW':'\u611f\u20de',b'\xaaX':'\u51a0\u20de',b'\xaaY': +'\u7b54\u20de',b'\xaaZ':'\u4ee3\u20de',b'\xaa[':'\u982d\u20de',b'\xaa\\': +'\u52d5\u20de',b'\xaa]':'\u540d\u20de',b'\xaa^':'\u76ee\u20de',b'\xaa_': +'\u53cd\u20de',b'\xaa`':'\u88dc\u20de',b'\xaaa':'\u672c\u20de',b'\xaab': +'\u526f\u20de',b'\xaac':'\u5e8f\u20de',b'\xaad':'\u9023\u20de',b'\xaae': +'\u5f71\u20de',b'\xaaf':'\u4f8b\u20de',b'\xaag':'\u6e90\u20de',b'\xaah': +'\u5b50\u20de',b'\xaai':'\u524d\u20de',b'\xaaj':'\u7bc0\u20de',b'\xaak': +'\u63a5\u20de',b'\xaal':'\u52a9\u20de',b'\xaam':'\u6307\u20de',b'\xaan': +'\u4ed6\u20de',b'\xaao':'\u6d3e\u20de',b'\xaap':'\u5f62\u20de',b'\xaaq': +'\uc870\u20de',b'\xaar':'\ubb38\u20de\uf87a',b'\xaas':'\ub2f5\u20de\uf87a', +b'\xaat':'\uc8fc\u20de\uf87a',b'\xaau':'\ub73b\u20de\uf87a',b'\xaav': +'\u8a3b\u20de\uf87a',b'\xaaw':'\uad50\u20de\uf87a',b'\xaax': +'\uc5ed\u20de\uf87a',b'\xaay':'\uc74c\u20de\uf87a',b'\xaaz': +'\uc815\u20de\uf87a',b'\xaa{':'\ud574\u20de\uf87a',b'\xaa|': +'\uc608\u20de\uf87a',b'\xaa}':'\uc874\u20dd',b'\xaa\x81':'\ub77c\u20dd', +b'\xaa\x82':'\ub9c8\u20dd',b'\xaa\x83':'\ubc14\u20dd',b'\xaa\x84': +'\uc0ac\u20dd',b'\xaa\x85':'\uc544\u20dd',b'\xaa\x86':'\uc790\u20dd', +b'\xaa\x87':'\ucc28\u20dd',b'\xaa\x88':'\uce74\u20dd',b'\xaa\x89': +'\ud0c0\u20dd',b'\xaa\x8a':'\ud30c\u20dd',b'\xaa\x8b':'\ub192\u20dd', +b'\xaa\x8c':'\ub0ae\u20dd',b'\xaa\x8d':'\uba85\u20dd',b'\xaa\x8e': +'\ub300\u20dd',b'\xaa\x8f':'\ud615\u20dd',b'\xaa\x90':'\ubd80\u20dd', +b'\xaa\x91':'\uc804\u20dd',b'\xaa\x92':'\uc811\u20dd',b'\xaa\x93': +'\uc218\u20dd',b'\xaa\x94':'\ub3d9\u20dd',b'\xaa\x95':'\ube44\u20dd', +b'\xaa\x96':'\uac8c\u20dd',b'\xaa\x97':'\ubc18\u20dd',b'\xaa\x98': +'\uc18d\u20dd',b'\xaa\x99':'\uc778\u20dd',b'\xaa\x9a':'\ubcf8\u20dd', +b'\xaa\x9b':'\uc57d\u20dd',b'\xaa\x9c':'\uc219\u20dd',b'\xaa\x9d': +'\uc720\u20dd',b'\xaa\x9e':'\uad00\u20dd',b'\xaa\x9f':'\u51a0\u20dd', +b'\xaa\xf4':'\u2483',b'\xaa\xf5':'\u2484',b'\xaa\xf6':'\u2485',b'\xaa\xf7': +'\u2486',b'\xaa\xf8':'\u2487',b'\xaa\xf9':'\uf862(21)',b'\xaa\xfa':'\uf862(22)' +,b'\xaa\xfb':'\uf862(23)',b'\xaa\xfc':'\uf862(24)',b'\xaa\xfd':'\uf862(25)', +b'\xaa\xfe':'\uf862(26)',b'\xab':2,b'\xabA':'\uc870\u20dd',b'\xabB': +'\uad6d\u20dd',b'\xabC':'\uac10\u20dd',b'\xabD':'\u5370\u20dd',b'\xabE': +'\u8863\u20dd',b'\xabF':'\u672b\u20dd',b'\xabG':'\uac70\u20dd',b'\xabH': +'\ub2f5\u20dd',b'\xabI':'\ubcc0\u20dd',b'\xabJ':'\uc0c1\u20dd',b'\xabK': +'\uc13c\u20dd',b'\xabL':'\uc2e0\u20dd',b'\xabM':'\uc5ec\u20dd',b'\xabN': +'\uc608\u20dd',b'\xabO':'\uc6d0\u20dd',b'\xabP':'\uc791\u20dd',b'\xabQ': +'\uc900\u20dd',b'\xabR':'\ud0b9\u20dd',b'\xabS':'\uc678\u20dd',b'\xabT': +'\ud65c\u20dd',b'\xabU':'\uac04\u20dd',b'\xabV':'\uac19\u20dd',b'\xabW': +'\uc2e4\u20dd',b'\xabX':'\u611f\u20dd',b'\xabY':'\u6163\u20dd',b'\xabZ': +'\u4ee3\u20dd',b'\xab[':'\u52d5\u20dd',b'\xab\\':'\u3294',b'\xab]': +'\u53cd\u20dd',b'\xab^':'\u526f\u20dd',b'\xab_':'\u81ea\u20dd',b'\xab`': +'\u524d\u20dd',b'\xaba':'\u96fb\u20dd',b'\xabb':'\u63a5\u20dd',b'\xabc': +'\u52a9\u20dd',b'\xabd':'\u6ce8\u20dd',b'\xabe':'\u53c3\u20dd',b'\xabf': +'\u672c\u20dd',b'\xabg':'\u65b0\u20dd',b'\xabh':'\u73fe\u20dd',b'\xabi': +'\u5f62\u20dd',b'\xabj':'\u9593\u20dd',b'\xabk':'\u570b\u20dd',b'\xabl': +'\u32a5',b'\xabm':'\u4ed6\u20dd',b'\xabn':'\ube60\u20dd',b'\xabo': +'\uc2dc\u20dd',b'\xabp':'\uc785\u20dd',b'\xabq':'\uc73c\u20dd',b'\xabr': +'\uc74c\u20dd',b'\xabs':'\uc9c1\u20dd',b'\xabt':'\ud45c\u20dd',b'\xabu': +'\uac00\u20dd',b'\xabv':'\ub098\u20dd',b'\xabw':'\ub2e4\u20dd',b'\xabx': +'\ud558\u20dd',b'\xaby':'\ub9c8\u20dd\uf87a',b'\xabz':'\ubc14\u20dd\uf87a', +b'\xab{':'\uc0ac\u20dd\uf87a',b'\xab|':'\uc544\u20dd\uf87a',b'\xab}': +'\uc790\u20dd\uf87a',b'\xab\x81':'\ucc28\u20dd\uf87a',b'\xab\x82': +'\uce74\u20dd\uf87a',b'\xab\x83':'\ud0c0\u20dd\uf87a',b'\xab\x84': +'\ud30c\u20dd\uf87a',b'\xab\x85':'\ud558\u20dd\uf87a',b'\xab\x86': +'\ube44\u20dd\uf87a',b'\xab\x87':'\ub2f5\u20dd\uf87a',b'\xab\x88': +'\ube60\u20dd\uf87a',b'\xab\x89':'\ubcf8\u20dd\uf87a',b'\xab\x8a': +'\ub2e8\u20dd\uf87a',b'\xab\x8b':'\uc13c\u20dd\uf87a',b'\xab\x8c': +'\uc2dc\u20dd\uf87a',b'\xab\x8d':'\uc5ec\u20dd\uf87a',b'\xab\x8e': +'\uc608\u20dd\uf87a',b'\xab\x8f':'\uc73c\u20dd\uf87a',b'\xab\x90': +'\uc74c\u20dd\uf87a',b'\xab\x91':'\uc785\u20dd\uf87a',b'\xab\x92': +'\uc81c\u20dd\uf87a',b'\xab\x93':'\uc874\u20dd\uf87a',b'\xab\x94': +'\uc900\u20dd\uf87a',b'\xab\x95':'\ud45c\u20dd\uf87a',b'\xab\x96': +'\ud574\u20dd\uf87a',b'\xab\x97':'\ub290\u20dd\uf87a',b'\xab\x98': +'\ub192\u20dd\uf87a',b'\xab\x99':'\ub0ae\u20dd\uf87a',b'\xab\x9a': +'\ubc18\u20dd\uf87a',b'\xab\x9b':'\uac00\u20dd\uf87a',b'\xab\x9c': +'\ub098\u20dd\uf87a',b'\xab\x9d':'\ub2e4\u20dd\uf87a',b'\xab\x9e': +'\ub77c\u20dd\uf87a',b'\xab\x9f':'\uc678\u20dd\uf87a',b'\xab\xf7':'\uf862(27)', +b'\xab\xf8':'\uf862(28)',b'\xab\xf9':'\uf862(29)',b'\xab\xfa':'\uf862(30)', +b'\xac':2,b'\xacA':'\u21f0',b'\xacB':'\uf843',b'\xacC':'\u27b5',b'\xacD': +'\u2964\uf87f',b'\xacE':'\u2962\uf87f',b'\xacF':'\u21e8\uf870',b'\xacG': +'\u21e6\uf870',b'\xacH':'\u27a4',b'\xacI':'\uf844',b'\xacJ':'\uf84b',b'\xacK': +'\uf84a',b'\xacL':'\u21c0\uf87f',b'\xacM':'\u21bc\uf87f',b'\xacN': +'\u21e8\uf87f',b'\xacO':'\u21e6\uf87f',b'\xacP':'\u21b6',b'\xacQ':'\u21b7', +b'\xacR':'\u219d',b'\xacS':'\u219c',b'\xacT':'\uf842',b'\xacU':'\u2190\uf87a', +b'\xacV':'\u2192\uf87a',b'\xacW':'\u2191\uf87a',b'\xacX':'\u2193\uf87a', +b'\xacY':'\u21e6\uf87c',b'\xacZ':'\u21e8\uf87c',b'\xac[':'\u21e7\uf87c', +b'\xac\\':'\u21e9\uf87c',b'\xac]':'\u2190\uf873',b'\xac^':'\u2794',b'\xac_': +'\uf845',b'\xac`':'\u2191\uf873',b'\xaca':'\u2193\uf873',b'\xacb': +'\u2190\uf878',b'\xacc':'\u2192\uf878',b'\xacd':'\u2191\uf878',b'\xace': +'\u2193\uf878',b'\xacf':'\u2190\uf874',b'\xacg':'\u2192\uf874',b'\xach': +'\u2191\uf874',b'\xaci':'\u2193\uf874',b'\xacj':'\u21e0',b'\xack':'\u21e2', +b'\xacl':'\u21e1',b'\xacm':'\u21e3',b'\xacn':'\u21e6\uf875',b'\xaco': +'\u21e8\uf875',b'\xacp':'\u21e7\uf875',b'\xacq':'\u21e9\uf875',b'\xacr': +'\u21e6',b'\xacs':'\u21e8',b'\xact':'\u21e7',b'\xacu':'\u21e9',b'\xacv': +'\u2936\uf87a',b'\xacw':'\u21b1\uf87a',b'\xacx':'\u21bb\uf87a',b'\xacy': +'\u2935\uf87a',b'\xacz':'\u21b0\uf87a',b'\xac{':'\u2937\uf87a',b'\xac|': +'\u2939\uf87a',b'\xac}':'\u2934\uf87a',b'\xac\x81':'\u2936\uf87c',b'\xac\x82': +'\u21b1\uf87c',b'\xac\x83':'\u21bb\uf87c',b'\xac\x84':'\u2935\uf87c', +b'\xac\x85':'\u21b0\uf87c',b'\xac\x86':'\u2937\uf87c',b'\xac\x87': +'\u2939\uf87c',b'\xac\x88':'\u2934\uf87c',b'\xac\x89':'\u2190\uf870', +b'\xac\x8a':'\u2192\uf870',b'\xac\x8b':'\u2191\uf870',b'\xac\x8c': +'\u2193\uf870',b'\xac\x8d':'\u261d',b'\xac\x8e':'\u261f',b'\xac\x8f': +'\u261d\uf87f',b'\xac\x90':'\u261f\uf87f',b'\xac\x91':'\ub2e8\u20dd', +b'\xac\x92':'\ucc38\u20dd',b'\xac\x93':'\uc18c\u20dd',b'\xac\x94': +'\uc911\u20dd',b'\xac\x95':'\uc77c\u20dd',b'\xac\x96':'\uc774\u20dd', +b'\xac\x97':'\ud734\u20dd',b'\xac\xc2':'1\u20de\uf875',b'\xac\xc3': +'2\u20de\uf875',b'\xac\xc4':'3\u20de\uf875',b'\xac\xc5':'4\u20de\uf875', +b'\xac\xc6':'5\u20de\uf875',b'\xac\xc7':'6\u20de\uf875',b'\xac\xc8': +'7\u20de\uf875',b'\xac\xc9':'8\u20de\uf875',b'\xac\xca':'9\u20de\uf875', +b'\xac\xcb':'\uf866[10]',b'\xac\xcc':'\uf866[11]',b'\xac\xcd':'\uf866[12]', +b'\xac\xce':'\uf866[13]',b'\xac\xcf':'\uf866[14]',b'\xac\xd0':'\uf866[15]', +b'\xac\xf2':'\uf866[16]',b'\xac\xf3':'\uf866[17]',b'\xac\xf4':'\uf866[18]', +b'\xac\xf5':'\uf866[19]',b'\xac\xf6':'\uf866[20]',b'\xad':2,b'\xadA': +'\u4e00\u20de\uf87a',b'\xadB':'\u4e8c\u20de\uf87a',b'\xadC': +'\u4e09\u20de\uf87a',b'\xadD':'\u56db\u20de\uf87a',b'\xadE': +'\u4e94\u20de\uf87a',b'\xadF':'\u516d\u20de\uf87a',b'\xadG': +'\u4e03\u20de\uf87a',b'\xadH':'\u516b\u20de\uf87a',b'\xadI': +'\u4e5d\u20de\uf87a',b'\xadJ':'\u5341\u20de\uf87a',b'\xadK': +'\uf863[\u5341\u4e00]',b'\xadL':'\uf863[\u5341\u4e8c]',b'\xadM': +'\uf863[\u5341\u4e09]',b'\xadN':'\uf863[\u5341\u56db]',b'\xadO': +'\uf863[\u5341\u4e94]',b'\xadP':'\uf863[\u5341\u516d]',b'\xadQ': +'\uf863[\u5341\u4e03]',b'\xadR':'\uf863[\u5341\u516b]',b'\xadS': +'\uf863[\u5341\u4e5d]',b'\xadT':'\uf863[\u4e8c\u5341]',b'\xadU':'\u4e00\u20de', +b'\xadV':'\u4e8c\u20de',b'\xadW':'\u4e09\u20de',b'\xadX':'\u56db\u20de', +b'\xadY':'\u4e94\u20de',b'\xadZ':'\u516d\u20de',b'\xad[':'\u4e03\u20de', +b'\xad\\':'\u516b\u20de',b'\xad]':'\u4e5d\u20de',b'\xad^':'\u5341\u20de', +b'\xad_':'\uf862[\u5341\u4e00]',b'\xad`':'\uf862[\u5341\u4e8c]',b'\xada': +'\uf862[\u5341\u4e09]',b'\xadb':'\uf862[\u5341\u56db]',b'\xadc': +'\uf862[\u5341\u4e94]',b'\xadd':'\uf862[\u5341\u516d]',b'\xade': +'\uf862[\u5341\u4e03]',b'\xadf':'\uf862[\u5341\u516b]',b'\xadg': +'\uf862[\u5341\u4e5d]',b'\xadh':'\uf862[\u4e8c\u5341]',b'\xadi':'\u65e5\u20de', +b'\xadj':'\u6708\u20de',b'\xadk':'\u706b\u20de',b'\xadl':'\u6c34\u20de', +b'\xadm':'\u6728\u20de',b'\xadn':'\u91d1\u20de',b'\xado':'\u571f\u20de', +b'\xadp':'\u3290',b'\xadq':'\u328a',b'\xadr':'\u328b',b'\xads':'\u328c', +b'\xadt':'\u328d',b'\xadu':'\u328e',b'\xadv':'\u328f',b'\xadw': +'\u65e5\u20de\uf87c',b'\xadx':'\u6708\u20de\uf87c',b'\xady': +'\u706b\u20de\uf87c',b'\xadz':'\u6c34\u20de\uf87c',b'\xad{': +'\u6728\u20de\uf87c',b'\xad|':'\u91d1\u20de\uf87c',b'\xad}': +'\u571f\u20de\uf87c',b'\xad\xa1':'\u300c\uf879',b'\xad\xa2':'\u300d\uf879', +b'\xad\xa3':'\u300e\uf879',b'\xad\xa4':'\u300f\uf879',b'\xad\xa5': +'\u21e8\uf878',b'\xad\xa6':'\u21e6\uf878',b'\xad\xa7':'\u21e7\uf878', +b'\xad\xa8':'\u21e9\uf878',b'\xad\xa9':'\u301e',b'\xad\xaa':'\u301f', +b'\xad\xab':'\u2036',b'\xad\xac':'\u2033\uf873',b'\xad\xad':'\u2035', +b'\xad\xae':'\u2032\uf873',b'\xad\xaf':'\u21e7\uf87f',b'\xad\xb0':'!\uf87f', +b'\xff':'\u2026\uf87f' +} + +basecodec = _codecs_kr.getcodec('euc_kr') +codec = mbc.create_extcodec('mac_korean', basecodec, encode_map, decode_map, + 5, 2, 1) + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='mac_korean', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) Index: Lib/encodings/mac_chintrad.py =================================================================== --- Lib/encodings/mac_chintrad.py (revision 0) +++ Lib/encodings/mac_chintrad.py (revision 0) @@ -0,0 +1,188 @@ +# +# mac_chintrad.py: Python Unicode Codec for MAC_CHINTRAD +# +# Written by Hye-Shik Chang +# + +import _codecs_tw, codecs +import _multibytecodec as mbc + +encode_map = { +'\\':(2,b'\\'),'\\\uf87f':b'\x80','\x83':b'\x83','\x84':b'\x84','\x85':b'\x85', +'\x86':b'\x86','\x87':b'\x87','\x88':b'\x88','\x89':b'\x89','\x8a':b'\x8a', +'\x8b':b'\x8b','\x8c':b'\x8c','\x8d':b'\x8d','\x8e':b'\x8e','\x8f':b'\x8f', +'\x90':b'\x90','\x91':b'\x91','\x92':b'\x92','\x93':b'\x93','\x94':b'\x94', +'\x95':b'\x95','\x96':b'\x96','\x97':b'\x97','\x98':b'\x98','\x99':b'\x99', +'\x9a':b'\x9a','\x9b':b'\x9b','\x9c':b'\x9c','\x9d':b'\x9d','\x9e':b'\x9e', +'\x9f':b'\x9f','\xa0':b'\xa0','\xa9':b'\xfd','\xb7':b'\xa1E','\u0401':None, +'\u0414':None,'\u0415':None,'\u0416':None,'\u0417':None,'\u0418':None,'\u0419': +None,'\u041a':None,'\u041b':None,'\u041c':None,'\u0423':None,'\u0424':None, +'\u0425':None,'\u0426':None,'\u0427':None,'\u0428':None,'\u0429':None,'\u042a': +None,'\u042b':None,'\u042c':None,'\u042d':None,'\u042e':None,'\u042f':None, +'\u0430':None,'\u0431':None,'\u0432':None,'\u0433':None,'\u0434':None,'\u0435': +None,'\u0436':None,'\u0437':None,'\u0438':None,'\u0439':None,'\u043a':None, +'\u043b':None,'\u043c':None,'\u043d':None,'\u043e':None,'\u043f':None,'\u0440': +None,'\u0441':None,'\u0442':None,'\u0443':None,'\u0444':None,'\u0445':None, +'\u0446':None,'\u0447':None,'\u0448':None,'\u0449':None,'\u044a':None,'\u044b': +None,'\u044c':None,'\u044d':None,'\u044e':None,'\u044f':None,'\u0451':None, +'\u2022':None,'\u2026':b'\xff','\u203e':(2,b'\xa1\xc2'),'\u203e\uf87c': +b'\xa1\xc3','\u2122':b'\xfe','\u2295':b'\xa1\xf2','\u22ef':b'\xa1K','\u2460': +None,'\u2461':None,'\u2462':None,'\u2463':None,'\u2464':None,'\u2465':None, +'\u2466':None,'\u2467':None,'\u2468':None,'\u2469':None,'\u2474':None,'\u2475': +None,'\u2476':None,'\u2477':None,'\u2478':None,'\u2479':None,'\u247a':None, +'\u247b':None,'\u247c':None,'\u247d':None,'\u2502':(2,b'\xa2x'),'\u2502\uf87f': +b'\xa2y','\u2595':None,'\u2641':None,'\u3001':(2,b'\xa1B'),'\u3001\uf87d': +b'\xa1N','\u3005':None,'\u3014':(2,b'\xa1e'),'\u3014\uf87f':b'\xa1\xa3', +'\u3015':(2,b'\xa1f'),'\u3015\uf87f':b'\xa1\xa4','\u3041':None,'\u3042':None, +'\u3043':None,'\u3044':None,'\u3045':None,'\u3046':None,'\u3047':None,'\u3048': +None,'\u3049':None,'\u304a':None,'\u304b':None,'\u304c':None,'\u304d':None, +'\u304e':None,'\u304f':None,'\u3050':None,'\u3051':None,'\u3052':None,'\u3053': +None,'\u3054':None,'\u3055':None,'\u3056':None,'\u3057':None,'\u3058':None, +'\u3059':None,'\u305a':None,'\u305b':None,'\u305c':None,'\u305d':None,'\u305e': +None,'\u305f':None,'\u3060':None,'\u3061':None,'\u3062':None,'\u3063':None, +'\u3064':None,'\u3065':None,'\u3066':None,'\u3067':None,'\u3068':None,'\u3069': +None,'\u306a':None,'\u306b':None,'\u306c':None,'\u306d':None,'\u306e':None, +'\u306f':None,'\u3070':None,'\u3071':None,'\u3072':None,'\u3073':None,'\u3074': +None,'\u3075':None,'\u3076':None,'\u3077':None,'\u3078':None,'\u3079':None, +'\u307a':None,'\u307b':None,'\u307c':None,'\u307d':None,'\u307e':None,'\u307f': +None,'\u3080':None,'\u3081':None,'\u3082':None,'\u3083':None,'\u3084':None, +'\u3085':None,'\u3086':None,'\u3087':None,'\u3088':None,'\u3089':None,'\u308a': +None,'\u308b':None,'\u308c':None,'\u308d':None,'\u308e':None,'\u308f':None, +'\u3090':None,'\u3091':None,'\u3092':None,'\u3093':None,'\u309d':None,'\u309e': +None,'\u30a1':None,'\u30a2':None,'\u30a3':None,'\u30a4':None,'\u30a5':None, +'\u30a6':None,'\u30a7':None,'\u30a8':None,'\u30a9':None,'\u30aa':None,'\u30ab': +None,'\u30ac':None,'\u30ad':None,'\u30ae':None,'\u30af':None,'\u30b0':None, +'\u30b1':None,'\u30b2':None,'\u30b3':None,'\u30b4':None,'\u30b5':None,'\u30b6': +None,'\u30b7':None,'\u30b8':None,'\u30b9':None,'\u30ba':None,'\u30bb':None, +'\u30bc':None,'\u30bd':None,'\u30be':None,'\u30bf':None,'\u30c0':None,'\u30c1': +None,'\u30c2':None,'\u30c3':None,'\u30c4':None,'\u30c5':None,'\u30c6':None, +'\u30c7':None,'\u30c8':None,'\u30c9':None,'\u30ca':None,'\u30cb':None,'\u30cc': +None,'\u30cd':None,'\u30ce':None,'\u30cf':None,'\u30d0':None,'\u30d1':None, +'\u30d2':None,'\u30d3':None,'\u30d4':None,'\u30d5':None,'\u30d6':None,'\u30d7': +None,'\u30d8':None,'\u30d9':None,'\u30da':None,'\u30db':None,'\u30dc':None, +'\u30dd':None,'\u30de':None,'\u30df':None,'\u30e0':None,'\u30e1':None,'\u30e2': +None,'\u30e3':None,'\u30e4':None,'\u30e5':None,'\u30e6':None,'\u30e7':None, +'\u30e8':None,'\u30e9':None,'\u30ea':None,'\u30eb':None,'\u30ec':None,'\u30ed': +None,'\u30ee':None,'\u30ef':None,'\u30f0':None,'\u30f1':None,'\u30f2':None, +'\u30f3':None,'\u30f4':None,'\u30f5':None,'\u30f6':None,'\u30fe':None,'\u5341': +(2,b'\xa4Q'),'\u5341\uf87f':b'\xa2\xcc','\u5345':(2,b'\xa4\xca'),'\u5345\uf87f' +:b'\xa2\xce','\uf880':b'\x81','\uf881':b'\x82','\ufe4b':(2,b'\xa1\xca'), +'\ufe4b\uf87c':b'\xa1\xcb','\ufe4c':None,'\ufe50':None,'\ufe52':None,'\ufe54': +None,'\ufe55':None,'\ufe56':None,'\ufe57':None,'\ufe59':None,'\ufe5a':None, +'\ufe5b':None,'\ufe5c':None,'\ufe5d':None,'\ufe5e':None,'\uff01':(2,b'\xa1I'), +'\uff01\uf87d':b'\xa1T','\uff08':(2,b'\xa1]'),'\uff08\uf87f':b'\xa1}','\uff09': +(2,b'\xa1^'),'\uff09\uf87f':b'\xa1~','\uff0c':(2,b'\xa1A'),'\uff0c\uf87d': +b'\xa1M','\uff0e':(2,b'\xa1D'),'\uff0e\uf87d':b'\xa1O','\uff0e\uf87e':b'\xa1P', +'\uff0f':(2,b'\xa2A'),'\uff0f\uf87f':b'\xa1\xfe','\uff1a':(2,b'\xa1G'), +'\uff1a\uf87d':b'\xa1R','\uff1b':(2,b'\xa1F'),'\uff1b\uf87d':b'\xa1Q','\uff1f': +(2,b'\xa1H'),'\uff1f\uf87d':b'\xa1S','\uff3c':(2,b'\xa2B'),'\uff3c\uf87f': +b'\xa2@','\uff3f':(2,b'\xa1\xc4'),'\uff3f\uf87c':b'\xa1\xc5','\uff3f\uf87f': +b'\xa1Z','\uff5b':(2,b'\xa1a'),'\uff5b\uf87f':b'\xa1\xa1','\uff5d':(2,b'\xa1b') +,'\uff5d\uf87f':b'\xa1\xa2','\uff64':None,'\ufffd':None +} + +decode_map = { +b'\x80':'\\\uf87f',b'\x81':'\uf880',b'\x82':'\uf881',b'\x83':'\x83',b'\x84': +'\x84',b'\x85':'\x85',b'\x86':'\x86',b'\x87':'\x87',b'\x88':'\x88',b'\x89': +'\x89',b'\x8a':'\x8a',b'\x8b':'\x8b',b'\x8c':'\x8c',b'\x8d':'\x8d',b'\x8e': +'\x8e',b'\x8f':'\x8f',b'\x90':'\x90',b'\x91':'\x91',b'\x92':'\x92',b'\x93': +'\x93',b'\x94':'\x94',b'\x95':'\x95',b'\x96':'\x96',b'\x97':'\x97',b'\x98': +'\x98',b'\x99':'\x99',b'\x9a':'\x9a',b'\x9b':'\x9b',b'\x9c':'\x9c',b'\x9d': +'\x9d',b'\x9e':'\x9e',b'\x9f':'\x9f',b'\xa0':'\xa0',b'\xa1':2,b'\xa1E':'\xb7', +b'\xa1K':'\u22ef',b'\xa1M':'\uff0c\uf87d',b'\xa1N':'\u3001\uf87d',b'\xa1O': +'\uff0e\uf87d',b'\xa1P':'\uff0e\uf87e',b'\xa1Q':'\uff1b\uf87d',b'\xa1R': +'\uff1a\uf87d',b'\xa1S':'\uff1f\uf87d',b'\xa1T':'\uff01\uf87d',b'\xa1Z': +'\uff3f\uf87f',b'\xa1}':'\uff08\uf87f',b'\xa1~':'\uff09\uf87f',b'\xa1\xa1': +'\uff5b\uf87f',b'\xa1\xa2':'\uff5d\uf87f',b'\xa1\xa3':'\u3014\uf87f', +b'\xa1\xa4':'\u3015\uf87f',b'\xa1\xc3':'\u203e\uf87c',b'\xa1\xc5': +'\uff3f\uf87c',b'\xa1\xcb':'\ufe4b\uf87c',b'\xa1\xf2':'\u2295',b'\xa1\xfe': +'\uff0f\uf87f',b'\xa2':2,b'\xa2@':'\uff3c\uf87f',b'\xa2y':'\u2502\uf87f', +b'\xa2\xcc':'\u5341\uf87f',b'\xa2\xce':'\u5345\uf87f',b'\xc6':2,b'\xc6\xa1': +None,b'\xc6\xa2':None,b'\xc6\xa3':None,b'\xc6\xa4':None,b'\xc6\xa5':None, +b'\xc6\xa6':None,b'\xc6\xa7':None,b'\xc6\xa8':None,b'\xc6\xa9':None,b'\xc6\xaa' +:None,b'\xc6\xab':None,b'\xc6\xac':None,b'\xc6\xad':None,b'\xc6\xae':None, +b'\xc6\xaf':None,b'\xc6\xb0':None,b'\xc6\xb1':None,b'\xc6\xb2':None,b'\xc6\xb3' +:None,b'\xc6\xb4':None,b'\xc6\xb5':None,b'\xc6\xb6':None,b'\xc6\xb7':None, +b'\xc6\xb8':None,b'\xc6\xb9':None,b'\xc6\xba':None,b'\xc6\xbb':None,b'\xc6\xbc' +:None,b'\xc6\xbd':None,b'\xc6\xbe':None,b'\xc6\xbf':None,b'\xc6\xc0':None, +b'\xc6\xc1':None,b'\xc6\xc2':None,b'\xc6\xc3':None,b'\xc6\xc4':None,b'\xc6\xc5' +:None,b'\xc6\xc6':None,b'\xc6\xc7':None,b'\xc6\xc8':None,b'\xc6\xc9':None, +b'\xc6\xca':None,b'\xc6\xcb':None,b'\xc6\xcc':None,b'\xc6\xcd':None,b'\xc6\xce' +:None,b'\xc6\xcf':None,b'\xc6\xd0':None,b'\xc6\xd1':None,b'\xc6\xd2':None, +b'\xc6\xd3':None,b'\xc6\xd4':None,b'\xc6\xd5':None,b'\xc6\xd6':None,b'\xc6\xd7' +:None,b'\xc6\xd8':None,b'\xc6\xd9':None,b'\xc6\xda':None,b'\xc6\xdb':None, +b'\xc6\xdc':None,b'\xc6\xdd':None,b'\xc6\xde':None,b'\xc6\xdf':None,b'\xc6\xe0' +:None,b'\xc6\xe1':None,b'\xc6\xe2':None,b'\xc6\xe3':None,b'\xc6\xe4':None, +b'\xc6\xe5':None,b'\xc6\xe6':None,b'\xc6\xe7':None,b'\xc6\xe8':None,b'\xc6\xe9' +:None,b'\xc6\xea':None,b'\xc6\xeb':None,b'\xc6\xec':None,b'\xc6\xed':None, +b'\xc6\xee':None,b'\xc6\xef':None,b'\xc6\xf0':None,b'\xc6\xf1':None,b'\xc6\xf2' +:None,b'\xc6\xf3':None,b'\xc6\xf4':None,b'\xc6\xf5':None,b'\xc6\xf6':None, +b'\xc6\xf7':None,b'\xc6\xf8':None,b'\xc6\xf9':None,b'\xc6\xfa':None,b'\xc6\xfb' +:None,b'\xc6\xfc':None,b'\xc6\xfd':None,b'\xc6\xfe':None,b'\xc7':2,b'\xc7@': +None,b'\xc7A':None,b'\xc7B':None,b'\xc7C':None,b'\xc7D':None,b'\xc7E':None, +b'\xc7F':None,b'\xc7G':None,b'\xc7H':None,b'\xc7I':None,b'\xc7J':None,b'\xc7K': +None,b'\xc7L':None,b'\xc7M':None,b'\xc7N':None,b'\xc7O':None,b'\xc7P':None, +b'\xc7Q':None,b'\xc7R':None,b'\xc7S':None,b'\xc7T':None,b'\xc7U':None,b'\xc7V': +None,b'\xc7W':None,b'\xc7X':None,b'\xc7Y':None,b'\xc7Z':None,b'\xc7[':None, +b'\xc7\\':None,b'\xc7]':None,b'\xc7^':None,b'\xc7_':None,b'\xc7`':None,b'\xc7a' +:None,b'\xc7b':None,b'\xc7c':None,b'\xc7d':None,b'\xc7e':None,b'\xc7f':None, +b'\xc7g':None,b'\xc7h':None,b'\xc7i':None,b'\xc7j':None,b'\xc7k':None,b'\xc7l': +None,b'\xc7m':None,b'\xc7n':None,b'\xc7o':None,b'\xc7p':None,b'\xc7q':None, +b'\xc7r':None,b'\xc7s':None,b'\xc7t':None,b'\xc7u':None,b'\xc7v':None,b'\xc7w': +None,b'\xc7x':None,b'\xc7y':None,b'\xc7z':None,b'\xc7{':None,b'\xc7|':None, +b'\xc7}':None,b'\xc7~':None,b'\xc7\xa1':None,b'\xc7\xa2':None,b'\xc7\xa3':None, +b'\xc7\xa4':None,b'\xc7\xa5':None,b'\xc7\xa6':None,b'\xc7\xa7':None,b'\xc7\xa8' +:None,b'\xc7\xa9':None,b'\xc7\xaa':None,b'\xc7\xab':None,b'\xc7\xac':None, +b'\xc7\xad':None,b'\xc7\xae':None,b'\xc7\xaf':None,b'\xc7\xb0':None,b'\xc7\xb1' +:None,b'\xc7\xb2':None,b'\xc7\xb3':None,b'\xc7\xb4':None,b'\xc7\xb5':None, +b'\xc7\xb6':None,b'\xc7\xb7':None,b'\xc7\xb8':None,b'\xc7\xb9':None,b'\xc7\xba' +:None,b'\xc7\xbb':None,b'\xc7\xbc':None,b'\xc7\xbd':None,b'\xc7\xbe':None, +b'\xc7\xbf':None,b'\xc7\xc0':None,b'\xc7\xc1':None,b'\xc7\xc2':None,b'\xc7\xc3' +:None,b'\xc7\xc4':None,b'\xc7\xc5':None,b'\xc7\xc6':None,b'\xc7\xc7':None, +b'\xc7\xc8':None,b'\xc7\xc9':None,b'\xc7\xca':None,b'\xc7\xcb':None,b'\xc7\xcc' +:None,b'\xc7\xcd':None,b'\xc7\xce':None,b'\xc7\xcf':None,b'\xc7\xd0':None, +b'\xc7\xd1':None,b'\xc7\xd2':None,b'\xc7\xd3':None,b'\xc7\xd4':None,b'\xc7\xd5' +:None,b'\xc7\xd6':None,b'\xc7\xd7':None,b'\xc7\xd8':None,b'\xc7\xd9':None, +b'\xc7\xda':None,b'\xc7\xdb':None,b'\xc7\xdc':None,b'\xc7\xdd':None,b'\xc7\xde' +:None,b'\xc7\xdf':None,b'\xc7\xe0':None,b'\xc7\xe1':None,b'\xc7\xe2':None, +b'\xc7\xe3':None,b'\xc7\xe4':None,b'\xc7\xe5':None,b'\xc7\xe6':None,b'\xc7\xe7' +:None,b'\xc7\xe8':None,b'\xc7\xe9':None,b'\xc7\xea':None,b'\xc7\xeb':None, +b'\xc7\xec':None,b'\xc7\xed':None,b'\xc7\xee':None,b'\xc7\xef':None,b'\xc7\xf0' +:None,b'\xc7\xf1':None,b'\xc7\xf2':None,b'\xc7\xf3':None,b'\xc7\xf4':None, +b'\xc7\xf5':None,b'\xc7\xf6':None,b'\xc7\xf7':None,b'\xc7\xf8':None,b'\xc7\xf9' +:None,b'\xc7\xfa':None,b'\xc7\xfb':None,b'\xc7\xfc':None,b'\xfd':'\xa9',b'\xfe' +:'\u2122',b'\xff':'\u2026' +} + +basecodec = _codecs_tw.getcodec('big5') +codec = mbc.create_extcodec('mac_chintrad', basecodec, encode_map, decode_map, + 2, 2, 0) + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='mac_chintrad', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) Index: Lib/encodings/aliases.py =================================================================== --- Lib/encodings/aliases.py (revision 64533) +++ Lib/encodings/aliases.py (working copy) @@ -416,6 +416,18 @@ 'latin' : 'latin_1', 'latin1' : 'latin_1', + # mac_chinsimp codec + 'cp10008' : 'mac_chinsimp', + 'macchinsimp' : 'mac_chinsimp', + 'macchinesesimplified' : 'mac_chinsimp', + 'x_mac_chinesesimp' : 'mac_chinsimp', + + # mac_chintrad codec + 'cp10002' : 'mac_chintrad', + 'macchintrad' : 'mac_chintrad', + 'macchinesetraditional' : 'mac_chintrad', + 'x_mac_chinesetrad' : 'mac_chintrad', + # mac_cyrillic codec 'maccyrillic' : 'mac_cyrillic', @@ -429,6 +441,16 @@ 'maccentraleurope' : 'mac_latin2', 'maclatin2' : 'mac_latin2', + # mac_japanese codec + 'cp10001' : 'mac_japanese', + 'macjapanese' : 'mac_japanese', + 'x_mac_japanese' : 'mac_japanese', + + # mac_korean codec + 'cp10003' : 'mac_korean', + 'mackorean' : 'mac_korean', + 'x_mac_korean' : 'mac_korean', + # mac_roman codec 'macroman' : 'mac_roman', Index: Lib/encodings/mac_chinsimp.py =================================================================== --- Lib/encodings/mac_chinsimp.py (revision 0) +++ Lib/encodings/mac_chinsimp.py (revision 0) @@ -0,0 +1,93 @@ +# +# mac_chinsimp.py: Python Unicode Codec for MAC_CHINSIMP +# +# Written by Hye-Shik Chang +# + +import _codecs_cn, codecs +import _multibytecodec as mbc + +encode_map = { +'\x83':b'\x83','\x84':b'\x84','\x85':b'\x85','\x86':b'\x86','\x87':b'\x87', +'\x88':b'\x88','\x89':b'\x89','\x8a':b'\x8a','\x8b':b'\x8b','\x8c':b'\x8c', +'\x8d':b'\x8d','\x8e':b'\x8e','\x8f':b'\x8f','\x90':b'\x90','\x91':b'\x91', +'\x92':b'\x92','\x93':b'\x93','\x94':b'\x94','\x95':b'\x95','\x96':b'\x96', +'\x97':b'\x97','\x98':b'\x98','\x99':b'\x99','\x9a':b'\x9a','\x9b':b'\x9b', +'\x9c':b'\x9c','\x9d':b'\x9d','\x9e':b'\x9e','\x9f':b'\x9f','\xa0':b'\xa0', +'\xa2':b'\xa1\xe9','\xa3':b'\xa1\xea','\xa5':b'\xa3\xa4','\xa9':b'\xfd','\xb7': +b'\xa1\xa4','\xfc':(2,b'\xa8\xb9'),'\xfc\uf87f':b'\x80','\u0144':b'\xa8\xbd', +'\u0148':b'\xa8\xbe','\u01f9':b'\xa8\xbf','\u0251':b'\xa8\xbb','\u0261': +b'\xa8\xc0','\u1e3f':b'\xa8\xbc','\u2014':b'\xa1\xaa','\u2015':None,'\u2026': +b'\xff','\u203e':b'\xa3\xfe','\u2122':b'\xfe','\u22ef':(2,b'\xa1\xad'), +'\u22ef\uf87e':b'\xa6\xf3','\u3001':(2,b'\xa1\xa2'),'\u3001\uf87e':b'\xa6\xdb', +'\u3002':(2,b'\xa1\xa3'),'\u3002\uf87e':b'\xa6\xda','\u3016':(2,b'\xa1\xbc'), +'\u3016\uf87e':b'\xa6\xec','\u3017':(2,b'\xa1\xbd'),'\u3017\uf87e':b'\xa6\xed', +'\u301c':b'\xa1\xab','\u30fb':None,'\uf880':b'\x81','\uf881':b'\x82','\ufe31': +b'\xa6\xf2','\ufe33':b'\xa6\xf4','\ufe34':b'\xa6\xf5','\ufe35':b'\xa6\xe0', +'\ufe36':b'\xa6\xe1','\ufe37':b'\xa6\xf0','\ufe38':b'\xa6\xf1','\ufe39': +b'\xa6\xe2','\ufe3a':b'\xa6\xe3','\ufe3b':b'\xa6\xee','\ufe3c':b'\xa6\xef', +'\ufe3d':b'\xa6\xe6','\ufe3e':b'\xa6\xe7','\ufe3f':b'\xa6\xe4','\ufe40': +b'\xa6\xe5','\ufe41':b'\xa6\xe8','\ufe42':b'\xa6\xe9','\ufe43':b'\xa6\xea', +'\ufe44':b'\xa6\xeb','\uff01':(2,b'\xa3\xa1'),'\uff01\uf87e':b'\xa6\xde', +'\uff0c':(2,b'\xa3\xac'),'\uff0c\uf87e':b'\xa6\xd9','\uff1a':(2,b'\xa3\xba'), +'\uff1a\uf87e':b'\xa6\xdc','\uff1b':(2,b'\xa3\xbb'),'\uff1b\uf87e':b'\xa6\xdd', +'\uff1f':(2,b'\xa3\xbf'),'\uff1f\uf87e':b'\xa6\xdf','\uff5e':None,'\uffe0':None +,'\uffe1':None,'\uffe3':None,'\uffe5':None +} + +decode_map = { +b'\x80':'\xfc\uf87f',b'\x81':'\uf880',b'\x82':'\uf881',b'\x83':'\x83',b'\x84': +'\x84',b'\x85':'\x85',b'\x86':'\x86',b'\x87':'\x87',b'\x88':'\x88',b'\x89': +'\x89',b'\x8a':'\x8a',b'\x8b':'\x8b',b'\x8c':'\x8c',b'\x8d':'\x8d',b'\x8e': +'\x8e',b'\x8f':'\x8f',b'\x90':'\x90',b'\x91':'\x91',b'\x92':'\x92',b'\x93': +'\x93',b'\x94':'\x94',b'\x95':'\x95',b'\x96':'\x96',b'\x97':'\x97',b'\x98': +'\x98',b'\x99':'\x99',b'\x9a':'\x9a',b'\x9b':'\x9b',b'\x9c':'\x9c',b'\x9d': +'\x9d',b'\x9e':'\x9e',b'\x9f':'\x9f',b'\xa0':'\xa0',b'\xa1':2,b'\xa1\xa4': +'\xb7',b'\xa1\xaa':'\u2014',b'\xa1\xab':'\u301c',b'\xa1\xad':'\u22ef', +b'\xa1\xe9':'\xa2',b'\xa1\xea':'\xa3',b'\xa3':2,b'\xa3\xa4':'\xa5',b'\xa3\xfe': +'\u203e',b'\xa6':2,b'\xa6\xd9':'\uff0c\uf87e',b'\xa6\xda':'\u3002\uf87e', +b'\xa6\xdb':'\u3001\uf87e',b'\xa6\xdc':'\uff1a\uf87e',b'\xa6\xdd': +'\uff1b\uf87e',b'\xa6\xde':'\uff01\uf87e',b'\xa6\xdf':'\uff1f\uf87e', +b'\xa6\xe0':'\ufe35',b'\xa6\xe1':'\ufe36',b'\xa6\xe2':'\ufe39',b'\xa6\xe3': +'\ufe3a',b'\xa6\xe4':'\ufe3f',b'\xa6\xe5':'\ufe40',b'\xa6\xe6':'\ufe3d', +b'\xa6\xe7':'\ufe3e',b'\xa6\xe8':'\ufe41',b'\xa6\xe9':'\ufe42',b'\xa6\xea': +'\ufe43',b'\xa6\xeb':'\ufe44',b'\xa6\xec':'\u3016\uf87e',b'\xa6\xed': +'\u3017\uf87e',b'\xa6\xee':'\ufe3b',b'\xa6\xef':'\ufe3c',b'\xa6\xf0':'\ufe37', +b'\xa6\xf1':'\ufe38',b'\xa6\xf2':'\ufe31',b'\xa6\xf3':'\u22ef\uf87e', +b'\xa6\xf4':'\ufe33',b'\xa6\xf5':'\ufe34',b'\xa8':2,b'\xa8\xbb':'\u0251', +b'\xa8\xbc':'\u1e3f',b'\xa8\xbd':'\u0144',b'\xa8\xbe':'\u0148',b'\xa8\xbf': +'\u01f9',b'\xa8\xc0':'\u0261',b'\xfd':'\xa9',b'\xfe':'\u2122',b'\xff':'\u2026' +} + +basecodec = _codecs_cn.getcodec('gb2312') +codec = mbc.create_extcodec('mac_chinsimp', basecodec, encode_map, decode_map, + 2, 2, 0) + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='mac_chinsimp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) Index: Lib/encodings/mac_japanese.py =================================================================== --- Lib/encodings/mac_japanese.py (revision 0) +++ Lib/encodings/mac_japanese.py (revision 0) @@ -0,0 +1,258 @@ +# +# mac_japanese.py: Python Unicode Codec for MAC_JAPANESE +# +# Written by Hye-Shik Chang +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +encode_map = { +'\\':b'\x80','\xa0':b'\xa0','\xa9':b'\xfd','\u2010':(2,b'\x81]'),'\u2010\uf87e' +:b'\xeb]','\u2014':b'\x81\\','\u2015':None,'\u2016':(2,b'\x81a'),'\u2016\uf87e' +:b'\xeba','\u2026':(2,b'\x81c'),'\u2026\uf87e':b'\xebc','\u2026\uf87f':b'\xff', +'\u203e':None,'\u2109':b'\x86V','\u2113':b'\x86P','\u2116':b'\x86\x9b','\u2121' +:b'\x86\x9d','\u2122':b'\xfe','\u2160':b'\x85\x9f','\u2161':b'\x85\xa0', +'\u2162':b'\x85\xa1','\u2163':b'\x85\xa2','\u2164':b'\x85\xa3','\u2165': +b'\x85\xa4','\u2166':b'\x85\xa5','\u2167':b'\x85\xa6','\u2168':b'\x85\xa7', +'\u2169':b'\x85\xa8','\u216a':b'\x85\xa9','\u216b':b'\x85\xaa','\u2170': +b'\x85\xb3','\u2171':b'\x85\xb4','\u2172':b'\x85\xb5','\u2173':b'\x85\xb6', +'\u2174':b'\x85\xb7','\u2175':b'\x85\xb8','\u2176':b'\x85\xb9','\u2177': +b'\x85\xba','\u2178':b'\x85\xbb','\u2179':b'\x85\xbc','\u217a':b'\x85\xbd', +'\u217b':b'\x85\xbe','\u21c4':b'\x86\xcc','\u21c5':b'\x86\xcd','\u21c6': +b'\x86\xcb','\u21e6':(2,b'\x86\xd0'),'\u21e6\uf87a':b'\x86\xd4','\u21e7': +(2,b'\x86\xd1'),'\u21e7\uf87a':b'\x86\xd5','\u21e8':(2,b'\x86\xcf'), +'\u21e8\uf87a':b'\x86\xd3','\u21e9':(2,b'\x86\xd2'),'\u21e9\uf87a':b'\x86\xd6', +'\u221f':b'\x88A','\u222e':b'\x88@','\u22bf':b'\x88B','\u2460':b'\x85@', +'\u2461':b'\x85A','\u2462':b'\x85B','\u2463':b'\x85C','\u2464':b'\x85D', +'\u2465':b'\x85E','\u2466':b'\x85F','\u2467':b'\x85G','\u2468':b'\x85H', +'\u2469':b'\x85I','\u246a':b'\x85J','\u246b':b'\x85K','\u246c':b'\x85L', +'\u246d':b'\x85M','\u246e':b'\x85N','\u246f':b'\x85O','\u2470':b'\x85P', +'\u2471':b'\x85Q','\u2472':b'\x85R','\u2473':b'\x85S','\u2474':b'\x85^', +'\u2475':b'\x85_','\u2476':b'\x85`','\u2477':b'\x85a','\u2478':b'\x85b', +'\u2479':b'\x85c','\u247a':b'\x85d','\u247b':b'\x85e','\u247c':b'\x85f', +'\u247d':b'\x85g','\u247e':b'\x85h','\u247f':b'\x85i','\u2480':b'\x85j', +'\u2481':b'\x85k','\u2482':b'\x85l','\u2483':b'\x85m','\u2484':b'\x85n', +'\u2485':b'\x85o','\u2486':b'\x85p','\u2487':b'\x85q','\u2488':b'\x85\x92', +'\u2489':b'\x85\x93','\u248a':b'\x85\x94','\u248b':b'\x85\x95','\u248c': +b'\x85\x96','\u248d':b'\x85\x97','\u248e':b'\x85\x98','\u248f':b'\x85\x99', +'\u2490':b'\x85\x9a','\u249c':b'\x85\xdb','\u249d':b'\x85\xdc','\u249e': +b'\x85\xdd','\u249f':b'\x85\xde','\u24a0':b'\x85\xdf','\u24a1':b'\x85\xe0', +'\u24a2':b'\x85\xe1','\u24a3':b'\x85\xe2','\u24a4':b'\x85\xe3','\u24a5': +b'\x85\xe4','\u24a6':b'\x85\xe5','\u24a7':b'\x85\xe6','\u24a8':b'\x85\xe7', +'\u24a9':b'\x85\xe8','\u24aa':b'\x85\xe9','\u24ab':b'\x85\xea','\u24ac': +b'\x85\xeb','\u24ad':b'\x85\xec','\u24ae':b'\x85\xed','\u24af':b'\x85\xee', +'\u24b0':b'\x85\xef','\u24b1':b'\x85\xf0','\u24b2':b'\x85\xf1','\u24b3': +b'\x85\xf2','\u24b4':b'\x85\xf3','\u24b5':b'\x85\xf4','\u260e':b'\x86\xb4', +'\u261c':b'\x86\xc8','\u261d':b'\x86\xc9','\u261e':b'\x86\xc7','\u261f': +b'\x86\xca','\u2660':b'\x86\xa3','\u2661':b'\x86\xa1','\u2662':b'\x86\xa2', +'\u2663':b'\x86\xa4','\u2664':b'\x86\x9f','\u2665':b'\x86\xa5','\u2666': +b'\x86\xa6','\u2667':b'\x86\xa0','\u2776':b'\x85|','\u2777':b'\x85}','\u2778': +b'\x85~','\u2779':b'\x85\x80','\u277a':b'\x85\x81','\u277b':b'\x85\x82', +'\u277c':b'\x85\x83','\u277d':b'\x85\x84','\u277e':b'\x85\x85','\u3001': +(2,b'\x81A'),'\u3001\uf87e':b'\xebA','\u3002':(2,b'\x81B'),'\u3002\uf87e': +b'\xebB','\u3004':b'\x86\xb5','\u301c':(2,b'\x81`'),'\u301c\uf87e':b'\xeb`', +'\u301d':b'\x88T','\u301f':b'\x88U','\u3020':b'\x86\xb3','\u3041': +(2,b'\x82\x9f'),'\u3041\uf87e':b'\xec\x9f','\u3043':(2,b'\x82\xa1'), +'\u3043\uf87e':b'\xec\xa1','\u3045':(2,b'\x82\xa3'),'\u3045\uf87e':b'\xec\xa3', +'\u3047':(2,b'\x82\xa5'),'\u3047\uf87e':b'\xec\xa5','\u3049':(2,b'\x82\xa7'), +'\u3049\uf87e':b'\xec\xa7','\u3063':(2,b'\x82\xc1'),'\u3063\uf87e':b'\xec\xc1', +'\u3083':(2,b'\x82\xe1'),'\u3083\uf87e':b'\xec\xe1','\u3085':(2,b'\x82\xe3'), +'\u3085\uf87e':b'\xec\xe3','\u3087':(2,b'\x82\xe5'),'\u3087\uf87e':b'\xec\xe5', +'\u308e':(2,b'\x82\xec'),'\u308e\uf87e':b'\xec\xec','\u3094':b'\x88h','\u30a1': +(2,b'\x83@'),'\u30a1\uf87e':b'\xed@','\u30a3':(2,b'\x83B'),'\u30a3\uf87e': +b'\xedB','\u30a5':(2,b'\x83D'),'\u30a5\uf87e':b'\xedD','\u30a7':(2,b'\x83F'), +'\u30a7\uf87e':b'\xedF','\u30a9':(2,b'\x83H'),'\u30a9\uf87e':b'\xedH','\u30c3': +(2,b'\x83b'),'\u30c3\uf87e':b'\xedb','\u30e3':(2,b'\x83\x83'),'\u30e3\uf87e': +b'\xed\x83','\u30e5':(2,b'\x83\x85'),'\u30e5\uf87e':b'\xed\x85','\u30e7': +(2,b'\x83\x87'),'\u30e7\uf87e':b'\xed\x87','\u30ee':(2,b'\x83\x8e'), +'\u30ee\uf87e':b'\xed\x8e','\u30f5':(2,b'\x83\x95'),'\u30f5\uf87e':b'\xed\x95', +'\u30f6':(2,b'\x83\x96'),'\u30f6\uf87e':b'\xed\x96','\u30f7':b'\x88j','\u30f8': +b'\x88k','\u30f9':b'\x88l','\u30fa':b'\x88m','\u30fc':(2,b'\x81['), +'\u30fc\uf87e':b'\xeb[','\u322a':b'\x87A','\u322b':b'\x87B','\u322c':b'\x87C', +'\u322d':b'\x87D','\u322e':b'\x87E','\u322f':b'\x87F','\u3230':b'\x87@', +'\u3231':b'\x87M','\u3232':b'\x87P','\u3233':b'\x87S','\u3234':b'\x87O', +'\u3235':b'\x87T','\u3236':b'\x87R','\u3237':b'\x87H','\u3238':b'\x87X', +'\u3239':b'\x87K','\u323a':b'\x87L','\u323b':b'\x87Q','\u323c':b'\x87U', +'\u323d':b'\x87V','\u323e':b'\x87N','\u323f':b'\x87W','\u3240':b'\x87G', +'\u3242':b'\x87I','\u3243':b'\x87J','\u3296':b'\x87\x99','\u3298':b'\x87\x9b', +'\u3299':b'\x87\x9e','\u329d':b'\x87\x9a','\u329e':b'\x87\x9c','\u32a4': +b'\x87\x93','\u32a5':b'\x87\x94','\u32a6':b'\x87\x95','\u32a7':b'\x87\x96', +'\u32a8':b'\x87\x97','\u32a9':b'\x87\x98','\u3300':b'\x87\xbd','\u3303': +b'\x87\xa7','\u3305':b'\x87\xa4','\u330d':b'\x87\xb0','\u3314':b'\x87\xa2', +'\u3315':b'\x87\xaa','\u3316':b'\x87\xa3','\u3318':b'\x87\xa9','\u331e': +b'\x87\xbe','\u3322':b'\x87\xa0','\u3323':b'\x87\xb2','\u3326':b'\x87\xb3', +'\u3327':b'\x87\xab','\u332a':b'\x87\xbf','\u332b':b'\x87\xb5','\u3331': +b'\x87\xc0','\u3333':b'\x87\xa5','\u3336':b'\x87\xa8','\u3339':b'\x87\xae', +'\u333b':b'\x87\xb4','\u3342':b'\x87\xb1','\u3347':b'\x87\xc1','\u3349': +b'\x87\x9f','\u334a':b'\x87\xad','\u334d':b'\x87\xa1','\u334e':b'\x87\xa6', +'\u3351':b'\x87\xac','\u3357':b'\x87\xaf','\u337b':b'\x87\xe8','\u337c': +b'\x87\xe7','\u337d':b'\x87\xe6','\u337e':b'\x87\xe5','\u337f':b'\x87\xfa', +'\u3385':b'\x86Z','\u3386':b'\x86[','\u3387':b'\x86\\','\u338e':b'\x86J', +'\u338f':b'\x86L','\u3390':b'\x86Y','\u3396':b'\x86N','\u3397':b'\x86O', +'\u3398':b'\x86Q','\u339c':b'\x86@','\u339d':b'\x86B','\u339e':b'\x86H', +'\u339f':b'\x86A','\u33a0':b'\x86C','\u33a1':b'\x86F','\u33a2':b'\x86I', +'\u33a4':b'\x86D','\u33a5':b'\x86G','\u33b0':b'\x86U','\u33b1':b'\x86T', +'\u33b2':b'\x86S','\u33b3':b'\x86R','\u33c4':b'\x86M','\u33cb':b'\x86X', +'\u33cd':b'\x86\x9c','\u33d4':b'\x86W','\u5927':(2,b'\x91\xe5'),'\u5927\u20dd': +b'\x87\x91','\u5c0f':(2,b'\x8f\xac'),'\u5c0f\u20dd':b'\x87\x92','\u63a7': +(2,b'\x8dT'),'\u63a7\u20dd':b'\x87\x9d','\uf860':2,'\uf8600':3,'\uf8600.': +b'\x85\x91','\uf860T':3,'\uf860TB':b'\x86]','\uf860X':3,'\uf860XV':b'\x85\xad', +'\uf860x':3,'\uf860xv':b'\x85\xc1','\uf860\u2193':3,'\uf860\u2193\u2191': +b'\x86\xce','\uf861':2,'\uf861F':3,'\uf861FA':4,'\uf861FAX':b'\x86\x9e', +'\uf861X':3,'\uf861XI':4,'\uf861XIV':b'\x85\xac','\uf861x':3,'\uf861xi':4, +'\uf861xiv':b'\x85\xc0','\uf862':2,'\uf862X':3,'\uf862XI':4,'\uf862XII':5, +'\uf862XIII':b'\x85\xab','\uf862x':3,'\uf862xi':4,'\uf862xii':5,'\uf862xiii': +b'\x85\xbf','\uf862\u6709':3,'\uf862\u6709\u9650':4,'\uf862\u6709\u9650\u4f1a': +5,'\uf862\u6709\u9650\u4f1a\u793e':b'\x87\xfb','\uf862\u8ca1':3, +'\uf862\u8ca1\u56e3':4,'\uf862\u8ca1\u56e3\u6cd5':5, +'\uf862\u8ca1\u56e3\u6cd5\u4eba':b'\x87\xfc','\ufe30':b'\xebd','\ufe31': +b'\xeb\\','\ufe33':b'\xebQ','\ufe35':b'\xebi','\ufe36':b'\xebj','\ufe37': +b'\xebo','\ufe38':b'\xebp','\ufe39':b'\xebk','\ufe3a':b'\xebl','\ufe3b': +b'\xeby','\ufe3c':b'\xebz','\ufe3d':b'\xebs','\ufe3e':b'\xebt','\ufe3f': +b'\xebq','\ufe40':b'\xebr','\ufe41':b'\xebu','\ufe42':b'\xebv','\ufe43': +b'\xebw','\ufe44':b'\xebx','\uff1d':(2,b'\x81\x81'),'\uff1d\uf87e':b'\xeb\x81', +'\uff3b':(2,b'\x81m'),'\uff3b\uf87e':b'\xebm','\uff3c':b'\x81_','\uff3d': +(2,b'\x81n'),'\uff3d\uf87e':b'\xebn','\uff47':(2,b'\x82\x87'),'\uff47\uf87f': +b'\x86K','\uff4d':(2,b'\x82\x8d'),'\uff4d\uf87f':b'\x86E','\uff5c':(2,b'\x81b') +,'\uff5c\uf87e':b'\xebb','\uffe3':(2,b'\x81P'),'\uffe3\uf87e':b'\xebP' +} + +decode_map = { +b'\\':'\xa5',b'\x80':'\\',b'\x81':2,b'\x81\\':'\u2014',b'\x81_':'\uff3c', +b'\x85':2,b'\x85@':'\u2460',b'\x85A':'\u2461',b'\x85B':'\u2462',b'\x85C': +'\u2463',b'\x85D':'\u2464',b'\x85E':'\u2465',b'\x85F':'\u2466',b'\x85G': +'\u2467',b'\x85H':'\u2468',b'\x85I':'\u2469',b'\x85J':'\u246a',b'\x85K': +'\u246b',b'\x85L':'\u246c',b'\x85M':'\u246d',b'\x85N':'\u246e',b'\x85O': +'\u246f',b'\x85P':'\u2470',b'\x85Q':'\u2471',b'\x85R':'\u2472',b'\x85S': +'\u2473',b'\x85^':'\u2474',b'\x85_':'\u2475',b'\x85`':'\u2476',b'\x85a': +'\u2477',b'\x85b':'\u2478',b'\x85c':'\u2479',b'\x85d':'\u247a',b'\x85e': +'\u247b',b'\x85f':'\u247c',b'\x85g':'\u247d',b'\x85h':'\u247e',b'\x85i': +'\u247f',b'\x85j':'\u2480',b'\x85k':'\u2481',b'\x85l':'\u2482',b'\x85m': +'\u2483',b'\x85n':'\u2484',b'\x85o':'\u2485',b'\x85p':'\u2486',b'\x85q': +'\u2487',b'\x85|':'\u2776',b'\x85}':'\u2777',b'\x85~':'\u2778',b'\x85\x80': +'\u2779',b'\x85\x81':'\u277a',b'\x85\x82':'\u277b',b'\x85\x83':'\u277c', +b'\x85\x84':'\u277d',b'\x85\x85':'\u277e',b'\x85\x91':'\uf8600.',b'\x85\x92': +'\u2488',b'\x85\x93':'\u2489',b'\x85\x94':'\u248a',b'\x85\x95':'\u248b', +b'\x85\x96':'\u248c',b'\x85\x97':'\u248d',b'\x85\x98':'\u248e',b'\x85\x99': +'\u248f',b'\x85\x9a':'\u2490',b'\x85\x9f':'\u2160',b'\x85\xa0':'\u2161', +b'\x85\xa1':'\u2162',b'\x85\xa2':'\u2163',b'\x85\xa3':'\u2164',b'\x85\xa4': +'\u2165',b'\x85\xa5':'\u2166',b'\x85\xa6':'\u2167',b'\x85\xa7':'\u2168', +b'\x85\xa8':'\u2169',b'\x85\xa9':'\u216a',b'\x85\xaa':'\u216b',b'\x85\xab': +'\uf862XIII',b'\x85\xac':'\uf861XIV',b'\x85\xad':'\uf860XV',b'\x85\xb3': +'\u2170',b'\x85\xb4':'\u2171',b'\x85\xb5':'\u2172',b'\x85\xb6':'\u2173', +b'\x85\xb7':'\u2174',b'\x85\xb8':'\u2175',b'\x85\xb9':'\u2176',b'\x85\xba': +'\u2177',b'\x85\xbb':'\u2178',b'\x85\xbc':'\u2179',b'\x85\xbd':'\u217a', +b'\x85\xbe':'\u217b',b'\x85\xbf':'\uf862xiii',b'\x85\xc0':'\uf861xiv', +b'\x85\xc1':'\uf860xv',b'\x85\xdb':'\u249c',b'\x85\xdc':'\u249d',b'\x85\xdd': +'\u249e',b'\x85\xde':'\u249f',b'\x85\xdf':'\u24a0',b'\x85\xe0':'\u24a1', +b'\x85\xe1':'\u24a2',b'\x85\xe2':'\u24a3',b'\x85\xe3':'\u24a4',b'\x85\xe4': +'\u24a5',b'\x85\xe5':'\u24a6',b'\x85\xe6':'\u24a7',b'\x85\xe7':'\u24a8', +b'\x85\xe8':'\u24a9',b'\x85\xe9':'\u24aa',b'\x85\xea':'\u24ab',b'\x85\xeb': +'\u24ac',b'\x85\xec':'\u24ad',b'\x85\xed':'\u24ae',b'\x85\xee':'\u24af', +b'\x85\xef':'\u24b0',b'\x85\xf0':'\u24b1',b'\x85\xf1':'\u24b2',b'\x85\xf2': +'\u24b3',b'\x85\xf3':'\u24b4',b'\x85\xf4':'\u24b5',b'\x86':2,b'\x86@':'\u339c', +b'\x86A':'\u339f',b'\x86B':'\u339d',b'\x86C':'\u33a0',b'\x86D':'\u33a4', +b'\x86E':'\uff4d\uf87f',b'\x86F':'\u33a1',b'\x86G':'\u33a5',b'\x86H':'\u339e', +b'\x86I':'\u33a2',b'\x86J':'\u338e',b'\x86K':'\uff47\uf87f',b'\x86L':'\u338f', +b'\x86M':'\u33c4',b'\x86N':'\u3396',b'\x86O':'\u3397',b'\x86P':'\u2113', +b'\x86Q':'\u3398',b'\x86R':'\u33b3',b'\x86S':'\u33b2',b'\x86T':'\u33b1', +b'\x86U':'\u33b0',b'\x86V':'\u2109',b'\x86W':'\u33d4',b'\x86X':'\u33cb', +b'\x86Y':'\u3390',b'\x86Z':'\u3385',b'\x86[':'\u3386',b'\x86\\':'\u3387', +b'\x86]':'\uf860TB',b'\x86\x9b':'\u2116',b'\x86\x9c':'\u33cd',b'\x86\x9d': +'\u2121',b'\x86\x9e':'\uf861FAX',b'\x86\x9f':'\u2664',b'\x86\xa0':'\u2667', +b'\x86\xa1':'\u2661',b'\x86\xa2':'\u2662',b'\x86\xa3':'\u2660',b'\x86\xa4': +'\u2663',b'\x86\xa5':'\u2665',b'\x86\xa6':'\u2666',b'\x86\xb3':'\u3020', +b'\x86\xb4':'\u260e',b'\x86\xb5':'\u3004',b'\x86\xc7':'\u261e',b'\x86\xc8': +'\u261c',b'\x86\xc9':'\u261d',b'\x86\xca':'\u261f',b'\x86\xcb':'\u21c6', +b'\x86\xcc':'\u21c4',b'\x86\xcd':'\u21c5',b'\x86\xce':'\uf860\u2193\u2191', +b'\x86\xcf':'\u21e8',b'\x86\xd0':'\u21e6',b'\x86\xd1':'\u21e7',b'\x86\xd2': +'\u21e9',b'\x86\xd3':'\u21e8\uf87a',b'\x86\xd4':'\u21e6\uf87a',b'\x86\xd5': +'\u21e7\uf87a',b'\x86\xd6':'\u21e9\uf87a',b'\x87':2,b'\x87@':'\u3230',b'\x87A': +'\u322a',b'\x87B':'\u322b',b'\x87C':'\u322c',b'\x87D':'\u322d',b'\x87E': +'\u322e',b'\x87F':'\u322f',b'\x87G':'\u3240',b'\x87H':'\u3237',b'\x87I': +'\u3242',b'\x87J':'\u3243',b'\x87K':'\u3239',b'\x87L':'\u323a',b'\x87M': +'\u3231',b'\x87N':'\u323e',b'\x87O':'\u3234',b'\x87P':'\u3232',b'\x87Q': +'\u323b',b'\x87R':'\u3236',b'\x87S':'\u3233',b'\x87T':'\u3235',b'\x87U': +'\u323c',b'\x87V':'\u323d',b'\x87W':'\u323f',b'\x87X':'\u3238',b'\x87\x91': +'\u5927\u20dd',b'\x87\x92':'\u5c0f\u20dd',b'\x87\x93':'\u32a4',b'\x87\x94': +'\u32a5',b'\x87\x95':'\u32a6',b'\x87\x96':'\u32a7',b'\x87\x97':'\u32a8', +b'\x87\x98':'\u32a9',b'\x87\x99':'\u3296',b'\x87\x9a':'\u329d',b'\x87\x9b': +'\u3298',b'\x87\x9c':'\u329e',b'\x87\x9d':'\u63a7\u20dd',b'\x87\x9e':'\u3299', +b'\x87\x9f':'\u3349',b'\x87\xa0':'\u3322',b'\x87\xa1':'\u334d',b'\x87\xa2': +'\u3314',b'\x87\xa3':'\u3316',b'\x87\xa4':'\u3305',b'\x87\xa5':'\u3333', +b'\x87\xa6':'\u334e',b'\x87\xa7':'\u3303',b'\x87\xa8':'\u3336',b'\x87\xa9': +'\u3318',b'\x87\xaa':'\u3315',b'\x87\xab':'\u3327',b'\x87\xac':'\u3351', +b'\x87\xad':'\u334a',b'\x87\xae':'\u3339',b'\x87\xaf':'\u3357',b'\x87\xb0': +'\u330d',b'\x87\xb1':'\u3342',b'\x87\xb2':'\u3323',b'\x87\xb3':'\u3326', +b'\x87\xb4':'\u333b',b'\x87\xb5':'\u332b',b'\x87\xbd':'\u3300',b'\x87\xbe': +'\u331e',b'\x87\xbf':'\u332a',b'\x87\xc0':'\u3331',b'\x87\xc1':'\u3347', +b'\x87\xe5':'\u337e',b'\x87\xe6':'\u337d',b'\x87\xe7':'\u337c',b'\x87\xe8': +'\u337b',b'\x87\xfa':'\u337f',b'\x87\xfb':'\uf862\u6709\u9650\u4f1a\u793e', +b'\x87\xfc':'\uf862\u8ca1\u56e3\u6cd5\u4eba',b'\x88':2,b'\x88@':'\u222e', +b'\x88A':'\u221f',b'\x88B':'\u22bf',b'\x88T':'\u301d',b'\x88U':'\u301f', +b'\x88h':'\u3094',b'\x88j':'\u30f7',b'\x88k':'\u30f8',b'\x88l':'\u30f9', +b'\x88m':'\u30fa',b'\xa0':'\xa0',b'\xeb':2,b'\xebA':'\u3001\uf87e',b'\xebB': +'\u3002\uf87e',b'\xebP':'\uffe3\uf87e',b'\xebQ':'\ufe33',b'\xeb[': +'\u30fc\uf87e',b'\xeb\\':'\ufe31',b'\xeb]':'\u2010\uf87e',b'\xeb`': +'\u301c\uf87e',b'\xeba':'\u2016\uf87e',b'\xebb':'\uff5c\uf87e',b'\xebc': +'\u2026\uf87e',b'\xebd':'\ufe30',b'\xebi':'\ufe35',b'\xebj':'\ufe36',b'\xebk': +'\ufe39',b'\xebl':'\ufe3a',b'\xebm':'\uff3b\uf87e',b'\xebn':'\uff3d\uf87e', +b'\xebo':'\ufe37',b'\xebp':'\ufe38',b'\xebq':'\ufe3f',b'\xebr':'\ufe40', +b'\xebs':'\ufe3d',b'\xebt':'\ufe3e',b'\xebu':'\ufe41',b'\xebv':'\ufe42', +b'\xebw':'\ufe43',b'\xebx':'\ufe44',b'\xeby':'\ufe3b',b'\xebz':'\ufe3c', +b'\xeb\x81':'\uff1d\uf87e',b'\xec':2,b'\xec\x9f':'\u3041\uf87e',b'\xec\xa1': +'\u3043\uf87e',b'\xec\xa3':'\u3045\uf87e',b'\xec\xa5':'\u3047\uf87e', +b'\xec\xa7':'\u3049\uf87e',b'\xec\xc1':'\u3063\uf87e',b'\xec\xe1': +'\u3083\uf87e',b'\xec\xe3':'\u3085\uf87e',b'\xec\xe5':'\u3087\uf87e', +b'\xec\xec':'\u308e\uf87e',b'\xed':2,b'\xed@':'\u30a1\uf87e',b'\xedB': +'\u30a3\uf87e',b'\xedD':'\u30a5\uf87e',b'\xedF':'\u30a7\uf87e',b'\xedH': +'\u30a9\uf87e',b'\xedb':'\u30c3\uf87e',b'\xed\x83':'\u30e3\uf87e',b'\xed\x85': +'\u30e5\uf87e',b'\xed\x87':'\u30e7\uf87e',b'\xed\x8e':'\u30ee\uf87e', +b'\xed\x95':'\u30f5\uf87e',b'\xed\x96':'\u30f6\uf87e',b'\xfd':'\xa9',b'\xfe': +'\u2122',b'\xff':'\u2026\uf87f' +} + +# User-defined range +lowset = list(range(0x40, 0x7f)) + list(range(0x80, 0xfd)) +for high in range(0xf0, 0xfd): + decode_map[bytes([high])] = 2 + for j, low in enumerate(lowset): + u = chr(high * 188 + j + 12224) + c = bytes([high, low]) + encode_map[u] = c + decode_map[c] = u +del high, j, low, lowset, u, c + +basecodec = _codecs_jp.getcodec('shift_jis') +codec = mbc.create_extcodec('mac_japanese', basecodec, encode_map, decode_map, + 5, 2, 0) + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='mac_japanese', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) Index: Lib/test/test_codecmaps_kr.py =================================================================== --- Lib/test/test_codecmaps_kr.py (revision 64533) +++ Lib/test/test_codecmaps_kr.py (working copy) @@ -11,8 +11,8 @@ class TestCP949Map(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'cp949' - mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' \ - '/WINDOWS/CP949.TXT' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' + '/WINDOWS/CP949.TXT') class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping, @@ -25,11 +25,18 @@ pass_dectest = [(b'\xa4\xd4', '\u3164')] +class TestMacKoreanMap(test_multibytecodec_support.TestBase_Mapping, + unittest.TestCase): + encoding = 'mac_korean' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/' + 'KOREAN.TXT') + + class TestJOHABMap(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'johab' - mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' \ - 'KSC/JOHAB.TXT' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' + 'KSC/JOHAB.TXT') # KS X 1001 standard assigned 0x5c as WON SIGN. # but, in early 90s that is the only era used johab widely, # the most softwares implements it as REVERSE SOLIDUS. @@ -41,4 +48,5 @@ support.run_unittest(__name__) if __name__ == "__main__": + support.use_resources = ['urlfetch'] test_main() Index: Lib/test/test_codecencodings_kr.py =================================================================== --- Lib/test/test_codecencodings_kr.py (revision 64533) +++ Lib/test/test_codecencodings_kr.py (working copy) @@ -50,6 +50,22 @@ (b"\xc1\xc4", "strict", "\uc894"), ) +class Test_MacKorean(test_multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'mac_korean' + tstring = test_multibytecodec_support.load_teststring('mac_korean') + codectests = ( + # invalid sequences + (b"abc\xfe\xfe\xc1\xc4", "strict", None), + (b"abc\xc8", "strict", None), + (b"abc\xfe\xfe\xc1\xc4", "replace", "abc\ufffd\uc894"), + (b"abc\xfe\xfe\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), + (b"abc\xfe\xfe\xc1\xc4", "ignore", "abc\uc894"), + ) + xmlcharnametest = ( + "\xab\u211c\xbb = \u2329\u1234\u232a", + b"\xa6\\ℜ\xa6] = ⟨ሴ⟩" + ) + class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'johab' tstring = test_multibytecodec_support.load_teststring('johab') Index: Lib/test/test_codecmaps_tw.py =================================================================== --- Lib/test/test_codecmaps_tw.py (revision 64533) +++ Lib/test/test_codecmaps_tw.py (working copy) @@ -24,8 +24,15 @@ (b'\xa2\xce', '\u5345'), ] +class TestMacChineseTradMap(test_multibytecodec_support.TestBase_Mapping, + unittest.TestCase): + encoding = 'mac_chintrad' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/' + 'CHINTRAD.TXT') + def test_main(): support.run_unittest(__name__) if __name__ == "__main__": + support.use_resources = ['urlfetch'] test_main() Index: Lib/test/test_codecmaps_cn.py =================================================================== --- Lib/test/test_codecmaps_cn.py (revision 64533) +++ Lib/test/test_codecmaps_cn.py (working copy) @@ -16,18 +16,24 @@ class TestGBKMap(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'gbk' - mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \ - 'MICSFT/WINDOWS/CP936.TXT' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/' + 'MICSFT/WINDOWS/CP936.TXT') class TestGB18030Map(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'gb18030' - mapfileurl = 'http://source.icu-project.org/repos/icu/data/' \ - 'trunk/charset/data/xml/gb-18030-2000.xml' + mapfileurl = ('http://source.icu-project.org/repos/icu/data/' + 'trunk/charset/data/xml/gb-18030-2000.xml') +class TestMacChineseSimpMap(test_multibytecodec_support.TestBase_Mapping, + unittest.TestCase): + encoding = 'mac_chinsimp' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/' + 'CHINSIMP.TXT') def test_main(): support.run_unittest(__name__) if __name__ == "__main__": + support.use_resources = ['urlfetch'] test_main() Index: Lib/test/test_codecmaps_jp.py =================================================================== --- Lib/test/test_codecmaps_jp.py (revision 64533) +++ Lib/test/test_codecmaps_jp.py (working copy) @@ -11,8 +11,8 @@ class TestCP932Map(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'cp932' - mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \ - 'WINDOWS/CP932.TXT' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' + 'WINDOWS/CP932.TXT') supmaps = [ (b'\x80', '\u0080'), (b'\xa0', '\uf8f0'), @@ -27,16 +27,14 @@ class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'euc_jp' - mapfilename = 'EUC-JP.TXT' mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JP.TXT' class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'shift_jis' - mapfilename = 'SHIFTJIS.TXT' - mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \ - '/EASTASIA/JIS/SHIFTJIS.TXT' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/OBSOLETE' + '/EASTASIA/JIS/SHIFTJIS.TXT') pass_enctest = [ (b'\x81_', '\\'), ] @@ -49,19 +47,25 @@ class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'euc_jisx0213' - mapfilename = 'EUC-JISX0213.TXT' mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT' class TestSJISX0213Map(test_multibytecodec_support.TestBase_Mapping, unittest.TestCase): encoding = 'shift_jisx0213' - mapfilename = 'SHIFT_JISX0213.TXT' mapfileurl = 'http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT' +class TestMacJapaneseMap(test_multibytecodec_support.TestBase_Mapping, + unittest.TestCase): + encoding = 'mac_japanese' + mapfileurl = ('http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/' + 'JAPANESE.TXT') + + def test_main(): support.run_unittest(__name__) if __name__ == "__main__": + support.use_resources = ['urlfetch'] test_main() Index: Lib/test/test_multibytecodec.py =================================================================== --- Lib/test/test_multibytecodec.py (revision 64533) +++ Lib/test/test_multibytecodec.py (working copy) @@ -8,6 +8,7 @@ from test import test_multibytecodec_support from test.support import TESTFN import unittest, io, codecs, sys, os +import _multibytecodec ALL_CJKENCODINGS = [ # _codecs_cn @@ -24,6 +25,8 @@ # _codecs_iso2022 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', +# extensions + 'mac_chinsimp', 'mac_chintrad', 'mac_japanese', 'mac_korean', ] class Test_MultibyteCodec(unittest.TestCase): @@ -234,6 +237,71 @@ # Any ISO 2022 codec will cause the segfault myunichr(x).encode('iso_2022_jp', 'ignore') +class Test_ExtCodec(unittest.TestCase): + base_codec = codecs.lookup('euc_kr')[2].codec + + def test_flagpass(self): + codec = _multibytecodec.create_extcodec( + 'extcodec_testing', self.base_codec, {}, {}, 1, 2, 0) + self.assertEqual(codec.encode('\uc496'), + (b'\xa4\xd4\xa4\xb6\xa4\xcd\xa4\xb8', 1)) + + codec = _multibytecodec.create_extcodec( + 'extcodec_testing', self.base_codec, {}, {}, 1, 2, 1) + self.assertRaises(UnicodeEncodeError, codec.encode, u'\uc496') + + def test_decoding(self): + decode_map = { + b'a': 'b', # replace original mapping + b'b': None, # invalidate original mapping + b'c': 2, # expand decoding window for the current character + b'cd': 3, # expand further + b'cde': 'spam', + b'cdf': None, + b'ce': 1, # decreasing is impossible: internal error is raised + b'cf': 'egg', + b'd': b'alien', # binary string is not accepted for decoding map + b'\xbc': 2, # window size hint is required here, too + b'\xbc\xf6': '\uc815', # replace part of mapping in a plane + b'\xbc\xbc': None, # invalidate part of mapping in a plane + } + + codec = _multibytecodec.create_extcodec( + 'extcodec_testing', self.base_codec, {}, decode_map, 1, 3, 1) + + self.assertEqual(codec.decode(b'acdecf\xbc\xf6z\xb1\xe8')[0], + 'bspamegg\uc815z\uae40') + self.assertEqual(codec.decode(b'bcdf\xbc\xbc', 'ignore')[0], '') + self.assertRaises(RuntimeError, codec.decode, b'ce') + self.assertRaises(RuntimeError, codec.decode, b'd') + + def test_encoding(self): + encode_map = { + 'a': b'b', # replace original mapping + 'b': None, # invalidate original mapping + 'c': 2, # expand decoding window for the current character + 'cd': (3, b'sausage'), # expanding with shorter candidate + 'cde': b'spam', + 'cdf': None, + 'cdg': 4, # expand more with keeping previous candidate + 'cdgh': b'ham', + 'cdgi': None, + 'ce': 1, # decrease is impossible: internal error is raised + 'cf': b'egg', + 'd': 'alien', # unicode is not accepted for encoding map + '\ud800': 2, # widen window without any further mapping + } + + codec = _multibytecodec.create_extcodec( + 'extcodec_testing', self.base_codec, encode_map, {}, 4, 2, 1) + + self.assertEqual(codec.encode('\uc218acdcdecdghcf\uc815')[0], + b'\xbc\xf6bsausagespamhamegg\xc1\xa4') + self.assertEqual(codec.encode('b\ud800cdfcdgi\ubdc1\ud800', + 'ignore')[0], b'') + self.assertRaises(RuntimeError, codec.encode, 'ce') + self.assertRaises(RuntimeError, codec.encode, 'd') + def test_main(): support.run_unittest(__name__) Index: Lib/test/test_codecencodings_tw.py =================================================================== --- Lib/test/test_codecencodings_tw.py (revision 64533) +++ Lib/test/test_codecencodings_tw.py (working copy) @@ -20,6 +20,19 @@ (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), ) +class Test_MacChineseTrad(test_multibytecodec_support.TestBase, + unittest.TestCase): + encoding = 'mac_chintrad' + tstring = test_multibytecodec_support.load_teststring('mac_chintrad') + codectests = ( + # invalid codepoints + (b"abc\xfc\xfc\xc1\xc4", "strict", None), + (b"abc\xc8", "strict", None), + (b"abc\xfc\xfc\xc1\xc4", "replace", "abc\ufffd\u8b10"), + (b"abc\xfc\xfc\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), + (b"abc\xfc\xfc\xc1\xc4", "ignore", "abc\u8b10"), + ) + def test_main(): support.run_unittest(__name__) Index: Lib/test/test_codecencodings_cn.py =================================================================== --- Lib/test/test_codecencodings_cn.py (revision 64533) +++ Lib/test/test_codecencodings_cn.py (working copy) @@ -50,6 +50,19 @@ ) has_iso10646 = True +class Test_MacChineseSimp(test_multibytecodec_support.TestBase, + unittest.TestCase): + encoding = 'mac_chinsimp' + tstring = test_multibytecodec_support.load_teststring('mac_chinsimp') + codectests = ( + # invalid codepoints + (b"abc\xfc\xfc\xc1\xc4", "strict", None), + (b"abc\xc8", "strict", None), + (b"abc\xfc\xfc\xc1\xc4", "replace", "abc\ufffd\u804a"), + (b"abc\xfc\xfc\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\xfc\xfc\xc1\xc4", "ignore", "abc\u804a"), + ) + def test_main(): support.run_unittest(__name__) Index: Lib/test/cjkencodings_test.py =================================================================== --- Lib/test/cjkencodings_test.py (revision 64533) +++ Lib/test/cjkencodings_test.py (working copy) @@ -775,6 +775,247 @@ b"\xe5\xb7\x9d\xef\xa6\x81\xe4\xb8\xad\xec\x8b\x81\xe2\x91\xa8\xeb" b"\x93\xa4\xec\x95\x9c\x21\x21\x20\xe3\x89\xaf\xe3\x89\xaf\xeb\x82" b"\xa9\xe2\x99\xa1\x20\xe2\x8c\x92\xe2\x8c\x92\x2a\x0a\x0a"), +'mac_chinsimp': ( +b"\xa1\xa4\xa1\xaa\xa1\xac\xa3\xfe\x80\xa8\xb9\xa8\xb9\x80\xa6\xdf" +b"\xa1\xa2\xa6\xdb\x0a\x50\x79\x74\x68\x6f\x6e\xa3\xa8\xc5\xc9\xc9" +b"\xad\xa3\xa9\xd3\xef\xd1\xd4\xca\xc7\xd2\xbb\xd6\xd6\xb9\xa6\xc4" +b"\xdc\xc7\xbf\xb4\xf3\xb6\xf8\xcd\xea\xc9\xc6\xb5\xc4\xcd\xa8\xd3" +b"\xc3\xd0\xcd\xbc\xc6\xcb\xe3\xbb\xfa\xb3\xcc\xd0\xf2\xc9\xe8\xbc" +b"\xc6\xd3\xef\xd1\xd4\xa3\xac\x0a\xd2\xd1\xbe\xad\xbe\xdf\xd3\xd0" +b"\xca\xae\xb6\xe0\xc4\xea\xb5\xc4\xb7\xa2\xd5\xb9\xc0\xfa\xca\xb7" +b"\xa3\xac\xb3\xc9\xca\xec\xc7\xd2\xce\xc8\xb6\xa8\xa1\xa3\xd5\xe2" +b"\xd6\xd6\xd3\xef\xd1\xd4\xbe\xdf\xd3\xd0\xb7\xc7\xb3\xa3\xbc\xf2" +b"\xbd\xdd\xb6\xf8\xc7\xe5\xce\xfa\x0a\xb5\xc4\xd3\xef\xb7\xa8\xcc" +b"\xd8\xb5\xe3\xa3\xac\xca\xca\xba\xcf\xcd\xea\xb3\xc9\xb8\xf7\xd6" +b"\xd6\xb8\xdf\xb2\xe3\xc8\xce\xce\xf1\xa3\xac\xbc\xb8\xba\xf5\xbf" +b"\xc9\xd2\xd4\xd4\xda\xcb\xf9\xd3\xd0\xb5\xc4\xb2\xd9\xd7\xf7\xcf" +b"\xb5\xcd\xb3\xd6\xd0\x0a\xd4\xcb\xd0\xd0\xa1\xa3\xd5\xe2\xd6\xd6" +b"\xd3\xef\xd1\xd4\xbc\xf2\xb5\xa5\xb6\xf8\xc7\xbf\xb4\xf3\xa3\xac" +b"\xca\xca\xba\xcf\xb8\xf7\xd6\xd6\xc8\xcb\xca\xbf\xd1\xa7\xcf\xb0" +b"\xca\xb9\xd3\xc3\xa1\xa3\xc4\xbf\xc7\xb0\xa3\xac\xbb\xf9\xd3\xda" +b"\xd5\xe2\x0a\xd6\xd6\xd3\xef\xd1\xd4\xb5\xc4\xcf\xe0\xb9\xd8\xbc" +b"\xbc\xca\xf5\xd5\xfd\xd4\xda\xb7\xc9\xcb\xd9\xb5\xc4\xb7\xa2\xd5" +b"\xb9\xa3\xac\xd3\xc3\xbb\xa7\xca\xfd\xc1\xbf\xbc\xb1\xbe\xe7\xc0" +b"\xa9\xb4\xf3\xa3\xac\xcf\xe0\xb9\xd8\xb5\xc4\xd7\xca\xd4\xb4\xb7" +b"\xc7\xb3\xa3\xb6\xe0\xa1\xa3\x0a", +b"\xc2\xb7\xe2\x80\x94\xe2\x80\x96\xe2\x80\xbe\xc3\xbc\xef\xa1\xbf" +b"\xc3\xbc\xc3\xbc\xc3\xbc\xef\xa1\xbf\xef\xbc\x9f\xef\xa1\xbe\xe3" +b"\x80\x81\xe3\x80\x81\xef\xa1\xbe\x0a\x50\x79\x74\x68\x6f\x6e\xef" +b"\xbc\x88\xe6\xb4\xbe\xe6\xa3\xae\xef\xbc\x89\xe8\xaf\xad\xe8\xa8" +b"\x80\xe6\x98\xaf\xe4\xb8\x80\xe7\xa7\x8d\xe5\x8a\x9f\xe8\x83\xbd" +b"\xe5\xbc\xba\xe5\xa4\xa7\xe8\x80\x8c\xe5\xae\x8c\xe5\x96\x84\xe7" +b"\x9a\x84\xe9\x80\x9a\xe7\x94\xa8\xe5\x9e\x8b\xe8\xae\xa1\xe7\xae" +b"\x97\xe6\x9c\xba\xe7\xa8\x8b\xe5\xba\x8f\xe8\xae\xbe\xe8\xae\xa1" +b"\xe8\xaf\xad\xe8\xa8\x80\xef\xbc\x8c\x0a\xe5\xb7\xb2\xe7\xbb\x8f" +b"\xe5\x85\xb7\xe6\x9c\x89\xe5\x8d\x81\xe5\xa4\x9a\xe5\xb9\xb4\xe7" +b"\x9a\x84\xe5\x8f\x91\xe5\xb1\x95\xe5\x8e\x86\xe5\x8f\xb2\xef\xbc" +b"\x8c\xe6\x88\x90\xe7\x86\x9f\xe4\xb8\x94\xe7\xa8\xb3\xe5\xae\x9a" +b"\xe3\x80\x82\xe8\xbf\x99\xe7\xa7\x8d\xe8\xaf\xad\xe8\xa8\x80\xe5" +b"\x85\xb7\xe6\x9c\x89\xe9\x9d\x9e\xe5\xb8\xb8\xe7\xae\x80\xe6\x8d" +b"\xb7\xe8\x80\x8c\xe6\xb8\x85\xe6\x99\xb0\x0a\xe7\x9a\x84\xe8\xaf" +b"\xad\xe6\xb3\x95\xe7\x89\xb9\xe7\x82\xb9\xef\xbc\x8c\xe9\x80\x82" +b"\xe5\x90\x88\xe5\xae\x8c\xe6\x88\x90\xe5\x90\x84\xe7\xa7\x8d\xe9" +b"\xab\x98\xe5\xb1\x82\xe4\xbb\xbb\xe5\x8a\xa1\xef\xbc\x8c\xe5\x87" +b"\xa0\xe4\xb9\x8e\xe5\x8f\xaf\xe4\xbb\xa5\xe5\x9c\xa8\xe6\x89\x80" +b"\xe6\x9c\x89\xe7\x9a\x84\xe6\x93\x8d\xe4\xbd\x9c\xe7\xb3\xbb\xe7" +b"\xbb\x9f\xe4\xb8\xad\x0a\xe8\xbf\x90\xe8\xa1\x8c\xe3\x80\x82\xe8" +b"\xbf\x99\xe7\xa7\x8d\xe8\xaf\xad\xe8\xa8\x80\xe7\xae\x80\xe5\x8d" +b"\x95\xe8\x80\x8c\xe5\xbc\xba\xe5\xa4\xa7\xef\xbc\x8c\xe9\x80\x82" +b"\xe5\x90\x88\xe5\x90\x84\xe7\xa7\x8d\xe4\xba\xba\xe5\xa3\xab\xe5" +b"\xad\xa6\xe4\xb9\xa0\xe4\xbd\xbf\xe7\x94\xa8\xe3\x80\x82\xe7\x9b" +b"\xae\xe5\x89\x8d\xef\xbc\x8c\xe5\x9f\xba\xe4\xba\x8e\xe8\xbf\x99" +b"\x0a\xe7\xa7\x8d\xe8\xaf\xad\xe8\xa8\x80\xe7\x9a\x84\xe7\x9b\xb8" +b"\xe5\x85\xb3\xe6\x8a\x80\xe6\x9c\xaf\xe6\xad\xa3\xe5\x9c\xa8\xe9" +b"\xa3\x9e\xe9\x80\x9f\xe7\x9a\x84\xe5\x8f\x91\xe5\xb1\x95\xef\xbc" +b"\x8c\xe7\x94\xa8\xe6\x88\xb7\xe6\x95\xb0\xe9\x87\x8f\xe6\x80\xa5" +b"\xe5\x89\xa7\xe6\x89\xa9\xe5\xa4\xa7\xef\xbc\x8c\xe7\x9b\xb8\xe5" +b"\x85\xb3\xe7\x9a\x84\xe8\xb5\x84\xe6\xba\x90\xe9\x9d\x9e\xe5\xb8" +b"\xb8\xe5\xa4\x9a\xe3\x80\x82\x0a"), +'mac_chintrad': ( +b"\xa1\x4e\xa1\x42\xa1\x50\xa1\x4f\xa1\x66\xa1\x66\xb8\x51\x0a\xa7" +b"\xda\xad\xcc\xab\x4b\xb1\x60\xa7\xc6\xb1\xe6\xaf\xe0\xa7\x51\xa5" +b"\xce\xa4\x40\xa8\xc7\xa4\x77\xb6\x7d\xb5\x6f\xa6\x6e\xaa\xba\x6c" +b"\x69\x62\x72\x61\x72\x79\x2c\x20\xa8\xc3\xa6\xb3\xa4\x40\xad\xd3" +b"\x20\x66\x61\x73\x74\x0a\x70\x72\x6f\x74\x6f\x74\x79\x70\x69\x6e" +b"\x67\x20\xaa\xba\x20\x70\x72\x6f\x67\x72\x61\x6d\x6d\x69\x6e\x67" +b"\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x20\xa5\x69\xa8\xd1\xa8\xcf" +b"\xa5\xce\x2e\x20\xa5\xd8\xab\x65\xa6\xb3\xb3\x5c\xb3\x5c\xa6\x68" +b"\xa6\x68\xaa\xba\x0a\x6c\x69\x62\x72\x61\x72\x79\x20\xac\x4f\xa5" +b"\x48\x20\x43\x20\xbc\x67\xa6\xa8\x2c\x20\xa6\xd3\x20\x50\x79\x74" +b"\x68\x6f\x6e\x20\xac\x4f\xa4\x40\xad\xd3\x20\x66\x61\x73\x74\x20" +b"\x70\x72\x6f\x74\x6f\x74\x79\x70\x69\x6e\x67\x20\xaa\xba\x0a\x70" +b"\x72\x6f\x67\x72\x61\x6d\x6d\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75" +b"\x61\x67\x65\x2e\x20\xac\x47\xa7\xda\xad\xcc\xa7\xc6\xb1\xe6\xaf" +b"\xe0\xb1\x4e\xac\x4a\xa6\xb3\xaa\xba\x20\x43\x20\x6c\x69\x62\x72" +b"\x61\x72\x79\x20\xae\xb3\xa8\xec\x0a\x50\x79\x74\x68\x6f\x6e\x20" +b"\xaa\xba\xc0\xf4\xb9\xd2\xa4\xa4\xb4\xfa\xb8\xd5\xa4\xce\xbe\xe3" +b"\xa6\x58\x2e\x20\xa8\xe4\xa4\xa4\xb3\xcc\xa5\x44\xad\x6e\xa4\x5d" +b"\xac\x4f\xa7\xda\xad\xcc\xa9\xd2\xad\x6e\xb0\x51\xbd\xd7\xaa\xba" +b"\xb0\xdd\xc3\x44\xb4\x4e\xac\x4f\x3a\x0a", +b"\xe3\x80\x81\xef\xa1\xbd\xe3\x80\x81\xef\xbc\x8e\xef\xa1\xbe\xef" +b"\xbc\x8e\xef\xa1\xbd\xe3\x80\x95\xe3\x80\x95\xe7\xa1\xbf\x0a\xe6" +b"\x88\x91\xe5\x80\x91\xe4\xbe\xbf\xe5\xb8\xb8\xe5\xb8\x8c\xe6\x9c" +b"\x9b\xe8\x83\xbd\xe5\x88\xa9\xe7\x94\xa8\xe4\xb8\x80\xe4\xba\x9b" +b"\xe5\xb7\xb2\xe9\x96\x8b\xe7\x99\xbc\xe5\xa5\xbd\xe7\x9a\x84\x6c" +b"\x69\x62\x72\x61\x72\x79\x2c\x20\xe4\xb8\xa6\xe6\x9c\x89\xe4\xb8" +b"\x80\xe5\x80\x8b\x20\x66\x61\x73\x74\x0a\x70\x72\x6f\x74\x6f\x74" +b"\x79\x70\x69\x6e\x67\x20\xe7\x9a\x84\x20\x70\x72\x6f\x67\x72\x61" +b"\x6d\x6d\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x20\xe5" +b"\x8f\xaf\xe4\xbe\x9b\xe4\xbd\xbf\xe7\x94\xa8\x2e\x20\xe7\x9b\xae" +b"\xe5\x89\x8d\xe6\x9c\x89\xe8\xa8\xb1\xe8\xa8\xb1\xe5\xa4\x9a\xe5" +b"\xa4\x9a\xe7\x9a\x84\x0a\x6c\x69\x62\x72\x61\x72\x79\x20\xe6\x98" +b"\xaf\xe4\xbb\xa5\x20\x43\x20\xe5\xaf\xab\xe6\x88\x90\x2c\x20\xe8" +b"\x80\x8c\x20\x50\x79\x74\x68\x6f\x6e\x20\xe6\x98\xaf\xe4\xb8\x80" +b"\xe5\x80\x8b\x20\x66\x61\x73\x74\x20\x70\x72\x6f\x74\x6f\x74\x79" +b"\x70\x69\x6e\x67\x20\xe7\x9a\x84\x0a\x70\x72\x6f\x67\x72\x61\x6d" +b"\x6d\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x2e\x20\xe6" +b"\x95\x85\xe6\x88\x91\xe5\x80\x91\xe5\xb8\x8c\xe6\x9c\x9b\xe8\x83" +b"\xbd\xe5\xb0\x87\xe6\x97\xa2\xe6\x9c\x89\xe7\x9a\x84\x20\x43\x20" +b"\x6c\x69\x62\x72\x61\x72\x79\x20\xe6\x8b\xbf\xe5\x88\xb0\x0a\x50" +b"\x79\x74\x68\x6f\x6e\x20\xe7\x9a\x84\xe7\x92\xb0\xe5\xa2\x83\xe4" +b"\xb8\xad\xe6\xb8\xac\xe8\xa9\xa6\xe5\x8f\x8a\xe6\x95\xb4\xe5\x90" +b"\x88\x2e\x20\xe5\x85\xb6\xe4\xb8\xad\xe6\x9c\x80\xe4\xb8\xbb\xe8" +b"\xa6\x81\xe4\xb9\x9f\xe6\x98\xaf\xe6\x88\x91\xe5\x80\x91\xe6\x89" +b"\x80\xe8\xa6\x81\xe8\xa8\x8e\xe8\xab\x96\xe7\x9a\x84\xe5\x95\x8f" +b"\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98\xaf\x3a\x0a"), +'mac_japanese': ( +b"\x50\x79\x74\x68\x6f\x6e\x20\x82\xcc\x8a\x4a\x94\xad\x82\xcd\x81" +b"\x41\x31\x39\x39\x30\x20\x94\x4e\x82\xb2\x82\xeb\x82\xa9\x82\xe7" +b"\x8a\x4a\x8e\x6e\x82\xb3\x82\xea\x82\xc4\x82\xa2\x82\xdc\x82\xb7" +b"\x81\x42\x0a\x8a\x4a\x94\xad\x8e\xd2\x82\xcc\x20\x47\x75\x69\x64" +b"\x6f\x20\x76\x61\x6e\x20\x52\x6f\x73\x73\x75\x6d\x20\x82\xcd\x8b" +b"\xb3\x88\xe7\x97\x70\x82\xcc\x83\x76\x83\x8d\x83\x4f\x83\x89\x83" +b"\x7e\x83\x93\x83\x4f\x8c\xbe\x8c\xea\x81\x75\x41\x42\x43\x81\x76" +b"\x82\xcc\x8a\x4a\x94\xad\x82\xc9\x8e\x51\x89\xc1\x82\xb5\x82\xc4" +b"\x82\xa2\x82\xdc\x82\xb5\x20\x82\xbd\x82\xaa\x81\x41\x41\x42\x43" +b"\x20\x82\xcd\x8e\xc0\x97\x70\x8f\xe3\x82\xcc\x96\xda\x93\x49\x82" +b"\xc9\x82\xcd\x82\xa0\x82\xdc\x82\xe8\x93\x4b\x82\xb5\x82\xc4\x82" +b"\xa2\x82\xdc\x82\xb9\x82\xf1\x82\xc5\x82\xb5\x82\xbd\x81\x42\x0a" +b"\x82\xb1\x82\xcc\x82\xbd\x82\xdf\x81\x41\x47\x75\x69\x64\x6f\x20" +b"\x82\xcd\x82\xe6\x82\xe8\x8e\xc0\x97\x70\x93\x49\x82\xc8\x83\x76" +b"\x83\x8d\x83\x4f\x83\x89\x83\x7e\x83\x93\x83\x4f\x8c\xbe\x8c\xea" +b"\x82\xcc\x8a\x4a\x94\xad\x82\xf0\x8a\x4a\x8e\x6e\x82\xb5\x81\x41" +b"\x89\x70\x8d\x91\x20\x42\x42\x53\x20\x95\xfa\x91\x97\x82\xcc\x83" +b"\x52\x83\x81\x20\x83\x66\x83\x42\x94\xd4\x91\x67\x81\x75\x83\x82" +b"\x83\x93\x83\x65\x83\x42\x20\x83\x70\x83\x43\x83\x5c\x83\x93\x81" +b"\x76\x82\xcc\x83\x74\x83\x40\x83\x93\x82\xc5\x82\xa0\x82\xe9\x20" +b"\x47\x75\x69\x64\x6f\x20\x82\xcd\x82\xb1\x82\xcc\x8c\xbe\x8c\xea" +b"\x82\xf0\x81\x75\x50\x79\x74\x68\x6f\x6e\x81\x76\x82\xc6\x96\xbc" +b"\x82\xc3\x82\xaf\x82\xdc\x82\xb5\x82\xbd\x81\x42\x0a\x82\xb1\x82" +b"\xcc\x82\xe6\x82\xa4\x82\xc8\x94\x77\x8c\x69\x82\xa9\x82\xe7\x90" +b"\xb6\x82\xdc\x82\xea\x82\xbd\x20\x50\x79\x74\x68\x6f\x6e\x20\x82" +b"\xcc\x8c\xbe\x8c\xea\x90\xdd\x8c\x76\x82\xcd\x81\x41\x81\x75\x83" +b"\x56\x83\x93\x83\x76\x83\x8b\x81\x76\x82\xc5\x81\x75\x8f\x4b\x93" +b"\xbe\x82\xaa\x97\x65\x88\xd5\x81\x76\x82\xc6\x82\xa2\x82\xa4\x96" +b"\xda\x95\x57\x82\xc9\x8f\x64\x93\x5f\x82\xaa\x92\x75\x82\xa9\x82" +b"\xea\x82\xc4\x82\xa2\x82\xdc\x82\xb7\x81\x42\x0a\xf0\x41\xfc\x40" +b"\x87\x91\x7e\x81\x5c\x81\x5f\xa0\x86\x5d\x86\x9e\x46\x41\x58\x87" +b"\xfc\x0a", +b"\x50\x79\x74\x68\x6f\x6e\x20\xe3\x81\xae\xe9\x96\x8b\xe7\x99\xba" +b"\xe3\x81\xaf\xe3\x80\x81\x31\x39\x39\x30\x20\xe5\xb9\xb4\xe3\x81" +b"\x94\xe3\x82\x8d\xe3\x81\x8b\xe3\x82\x89\xe9\x96\x8b\xe5\xa7\x8b" +b"\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe\xe3" +b"\x81\x99\xe3\x80\x82\x0a\xe9\x96\x8b\xe7\x99\xba\xe8\x80\x85\xe3" +b"\x81\xae\x20\x47\x75\x69\x64\x6f\x20\x76\x61\x6e\x20\x52\x6f\x73" +b"\x73\x75\x6d\x20\xe3\x81\xaf\xe6\x95\x99\xe8\x82\xb2\xe7\x94\xa8" +b"\xe3\x81\xae\xe3\x83\x97\xe3\x83\xad\xe3\x82\xb0\xe3\x83\xa9\xe3" +b"\x83\x9f\xe3\x83\xb3\xe3\x82\xb0\xe8\xa8\x80\xe8\xaa\x9e\xe3\x80" +b"\x8c\x41\x42\x43\xe3\x80\x8d\xe3\x81\xae\xe9\x96\x8b\xe7\x99\xba" +b"\xe3\x81\xab\xe5\x8f\x82\xe5\x8a\xa0\xe3\x81\x97\xe3\x81\xa6\xe3" +b"\x81\x84\xe3\x81\xbe\xe3\x81\x97\x20\xe3\x81\x9f\xe3\x81\x8c\xe3" +b"\x80\x81\x41\x42\x43\x20\xe3\x81\xaf\xe5\xae\x9f\xe7\x94\xa8\xe4" +b"\xb8\x8a\xe3\x81\xae\xe7\x9b\xae\xe7\x9a\x84\xe3\x81\xab\xe3\x81" +b"\xaf\xe3\x81\x82\xe3\x81\xbe\xe3\x82\x8a\xe9\x81\xa9\xe3\x81\x97" +b"\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3" +b"\x81\xa7\xe3\x81\x97\xe3\x81\x9f\xe3\x80\x82\x0a\xe3\x81\x93\xe3" +b"\x81\xae\xe3\x81\x9f\xe3\x82\x81\xe3\x80\x81\x47\x75\x69\x64\x6f" +b"\x20\xe3\x81\xaf\xe3\x82\x88\xe3\x82\x8a\xe5\xae\x9f\xe7\x94\xa8" +b"\xe7\x9a\x84\xe3\x81\xaa\xe3\x83\x97\xe3\x83\xad\xe3\x82\xb0\xe3" +b"\x83\xa9\xe3\x83\x9f\xe3\x83\xb3\xe3\x82\xb0\xe8\xa8\x80\xe8\xaa" +b"\x9e\xe3\x81\xae\xe9\x96\x8b\xe7\x99\xba\xe3\x82\x92\xe9\x96\x8b" +b"\xe5\xa7\x8b\xe3\x81\x97\xe3\x80\x81\xe8\x8b\xb1\xe5\x9b\xbd\x20" +b"\x42\x42\x53\x20\xe6\x94\xbe\xe9\x80\x81\xe3\x81\xae\xe3\x82\xb3" +b"\xe3\x83\xa1\x20\xe3\x83\x87\xe3\x82\xa3\xe7\x95\xaa\xe7\xb5\x84" +b"\xe3\x80\x8c\xe3\x83\xa2\xe3\x83\xb3\xe3\x83\x86\xe3\x82\xa3\x20" +b"\xe3\x83\x91\xe3\x82\xa4\xe3\x82\xbd\xe3\x83\xb3\xe3\x80\x8d\xe3" +b"\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x83\xb3\xe3\x81\xa7\xe3\x81" +b"\x82\xe3\x82\x8b\x20\x47\x75\x69\x64\x6f\x20\xe3\x81\xaf\xe3\x81" +b"\x93\xe3\x81\xae\xe8\xa8\x80\xe8\xaa\x9e\xe3\x82\x92\xe3\x80\x8c" +b"\x50\x79\x74\x68\x6f\x6e\xe3\x80\x8d\xe3\x81\xa8\xe5\x90\x8d\xe3" +b"\x81\xa5\xe3\x81\x91\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f\xe3\x80" +b"\x82\x0a\xe3\x81\x93\xe3\x81\xae\xe3\x82\x88\xe3\x81\x86\xe3\x81" +b"\xaa\xe8\x83\x8c\xe6\x99\xaf\xe3\x81\x8b\xe3\x82\x89\xe7\x94\x9f" +b"\xe3\x81\xbe\xe3\x82\x8c\xe3\x81\x9f\x20\x50\x79\x74\x68\x6f\x6e" +b"\x20\xe3\x81\xae\xe8\xa8\x80\xe8\xaa\x9e\xe8\xa8\xad\xe8\xa8\x88" +b"\xe3\x81\xaf\xe3\x80\x81\xe3\x80\x8c\xe3\x82\xb7\xe3\x83\xb3\xe3" +b"\x83\x97\xe3\x83\xab\xe3\x80\x8d\xe3\x81\xa7\xe3\x80\x8c\xe7\xbf" +b"\x92\xe5\xbe\x97\xe3\x81\x8c\xe5\xae\xb9\xe6\x98\x93\xe3\x80\x8d" +b"\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x86\xe7\x9b\xae\xe6\xa8\x99\xe3" +b"\x81\xab\xe9\x87\x8d\xe7\x82\xb9\xe3\x81\x8c\xe7\xbd\xae\xe3\x81" +b"\x8b\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99" +b"\xe3\x80\x82\x0a\xee\x80\x81\xee\xa3\x90\xe5\xa4\xa7\xe2\x83\x9d" +b"\x7e\xe2\x80\x94\xef\xbc\xbc\xc2\xa0\xef\xa1\xa0\x54\x42\xef\xa1" +b"\xa1\x46\x41\x58\x46\x41\x58\xef\xa1\xa2\xe8\xb2\xa1\xe5\x9b\xa3" +b"\xe6\xb3\x95\xe4\xba\xba\x0a"), +'mac_korean': ( +b"\xa1\xdd\x20\xc6\xc4\xc0\xcc\xbd\xe3\x28\x50\x79\x74\x68\x6f\x6e" +b"\x29\xc0\xba\x20\xb9\xe8\xbf\xec\xb1\xe2\x20\xbd\xb1\xb0\xed\x2c" +b"\x20\xb0\xad\xb7\xc2\xc7\xd1\x20\xc7\xc1\xb7\xce\xb1\xd7\xb7\xa1" +b"\xb9\xd6\x20\xbe\xf0\xbe\xee\xc0\xd4\xb4\xcf\xb4\xd9\x2e\x20\xc6" +b"\xc4\xc0\xcc\xbd\xe3\xc0\xba\x0a\xc8\xbf\xc0\xb2\xc0\xfb\xc0\xce" +b"\x20\xb0\xed\xbc\xf6\xc1\xd8\x20\xb5\xa5\xc0\xcc\xc5\xcd\x20\xb1" +b"\xb8\xc1\xb6\xbf\xcd\x20\xb0\xa3\xb4\xdc\xc7\xcf\xc1\xf6\xb8\xb8" +b"\x20\xc8\xbf\xc0\xb2\xc0\xfb\xc0\xce\x20\xb0\xb4\xc3\xbc\xc1\xf6" +b"\xc7\xe2\xc7\xc1\xb7\xce\xb1\xd7\xb7\xa1\xb9\xd6\xc0\xbb\x0a\xc1" +b"\xf6\xbf\xf8\xc7\xd5\xb4\xcf\xb4\xd9\x2e\x20\xc6\xc4\xc0\xcc\xbd" +b"\xe3\xc0\xc7\x20\xbf\xec\xbe\xc6\x28\xe9\xd0\xe4\xba\x29\xc7\xd1" +b"\x20\xb9\xae\xb9\xfd\xb0\xfa\x20\xb5\xbf\xc0\xfb\x20\xc5\xb8\xc0" +b"\xcc\xc7\xce\x2c\x20\xb1\xd7\xb8\xae\xb0\xed\x20\xc0\xce\xc5\xcd" +b"\xc7\xc1\xb8\xae\xc6\xc3\x0a\xc8\xaf\xb0\xe6\xc0\xba\x20\xc6\xc4" +b"\xc0\xcc\xbd\xe3\xc0\xbb\x20\xbd\xba\xc5\xa9\xb8\xb3\xc6\xc3\xb0" +b"\xfa\x20\xbf\xa9\xb7\xaf\x20\xba\xd0\xbe\xdf\xbf\xa1\xbc\xad\xbf" +b"\xcd\x20\xb4\xeb\xba\xce\xba\xd0\xc0\xc7\x20\xc7\xc3\xb7\xa7\xc6" +b"\xfb\xbf\xa1\xbc\xad\xc0\xc7\x20\xba\xfc\xb8\xa5\x0a\xbe\xd6\xc7" +b"\xc3\xb8\xae\xc4\xc9\xc0\xcc\xbc\xc7\x20\xb0\xb3\xb9\xdf\xc0\xbb" +b"\x20\xc7\xd2\x20\xbc\xf6\x20\xc0\xd6\xb4\xc2\x20\xc0\xcc\xbb\xf3" +b"\xc0\xfb\xc0\xce\x20\xbe\xf0\xbe\xee\xb7\xce\x20\xb8\xb8\xb5\xe9" +b"\xbe\xee\xc1\xdd\xb4\xcf\xb4\xd9\x2e\x0a\xa1\xcb\xa1\xcc\xa3\xfe" +b"\x9a\xa1\x4b\xa1\xb4\xbc\xf6\x20\xa2\xe8\xc1\xa4\xa2\xf0\x5d\x31" +b"\xa3\x91\xaa\x50\x0a", +b"\xe2\x97\x8e\x20\xed\x8c\x8c\xec\x9d\xb4\xec\x8d\xac\x28\x50\x79" +b"\x74\x68\x6f\x6e\x29\xec\x9d\x80\x20\xeb\xb0\xb0\xec\x9a\xb0\xea" +b"\xb8\xb0\x20\xec\x89\xbd\xea\xb3\xa0\x2c\x20\xea\xb0\x95\xeb\xa0" +b"\xa5\xed\x95\x9c\x20\xed\x94\x84\xeb\xa1\x9c\xea\xb7\xb8\xeb\x9e" +b"\x98\xeb\xb0\x8d\x20\xec\x96\xb8\xec\x96\xb4\xec\x9e\x85\xeb\x8b" +b"\x88\xeb\x8b\xa4\x2e\x20\xed\x8c\x8c\xec\x9d\xb4\xec\x8d\xac\xec" +b"\x9d\x80\x0a\xed\x9a\xa8\xec\x9c\xa8\xec\xa0\x81\xec\x9d\xb8\x20" +b"\xea\xb3\xa0\xec\x88\x98\xec\xa4\x80\x20\xeb\x8d\xb0\xec\x9d\xb4" +b"\xed\x84\xb0\x20\xea\xb5\xac\xec\xa1\xb0\xec\x99\x80\x20\xea\xb0" +b"\x84\xeb\x8b\xa8\xed\x95\x98\xec\xa7\x80\xeb\xa7\x8c\x20\xed\x9a" +b"\xa8\xec\x9c\xa8\xec\xa0\x81\xec\x9d\xb8\x20\xea\xb0\x9d\xec\xb2" +b"\xb4\xec\xa7\x80\xed\x96\xa5\xed\x94\x84\xeb\xa1\x9c\xea\xb7\xb8" +b"\xeb\x9e\x98\xeb\xb0\x8d\xec\x9d\x84\x0a\xec\xa7\x80\xec\x9b\x90" +b"\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4\x2e\x20\xed\x8c\x8c\xec\x9d" +b"\xb4\xec\x8d\xac\xec\x9d\x98\x20\xec\x9a\xb0\xec\x95\x84\x28\xe5" +b"\x84\xaa\xe9\x9b\x85\x29\xed\x95\x9c\x20\xeb\xac\xb8\xeb\xb2\x95" +b"\xea\xb3\xbc\x20\xeb\x8f\x99\xec\xa0\x81\x20\xed\x83\x80\xec\x9d" +b"\xb4\xed\x95\x91\x2c\x20\xea\xb7\xb8\xeb\xa6\xac\xea\xb3\xa0\x20" +b"\xec\x9d\xb8\xed\x84\xb0\xed\x94\x84\xeb\xa6\xac\xed\x8c\x85\x0a" +b"\xed\x99\x98\xea\xb2\xbd\xec\x9d\x80\x20\xed\x8c\x8c\xec\x9d\xb4" +b"\xec\x8d\xac\xec\x9d\x84\x20\xec\x8a\xa4\xed\x81\xac\xeb\xa6\xbd" +b"\xed\x8c\x85\xea\xb3\xbc\x20\xec\x97\xac\xeb\x9f\xac\x20\xeb\xb6" +b"\x84\xec\x95\xbc\xec\x97\x90\xec\x84\x9c\xec\x99\x80\x20\xeb\x8c" +b"\x80\xeb\xb6\x80\xeb\xb6\x84\xec\x9d\x98\x20\xed\x94\x8c\xeb\x9e" +b"\xab\xed\x8f\xbc\xec\x97\x90\xec\x84\x9c\xec\x9d\x98\x20\xeb\xb9" +b"\xa0\xeb\xa5\xb8\x0a\xec\x95\xa0\xed\x94\x8c\xeb\xa6\xac\xec\xbc" +b"\x80\xec\x9d\xb4\xec\x85\x98\x20\xea\xb0\x9c\xeb\xb0\x9c\xec\x9d" +b"\x84\x20\xed\x95\xa0\x20\xec\x88\x98\x20\xec\x9e\x88\xeb\x8a\x94" +b"\x20\xec\x9d\xb4\xec\x83\x81\xec\xa0\x81\xec\x9d\xb8\x20\xec\x96" +b"\xb8\xec\x96\xb4\xeb\xa1\x9c\x20\xeb\xa7\x8c\xeb\x93\xa4\xec\x96" +b"\xb4\xec\xa4\x8d\xeb\x8b\x88\xeb\x8b\xa4\x2e\x0a\xc2\xa2\xc2\xa3" +b"\xe2\x80\xbe\xc2\x9a\xe3\x80\x88\xef\xa1\xb8\xe3\x80\x88\xec\x88" +b"\x98\x20\x33\xe2\x83\x9e\xef\xa1\xbc\xec\xa0\x95\xef\xa1\xa3\x5b" +b"\x31\x31\x5d\x5d\x31\xe2\x93\x81\xea\xb0\x90\xe2\x83\x9e\x0a"), 'shift_jis': ( b"\x50\x79\x74\x68\x6f\x6e\x20\x82\xcc\x8a\x4a\x94\xad\x82\xcd\x81" b"\x41\x31\x39\x39\x30\x20\x94\x4e\x82\xb2\x82\xeb\x82\xa9\x82\xe7" Index: Lib/test/test_codecencodings_jp.py =================================================================== --- Lib/test/test_codecencodings_jp.py (revision 64533) +++ Lib/test/test_codecencodings_jp.py (working copy) @@ -98,6 +98,19 @@ b"\x85Gℜ\x85Q = ⟨ሴ⟩" ) +class Test_MacJapanese(test_multibytecodec_support.TestBase, + unittest.TestCase): + encoding = 'mac_japanese' + tstring = test_multibytecodec_support.load_teststring('mac_japanese') + codectests = ( + # invalid codepoints + (b"abc\x81\xec\x81\xec\x82\x84", "strict", None), + (b"abc\xf8", "strict", None), + (b"abc\x81\xec\x82\x84", "replace", "abc\ufffd\uff44"), + (b"abc\x81\xec\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), + (b"abc\x81\xec\x81\xec\x82\x84def", "ignore", "abc\uff44def"), + ) + def test_main(): support.run_unittest(__name__) Index: Lib/test/test_multibytecodec_support.py =================================================================== --- Lib/test/test_multibytecodec_support.py (revision 64533) +++ Lib/test/test_multibytecodec_support.py (working copy) @@ -316,7 +316,7 @@ continue unich = unichrs(data[1]) - if ord(unich) == 0xfffd or unich in urt_wa: + if unich == '\ufffd' or unich in urt_wa: continue urt_wa[unich] = csetch Index: Modules/cjkcodecs/_codecs_tw.c =================================================================== --- Modules/cjkcodecs/_codecs_tw.c (revision 64533) +++ Modules/cjkcodecs/_codecs_tw.c (working copy) @@ -125,8 +125,8 @@ END_MAPPINGS_LIST BEGIN_CODECS_LIST - CODEC_STATELESS(big5) - CODEC_STATELESS(cp950) + CODEC_STATELESS(big5, 1, 2) + CODEC_STATELESS(cp950, 1, 2) END_CODECS_LIST I_AM_A_MODULE_FOR(tw) Index: Modules/cjkcodecs/multibytecodec.c =================================================================== --- Modules/cjkcodecs/multibytecodec.c (revision 64533) +++ Modules/cjkcodecs/multibytecodec.c (working copy) @@ -684,6 +684,9 @@ static void multibytecodec_dealloc(MultibyteCodecObject *self) { + if (self->dataref != NULL) { + Py_DECREF(self->dataref); + } PyObject_Del(self); } @@ -724,11 +727,24 @@ * Utility functions for stateful codec mechanism */ -#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) -#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) +#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoder *)(o)) +#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoder *)(o)) static PyObject * -encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, +allocate_variable_codec(PyTypeObject *type, PyObject *codec, int enc) +{ + MultibyteCodecObject *c = (MultibyteCodecObject *)codec; + Py_ssize_t size; + PyObject *r; + + size = enc ? c->codec->encbufsize : c->codec->decbufsize; + r = type->tp_alloc(type, size - 1); /* pendings don't need sentinel */ + Py_SIZE(r) = size; /* fix up */ + return r; +} + +static PyObject * +encoder_encode_stateful(MultibyteStatefulEncoder *ctx, PyObject *unistr, int final) { PyObject *ucvt, *r = NULL; @@ -789,7 +805,7 @@ if (inbuf < inbuf_end) { ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); - if (ctx->pendingsize > MAXENCPENDING) { + if (ctx->pendingsize > Py_SIZE(ctx)) { /* normal codecs can't reach here */ ctx->pendingsize = 0; PyErr_SetString(PyExc_UnicodeError, @@ -814,16 +830,16 @@ } static int -decoder_append_pending(MultibyteStatefulDecoderContext *ctx, +decoder_append_pending(MultibyteStatefulDecoder *ctx, MultibyteDecodeBuffer *buf) { Py_ssize_t npendings; npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - if (npendings + ctx->pendingsize > MAXDECPENDING || - npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { - PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); - return -1; + if (npendings + ctx->pendingsize > Py_SIZE(ctx) || + npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); + return -1; } memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); ctx->pendingsize += npendings; @@ -849,7 +865,7 @@ } static int -decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, +decoder_feed_buffer(MultibyteStatefulDecoder *ctx, MultibyteDecodeBuffer *buf) { while (buf->inbuf < buf->inbuf_end) { @@ -919,18 +935,19 @@ incnewkwarglist, &errors)) return NULL; - self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - codec = PyObject_GetAttrString((PyObject *)type, "codec"); if (codec == NULL) - goto errorexit; + return NULL; if (!MultibyteCodec_Check(codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; + return NULL; } + self = (MultibyteIncrementalEncoderObject *) + allocate_variable_codec(type, codec, 1); + if (self == NULL) + goto errorexit; + self->codec = ((MultibyteCodecObject *)codec)->codec; self->pendingsize = 0; self->errors = internal_error_callback(errors); @@ -975,8 +992,9 @@ static PyTypeObject MultibyteIncrementalEncoder_Type = { PyVarObject_HEAD_INIT(NULL, 0) "MultibyteIncrementalEncoder", /* tp_name */ - sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ + sizeof(MultibyteIncrementalEncoderObject) - sizeof(Py_UNICODE), + /* tp_basicsize */ + sizeof(Py_UNICODE), /* tp_itemsize */ /* methods */ (destructor)mbiencoder_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -1123,18 +1141,19 @@ incnewkwarglist, &errors)) return NULL; - self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - codec = PyObject_GetAttrString((PyObject *)type, "codec"); if (codec == NULL) - goto errorexit; + return NULL; if (!MultibyteCodec_Check(codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; + return NULL; } + self = (MultibyteIncrementalDecoderObject *) + allocate_variable_codec(type, codec, 0); + if (self == NULL) + goto errorexit; + self->codec = ((MultibyteCodecObject *)codec)->codec; self->pendingsize = 0; self->errors = internal_error_callback(errors); @@ -1179,8 +1198,9 @@ static PyTypeObject MultibyteIncrementalDecoder_Type = { PyVarObject_HEAD_INIT(NULL, 0) "MultibyteIncrementalDecoder", /* tp_name */ - sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ + sizeof(MultibyteIncrementalDecoderObject) - sizeof(unsigned char), + /* tp_basicsize */ + sizeof(unsigned char), /* tp_itemsize */ /* methods */ (destructor)mbidecoder_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -1288,7 +1308,7 @@ goto errorexit; if (rsize > 0 && decoder_feed_buffer( - (MultibyteStatefulDecoderContext *)self, &buf)) + (MultibyteStatefulDecoder *)self, &buf)) goto errorexit; if (endoffile || sizehint < 0) { @@ -1449,18 +1469,19 @@ streamkwarglist, &stream, &errors)) return NULL; - self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - codec = PyObject_GetAttrString((PyObject *)type, "codec"); if (codec == NULL) - goto errorexit; + return NULL; if (!MultibyteCodec_Check(codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; + return NULL; } + self = (MultibyteStreamReaderObject *) + allocate_variable_codec(type, codec, 0); + if (self == NULL) + goto errorexit; + self->codec = ((MultibyteCodecObject *)codec)->codec; self->stream = stream; Py_INCREF(stream); @@ -1509,8 +1530,9 @@ static PyTypeObject MultibyteStreamReader_Type = { PyVarObject_HEAD_INIT(NULL, 0) "MultibyteStreamReader", /* tp_name */ - sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ + sizeof(MultibyteStreamReaderObject) - sizeof(unsigned char), + /* tp_basicsize */ + sizeof(unsigned char), /* tp_itemsize */ /* methods */ (destructor)mbstreamreader_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -1652,18 +1674,19 @@ streamkwarglist, &stream, &errors)) return NULL; - self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - codec = PyObject_GetAttrString((PyObject *)type, "codec"); if (codec == NULL) - goto errorexit; + return NULL; if (!MultibyteCodec_Check(codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; + return NULL; } + self = (MultibyteStreamWriterObject *) + allocate_variable_codec(type, codec, 1); + if (self == NULL) + goto errorexit; + self->codec = ((MultibyteCodecObject *)codec)->codec; self->stream = stream; Py_INCREF(stream); @@ -1729,8 +1752,9 @@ static PyTypeObject MultibyteStreamWriter_Type = { PyVarObject_HEAD_INIT(NULL, 0) "MultibyteStreamWriter", /* tp_name */ - sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ - 0, /* tp_itemsize */ + sizeof(MultibyteStreamWriterObject) - sizeof(Py_UNICODE), + /* tp_basicsize */ + sizeof(Py_UNICODE), /* tp_itemsize */ /* methods */ (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -1769,7 +1793,6 @@ mbstreamwriter_new, /* tp_new */ }; - /** * Exposed factory function */ @@ -1793,12 +1816,419 @@ if (self == NULL) return NULL; self->codec = codec; + self->dataref = arg; + Py_INCREF(arg); return (PyObject *)self; } +/** + * Dictionary-driven codec extension adaptor + */ + +struct extcodec_config { + PyObject *encovermap; /* dictionary for encoding mapping */ + PyObject *decovermap; /* dictionary for decoding mapping */ + MultibyteCodec *basecodec; /* codec which will be overridden */ + PyObject *dataref; /* object reference to basecodec */ + Py_ssize_t encbufsize; /* maximum buffer size for encoding */ + Py_ssize_t decbufsize; /* maximum buffer size for decoding */ + void *baseconfig; /* configuration flag for basecodec */ +}; + +static int +extcodec_init(const void *config) +{ + /* init is already called when the extension wrapper is created. */ + return 0; +} + +static Py_ssize_t +extcodec_encode(MultibyteCodec_State *state, + const void *config, + const Py_UNICODE **inbuf, Py_ssize_t inleft, + unsigned char **outbuf, Py_ssize_t outleft, + int flags) +{ + const struct extcodec_config *cfg; + Py_ssize_t winsize; + + cfg = (const struct extcodec_config *)config; + + while (inleft > 0) { + Py_ssize_t written = -1, winputsize; + + for (winsize = 1;;) { + PyObject *lookup, *found; + Py_ssize_t nextwinsize; + + if (inleft < winsize) { + if (written >= 0 && + (flags & MBENC_FLUSH)) { + winsize = 0; + break; + } + else + return MBERR_TOOFEW; + } + + lookup = PyUnicode_FromUnicode(*inbuf, winsize); + if (lookup == NULL) + return MBERR_INTERNAL; + + found = PyDict_GetItem(cfg->encovermap, lookup); + Py_DECREF(lookup); + if (found == NULL) { + /* if already have a match for shorter input, + * avoid delegation to the base codec. */ + if (written >= 0) + winsize = 0; + break; + } + else if (found == Py_None) /* invalid sequence */ + return winsize; + else if (PyBytes_Check(found)) {/* found in overmap */ + if (outleft < PyBytes_GET_SIZE(found)) + return MBERR_TOOSMALL; + + memcpy(*outbuf, PyBytes_AS_STRING(found), + PyBytes_GET_SIZE(found)); + written = PyBytes_GET_SIZE(found); + winputsize = winsize; + winsize = 0; + break; + } + else if (PyLong_Check(found)) /* set window size */ + nextwinsize = PyLong_AS_LONG(found); + else if (PyTuple_Check(found) && + PyTuple_GET_SIZE(found) == 2 && + PyLong_Check(PyTuple_GET_ITEM(found, 0)) && + PyBytes_Check(PyTuple_GET_ITEM(found, 1))) { + /* set window size and write shorter candidate + * sequence in outbuf. It will be overwritten + * if longer match is found in overmap, or + * survive otherwise. */ + PyObject *candidate; + + candidate = PyTuple_GET_ITEM(found, 1); + written = PyBytes_GET_SIZE(candidate); + winputsize = winsize; + + if (outleft < written) + return MBERR_TOOSMALL; + memcpy(*outbuf, PyBytes_AS_STRING(candidate), + written); + + nextwinsize = PyLong_AS_LONG( + PyTuple_GET_ITEM(found, 0)); + } + else + return MBERR_INTERNAL; + + /* window size must grow in order not to fall into an + * infinite loop. */ + if (nextwinsize <= winsize || + nextwinsize > cfg->encbufsize) + return MBERR_INTERNAL; + + winsize = nextwinsize; + } + + if (winsize > 0) { /* delegate to process a letter */ + const Py_UNICODE *inpos; + const unsigned char *outpos; + int dflags; + + dflags &= ~MBENC_FLUSH; + for (inpos = *inbuf, outpos = *outbuf;;) { + Py_ssize_t r; + + r = cfg->basecodec->encode(state, + cfg->baseconfig, inbuf, + winsize > inleft ? inleft : winsize, + outbuf, outleft, dflags); + inleft -= (Py_ssize_t)(*inbuf - inpos); + outleft -= (Py_ssize_t)(*outbuf - outpos); + if (r == 0) + break; + + if (r == MBERR_TOOFEW) { + if (winsize < inleft && + winsize <= cfg->decbufsize) + winsize++; + else if (!(dflags & MBENC_FLUSH) && + (flags & MBENC_FLUSH)) + dflags |= MBENC_FLUSH; + else + return MBERR_TOOFEW; + } + else + return r; + } + } + else if (written >= 0) { /* processed by overmap */ + (*outbuf) += written; + outleft -= written; + (*inbuf) += winputsize; + inleft -= winputsize; + } + } + + return 0; +} + +static int +extcodec_encinit(MultibyteCodec_State *state, const void *config) +{ + const struct extcodec_config *cfg; + + cfg = (const struct extcodec_config *)config; + + if (cfg->basecodec->encinit != NULL) + return cfg->basecodec->encinit(state, cfg->baseconfig); + else + return 0; +} + +static Py_ssize_t +extcodec_encreset(MultibyteCodec_State *state, + const void *config, + unsigned char **outbuf, Py_ssize_t outleft) +{ + const struct extcodec_config *cfg; + + cfg = (const struct extcodec_config *)config; + + if (cfg->basecodec->encreset != NULL) + return cfg->basecodec->encreset(state, cfg->baseconfig, + outbuf, outleft); + else + return 0; +} + +static Py_ssize_t +extcodec_decode(MultibyteCodec_State *state, + const void *config, + const unsigned char **inbuf, Py_ssize_t inleft, + Py_UNICODE **outbuf, Py_ssize_t outleft) +{ + const struct extcodec_config *cfg; + Py_ssize_t winsize; + + cfg = (const struct extcodec_config *)config; + + while (inleft > 0) { + for (winsize = 1;;) { + PyObject *lookup, *found; + Py_UNICODE *decoded; + Py_ssize_t i; + + if (inleft < winsize) + return MBERR_TOOFEW; + + lookup = PyBytes_FromStringAndSize( + (const char *)*inbuf, winsize); + if (lookup == NULL) + return MBERR_INTERNAL; + + found = PyDict_GetItem(cfg->decovermap, lookup); + Py_DECREF(lookup); + if (found == Py_None) /* invalid sequence */ + return winsize; + else if (found == NULL) /* delegate */ + break; + else if (PyLong_Check(found)) { /* set window size */ + Py_ssize_t nextwinsize; + + nextwinsize = PyLong_AS_LONG(found); + if (nextwinsize <= winsize || + nextwinsize > cfg->decbufsize) + return MBERR_INTERNAL; + + winsize = nextwinsize; + continue; + } + else if (!PyUnicode_Check(found)) + return MBERR_INTERNAL; + + /* found in overmap */ + if (outleft < PyUnicode_GET_SIZE(found)) + return MBERR_TOOSMALL; + + decoded = PyUnicode_AS_UNICODE(found); + for (i = PyUnicode_GET_SIZE(found); i > 0; i--) + *(*outbuf)++ = *decoded++; + + (*inbuf) += winsize; + inleft -= winsize; + outleft -= PyUnicode_GET_SIZE(found); + winsize = 0; + break; + } + + if (winsize > 0) { /* delegate to process a letter */ + const unsigned char *inpos; + const Py_UNICODE *outpos; + Py_ssize_t r; + + for (inpos = *inbuf, outpos = *outbuf;;) { + r = cfg->basecodec->decode(state, + cfg->baseconfig, inbuf, + winsize > inleft ? inleft : winsize, + outbuf, outleft); + inleft -= (Py_ssize_t)(*inbuf - inpos); + outleft -= (Py_ssize_t)(*outbuf - outpos); + + if (r == 0) + break; + + if (r == MBERR_TOOFEW && winsize < inleft && + winsize <= cfg->decbufsize) + winsize++; + else + return r; + } + } + } + + return 0; +} + +static int +extcodec_decinit(MultibyteCodec_State *state, const void *config) +{ + const struct extcodec_config *cfg; + + cfg = (const struct extcodec_config *)config; + + if (cfg->basecodec->decinit != NULL) + return cfg->basecodec->decinit(state, cfg->baseconfig); + else + return 0; +} + +static Py_ssize_t +extcodec_decreset(MultibyteCodec_State *state, const void *config) +{ + const struct extcodec_config *cfg; + + cfg = (const struct extcodec_config *)config; + + if (cfg->basecodec->decreset != NULL) + return cfg->basecodec->decreset(state, cfg->baseconfig); + else + return 0; +} + +static const MultibyteCodec extcodec_template = { + NULL, /* encoding */ + NULL, /* config */ + extcodec_init, /* codecinit */ + extcodec_encode, /* encode */ + extcodec_encinit, /* encinit */ + extcodec_encreset, /* encreset */ + 0, /* encbufsize */ + extcodec_decode, /* decode */ + extcodec_decinit, /* decinit */ + extcodec_decreset, /* decreset */ + 0, /* decbufsize */ +}; + +static void +extcodec_dealloc(void *codec) +{ + struct extcodec_config *config; + + if (((MultibyteCodec *)codec)->encoding != NULL) + PyMem_Del((void *)((MultibyteCodec *)codec)->encoding); + + config = (struct extcodec_config *)((MultibyteCodec *)codec)->config; + Py_DECREF(config->encovermap); + Py_DECREF(config->decovermap); + Py_DECREF(config->dataref); + PyMem_Del(config); + + PyMem_Del(codec); +} + +static PyObject * +create_extcodec(PyObject *self, PyObject *args) +{ + PyObject *encovermap, *decovermap, *basecodec, *extcodec_cobj; + PyObject *r; + MultibyteCodec *codec, *extcodec; + Py_ssize_t encbufsize, decbufsize, encnamelen; + char *encname; + int flags; + struct extcodec_config *newconfig; + + if (!PyArg_ParseTuple(args, "s#OOOnni:create_extcodec", + &encname, &encnamelen, &basecodec, &encovermap, + &decovermap, &encbufsize, &decbufsize, &flags)) + return NULL; + + if (!MultibyteCodec_Check(basecodec)) { + PyErr_SetString(PyExc_ValueError, "arg 2 must be a " + "MultibyteCodec object."); + return NULL; + } + + if (!PyDict_Check(encovermap) || !PyDict_Check(decovermap)) { + PyErr_SetString(PyExc_ValueError, + "both arg 3 and 4 must be dictionaries."); + return NULL; + } + + codec = ((MultibyteCodecObject *)basecodec)->codec; + if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) + return NULL; + + newconfig = PyMem_New(struct extcodec_config, 1); + if (newconfig == NULL) + return NULL; + + extcodec = PyMem_New(MultibyteCodec, 1); + if (extcodec == NULL) { + PyMem_Del(newconfig); + return NULL; + } + + memcpy(extcodec, &extcodec_template, sizeof(extcodec_template)); + extcodec->config = newconfig; + extcodec->encbufsize = newconfig->encbufsize = encbufsize; + extcodec->decbufsize = newconfig->decbufsize = decbufsize; + + newconfig->encovermap = encovermap; + newconfig->decovermap = decovermap; + newconfig->basecodec = ((MultibyteCodecObject *)basecodec)->codec; + newconfig->dataref = ((MultibyteCodecObject *)basecodec)->dataref; + newconfig->baseconfig = (void *)flags; + Py_INCREF(encovermap); + Py_INCREF(decovermap); + Py_INCREF(newconfig->dataref); + + extcodec_cobj = PyCObject_FromVoidPtr(extcodec, extcodec_dealloc); + if (extcodec_cobj == NULL) { + extcodec_dealloc(extcodec); + return NULL; + } + + extcodec->encoding = PyMem_Malloc(encnamelen + 1); + if (extcodec->encoding == NULL) { + extcodec_dealloc(extcodec); + return NULL; + } + memcpy((void *)extcodec->encoding, encname, encnamelen + 1); + + r = __create_codec(NULL, extcodec_cobj); + Py_DECREF(extcodec_cobj); + + return r; +} + static struct PyMethodDef __methods[] = { {"__create_codec", (PyCFunction)__create_codec, METH_O}, + {"create_extcodec", (PyCFunction)create_extcodec, METH_VARARGS}, {NULL, NULL}, }; Index: Modules/cjkcodecs/_codecs_cn.c =================================================================== --- Modules/cjkcodecs/_codecs_cn.c (revision 64533) +++ Modules/cjkcodecs/_codecs_cn.c (working copy) @@ -435,10 +435,10 @@ END_MAPPINGS_LIST BEGIN_CODECS_LIST - CODEC_STATELESS(gb2312) - CODEC_STATELESS(gbk) - CODEC_STATELESS(gb18030) - CODEC_STATEFUL(hz) + CODEC_STATELESS(gb2312, 1, 2) + CODEC_STATELESS(gbk, 1, 2) + CODEC_STATELESS(gb18030, 2, 4) + CODEC_STATEFUL(hz, 1, 2) END_CODECS_LIST I_AM_A_MODULE_FOR(cn) Index: Modules/cjkcodecs/_codecs_iso2022.c =================================================================== --- Modules/cjkcodecs/_codecs_iso2022.c (revision 64533) +++ Modules/cjkcodecs/_codecs_iso2022.c (working copy) @@ -1115,7 +1115,7 @@ "iso2022_" #variation, \ &iso2022_##variation##_config, \ iso2022_codec_init, \ - _STATEFUL_METHODS(iso2022) \ + _STATEFUL_METHODS(iso2022, 2, 8) \ }, BEGIN_CODECS_LIST Index: Modules/cjkcodecs/multibytecodec.h =================================================================== --- Modules/cjkcodecs/multibytecodec.h (revision 64533) +++ Modules/cjkcodecs/multibytecodec.h (working copy) @@ -54,23 +54,28 @@ const char *encoding; const void *config; mbcodec_init codecinit; + mbencode_func encode; mbencodeinit_func encinit; mbencodereset_func encreset; + Py_ssize_t encbufsize; + mbdecode_func decode; mbdecodeinit_func decinit; mbdecodereset_func decreset; + Py_ssize_t decbufsize; } MultibyteCodec; typedef struct { PyObject_HEAD MultibyteCodec *codec; + PyObject *dataref; } MultibyteCodecObject; #define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type) #define _MultibyteStatefulCodec_HEAD \ - PyObject_HEAD \ + PyObject_VAR_HEAD \ MultibyteCodec *codec; \ MultibyteCodec_State state; \ PyObject *errors; @@ -78,42 +83,26 @@ _MultibyteStatefulCodec_HEAD } MultibyteStatefulCodecContext; -#define MAXENCPENDING 2 -#define _MultibyteStatefulEncoder_HEAD \ - _MultibyteStatefulCodec_HEAD \ - Py_UNICODE pending[MAXENCPENDING]; \ - Py_ssize_t pendingsize; typedef struct { - _MultibyteStatefulEncoder_HEAD -} MultibyteStatefulEncoderContext; - -#define MAXDECPENDING 8 -#define _MultibyteStatefulDecoder_HEAD \ - _MultibyteStatefulCodec_HEAD \ - unsigned char pending[MAXDECPENDING]; \ + _MultibyteStatefulCodec_HEAD + PyObject *stream; /* not used in IncrementalEncoder */ Py_ssize_t pendingsize; -typedef struct { - _MultibyteStatefulDecoder_HEAD -} MultibyteStatefulDecoderContext; + Py_UNICODE pending[1]; +} MultibyteStatefulEncoder; -typedef struct { - _MultibyteStatefulEncoder_HEAD -} MultibyteIncrementalEncoderObject; +typedef MultibyteStatefulEncoder MultibyteIncrementalEncoderObject; +typedef MultibyteStatefulEncoder MultibyteStreamWriterObject; typedef struct { - _MultibyteStatefulDecoder_HEAD -} MultibyteIncrementalDecoderObject; + _MultibyteStatefulCodec_HEAD + PyObject *stream; /* not used in IncrementalDecoder */ + Py_ssize_t pendingsize; + unsigned char pending[1]; +} MultibyteStatefulDecoder; -typedef struct { - _MultibyteStatefulDecoder_HEAD - PyObject *stream; -} MultibyteStreamReaderObject; +typedef MultibyteStatefulDecoder MultibyteIncrementalDecoderObject; +typedef MultibyteStatefulDecoder MultibyteStreamReaderObject; -typedef struct { - _MultibyteStatefulEncoder_HEAD - PyObject *stream; -} MultibyteStreamWriterObject; - /* positive values for illegal sequences */ #define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ #define MBERR_TOOFEW (-2) /* incomplete input buffer */ Index: Modules/cjkcodecs/_codecs_hk.c =================================================================== --- Modules/cjkcodecs/_codecs_hk.c (revision 64533) +++ Modules/cjkcodecs/_codecs_hk.c (working copy) @@ -177,7 +177,7 @@ END_MAPPINGS_LIST BEGIN_CODECS_LIST - CODEC_STATELESS_WINIT(big5hkscs) + CODEC_STATELESS_WINIT(big5hkscs, 2, 2) END_CODECS_LIST I_AM_A_MODULE_FOR(hk) Index: Modules/cjkcodecs/_codecs_jp.c =================================================================== --- Modules/cjkcodecs/_codecs_jp.c (revision 64533) +++ Modules/cjkcodecs/_codecs_jp.c (working copy) @@ -719,13 +719,15 @@ END_MAPPINGS_LIST BEGIN_CODECS_LIST - CODEC_STATELESS(shift_jis) - CODEC_STATELESS(cp932) - CODEC_STATELESS(euc_jp) - CODEC_STATELESS(shift_jis_2004) - CODEC_STATELESS(euc_jis_2004) - { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) }, - { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) }, + CODEC_STATELESS(shift_jis, 1, 2) + CODEC_STATELESS(cp932, 1, 2) + CODEC_STATELESS(euc_jp, 1, 3) + CODEC_STATELESS(shift_jis_2004, 2, 2) + CODEC_STATELESS(euc_jis_2004, 2, 3) + { "euc_jisx0213", (void *)2000, NULL, + _STATELESS_METHODS(euc_jis_2004, 2, 3) }, + { "shift_jisx0213", (void *)2000, NULL, + _STATELESS_METHODS(shift_jis_2004, 2, 2) }, END_CODECS_LIST I_AM_A_MODULE_FOR(jp) Index: Modules/cjkcodecs/_codecs_kr.c =================================================================== --- Modules/cjkcodecs/_codecs_kr.c (revision 64533) +++ Modules/cjkcodecs/_codecs_kr.c (working copy) @@ -31,6 +31,8 @@ 0xbb, 0xbc, 0xbd, 0xbe }; +#define EUC_KR_NO_MAKE_UP (void *)1 + ENCODER(euc_kr) { while (inleft > 0) { @@ -54,6 +56,8 @@ OUT2((code & 0xFF) | 0x80) NEXT(1, 2) } + else if (config == EUC_KR_NO_MAKE_UP) + return 1; else { /* Mapping is found in CP949 extension, * but we encode it in KS X 1001:1998 Annex 3, * make-up sequence for EUC-KR. */ @@ -114,8 +118,8 @@ REQUIRE_INBUF(2) - if (c == EUCKR_JAMO_FIRSTBYTE && - IN2 == EUCKR_JAMO_FILLER) { + if (c == EUCKR_JAMO_FIRSTBYTE && IN2 == EUCKR_JAMO_FILLER && + config != EUC_KR_NO_MAKE_UP) { /* KS X 1001:1998 Annex 3 make-up sequence */ DBCHAR cho, jung, jong; @@ -444,9 +448,9 @@ END_MAPPINGS_LIST BEGIN_CODECS_LIST - CODEC_STATELESS(euc_kr) - CODEC_STATELESS(cp949) - CODEC_STATELESS(johab) + CODEC_STATELESS(euc_kr, 1, 8) + CODEC_STATELESS(cp949, 1, 2) + CODEC_STATELESS(johab, 1, 2) END_CODECS_LIST I_AM_A_MODULE_FOR(kr) Index: Modules/cjkcodecs/cjkcodecs.h =================================================================== --- Modules/cjkcodecs/cjkcodecs.h (revision 64533) +++ Modules/cjkcodecs/cjkcodecs.h (working copy) @@ -211,28 +211,32 @@ (const struct dbcs_map *)_mapping_list; #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { -#define _STATEFUL_METHODS(enc) \ - enc##_encode, \ - enc##_encode_init, \ - enc##_encode_reset, \ - enc##_decode, \ - enc##_decode_init, \ - enc##_decode_reset, -#define _STATELESS_METHODS(enc) \ - enc##_encode, NULL, NULL, \ - enc##_decode, NULL, NULL, -#define CODEC_STATEFUL(enc) { \ - #enc, NULL, NULL, \ - _STATEFUL_METHODS(enc) \ + +#define _STATEFUL_METHODS(enc, encbufsize, decbufsize) \ + enc##_encode, \ + enc##_encode_init, \ + enc##_encode_reset, \ + encbufsize, \ + enc##_decode, \ + enc##_decode_init, \ + enc##_decode_reset, \ + decbufsize +#define _STATELESS_METHODS(enc, encbufsize, decbufsize) \ + enc##_encode, NULL, NULL, encbufsize, \ + enc##_decode, NULL, NULL, decbufsize + +#define CODEC_STATEFUL(enc, encbufsize, decbufsize) { \ + #enc, NULL, NULL, \ + _STATEFUL_METHODS(enc, encbufsize, decbufsize) \ }, -#define CODEC_STATELESS(enc) { \ - #enc, NULL, NULL, \ - _STATELESS_METHODS(enc) \ +#define CODEC_STATELESS(enc, encbufsize, decbufsize) { \ + #enc, NULL, NULL, \ + _STATELESS_METHODS(enc, encbufsize, decbufsize) \ }, -#define CODEC_STATELESS_WINIT(enc) { \ - #enc, NULL, \ - enc##_codec_init, \ - _STATELESS_METHODS(enc) \ +#define CODEC_STATELESS_WINIT(enc, encbufsize, decbufsize) { \ + #enc, NULL, \ + enc##_codec_init, \ + _STATELESS_METHODS(enc, encbufsize, decbufsize) \ }, #define END_CODECS_LIST \ {"", NULL,} }; \ @@ -408,5 +412,4 @@ (void)register_maps(m); \ return m; \ } - #endif