#!/usr/bin/env python # -*- coding: utf_8 -*- import sys import codecs import encodings import types exitvalue = 0 class enc_t: def __init__(self, encodingDescription, bijectiveType=None): self.encodingDescription = encodingDescription self.bijectiveType = bijectiveType self.aliaslist = [] encoding_names = { 'ascii': enc_t(u'English'), 'big5': enc_t(u'Traditional Chinese'), 'big5hkscs': enc_t(u'Traditional Chinese'), 'cp037': enc_t(u'English'), 'cp424': enc_t(u'Hebrew'), 'cp437': enc_t(u'English'), 'cp500': enc_t(u'Western Europe'), 'cp737': enc_t(u'Greek'), 'cp775': enc_t(u'Baltic languages'), 'cp850': enc_t(u'Western Europe'), 'cp852': enc_t(u'Central and Eastern Europe'), 'cp855': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'), 'cp856': enc_t(u'Hebrew'), 'cp857': enc_t(u'Turkish'), 'cp860': enc_t(u'Portuguese'), 'cp861': enc_t(u'Icelandic'), 'cp862': enc_t(u'Hebrew'), 'cp863': enc_t(u'Canadian'), 'cp864': enc_t(u'Arabic'), 'cp865': enc_t(u'Danish, Norwegian'), 'cp866': enc_t(u'Russian'), 'cp869': enc_t(u'Greek'), 'cp874': enc_t(u'Thai'), 'cp875': enc_t(u'Greek'), 'cp932': enc_t(u'Japanese'), 'cp949': enc_t(u'Korean'), 'cp950': enc_t(u'Traditional Chinese'), 'cp1006': enc_t(u'Urdu'), 'cp1026': enc_t(u'Turkish'), 'cp1140': enc_t(u'Western Europe'), 'cp1250': enc_t(u'Central and Eastern Europe'), 'cp1251': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'), 'cp1252': enc_t(u'Western Europe'), 'cp1253': enc_t(u'Greek'), 'cp1254': enc_t(u'Turkish'), 'cp1255': enc_t(u'Hebrew'), 'cp1256': enc_t(u'Arabic'), 'cp1257': enc_t(u'Baltic languages'), 'cp1258': enc_t(u'Vietnamese'), 'euc_jp': enc_t(u'Japanese'), 'euc_jis_2004': enc_t(u'Japanese'), 'euc_jisx0213': enc_t(u'Japanese'), 'euc_kr': enc_t(u'Korean'), 'gb2312': enc_t(u'Simplified Chinese'), 'gbk': enc_t(u'Unified Chinese'), 'gb18030': enc_t(u'Unified Chinese'), 'hp_roman8': enc_t(u''), 'hz': enc_t(u'Simplified Chinese'), 'iso2022_jp': enc_t(u'Japanese'), 'iso2022_jp_1': enc_t(u'Japanese'), 'iso2022_jp_2': enc_t(u'Japanese, Korean, Simplified Chinese, Western Europe, Greek'), 'iso2022_jp_2004': enc_t(u'Japanese'), 'iso2022_jp_3': enc_t(u'Japanese'), 'iso2022_jp_ext': enc_t(u'Japanese'), 'iso2022_kr': enc_t(u'Korean'), 'latin_1': enc_t(u'Western Europe'), 'iso8859_2': enc_t(u'Central and Eastern Europe'), 'iso8859_3': enc_t(u'Esperanto, Maltese'), 'iso8859_4': enc_t(u'Baltic languagues'), 'iso8859_5': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'), 'iso8859_6': enc_t(u'Arabic'), 'iso8859_7': enc_t(u'Greek'), 'iso8859_8': enc_t(u'Hebrew'), 'iso8859_9': enc_t(u'Turkish'), 'iso8859_10': enc_t(u'Nordic languages'), 'iso8859_11': enc_t(u'Thai'), 'iso8859_13': enc_t(u'Baltic languages'), 'iso8859_14': enc_t(u'Celtic languages'), 'iso8859_15': enc_t(u'Western Europe'), 'iso8859_16': enc_t(u'Romanian'), 'johab': enc_t(u'Korean'), 'koi8_r': enc_t(u'Russian'), 'koi8_u': enc_t(u'Ukrainian'), 'mac_cyrillic': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'), 'mac_greek': enc_t(u'Greek'), 'mac_iceland': enc_t(u'Icelandic'), 'mac_latin2': enc_t(u'Central and Eastern Europe'), 'mac_roman': enc_t(u'Western Europe'), 'mac_turkish': enc_t(u'Turkish'), 'ptcp154': enc_t(u'Kazakh'), 'shift_jis': enc_t(u'Japanese'), 'shift_jis_2004': enc_t(u'Japanese'), 'shift_jisx0213': enc_t(u'Japanese'), 'tactis': enc_t(u'Thai'), 'tis_620': enc_t(u'Thai'), 'utf_16': enc_t(u'all languages'), 'utf_16_be': enc_t(u'all languages (BMP only)'), 'utf_16_le': enc_t(u'all languages (BMP only)'), 'utf_7': enc_t(u'all languages'), 'utf_8': enc_t(u'all languages'), 'base64_codec': enc_t(u'Convert operand to MIME base64', types.StringType), 'bz2_codec': enc_t(u'Compress the operand using bz2', types.StringType), 'hex_codec': enc_t(u'Convert operand to hexadecimal representation, with two digits per byte', types.StringType), 'idna': enc_t(u'Implements RFC 3490. New in version 2.3. See also encodings.idna', types.UnicodeType), 'mbcs': enc_t(u'Windows only: Encode operand according to the ANSI codepage (CP_ACP)', types.UnicodeType), 'palmos': enc_t(u'Encoding of PalmOS 3.5', types.UnicodeType), 'punycode': enc_t(u'Implements RFC 3492. New in version 2.3.', types.UnicodeType), 'quopri_codec': enc_t(u'Convert operand to MIME quoted printable', types.StringType), 'raw_unicode_escape': enc_t(u'Produce a string that is suitable as raw Unicode literal in Python source code', types.UnicodeType), 'rot_13': enc_t(u'Returns the Caesar-cypher encryption of the operand', types.StringType), 'string_escape': enc_t(u'Produce a string that is suitable as string literal in Python source code', types.StringType), 'undefined': enc_t(u'Raise an exception for all conversion. Can be used as the system encoding if no automatic coercion between byte and Unicode strings is desired.', types.NoneType), 'unicode_escape': enc_t(u'Produce a string that is suitable as Unicode literal in Python source code', types.UnicodeType), 'unicode_internal': enc_t(u'Return the internal representation of the operand', types.UnicodeType), 'uu_codec': enc_t(u'Convert the operand using uuencode', types.StringType), 'zlib_codec': enc_t(u'Compress the operand using gzip', types.StringType), } enchash = dict(map(lambda enc: (enc,[]), set(encodings.aliases.aliases.values()))) for encalias in encodings.aliases.aliases.keys(): enchash[encodings.aliases.aliases[encalias]].append(encalias) elist = enchash.keys() for enc in elist: aliaslist = enchash[enc] if enc in encoding_names: aliaslist.sort() encoding_names[enc].aliaslist = aliaslist else: encoding_names[enc] = enc_t(u'Unknown encoding %s in runtime!' % (enc)) print >>sys.stderr, encoding_names[enc].encodingDescription exitvalue = 1 # try: # assert len(codecs.lookup(enc)) == 4 # except: # print u'%s NOT SUPPORTED' % (enc) # continue cfh = open(sys.argv[1], 'w') ofh = open(sys.argv[2], 'w') olist = encoding_names.keys() olist.sort() for k in olist: v = encoding_names[k] if v.bijectiveType != None: fh = ofh else: fh = cfh print >>fh, '

%s' % (k) print >>fh, '\t%s' % (", ".join(v.aliaslist)) #if v.bijectiveType != None: print >>fh, '\t<%s>' % (str(v.bijectiveType)[1:-1]) if v.bijectiveType != None: print >>fh, '\t%s' % (v.bijectiveType.__name__) print >>fh, '\t%s' % (v.encodingDescription) print >>fh, '

' sys.exit(exitvalue)