#!/usr/bin/env python
# -*- coding: utf_8 -*-

import sys
import codecs
import encodings
import types

exitvalue = 0

class enc_t:
    def __init__(self, encodingDescription, bijectiveType=None):
        self.encodingDescription = encodingDescription
        self.bijectiveType = bijectiveType
        self.aliaslist = []

encoding_names = {
    'ascii': enc_t(u'English'),
    'big5': enc_t(u'Traditional Chinese'),
    'big5hkscs': enc_t(u'Traditional Chinese'),
    'cp037': enc_t(u'English'),
    'cp424': enc_t(u'Hebrew'),
    'cp437': enc_t(u'English'),
    'cp500': enc_t(u'Western Europe'),
    'cp737': enc_t(u'Greek'),
    'cp775': enc_t(u'Baltic languages'),
    'cp850': enc_t(u'Western Europe'),
    'cp852': enc_t(u'Central and Eastern Europe'),
    'cp855': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'),
    'cp856': enc_t(u'Hebrew'),
    'cp857': enc_t(u'Turkish'),
    'cp860': enc_t(u'Portuguese'),
    'cp861': enc_t(u'Icelandic'),
    'cp862': enc_t(u'Hebrew'),
    'cp863': enc_t(u'Canadian'),
    'cp864': enc_t(u'Arabic'),
    'cp865': enc_t(u'Danish, Norwegian'),
    'cp866': enc_t(u'Russian'),
    'cp869': enc_t(u'Greek'),
    'cp874': enc_t(u'Thai'),
    'cp875': enc_t(u'Greek'),
    'cp932': enc_t(u'Japanese'),
    'cp949': enc_t(u'Korean'),
    'cp950': enc_t(u'Traditional Chinese'),
    'cp1006': enc_t(u'Urdu'),
    'cp1026': enc_t(u'Turkish'),
    'cp1140': enc_t(u'Western Europe'),
    'cp1250': enc_t(u'Central and Eastern Europe'),
    'cp1251': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'),
    'cp1252': enc_t(u'Western Europe'),
    'cp1253': enc_t(u'Greek'),
    'cp1254': enc_t(u'Turkish'),
    'cp1255': enc_t(u'Hebrew'),
    'cp1256': enc_t(u'Arabic'),
    'cp1257': enc_t(u'Baltic languages'),
    'cp1258': enc_t(u'Vietnamese'),
    'euc_jp': enc_t(u'Japanese'),
    'euc_jis_2004': enc_t(u'Japanese'),
    'euc_jisx0213': enc_t(u'Japanese'),
    'euc_kr': enc_t(u'Korean'),
    'gb2312': enc_t(u'Simplified Chinese'),
    'gbk': enc_t(u'Unified Chinese'),
    'gb18030': enc_t(u'Unified Chinese'),
    'hp_roman8': enc_t(u'<undocumented>'),
    'hz': enc_t(u'Simplified Chinese'),
    'iso2022_jp': enc_t(u'Japanese'),
    'iso2022_jp_1': enc_t(u'Japanese'),
    'iso2022_jp_2': enc_t(u'Japanese, Korean, Simplified Chinese, Western Europe, Greek'),
    'iso2022_jp_2004': enc_t(u'Japanese'),
    'iso2022_jp_3': enc_t(u'Japanese'),
    'iso2022_jp_ext': enc_t(u'Japanese'),
    'iso2022_kr': enc_t(u'Korean'),
    'latin_1': enc_t(u'Western Europe'),
    'iso8859_2': enc_t(u'Central and Eastern Europe'),
    'iso8859_3': enc_t(u'Esperanto, Maltese'),
    'iso8859_4': enc_t(u'Baltic languagues'),
    'iso8859_5': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'),
    'iso8859_6': enc_t(u'Arabic'),
    'iso8859_7': enc_t(u'Greek'),
    'iso8859_8': enc_t(u'Hebrew'),
    'iso8859_9': enc_t(u'Turkish'),
    'iso8859_10': enc_t(u'Nordic languages'),
    'iso8859_11': enc_t(u'Thai'),
    'iso8859_13': enc_t(u'Baltic languages'),
    'iso8859_14': enc_t(u'Celtic languages'),
    'iso8859_15': enc_t(u'Western Europe'),
    'iso8859_16': enc_t(u'Romanian'),
    'johab': enc_t(u'Korean'),
    'koi8_r': enc_t(u'Russian'),
    'koi8_u': enc_t(u'Ukrainian'),
    'mac_cyrillic': enc_t(u'Bulgarian, Byelorussian, Macedonian, Russian, Serbian'),
    'mac_greek': enc_t(u'Greek'),
    'mac_iceland': enc_t(u'Icelandic'),
    'mac_latin2': enc_t(u'Central and Eastern Europe'),
    'mac_roman': enc_t(u'Western Europe'),
    'mac_turkish': enc_t(u'Turkish'),
    'ptcp154': enc_t(u'Kazakh'),
    'shift_jis': enc_t(u'Japanese'),
    'shift_jis_2004': enc_t(u'Japanese'),
    'shift_jisx0213': enc_t(u'Japanese'),
    'tactis': enc_t(u'Thai'),
    'tis_620': enc_t(u'Thai'),
    'utf_16': enc_t(u'all languages'),
    'utf_16_be': enc_t(u'all languages (BMP only)'),
    'utf_16_le': enc_t(u'all languages (BMP only)'),
    'utf_7': enc_t(u'all languages'),
    'utf_8': enc_t(u'all languages'),

    'base64_codec': enc_t(u'Convert operand to MIME base64', types.StringType),
    'bz2_codec': enc_t(u'Compress the operand using bz2', types.StringType),
    'hex_codec': enc_t(u'Convert operand to hexadecimal representation, with two digits per byte', types.StringType),
    'idna': enc_t(u'Implements RFC 3490. New in version 2.3. See also encodings.idna', types.UnicodeType),
    'mbcs': enc_t(u'Windows only: Encode operand according to the ANSI codepage (CP_ACP)', types.UnicodeType),
    'palmos': enc_t(u'Encoding of PalmOS 3.5', types.UnicodeType),
    'punycode': enc_t(u'Implements RFC 3492. New in version 2.3.', types.UnicodeType),
    'quopri_codec': enc_t(u'Convert operand to MIME quoted printable', types.StringType),
    'raw_unicode_escape': enc_t(u'Produce a string that is suitable as raw Unicode literal in Python source code', types.UnicodeType),
    'rot_13': enc_t(u'Returns the Caesar-cypher encryption of the operand', types.StringType),
    'string_escape': enc_t(u'Produce a string that is suitable as string literal in Python source code', types.StringType),
    'undefined': enc_t(u'Raise an exception for all conversion. Can be used as the system encoding if no automatic coercion between byte and Unicode strings is desired.', types.NoneType),
    'unicode_escape': enc_t(u'Produce a string that is suitable as Unicode literal in Python source code', types.UnicodeType),
    'unicode_internal': enc_t(u'Return the internal representation of the operand', types.UnicodeType),
    'uu_codec': enc_t(u'Convert the operand using uuencode', types.StringType),
    'zlib_codec': enc_t(u'Compress the operand using gzip', types.StringType),
    }


enchash = dict(map(lambda enc: (enc,[]), set(encodings.aliases.aliases.values())))
for encalias in encodings.aliases.aliases.keys():
    enchash[encodings.aliases.aliases[encalias]].append(encalias)

elist = enchash.keys()

for enc in elist:
    aliaslist = enchash[enc]

    if enc in encoding_names:
        aliaslist.sort()
        encoding_names[enc].aliaslist = aliaslist
    else:
        encoding_names[enc] = enc_t(u'Unknown encoding %s in runtime!' % (enc))
        print >>sys.stderr, encoding_names[enc].encodingDescription
        exitvalue = 1

#    try:
#        assert len(codecs.lookup(enc)) == 4
#    except:
#        print u'%s NOT SUPPORTED' % (enc)
#        continue

cfh = open(sys.argv[1], 'w')
ofh = open(sys.argv[2], 'w')

olist = encoding_names.keys()
olist.sort()
for k in olist:
    v = encoding_names[k]

    if v.bijectiveType != None: fh = ofh
    else: fh = cfh

    print >>fh, '<P><tr><td class="left"   valign="baseline">%s</td>' % (k)
    print >>fh, '\t<td class="left">%s</td>' % (", ".join(v.aliaslist))
    #if v.bijectiveType != None: print >>fh, '\t<td class="left">&lt;%s&gt;</td>' % (str(v.bijectiveType)[1:-1])
    if v.bijectiveType != None: print >>fh, '\t<td class="left">%s</td>' % (v.bijectiveType.__name__)
    print >>fh, '\t<td class="left"  >%s</td>' % (v.encodingDescription)
    print >>fh, '</tr></P>'

sys.exit(exitvalue)