Author liturgist
Recipients
Date 2005-08-01.18:23:30
SpamBayes Score
Marked as misclassified
Message-id
In-reply-to
Content
2.4.1 documentation has a list of standard encodings in
4.9.2.  However, this list does not seem to match what
is returned by the runtime.  Below is code to dump out
the encodings and aliases.  Please tell me if anything
is incorrect.

In some cases, there are many more valid aliases than
listed in the documentation.  See 'cp037' as an example.

I see that the identifiers are intended to be case
insensitive.  I would prefer to see the documentation
provide the identifiers as they will appear in
encodings.aliases.aliases.  The only alias containing
any upper case letters appears to be 'hp_roman8'.

$ cat encodingaliases.py
#!/usr/bin/env python
import sys
import encodings

def main():
    enchash = {}

    for enc in encodings.aliases.aliases.values():
        enchash[enc] = []
    for encalias in encodings.aliases.aliases.keys():
       
enchash[encodings.aliases.aliases[encalias]].append(encalias)

    elist = enchash.keys()
    elist.sort()
    for enc in elist:
        print enc, enchash[enc]

if __name__ == '__main__':
    main()
    sys.exit(0)
13:12 pwatson [
ruth.knightsbridge.com:/home/pwatson/src/python ] 366
$ ./encodingaliases.py
ascii ['iso_ir_6', 'ansi_x3_4_1968', 'ibm367',
'iso646_us', 'us', 'cp367', '646', 'us_ascii',
'csascii', 'ansi_x3.4_1986', 'iso_646.irv_1991',
'ansi_x3.4_1968']
base64_codec ['base_64', 'base64']
big5 ['csbig5', 'big5_tw']
big5hkscs ['hkscs', 'big5_hkscs']
bz2_codec ['bz2']
cp037 ['ebcdic_cp_wt', 'ebcdic_cp_us', 'ebcdic_cp_nl',
'037', 'ibm039', 'ibm037', 'csibm037', 'ebcdic_cp_ca']
cp1026 ['csibm1026', 'ibm1026', '1026']
cp1140 ['1140', 'ibm1140']
cp1250 ['1250', 'windows_1250']
cp1251 ['1251', 'windows_1251']
cp1252 ['windows_1252', '1252']
cp1253 ['1253', 'windows_1253']
cp1254 ['1254', 'windows_1254']
cp1255 ['1255', 'windows_1255']
cp1256 ['1256', 'windows_1256']
cp1257 ['1257', 'windows_1257']
cp1258 ['1258', 'windows_1258']
cp424 ['ebcdic_cp_he', 'ibm424', '424', 'csibm424']
cp437 ['ibm437', '437', 'cspc8codepage437']
cp500 ['csibm500', 'ibm500', '500', 'ebcdic_cp_ch',
'ebcdic_cp_be']
cp775 ['cspc775baltic', '775', 'ibm775']
cp850 ['ibm850', 'cspc850multilingual', '850']
cp852 ['ibm852', '852', 'cspcp852']
cp855 ['csibm855', 'ibm855', '855']
cp857 ['csibm857', 'ibm857', '857']
cp860 ['csibm860', 'ibm860', '860']
cp861 ['csibm861', 'cp_is', 'ibm861', '861']
cp862 ['cspc862latinhebrew', 'ibm862', '862']
cp863 ['csibm863', 'ibm863', '863']
cp864 ['csibm864', 'ibm864', '864']
cp865 ['csibm865', 'ibm865', '865']
cp866 ['csibm866', 'ibm866', '866']
cp869 ['csibm869', 'ibm869', '869', 'cp_gr']
cp932 ['mskanji', '932', 'ms932', 'ms_kanji']
cp949 ['uhc', 'ms949', '949']
cp950 ['ms950', '950']
euc_jis_2004 ['eucjis2004', 'jisx0213', 'euc_jis2004']
euc_jisx0213 ['eucjisx0213']
euc_jp ['eucjp', 'ujis', 'u_jis']
euc_kr ['ksc5601', 'korean', 'euckr', 'ksx1001',
'ks_c_5601', 'ks_c_5601_1987', 'ks_x_1001']
gb18030 ['gb18030_2000']
gb2312 ['chinese', 'euc_cn', 'csiso58gb231280',
'iso_ir_58', 'euccn', 'eucgb2312_cn', 'gb2312_1980',
'gb2312_80']
gbk ['cp936', 'ms936', '936']
hex_codec ['hex']
hp_roman8 ['csHPRoman8', 'r8', 'roman8']
hz ['hzgb', 'hz_gb_2312', 'hz_gb']
iso2022_jp ['iso2022jp', 'iso_2022_jp', 'csiso2022jp']
iso2022_jp_1 ['iso_2022_jp_1', 'iso2022jp_1']
iso2022_jp_2 ['iso_2022_jp_2', 'iso2022jp_2']
iso2022_jp_2004 ['iso_2022_jp_2004', 'iso2022jp_2004']
iso2022_jp_3 ['iso_2022_jp_3', 'iso2022jp_3']
iso2022_jp_ext ['iso2022jp_ext', 'iso_2022_jp_ext']
iso2022_kr ['iso_2022_kr', 'iso2022kr', 'csiso2022kr']
iso8859_10 ['csisolatin6', 'l6', 'iso_8859_10_1992',
'iso_ir_157', 'iso_8859_10', 'latin6']
iso8859_11 ['iso_8859_11', 'thai', 'iso_8859_11_2001']
iso8859_13 ['iso_8859_13']
iso8859_14 ['iso_celtic', 'iso_ir_199', 'l8',
'iso_8859_14_1998', 'iso_8859_14', 'latin8']
iso8859_15 ['iso_8859_15']
iso8859_16 ['iso_8859_16_2001', 'l10', 'iso_ir_226',
'latin10', 'iso_8859_16']
iso8859_2 ['l2', 'csisolatin2', 'iso_ir_101',
'iso_8859_2', 'iso_8859_2_1987', 'latin2']
iso8859_3 ['iso_8859_3_1988', 'l3', 'iso_ir_109',
'csisolatin3', 'iso_8859_3', 'latin3']
iso8859_4 ['csisolatin4', 'l4', 'iso_ir_110',
'iso_8859_4', 'iso_8859_4_1988', 'latin4']
iso8859_5 ['iso_8859_5_1988', 'iso_8859_5', 'cyrillic',
'csisolatincyrillic', 'iso_ir_144']
iso8859_6 ['iso_8859_6_1987', 'iso_ir_127',
'csisolatinarabic', 'asmo_708', 'iso_8859_6',
'ecma_114', 'arabic']
iso8859_7 ['ecma_118', 'greek8', 'iso_8859_7',
'iso_ir_126', 'elot_928', 'iso_8859_7_1987',
'csisolatingreek', 'greek']
iso8859_8 ['iso_8859_8_1988', 'iso_ir_138',
'iso_8859_8', 'csisolatinhebrew', 'hebrew']
iso8859_9 ['l5', 'iso_8859_9_1989', 'iso_8859_9',
'csisolatin5', 'latin5', 'iso_ir_148']
johab ['cp1361', 'ms1361']
koi8_r ['cskoi8r']
latin_1 ['iso8859', 'csisolatin1', 'latin', 'l1',
'iso_ir_100', 'ibm819', 'cp819', 'iso_8859_1',
'latin1', 'iso_8859_1_1987', '8859']
mac_cyrillic ['maccyrillic']
mac_greek ['macgreek']
mac_iceland ['maciceland']
mac_latin2 ['maccentraleurope', 'maclatin2']
mac_roman ['macroman']
mac_turkish ['macturkish']
mbcs ['dbcs']
ptcp154 ['cp154', 'cyrillic-asian', 'csptcp154', 'pt154']
quopri_codec ['quopri', 'quoted_printable',
'quotedprintable']
rot_13 ['rot13']
shift_jis ['s_jis', 'sjis', 'shiftjis', 'csshiftjis']
shift_jis_2004 ['shiftjis2004', 's_jis_2004', 'sjis_2004']
shift_jisx0213 ['shiftjisx0213', 'sjisx0213', 's_jisx0213']
tactis ['tis260']
tis_620 ['tis620', 'tis_620_2529_1', 'tis_620_2529_0',
'iso_ir_166', 'tis_620_0']
utf_16 ['utf16', 'u16']
utf_16_be ['utf_16be', 'unicodebigunmarked']
utf_16_le ['utf_16le', 'unicodelittleunmarked']
utf_7 ['u7', 'utf7']
utf_8 ['u8', 'utf', 'utf8_ucs4', 'utf8_ucs2', 'utf8']
uu_codec ['uu']
zlib_codec ['zlib', 'zip']
History
Date User Action Args
2007-08-23 14:33:29adminlinkissue1249749 messages
2007-08-23 14:33:29admincreate