diff -r cc60d0283fad Lib/email/charset.py --- a/Lib/email/charset.py Fri May 20 16:55:06 2011 +0200 +++ b/Lib/email/charset.py Sun May 22 00:19:30 2011 +0200 @@ -10,6 +10,7 @@ ] from functools import partial +from codecs import lookup import email.base64mime import email.quoprimime @@ -63,36 +64,6 @@ 'utf-8': (SHORTEST, BASE64, 'utf-8'), } -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', - 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - - # Map charsets to their Unicode codec strings. CODEC_MAP = { 'gb2312': 'eucgb2312_cn', @@ -103,6 +74,8 @@ 'us-ascii': None, } +# Aliases defined by the user +ALIASES = dict() # Convenience functions for extending the above mappings @@ -220,9 +193,12 @@ input_charset = str(input_charset, 'ascii') except UnicodeError: raise errors.CharsetError(input_charset) - input_charset = input_charset.lower() - # Set the input charset after filtering through the aliases - self.input_charset = ALIASES.get(input_charset, input_charset) + # Set the input charset after filtering through its aliases defined in + # codecs library + try: + self.input_charset = lookup(input_charset).name + except LookupError: + self.input_charset = ALIASES.get(input_charset, input_charset) # We can try to guess which encoding and conversion to use by the # charset_map dictionary. Try that first, but let the user override # it. diff -r cc60d0283fad Lib/encodings/aliases.py --- a/Lib/encodings/aliases.py Fri May 20 16:55:06 2011 +0200 +++ b/Lib/encodings/aliases.py Sun May 22 00:19:30 2011 +0200 @@ -298,6 +298,7 @@ 'iso_ir_157' : 'iso8859_10', 'l6' : 'iso8859_10', 'latin6' : 'iso8859_10', + 'latin_6' : 'iso8859_10', # iso8859_11 codec 'thai' : 'iso8859_11', @@ -308,6 +309,7 @@ 'iso_8859_13' : 'iso8859_13', 'l7' : 'iso8859_13', 'latin7' : 'iso8859_13', + 'latin_7' : 'iso8859_13', # iso8859_14 codec 'iso_8859_14' : 'iso8859_14', @@ -316,11 +318,13 @@ 'iso_ir_199' : 'iso8859_14', 'l8' : 'iso8859_14', 'latin8' : 'iso8859_14', + 'latin_8' : 'iso8859_14', # iso8859_15 codec 'iso_8859_15' : 'iso8859_15', 'l9' : 'iso8859_15', 'latin9' : 'iso8859_15', + 'latin_9' : 'iso8859_15', # iso8859_16 codec 'iso_8859_16' : 'iso8859_16', @@ -328,6 +332,7 @@ 'iso_ir_226' : 'iso8859_16', 'l10' : 'iso8859_16', 'latin10' : 'iso8859_16', + 'latin_10' : 'iso8859_16', # iso8859_2 codec 'csisolatin2' : 'iso8859_2', @@ -336,6 +341,7 @@ 'iso_ir_101' : 'iso8859_2', 'l2' : 'iso8859_2', 'latin2' : 'iso8859_2', + 'latin_2' : 'iso8859_2', # iso8859_3 codec 'csisolatin3' : 'iso8859_3', @@ -344,6 +350,7 @@ 'iso_ir_109' : 'iso8859_3', 'l3' : 'iso8859_3', 'latin3' : 'iso8859_3', + 'latin_3' : 'iso8859_3', # iso8859_4 codec 'csisolatin4' : 'iso8859_4', @@ -352,6 +359,7 @@ 'iso_ir_110' : 'iso8859_4', 'l4' : 'iso8859_4', 'latin4' : 'iso8859_4', + 'latin_4' : 'iso8859_4', # iso8859_5 codec 'csisolatincyrillic' : 'iso8859_5', @@ -393,6 +401,7 @@ 'iso_ir_148' : 'iso8859_9', 'l5' : 'iso8859_9', 'latin5' : 'iso8859_9', + 'latin_5' : 'iso8859_9', # johab codec 'cp1361' : 'johab',