Index: email/test/test_email.py =================================================================== --- email/test/test_email.py (revision 42261) +++ email/test/test_email.py (working copy) @@ -2073,7 +2073,8 @@ charset = Charset(charsets[0]) eq(charset.get_body_encoding(), 'base64') msg.set_payload('hello world', charset=charset) - eq(msg.get_payload(), 'hello world') + eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') + eq(msg.get_payload(decode=True), 'hello world') eq(msg['content-transfer-encoding'], 'base64') # Try another one msg = Message() Index: email/test/test_email_codecs.py =================================================================== --- email/test/test_email_codecs.py (revision 42261) +++ email/test/test_email_codecs.py (working copy) @@ -1,17 +1,16 @@ -# Copyright (C) 2002 Python Software Foundation +# Copyright (C) 2002-2006 Python Software Foundation # email package unit tests for (optional) Asian codecs import unittest from test.test_support import TestSkipped, run_unittest from email.test.test_email import TestEmailBase -from email.Charset import Charset +from email.Charset import Charset, _find_asian_codec from email.Header import Header, decode_header +from email.Message import Message # See if we have the Japanese codecs package installed -try: - unicode('foo', 'japanese.iso-2022-jp') -except LookupError: +if not _find_asian_codec('iso-2022-jp', 'japanese'): raise TestSkipped, 'Optional Japanese codecs not installed' @@ -49,7 +48,15 @@ # TK: full decode comparison eq(h.__unicode__().encode('euc-jp'), long) + def test_payload_encoding(self): + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + jcode = 'euc-jp' + msg = Message() + msg.set_payload(jhello, jcode) + ustr = unicode(msg.get_payload(), msg.get_content_charset()) + self.assertEqual(jhello, ustr.encode(jcode)) + def suite(): suite = unittest.TestSuite() Index: email/Charset.py =================================================================== --- email/Charset.py (revision 42261) +++ email/Charset.py (working copy) @@ -1,5 +1,5 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw) from types import UnicodeType from email.Encoders import encode_7or8bit @@ -99,20 +99,13 @@ # of stability and useability. CODEC_MAP = { - 'euc-jp': 'japanese.euc-jp', - 'iso-2022-jp': 'japanese.iso-2022-jp', - 'shift_jis': 'japanese.shift_jis', - 'euc-kr': 'korean.euc-kr', - 'ks_c_5601-1987': 'korean.cp949', - 'iso-2022-kr': 'korean.iso-2022-kr', - 'johab': 'korean.johab', - 'gb2132': 'eucgb2312_cn', - 'big5': 'big5_tw', - 'utf-8': 'utf-8', + 'gb2132': 'eucgb2312_cn', + 'big5': 'big5_tw', + 'utf-8': 'utf-8', # Hack: We don't want *any* conversion for stuff marked us-ascii, as all # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. # Let that stuff pass through without conversion to/from Unicode. - 'us-ascii': None, + 'us-ascii': None, } @@ -165,6 +158,26 @@ CODEC_MAP[charset] = codecname +def _find_asian_codec(charset, language): + try: + unicode('foo', charset) + return charset + except LookupError: + try: + codec = language + '.' + charset + unicode('foo', codec) + return codec + except LookupError: + return None + + +for _charset in ('euc-jp', 'iso-2022-jp', 'shift_jis'): + add_codec(_charset, _find_asian_codec(_charset, 'japanese') or _charset) + +for _charset in ('euc-kr', 'cp949', 'iso-2022-kr', 'johab'): + add_codec(_charset, _find_asian_codec(_charset, 'korean') or _charset) + + class Charset: """Map character sets to their email properties. @@ -229,7 +242,7 @@ self.input_codec = CODEC_MAP.get(self.input_charset, self.input_charset) self.output_codec = CODEC_MAP.get(self.output_charset, - self.input_codec) + self.input_codec) def __str__(self): return self.input_charset.lower() Index: email/Message.py =================================================================== --- email/Message.py (revision 42261) +++ email/Message.py (working copy) @@ -272,11 +272,14 @@ charset=charset.get_output_charset()) else: self.set_param('charset', charset.get_output_charset()) + if str(charset) <> charset.get_output_charset(): + self._payload = charset.body_encode(self._payload) if not self.has_key('Content-Transfer-Encoding'): cte = charset.get_body_encoding() if callable(cte): cte(self) else: + self._payload = charset.body_encode(self._payload) self.add_header('Content-Transfer-Encoding', cte) def get_charset(self): Index: email/Generator.py =================================================================== --- email/Generator.py (revision 42261) +++ email/Generator.py (working copy) @@ -1,8 +1,7 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: barry@python.org (Barry Warsaw) -"""Classes to generate plain text from a message object tree. -""" +"""Classes to generate plain text from a message object tree.""" import re import sys @@ -192,9 +191,6 @@ payload = msg.get_payload() if payload is None: return - cset = msg.get_charset() - if cset is not None: - payload = cset.body_encode(payload) if not _isstring(payload): raise TypeError, 'string payload expected: %s' % type(payload) if self._mangle_from_: