diff -r 7b81a535ad14 -r 210967b20137 Lib/email/message.py --- a/Lib/email/message.py Wed Jul 17 13:41:39 2013 +0200 +++ b/Lib/email/message.py Sun Jul 21 22:55:02 2013 +0800 @@ -227,47 +227,40 @@ payload = self._payload # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() - # payload may be bytes here. if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') + if cte == 'base64': + bpayload = base64.b64encode(bpayload) if not decode: try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + payload = bpayload.decode(self.get_param('charset', + 'ascii'), + 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII codepoints in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') + bytes_charset = self.get_param('charset', + failobj='latin-1', + header='content-type') + try: + bpayload = payload.encode(bytes_charset) + except UnicodeError: + # This won't happen for RFC compliant messages + # (messages containing only ASCII codepoints in the + # unicode input). If it does happen, turn the + # string into bytes in a way guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') + if cte == 'base64': + bpayload = base64.b64encode(bpayload) + elif isinstance(payload, bytes): + bpayload = payload if not decode: return payload - if cte == 'quoted-printable': - return utils._qdecode(bpayload) - elif cte == 'base64': - # XXX: this is a bit of a hack; decode_b should probably be factored - # out somewhere, but I haven't figured out where yet. - value, defects = decode_b(b''.join(bpayload.splitlines())) - for defect in defects: - self.policy.handle_defect(self, defect) - return value - elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - in_file = BytesIO(bpayload) - out_file = BytesIO() - try: - uu.decode(in_file, out_file, quiet=True) - return out_file.getvalue() - except uu.Error: - # Some decoding problem - return bpayload - if isinstance(payload, str): - return bpayload - return payload + return self._decode_transfer_encoded_bytes(bpayload, cte) def set_payload(self, payload, charset=None): """Set the payload to the given value. @@ -275,10 +268,50 @@ Optional charset sets the message's default character set. See set_charset() for details. """ - self._payload = payload + cte = str(self.get('content-transfer-encoding', '')).lower() + if isinstance(payload, str): + # If the payload contains non-ascii character for base64, it has + # to be converted to bytes. + if utils._has_surrogates(payload) and cte == 'base64': + payload = payload.encode('ascii', 'surrogateescape') + else: + self._payload = payload + + if isinstance(payload, bytes) and cte: + payload = self._decode_transfer_encoded_bytes(payload, cte) + bytes_charset = self.get_param('charset', + failobj='latin-1', + header='content-type') + self._payload = payload.decode(bytes_charset, + 'surrogateescape') + else: + self._payload = payload + if charset is not None: self.set_charset(charset) + def _decode_transfer_encoded_bytes(self, payload, cte): + if cte == 'quoted-printable': + return utils._qdecode(payload) + elif cte == 'base64': + # XXX: this is a bit of a hack; decode_b should probably be + # factored out somewhere, but I haven't figured out where yet. + value, defects = decode_b(b''.join(payload.splitlines())) + for defect in defects: + self.policy.handle_defect(self, defect) + return value + elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + in_file = BytesIO(payload) + out_file = BytesIO() + try: + uu.decode(in_file, out_file, quiet=True) + return out_file.getvalue() + except uu.Error: + # Some decoding problem + return payload + else: + return payload + def set_charset(self, charset): """Set the charset of the payload to a given character set. diff -r 7b81a535ad14 -r 210967b20137 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Wed Jul 17 13:41:39 2013 +0200 +++ b/Lib/test/test_email/test_email.py Sun Jul 21 22:55:02 2013 +0800 @@ -552,6 +552,42 @@ msg._payload = x self.assertEqual(msg.get_payload(decode=True), x) + def test_binary_quopri_payload(self): + for charset in ('latin-1', 'ascii'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = 'quoted-printable' + msg.set_payload(b'foo=e6=96=87bar') + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + 'get_payload returns wrong result with charset %s.' % charset) + + def test_binary_base64_payload(self): + for charset in ('latin-1', 'ascii'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = 'base64' + msg.set_payload(b'Zm9v5paHYmFy') + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + 'get_payload returns wrong result with charset %s.' % charset) + + def test_binary_uuencode_payload(self): + for charset in ('latin-1', 'ascii'): + for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = encoding + msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + str(('get_payload returns wrong result ', + 'with charset {0} and encoding {1}.')).\ + format(charset, encoding)) + # Issue 1078919 def test_ascii_add_header(self): msg = Message()