diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -76,3 +76,9 @@ def encode_noop(msg): """Do nothing.""" + # Well, not quite *nothing*: in Python3 we have to turn bytes into a string + # in our internal surrogateescaped form in order to keep the model + # consistent. + orig = msg.get_payload() + if not isinstance(orig, str): + msg.set_payload(orig.decode('ascii', 'surrogateescape')) diff --git a/Lib/email/generator.py b/Lib/email/generator.py --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -397,6 +397,9 @@ else: super(BytesGenerator,self)._handle_text(msg) + # Default body handler + _writeBody = _handle_text + @classmethod def _compile_re(cls, s, flags): return re.compile(s.encode('ascii'), flags) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1438,6 +1438,22 @@ eq(msg.get_payload().strip(), '+vv8/f7/') eq(msg.get_payload(decode=True), bytesdata) + def test_body_with_encode_noop(self): + # Issue 16564: This does not produce an RFC valid message, since to be + # valid it should have a CTE of binary. But the below works in + # Python2, and is documented as working this way. + bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' + msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) + # Treated as a string, this will be invalid code points. + self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) + self.assertEqual(msg.get_payload(decode=True), bytesdata) + s = BytesIO() + g = BytesGenerator(s) + g.flatten(msg) + wireform = s.getvalue() + msg2 = email.message_from_bytes(wireform) + self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) + self.assertEqual(msg2.get_payload(decode=True), bytesdata) # Test the basic MIMEText class