# HG changeset patch # Parent 64ed56fbc5e752bcdc183d3a68fcc638a244c051 Issue #24364: Raise InvalidHeaderDefect and MisplacedEnvelopeHeaderDefect Also: * Run the tests in TestDefectsBase against “email.policy.compat32” as well as “email.policy.default” * Remove various tests from TestNonConformant, which are now redundant with methods of the same name in TestCompat32 * Alter test_multipart_no_cte_no_defect() and test_multipart_valid_cte_no_ defect() to still test for the absense of defects in TestDefectRaising diff -r 64ed56fbc5e7 Lib/email/feedparser.py --- a/Lib/email/feedparser.py Sat Jun 25 03:06:58 2016 +0000 +++ b/Lib/email/feedparser.py Sat Jun 25 10:18:03 2016 +0000 @@ -502,10 +502,9 @@ self._input.unreadline(line) return else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. + # Weirdly placed unix-from line. Ignore it. defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue # Split the line on the colon separating field name from value. # There will always be a colon, because if there wasn't the part of @@ -514,10 +513,10 @@ # If the colon is on the start of the line the header is clearly # malformed, but we might be able to salvage the rest of the - # message. Track the error but keep going. + # message. if i == 0: defect = errors.InvalidHeaderDefect("Missing header name.") - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue assert i>0, "_parse_headers fed line with no : and no leading WS" diff -r 64ed56fbc5e7 Lib/test/test_email/test_defect_handling.py --- a/Lib/test/test_email/test_defect_handling.py Sat Jun 25 03:06:58 2016 +0000 +++ b/Lib/test/test_email/test_defect_handling.py Sat Jun 25 10:18:03 2016 +0000 @@ -15,6 +15,17 @@ def _raise_point(self, defect): yield + def get_defects(self, obj): + return obj.defects + + def check_defect(self, defect, string): + msg = None + with self._raise_point(defect): + msg = self._str_msg(string) + self.assertEqual(len(self.get_defects(msg)), 1) + self.assertDefectsEqual(self.get_defects(msg), [defect]) + return msg + def test_same_boundary_inner_outer(self): source = textwrap.dedent("""\ Subject: XX @@ -126,12 +137,10 @@ errors.InvalidMultipartContentTransferEncodingDefect) def test_multipart_no_cte_no_defect(self): - if self.raise_expected: return msg = self._str_msg(self.multipart_msg.format('')) self.assertEqual(len(self.get_defects(msg)), 0) def test_multipart_valid_cte_no_defect(self): - if self.raise_expected: return for cte in ('7bit', '8bit', 'BINary'): msg = self._str_msg( self.multipart_msg.format("\nContent-Transfer-Encoding: "+cte)) @@ -283,11 +292,38 @@ self.assertDefectsEqual(self.get_defects(msg), [errors.CloseBoundaryNotFoundDefect]) + def test_line_beginning_colon(self): + msg = self.check_defect(errors.InvalidHeaderDefect, + 'Subject: Dummy subject\r\n' + ': faulty header line\r\n' + '\r\n' + 'body\r\n' + ) + if msg: + self.assertEqual(msg.items(), [('Subject', 'Dummy subject')]) + self.assertEqual(msg.get_payload(), 'body\r\n') + + def test_misplaced_envelope(self): + msg = self.check_defect(errors.MisplacedEnvelopeHeaderDefect, + 'Subject: Dummy subject\r\n' + 'From wtf\r\n' + 'To: abc\r\n' + '\r\n' + 'body\r\n' + ) + if msg: + headers = [('Subject', 'Dummy subject'), ('To', 'abc')] + self.assertEqual(msg.items(), headers) + self.assertEqual(msg.get_payload(), 'body\r\n') + class TestDefectDetection(TestDefectsBase, TestEmailBase): + pass - def get_defects(self, obj): - return obj.defects + +class TestCompat32(TestDefectsBase, TestEmailBase): + + policy = policy.compat32 class TestDefectCapture(TestDefectsBase, TestEmailBase): diff -r 64ed56fbc5e7 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sat Jun 25 03:06:58 2016 +0000 +++ b/Lib/test/test_email/test_email.py Sat Jun 25 10:18:03 2016 +0000 @@ -2094,50 +2094,6 @@ self.assertIsInstance(msg.defects[1], errors.MultipartInvariantViolationDefect) - multipart_msg = textwrap.dedent("""\ - Date: Wed, 14 Nov 2007 12:56:23 GMT - From: foo@bar.invalid - To: foo@bar.invalid - Subject: Content-Transfer-Encoding: base64 and multipart - MIME-Version: 1.0 - Content-Type: multipart/mixed; - boundary="===============3344438784458119861=="{} - - --===============3344438784458119861== - Content-Type: text/plain - - Test message - - --===============3344438784458119861== - Content-Type: application/octet-stream - Content-Transfer-Encoding: base64 - - YWJj - - --===============3344438784458119861==-- - """) - - # test_defect_handling - def test_multipart_invalid_cte(self): - msg = self._str_msg( - self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) - self.assertEqual(len(msg.defects), 1) - self.assertIsInstance(msg.defects[0], - errors.InvalidMultipartContentTransferEncodingDefect) - - # test_defect_handling - def test_multipart_no_cte_no_defect(self): - msg = self._str_msg(self.multipart_msg.format('')) - self.assertEqual(len(msg.defects), 0) - - # test_defect_handling - def test_multipart_valid_cte_no_defect(self): - for cte in ('7bit', '8bit', 'BINary'): - msg = self._str_msg( - self.multipart_msg.format( - "\nContent-Transfer-Encoding: {}".format(cte))) - self.assertEqual(len(msg.defects), 0) - # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. def test_invalid_content_type(self): eq = self.assertEqual @@ -2214,30 +2170,6 @@ self.assertIsInstance(bad.defects[0], errors.StartBoundaryNotFoundDefect) - # test_defect_handling - def test_first_line_is_continuation_header(self): - eq = self.assertEqual - m = ' Line 1\nSubject: test\n\nbody' - msg = email.message_from_string(m) - eq(msg.keys(), ['Subject']) - eq(msg.get_payload(), 'body') - eq(len(msg.defects), 1) - self.assertDefectsEqual(msg.defects, - [errors.FirstHeaderLineIsContinuationDefect]) - eq(msg.defects[0].line, ' Line 1\n') - - # test_defect_handling - def test_missing_header_body_separator(self): - # Our heuristic if we see a line that doesn't look like a header (no - # leading whitespace but no ':') is to assume that the blank line that - # separates the header from the body is missing, and to stop parsing - # headers and start parsing the body. - msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') - self.assertEqual(msg.keys(), ['Subject']) - self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') - self.assertDefectsEqual(msg.defects, - [errors.MissingHeaderBodySeparatorDefect]) - # Test RFC 2047 header encoding and decoding class TestRFC2047(TestEmailBase): diff -r 64ed56fbc5e7 Misc/NEWS --- a/Misc/NEWS Sat Jun 25 03:06:58 2016 +0000 +++ b/Misc/NEWS Sat Jun 25 10:18:03 2016 +0000 @@ -10,6 +10,9 @@ Library ------- +- Issue #24364: In the "email" package, raise InvalidHeaderDefect and + MisplacedEnvelopeHeaderDefect as exceptions if directed by the policy. + - Issue #27038: Expose the DirEntry type as os.DirEntry. Code patch by Jelle Zijlstra.