diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst --- a/Doc/library/email.parser.rst +++ b/Doc/library/email.parser.rst @@ -94,12 +94,14 @@ The :class:`Parser` class, imported from the :mod:`email.parser` module, provides an API that can be used to parse a message when the complete contents of the message are available in a string or file. The :mod:`email.parser` -module also provides a second class, called :class:`HeaderParser` which can be -used if you're only interested in the headers of the message. -:class:`HeaderParser` can be much faster in these situations, since it does not -attempt to parse the message body, instead setting the payload to the raw body -as a string. :class:`HeaderParser` has the same API as the :class:`Parser` -class. +module also provides header-only parsers, called :class:`HeaderParser` and +:class:`BytesHeaderParser`, which can be used if you're only interested in +the headers of the message. +:class:`HeaderParser` and :class:`BytesHeaderParser` can be much faster in +these situations, since they do not attempt to parse the message body, +instead setting the payload to the raw body as a string. +They do have the same API as the :class:`Parser` and :class:`BytesParser` +classes. .. class:: Parser(_class=email.message.Message) diff --git a/Lib/email/parser.py b/Lib/email/parser.py --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -4,7 +4,7 @@ """A parser of RFC 2822 and MIME email messages.""" -__all__ = ['Parser', 'HeaderParser'] +__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] import warnings from io import StringIO, TextIOWrapper @@ -114,3 +114,12 @@ """ text = text.decode('ASCII', errors='surrogateescape') return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser): + def parse(self, fp, headersonly=True): + return BytesParser.parse(self, fp, headersonly=True) + + def parsebytes(self, text, headersonly=True): + return BytesParser.parsebytes(self, text, headersonly=True) + diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -177,6 +177,40 @@ gen.flatten(msg, False) self.assertEqual(out.getvalue(), msgdata) + def test_message_rfc822_only_fp(self): + # Issue 7970: message/rfc822 not in multipart parsed by + # HeaderParser caused an exception when flattened. + with openfile('msg_46.txt') as fp: + msgdata = fp.read() + out = StringIO() + with openfile('msg_46.txt') as fp: + parser = HeaderParser() + msg = parser.parse(fp) + gen = Generator(out, True, 0) + gen.flatten(msg, False) + self.assertEqual(out.getvalue(), msgdata) + + def test_byte_message_rfc822_only(self): + with openfile('msg_46.txt', 'rb') as fp: + msgdata = fp.read() + parser = email.parser.BytesHeaderParser() + msg = parser.parsebytes(msgdata) + out = BytesIO() + gen = email.generator.BytesGenerator(out) + gen.flatten(msg) + self.assertEqual(out.getvalue(), msgdata) + + def test_byte_message_rfc822_only_fp(self): + with openfile('msg_46.txt', 'rb') as fp: + msgdata = fp.read() + out = BytesIO() + with openfile('msg_46.txt', 'rb') as fp: + parser = email.parser.BytesHeaderParser() + msg = parser.parse(fp) + gen = email.generator.BytesGenerator(out) + gen.flatten(msg) + self.assertEqual(out.getvalue(), msgdata) + def test_get_decoded_payload(self): eq = self.assertEqual msg = self._msgobj('msg_10.txt')