diff -r 06cf4044a11a Lib/email/feedparser.py --- a/Lib/email/feedparser.py Sat Aug 09 09:34:25 2014 +0300 +++ b/Lib/email/feedparser.py Sun Aug 10 20:50:07 2014 +0300 @@ -50,8 +50,8 @@ simple abstraction -- it parses until EOF closes the current message. """ def __init__(self): - # The last partial line pushed into this object. - self._partial = '' + # Chunks of the last partial line pushed into this object. + self._partial = [] # The list of full, pushed lines, in reverse order self._lines = [] # The stack of false-EOF checking predicates. @@ -67,8 +67,8 @@ def close(self): # Don't forget any trailing partial line. - self._lines.append(self._partial) - self._partial = '' + self.pushlines(''.join(self._partial).splitlines(True)) + self._partial = [] self._closed = True def readline(self): @@ -96,16 +96,26 @@ def push(self, data): """Push some new data into this object.""" - # Handle any previous leftovers - data, self._partial = self._partial + data, '' # Crack into lines, but preserve the linesep characters on the end of each parts = data.splitlines(True) + + if not parts or not parts[0].endswith(('\n', '\r')): + # No new complete lines, so just accumulate partials + self._partial += parts + return + + if self._partial: + # If there are previous leftovers, complete them now + self._partial.append(parts[0]) + parts[0:1] = ''.join(self._partial).splitlines(True) + del self._partial[:] + # If the last element of the list does not end in a newline, then treat # it as a partial line. We only check for '\n' here because a line # ending with '\r' might be a line that was split in the middle of a # '\r\n' sequence (see bugs 1555570 and 1721862). - if parts and not parts[-1].endswith('\n'): - self._partial = parts.pop() + if not parts[-1].endswith('\n'): + self._partial = [parts.pop()] self.pushlines(parts) def pushlines(self, lines): diff -r 06cf4044a11a Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sat Aug 09 09:34:25 2014 +0300 +++ b/Lib/test/test_email/test_email.py Sun Aug 10 20:50:07 2014 +0300 @@ -3364,6 +3364,44 @@ self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) +class TestFeedParsers(TestEmailBase): + + def parse(self, chunks): + from email.feedparser import FeedParser + feedparser = FeedParser() + for chunk in chunks: + feedparser.feed(chunk) + return feedparser.close() + + def test_newlines(self): + m = self.parse(['a:\nb:\rc:\r\nd:\n']) + self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) + m = self.parse(['a:\nb:\rc:\r\nd:']) + self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) + m = self.parse(['a:\rb', 'c:\n']) + self.assertEqual(m.keys(), ['a', 'bc']) + m = self.parse(['a:\r', 'b:\n']) + self.assertEqual(m.keys(), ['a', 'b']) + m = self.parse(['a:\r', '\nb:\n']) + self.assertEqual(m.keys(), ['a', 'b']) + m = self.parse(['a:\x85b:\u2028c:\n']) + self.assertEqual(m.items(), [('a', '\x85'), ('b', '\u2028'), ('c', '')]) + m = self.parse(['a:\r', 'b:\x85', 'c:\n']) + self.assertEqual(m.items(), [('a', ''), ('b', '\x85'), ('c', '')]) + + def test_long_lines(self): + m = self.parse(['a:b\n\n'] + ['x'*1000] * 10000) + self.assertEqual(m.items(), [('a', 'b')]) + self.assertEqual(m.get_payload(), 'x'*10000000) + m = self.parse(['a:b\r\r'] + ['x'*1000] * 10000) + self.assertEqual(m.items(), [('a', 'b')]) + self.assertEqual(m.get_payload(), 'x'*10000000) + m = self.parse(['a:b\r\r'] + ['x'*1000+'\x85'] * 10000) + self.assertEqual(m.items(), [('a', 'b')]) + self.assertEqual(m.get_payload(), ('x'*1000+'\x85')*10000) + m = self.parse(['a:\r', 'b: '] + ['x'*1000] * 10000) + self.assertEqual(m.items(), [('a', ''), ('b', 'x'*10000000)]) + class TestParsers(TestEmailBase):