diff -r 6ede0824127d Doc/library/test.rst --- a/Doc/library/test.rst Thu Mar 17 12:30:47 2011 -0400 +++ b/Doc/library/test.rst Sat Mar 19 08:11:06 2011 -0400 @@ -350,7 +350,7 @@ .. function:: captured_stdout() This is a context manager that runs the :keyword:`with` statement body using - a :class:`StringIO.StringIO` object as sys.stdout. That object can be + a :class:`io.StringIO` object as sys.stdout. That object can be retrieved using the ``as`` clause of the :keyword:`with` statement. Example use:: diff -r 6ede0824127d Lib/email/quoprimime.py --- a/Lib/email/quoprimime.py Thu Mar 17 12:30:47 2011 -0400 +++ b/Lib/email/quoprimime.py Sat Mar 19 08:11:06 2011 -0400 @@ -40,6 +40,7 @@ ] import re +import io from string import ascii_letters, digits, hexdigits @@ -155,72 +156,101 @@ this to "\\r\\n" if you will be using the result of this function directly in an email. - Each line will be wrapped at, at most, maxlinelen characters (defaults to - 76 characters). Long lines will have the `soft linefeed' quoted-printable - character "=" appended to them, so the decoded text will be identical to - the original text. + Each line will be wrapped at, at most, maxlinelen characters before the + eol string (maxlinelen defaults to 76 characters, the maximum value + permitted by RFC 2045). Long lines will have the 'soft line break' + quoted-printable character "=" appended to them, so the decoded text will + be identical to the original text. + + The minimum maxlinelen is 4 to have room for a quoted character ("=XX") + followed by a soft line break. Smaller values will generate an + OverflowError. + """ + + if maxlinelen < 4: + raise OverflowError("maxlinelen must be at least 4") if not body: return body - # BAW: We're accumulating the body text by string concatenation. That - # can't be very efficient, but I don't have time now to rewrite it. It - # just feels like this algorithm could be more efficient. - encoded_body = '' - lineno = -1 - # Preserve line endings here so we can check later to see an eol needs to - # be added to the output later. - lines = body.splitlines(1) - for line in lines: - # But strip off line-endings for processing this line. - if line.endswith(CRLF): - line = line[:-2] - elif line[-1] in CRLF: - line = line[:-1] + # The last line may not end in eol, but all other lines do. + last_has_eol = (body[-1] in '\r\n') - lineno += 1 - encoded_line = '' - prev = None - linelen = len(line) - # Now we need to examine every character to see if it needs to be - # quopri encoded. BAW: again, string concatenation is inefficient. - for j in range(linelen): - c = line[j] - prev = c + # Room left in current encoded line. + room = maxlinelen + encoded_body = io.StringIO() + + def write(s): + """Write string s to encoded_body.""" + nonlocal room + encoded_body.write(s) + room -= len(s) + + def write_line(s=''): + """Write string s to encoded_body, then start new encoded line.""" + nonlocal room + write(s) + encoded_body.write(eol) + room = maxlinelen + + def write_soft_break(): + """Write a soft line break to encoded_body.""" + write_line('=') + + def write_wrapped(s, extra_room=0): + """Perform soft line break if needed, then write s to encoded_body.""" + nonlocal room + if room < len(s) + extra_room: + write_soft_break() + write(s) + + def each_last(seq): + """Return (item, last) for each item in seq. + + 'last' will be True only for the last item in seq. + """ + seq = iter(seq) + item = next(seq) + for next_item in seq: + yield item, False + item = next_item + yield item, True + + for line, on_last_line in each_last(body.splitlines()): + for c, at_eol in each_last(line): if body_check(ord(c)): c = quote(c) - elif j+1 == linelen: - # Check for whitespace at end of line; special case - if c not in ' \t': - encoded_line += c - prev = c - continue - # Check to see to see if the line has reached its maximum length - if len(encoded_line) + len(c) >= maxlinelen: - encoded_body += encoded_line + '=' + eol - encoded_line = '' - encoded_line += c - # Now at end of line.. - if prev and prev in ' \t': - # Special case for whitespace at end of file - if lineno + 1 == len(lines): - prev = quote(prev) - if len(encoded_line) + len(prev) > maxlinelen: - encoded_body += encoded_line + '=' + eol + prev - else: - encoded_body += encoded_line + prev - # Just normal whitespace at end of line + if not at_eol: + # Another character follows on this line, so we must leave + # extra room, either for it or a soft break, and whitespace + # need not be quoted. + write_wrapped(c, extra_room=1) + elif c not in ' \t': + # For this and remaining cases, no more characters follow, + # so there is no need to reserve extra room (since a hard + # break immediately follows). + write_wrapped(c) + elif room >= 3: + # It's a whitespace character at end-of-line, and we have room + # for the three-character quoted encoding. + write(quote(c)) + elif room == 2: + # There's room for the whitespace character and a soft break. + write(c) + write_soft_break() else: - encoded_body += encoded_line + prev + '=' + eol - encoded_line = '' - # Now look at the line we just finished and it has a line ending, we - # need to add eol to the end of the line. - if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: - encoded_body += encoded_line + eol - else: - encoded_body += encoded_line - encoded_line = '' - return encoded_body + # There's room only for a soft break. The quoted whitespace + # will be the only content on the subsequent line. + write_soft_break() + write(quote(c)) + + # Add an eol if input line had eol. All input lines have eol except + # possibly the last one. + if not on_last_line or last_has_eol: + write_line() + + return encoded_body.getvalue() + diff -r 6ede0824127d Lib/email/test/test_email.py --- a/Lib/email/test/test_email.py Thu Mar 17 12:30:47 2011 -0400 +++ b/Lib/email/test/test_email.py Sat Mar 19 08:11:06 2011 -0400 @@ -3453,6 +3453,14 @@ def test_encode_one_line_one_space(self): self.encode(' \n', '=20\n') +# XXX: body_encode() expect strings, but uses ord(char) from these strings +# to index into a 256-entry list. For code points above 255, this will fail. +# Should there be a check for 8-bit only ord() values in body, or at least +# a comment about the expected input? + + def test_encode_two_lines_one_space(self): + self.encode(' \n \n', '=20\n=20\n') + def test_encode_one_word_trailing_spaces(self): self.encode('hello ', 'hello =20') @@ -3468,8 +3476,14 @@ def test_encode_trailing_space_before_maxlinelen(self): self.encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) + def test_encode_trailing_space_at_maxlinelen(self): + self.encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) + def test_encode_trailing_space_beyond_maxlinelen(self): - self.encode('abcd \n1234', 'abc=\nd =\n\n1234', maxlinelen=4) + self.encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) + + def test_encode_whitespace_lines(self): + self.encode(' \n' * 5, '=20\n' * 5) def test_encode_quoted_equals(self): self.encode('a = b', 'a =3D b') @@ -3490,6 +3504,9 @@ def test_encode_shortest_maxlinelen(self): self.encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) + def test_encode_maxlinelen_too_small(self): + self.assertRaises(OverflowError, self.encode, '', '', maxlinelen=3) + def test_encode(self): eq = self.assertEqual eq(quoprimime.body_encode(''), '')