Index: Lib/nntplib.py =================================================================== --- Lib/nntplib.py (révision 85196) +++ Lib/nntplib.py (copie de travail) @@ -69,7 +69,7 @@ import datetime import warnings -from email.header import decode_header as _email_decode_header +from email.header import decode_header_as_string as _email_decode_header from socket import _GLOBAL_DEFAULT_TIMEOUT __all__ = ["NNTP", @@ -153,13 +153,7 @@ def decode_header(header_str): """Takes an unicode string representing a munged header value and decodes it as a (possibly non-ASCII) readable value.""" - parts = [] - for v, enc in _email_decode_header(header_str): - if isinstance(v, bytes): - parts.append(v.decode(enc or 'ascii')) - else: - parts.append(v) - return ' '.join(parts) + return _email_decode_header(header_str) def _parse_overview_fmt(lines): """Parse a list of string representing the response to LIST OVERVIEW.FMT Index: Lib/email/header.py =================================================================== --- Lib/email/header.py (révision 85196) +++ Lib/email/header.py (copie de travail) @@ -47,7 +47,6 @@ fcre = re.compile(r'[\041-\176]+:$') - # Helpers _max_append = email.quoprimime._max_append @@ -127,6 +126,57 @@ return collapsed +def decode_header_as_string(header, accept_8bit=False): + """Decode a message header value (as a str object). + Returns a str object of the decoded header. + + An email.errors.HeaderParseError may be raised when certain decoding error + occurs (e.g. a base64 decoding exception). + """ + # If no encoding, just return the header with no charset. + if not ecre.search(header): + return header + # First step is to parse all the encoded parts into triplets of the form + # (encoded_string, encoding, charset). For unencoded strings, the last + # two parts will be None. + words = [] + for line in header.splitlines(): + parts = ecre.split(line) + while parts: + unencoded = parts.pop(0) + if unencoded: + words.append((unencoded, None, None)) + if parts: + charset = parts.pop(0).lower() + encoding = parts.pop(0).lower() + encoded = parts.pop(0) + words.append((encoded, encoding, charset)) + # The next step is to decode each encoded word by applying the reverse + # base64 or quopri transformation, and then the charset decoding. + decoded_words = [] + for encoded_string, encoding, charset in words: + if encoding is None: + # This is an unencoded word. + if encoded_string.strip(): + decoded_words.append(encoded_string) + elif encoding == 'q': + word = email.quoprimime.header_decode(encoded_string) + decoded_words.append(word.encode('latin1').decode(charset)) + elif encoding == 'b': + paderr = len(encoded_string) % 4 # Postel's law: add missing padding + if paderr: + encoded_string += '==='[:4 - paderr] + try: + word = email.base64mime.decode(encoded_string) + except binascii.Error: + raise HeaderParseError('Base64 decoding error') + else: + decoded_words.append(word.decode(charset)) + else: + raise AssertionError('Unexpected encoding: ' + encoding) + return ''.join(filter(None, decoded_words)) + + def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): Index: Lib/email/test/test_email.py =================================================================== --- Lib/email/test/test_email.py (révision 85196) +++ Lib/email/test/test_email.py (copie de travail) @@ -16,7 +16,7 @@ import email from email.charset import Charset -from email.header import Header, decode_header, make_header +from email.header import Header, decode_header, decode_header_as_string, make_header from email.parser import Parser, HeaderParser from email.generator import Generator, DecodedGenerator from email.message import Message @@ -1618,6 +1618,8 @@ self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= =?mac-iceland?q?=9Arg=8Cs?=""") + v = decode_header_as_string(s) + eq(v, 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') def test_whitespace_eater_unicode(self): eq = self.assertEqual @@ -1627,6 +1629,8 @@ (b'Pirard ', None)]) header = str(make_header(dh)) eq(header, 'Andr\xe9 Pirard ') + v = decode_header_as_string(s) + eq(v, 'Andr\xe9 Pirard ') def test_whitespace_eater_unicode_2(self): eq = self.assertEqual @@ -1636,11 +1640,15 @@ (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')]) hu = str(make_header(dh)) eq(hu, 'The quick brown fox jumped over the lazy dog') + v = decode_header_as_string(s) + eq(v, 'The quick brown fox jumped over the lazy dog') def test_rfc2047_missing_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) self.assertEqual(dh, [(s, None)]) + v = decode_header_as_string(s) + self.assertEqual(v, s) def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' @@ -1648,6 +1656,8 @@ self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), (b'rg', None), (b'\xe5', 'iso-8859-1'), (b'sbord', None)]) + v = decode_header_as_string(s) + self.assertEqual(v, 'Sm \xf6 rg \xe5 sbord') def test_rfc2047_B_bad_padding(self): s = '=?iso-8859-1?B?%s?=' @@ -1658,13 +1668,27 @@ for q, a in data: dh = decode_header(s % q) self.assertEqual(dh, [(a, 'iso-8859-1')]) + v = decode_header_as_string(s % q) + self.assertEqual(v, a.decode('latin1')) def test_rfc2047_Q_invalid_digits(self): # issue 10004. - s = '=?iso-8659-1?Q?andr=e9=zz?=' + s = '=?iso-8859-1?Q?andr=e9=zz?=' self.assertEqual(decode_header(s), - [(b'andr\xe9=zz', 'iso-8659-1')]) + [(b'andr\xe9=zz', 'iso-8859-1')]) + v = decode_header_as_string(s) + self.assertEqual(v, 'andr\xe9=zz') + def test_unescaped_unicode(self): + # decode_header_as_string() accepts arbitrary unicode chars in + # unescaped parts. + s = 'h\u1234' + v = decode_header_as_string(s) + self.assertEqual(v, s) + s = '=?iso-8859-1?Q?andr=e9?= h\u1234' + v = decode_header_as_string(s) + self.assertEqual(v, 'andré h\u1234') + # Test the MIMEMessage class class TestMIMEMessage(TestEmailBase):