Index: Lib/email/test/test_email.py =================================================================== --- Lib/email/test/test_email.py (revision 87768) +++ Lib/email/test/test_email.py (working copy) @@ -2886,52 +2886,61 @@ '<,.V maxlen: # This last character doesn't fit so pop it off. current_line.pop() @@ -345,12 +354,12 @@ else: separator = (' ' if lines else '') joined_line = EMPTYSTRING.join(current_line) - header_bytes = joined_line.encode(codec) + header_bytes = _encode(joined_line, codec) lines.append(encoder(header_bytes)) current_line = [character] maxlen = next(maxlengths) - extra joined_line = EMPTYSTRING.join(current_line) - header_bytes = joined_line.encode(codec) + header_bytes = _encode(joined_line, codec) lines.append(encoder(header_bytes)) return lines Index: Lib/email/message.py =================================================================== --- Lib/email/message.py (revision 87768) +++ Lib/email/message.py (working copy) @@ -16,6 +16,7 @@ # Intrapackage imports from email import utils from email import errors +from email import header from email.charset import Charset SEMISPACE = '; ' @@ -31,16 +32,14 @@ # Helper functions -def _sanitize_surrogates(value): - # If the value contains surrogates, re-decode and replace the original - # non-ascii bytes with '?'s. Used to sanitize header values before letting - # them escape as strings. +def _sanitize_header(name, value): + # If the header value contains surrogates, return a Header using + # the unknown-8bit charset to encode the bytes as encoded words. if not isinstance(value, str): - # Header object + # Assume it is already a header object return value if _has_surrogates(value): - original_bytes = value.encode('ascii', 'surrogateescape') - return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?') + return header.Header(value, charset='unknown-8bit', header_name=name) else: return value @@ -398,7 +397,7 @@ Any fields deleted and re-inserted are always appended to the header list. """ - return [_sanitize_surrogates(v) for k, v in self._headers] + return [_sanitize_header(k, v) for k, v in self._headers] def items(self): """Get all the message's header fields and values. @@ -408,7 +407,7 @@ Any fields deleted and re-inserted are always appended to the header list. """ - return [(k, _sanitize_surrogates(v)) for k, v in self._headers] + return [(k, _sanitize_header(k, v)) for k, v in self._headers] def get(self, name, failobj=None): """Get a header value. @@ -419,7 +418,7 @@ name = name.lower() for k, v in self._headers: if k.lower() == name: - return _sanitize_surrogates(v) + return _sanitize_header(k, v) return failobj # @@ -439,7 +438,7 @@ name = name.lower() for k, v in self._headers: if k.lower() == name: - values.append(_sanitize_surrogates(v)) + values.append(_sanitize_header(k, v)) if not values: return failobj return values