diff -r a7e0a1dbfbb6 Lib/email/utils.py --- a/Lib/email/utils.py Sun Mar 27 10:15:57 2011 +0200 +++ b/Lib/email/utils.py Sun Mar 27 18:59:31 2011 +0200 @@ -42,6 +42,7 @@ # Intrapackage imports from email.encoders import _bencode, _qencode +from email.charset import Charset COMMASPACE = ', ' EMPTYSTRING = '' @@ -56,21 +57,33 @@ # Helpers -def formataddr(pair): +def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is returned unmodified. + + Optional charset if given this is the character set that is used + to encode name in case name is not ascii safe. """ name, address = pair + # Will throw UnicodeEncodeError when address is not plain ascii: + address.encode('ascii') if name: - quotes = '' - if specialsre.search(name): - quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) + try: + name.encode('ascii') + except UnicodeEncodeError: + encoder = Charset(charset) + encoded_name = encoder.header_encode(name) + return "%s <%s>" % (encoded_name, address) + else: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) return address diff -r a7e0a1dbfbb6 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sun Mar 27 10:15:57 2011 +0200 +++ b/Lib/test/test_email/test_email.py Sun Mar 27 18:59:31 2011 +0200 @@ -2408,6 +2408,35 @@ b = 'person@dom.ain' self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) + def test_quotes_unicode_names(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + name = "H\u00e4ns W\u00fcrst" + addr = 'person@domain' + # upper and lower case b and q are allowed here: + utf8_base64 = r"=\?utf-8\?[bB]\?SMOkbnMgV8O8cnN0\?= " + iso_quopri = r"=\?iso-8859-1\?[qQ]\?H=E4ns_W=FCrst\?= " + self.assertTrue(re.match(utf8_base64, utils.formataddr((name, addr)))) + self.assertTrue(re.match(iso_quopri, utils.formataddr((name, addr), + 'iso-8859-1'))) + + def test_unicode_address_raises_error(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + addr = 'pers\u00f6n@dom.in' + self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) + self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) + + def test_unicode_names_work_with_message_class(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + name = "H\u00e4ns W\u00fcrst" + addr = 'person@dom.ain' + msg = Message() + msg['To'] = utils.formataddr((name, addr)) + msg.set_payload("Hello World!") + self.assertEqual(msg.as_string(), '''\ +To: =?utf-8?b?SMOkbnMgV8O8cnN0?= + +Hello World!''') + def test_name_with_dot(self): x = 'John X. Doe ' y = '"John X. Doe" ' diff -r a7e0a1dbfbb6 Misc/ACKS --- a/Misc/ACKS Sun Mar 27 10:15:57 2011 +0200 +++ b/Misc/ACKS Sun Mar 27 18:59:31 2011 +0200 @@ -977,3 +977,4 @@ Kai Zhu Tarek Ziadé Peter Åstrand +Torsten Becker