diff -r 149cc6364180 Doc/library/codecs.rst --- a/Doc/library/codecs.rst Wed Jun 11 08:04:16 2014 +0100 +++ b/Doc/library/codecs.rst Wed Jun 11 13:17:33 2014 +0200 @@ -1359,7 +1359,9 @@ socket module. On top of that, modules that have host names as function parameters, such as :mod:`http.client` and :mod:`ftplib`, accept Unicode host names (:mod:`http.client` then also transparently sends an IDNA hostname in the -:mailheader:`Host` field if it sends that field at all). +:mailheader:`Host` field if it sends that field at all), and the :mod:`email` +module's :func:`parseaddr`, :func:`getaddresses`, and :func:`formataddr` +functions automatically apply IDNA to the hostnames in email addresses. .. _section 3.1: http://tools.ietf.org/html/rfc3490#section-3.1 diff -r 149cc6364180 Doc/library/email.util.rst --- a/Doc/library/email.util.rst Wed Jun 11 08:04:16 2014 +0100 +++ b/Doc/library/email.util.rst Wed Jun 11 13:17:33 2014 +0200 @@ -21,12 +21,17 @@ begins with angle brackets, they are stripped off. -.. function:: parseaddr(address) +.. function:: parseaddr(address, decode_idna=True) Parse address -- which should be the value of some address-containing field such as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + If *decode_idna* is ``True`` the domain part of the address is decoded + according to :RFC:`3490`. + + .. versionchanged:: 3.5 + Added the *decode_idna* option. .. function:: formataddr(pair, charset='utf-8') diff -r 149cc6364180 Lib/email/utils.py --- a/Lib/email/utils.py Wed Jun 11 08:04:16 2014 +0100 +++ b/Lib/email/utils.py Wed Jun 11 13:17:33 2014 +0200 @@ -73,13 +73,25 @@ # Helpers +def _encode_decode_addr(addr, encode_codec, decode_codec): + """Helper function for formataddr() and parseaddr() to encode and + decode via IDNA. + """ + parts = addr.split("@") + if len(parts) <= 1: + return addr + parts[-1] = parts[-1].encode(encode_codec).decode(decode_codec) + return "@".join(parts) + + def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is - returned unmodified. + returned without surrounding it with <>. The hostname portion of the + second element (if any) is transformed using the IDNA codec. Optional charset if given is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or @@ -87,7 +99,9 @@ 'utf-8'. """ name, address = pair - # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. + address = _encode_decode_addr(address, 'idna', 'ascii') + # The address MUST (per RFC) be ASCII, so if there's any non-ASCII left + # throw a UnicodeError. address.encode('ascii') if name: try: @@ -231,11 +245,14 @@ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): +def parseaddr(addr, decode_idna=True): addrs = _AddressList(addr).addresslist if not addrs: return '', '' - return addrs[0] + name, address = addrs[0] + if decode_idna: + address = _encode_decode_addr(address, 'ascii', 'idna') + return name, address # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. diff -r 149cc6364180 Lib/smtplib.py --- a/Lib/smtplib.py Wed Jun 11 08:04:16 2014 +0100 +++ b/Lib/smtplib.py Wed Jun 11 13:17:33 2014 +0200 @@ -139,7 +139,7 @@ Should be able to handle anything email.utils.parseaddr can handle. """ - displayname, addr = email.utils.parseaddr(addrstring) + displayname, addr = email.utils.parseaddr(addrstring, decode_idna=False) if (displayname, addr) == ('', ''): # parseaddr couldn't parse it, use it as is and hope for the best. if addrstring.strip().startswith('<'): @@ -148,7 +148,7 @@ return "<%s>" % addr def _addr_only(addrstring): - displayname, addr = email.utils.parseaddr(addrstring) + displayname, addr = email.utils.parseaddr(addrstring, decode_idna=False) if (displayname, addr) == ('', ''): # parseaddr couldn't parse it, so use it as is. return addrstring diff -r 149cc6364180 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Wed Jun 11 08:04:16 2014 +0100 +++ b/Lib/test/test_email/test_email.py Wed Jun 11 13:17:33 2014 +0200 @@ -3274,6 +3274,25 @@ g.flatten(msg, linesep='\r\n') self.assertEqual(s.getvalue(), msgtxt) + def test_formataddr_encodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "foo@d\u00f6m.ain" + puny = "foo@xn--dm-fka.ain" + self.assertEqual(utils.formataddr((None, addr)), puny) + + def test_parseaddr_decodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + puny = "Foo " + self.assertEqual(utils.parseaddr(puny), ("Foo", "bar@d\u00f6m.ain")) + + def test_parseaddr_formataddr_ignore_idn_in_local_part_only(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "xn--dm-fka" + name = "Foo" + pair = "%s <%s>" % (name, addr) + self.assertEqual(utils.parseaddr(pair), (name, addr)) + self.assertEqual(utils.formataddr((name, addr)), pair) + # Test the iterator/generators class TestIterators(TestEmailBase):