diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst --- a/Doc/library/email.util.rst +++ b/Doc/library/email.util.rst @@ -28,13 +28,18 @@ *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + If the host name of the *email address* part is a IDN, it is automatically + decoded to unicode with the 'idna' codec. + .. function:: formataddr(pair, charset='utf-8') The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, email_address)`` and returns the string value suitable for a :mailheader:`To` or :mailheader:`Cc` header. If the first element of *pair* is false, then the - second element is returned unmodified. + second element is returned unmodified. If the host name of email_address + contains non-ASCII characters, it is automatically converted to a IDN + representation with the 'idna' codec. Optional *charset* is the character set that will be used in the :rfc:`2047` encoding of the ``realname`` if the ``realname`` contains non-ASCII diff --git a/Lib/email/utils.py b/Lib/email/utils.py --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -57,6 +57,17 @@ # Helpers +def _encode_decode_addr(addr, encode_codec, decode_codec): + """Helper function for formataddr() and parseaddr() to encode and + decode IDNAs. + """ + parts = addr.split("@") + if len(parts) <= 1: + return addr + parts[-1] = parts[-1].encode(encode_codec).decode(decode_codec) + return "@".join(parts) + + def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable @@ -65,12 +76,17 @@ If the first element of pair is false, then the second element is returned unmodified. + If the host name of email_address contains non-ASCII characters, + it is automatically converted to a IDN representation with the + 'idna' codec. + Optional charset if given is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or a Charset-like object which has a header_encode method. Default is 'utf-8'. """ name, address = pair + address = _encode_decode_addr(address, 'idna', 'ascii') # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't. address.encode('ascii') if name: @@ -208,7 +224,9 @@ addrs = _AddressList(addr).addresslist if not addrs: return '', '' - return addrs[0] + name, address = addrs[0] + address = _encode_decode_addr(address, 'ascii', 'idna') + return name, address # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2657,6 +2657,25 @@ email.utils.make_msgid(domain='testdomain-string')[-19:], '@testdomain-string>') + def test_formataddr_encodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "foo@d\u00f6m.ain" + puny = "foo@xn--dm-fka.ain" + self.assertEqual(utils.formataddr((None, addr)), puny) + + def test_parseaddr_decodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + puny = "Foo " + self.assertEqual(utils.parseaddr(puny), ("Foo", "bar@d\u00f6m.ain")) + + def test_parseaddr_formataddr_ignore_idn_in_local_part_only(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "xn--dm-fka" + name = "Foo" + pair = "%s <%s>" % (name, addr) + self.assertEqual(utils.parseaddr(pair), (name, addr)) + self.assertEqual(utils.formataddr((name, addr)), pair) + # Test the iterator/generators class TestIterators(TestEmailBase):