diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1247,7 +1247,9 @@ socket module. On top of that, modules that have host names as function parameters, such as :mod:`http.client` and :mod:`ftplib`, accept Unicode host names (:mod:`http.client` then also transparently sends an IDNA hostname in the -:mailheader:`Host` field if it sends that field at all). +:mailheader:`Host` field if it sends that field at all), and the :mod:`email` +module's :func:`parseaddr`, :func:`getaddresses`, and :func:`formataddr` +functions automatically apply IDNA to the hostnames in email addresses. .. _section 3.1: http://tools.ietf.org/html/rfc3490#section-3.1 diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst --- a/Doc/library/email.util.rst +++ b/Doc/library/email.util.rst @@ -28,20 +28,29 @@ *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + The host name of the *email address* is decoded using the + :mod:`~encodings.idna` codec, so if it is an IDN it will be converted to its + unicode representation. + + .. versionchanged: 3.3: added IDNA support + .. function:: formataddr(pair, charset='utf-8') The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, email_address)`` and returns the string value suitable for a :mailheader:`To` or :mailheader:`Cc` header. If the first element of *pair* is false, then the - second element is returned unmodified. + second element is returned, without surrounding it with ``<>``. The hostname + portion of the second element (if any) is transformed using the + :mod:`~encodings.idna` codec, thereby converting any labels containing + non-ASCII characters into the equivalent ACE representation. Optional *charset* is the character set that will be used in the :rfc:`2047` encoding of the ``realname`` if the ``realname`` contains non-ASCII characters. Can be an instance of :class:`str` or a :class:`~email.charset.Charset`. Defaults to ``utf-8``. - .. versionchanged: 3.3 added the *charset* option + .. versionchanged: 3.3 added the *charset* option and IDNA support .. function:: getaddresses(fieldvalues) diff --git a/Lib/email/utils.py b/Lib/email/utils.py --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -57,13 +57,25 @@ # Helpers +def _encode_decode_addr(addr, encode_codec, decode_codec): + """Helper function for formataddr() and parseaddr() to encode and + decode via IDNA. + """ + parts = addr.split("@") + if len(parts) <= 1: + return addr + parts[-1] = parts[-1].encode(encode_codec).decode(decode_codec) + return "@".join(parts) + + def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is - returned unmodified. + returned without surrounding it with <>. The hostname portion of the + second element (if any) is transformed using the IDNA codec. Optional charset if given is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or @@ -71,7 +83,9 @@ 'utf-8'. """ name, address = pair - # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't. + address = _encode_decode_addr(address, 'idna', 'ascii') + # The address MUST (per RFC) be ASCII, so if there's any non-ASCII left + # throw a UnicodeError. address.encode('ascii') if name: try: @@ -208,7 +222,9 @@ addrs = _AddressList(addr).addresslist if not addrs: return '', '' - return addrs[0] + name, address = addrs[0] + address = _encode_decode_addr(address, 'ascii', 'idna') + return name, address # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2657,6 +2657,25 @@ email.utils.make_msgid(domain='testdomain-string')[-19:], '@testdomain-string>') + def test_formataddr_encodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "foo@d\u00f6m.ain" + puny = "foo@xn--dm-fka.ain" + self.assertEqual(utils.formataddr((None, addr)), puny) + + def test_parseaddr_decodes_idnas(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + puny = "Foo " + self.assertEqual(utils.parseaddr(puny), ("Foo", "bar@d\u00f6m.ain")) + + def test_parseaddr_formataddr_ignore_idn_in_local_part_only(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "xn--dm-fka" + name = "Foo" + pair = "%s <%s>" % (name, addr) + self.assertEqual(utils.parseaddr(pair), (name, addr)) + self.assertEqual(utils.formataddr((name, addr)), pair) + # Test the iterator/generators class TestIterators(TestEmailBase):