diff -r 3ae2cd85a908 Doc/library/codecs.rst --- a/Doc/library/codecs.rst Sun Mar 09 11:18:16 2014 +0100 +++ b/Doc/library/codecs.rst Wed Jun 11 10:53:32 2014 +0200 @@ -1358,7 +1358,9 @@ socket module. On top of that, modules that have host names as function parameters, such as :mod:`http.client` and :mod:`ftplib`, accept Unicode host names (:mod:`http.client` then also transparently sends an IDNA hostname in the -:mailheader:`Host` field if it sends that field at all). +:mailheader:`Host` field if it sends that field at all), and the :mod:`email` +module's :func:`parseaddr`, :func:`getaddresses`, and :func:`formataddr` +functions automatically apply IDNA to the hostnames in email addresses. .. _section 3.1: http://tools.ietf.org/html/rfc3490#section-3.1 diff -r 3ae2cd85a908 Doc/library/email.util.rst --- a/Doc/library/email.util.rst Sun Mar 09 11:18:16 2014 +0100 +++ b/Doc/library/email.util.rst Wed Jun 11 10:53:32 2014 +0200 @@ -28,13 +28,23 @@ *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + The host name of the *email address* is decoded using the + :mod:`~encodings.idna` codec, so if it is an IDN it will be converted to its + unicode representation. + + .. versionchanged:: 3.5 + Added Internationalized Domain Name (IDN) support. + .. function:: formataddr(pair, charset='utf-8') The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, email_address)`` and returns the string value suitable for a :mailheader:`To` or :mailheader:`Cc` header. If the first element of *pair* is false, then the - second element is returned unmodified. + second element is returned, without surrounding it with ``<>``. The hostname + portion of the second element (if any) is transformed using the + :mod:`~encodings.idna` codec, thereby converting any labels containing + non-ASCII characters into the equivalent ACE representation. Optional *charset* is the character set that will be used in the :rfc:`2047` encoding of the ``realname`` if the ``realname`` contains non-ASCII @@ -44,13 +54,19 @@ .. versionchanged:: 3.3 Added the *charset* option. + .. versionchanged:: 3.5 + Added Internationalized Domain Name (IDN) support. + .. function:: getaddresses(fieldvalues) This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: + :meth:`Message.get_all`. The host name of each *email address* in the + returned list is decoded using the :mod:`~encodings.idna` codec, so if it is an + IDN it will be converted to its unicode representation. + + Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses @@ -60,6 +76,9 @@ resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + .. versionchanged:: 3.5 + Added Internationalized Domain Name (IDN) support. + .. function:: parsedate(date) diff -r 3ae2cd85a908 Lib/email/_parseaddr.py --- a/Lib/email/_parseaddr.py Sun Mar 09 11:18:16 2014 +0100 +++ b/Lib/email/_parseaddr.py Wed Jun 11 10:53:32 2014 +0200 @@ -200,6 +200,19 @@ return str.replace('\\', '\\\\').replace('"', '\\"') +def _encode_decode_addr(addr, encode_codec, decode_codec): + """Helper function to encode or decode IDNs.""" + parts = addr.split("@") + # No '@' means this is a local-part only address, don't do anything then. + if len(parts) <= 1: + return addr + parts[-1] = parts[-1].encode(encode_codec).decode(decode_codec) + return "@".join(parts) + +encode_idn_addr = lambda addr: _encode_decode_addr(addr, 'idna', 'ascii') +decode_idn_addr = lambda addr: _encode_decode_addr(addr, 'ascii', 'idna') + + class AddrlistClass: """Address parser class by Ben Escoto. @@ -258,7 +271,11 @@ return result def getaddress(self): - """Parse the next address.""" + """Parse the next address. + + If the host name of the address is an IDN it, will be converted + to its unicode representation. + """ self.commentlist = [] self.gotonext() @@ -314,6 +331,8 @@ self.gotonext() if self.pos < len(self.field) and self.field[self.pos] == ',': self.pos += 1 + returnlist = [(name, decode_idn_addr(addr)) + for name, addr in returnlist] return returnlist def getrouteaddr(self): diff -r 3ae2cd85a908 Lib/email/utils.py --- a/Lib/email/utils.py Sun Mar 09 11:18:16 2014 +0100 +++ b/Lib/email/utils.py Wed Jun 11 10:53:32 2014 +0200 @@ -36,6 +36,7 @@ from email._parseaddr import quote from email._parseaddr import AddressList as _AddressList from email._parseaddr import mktime_tz +from email._parseaddr import encode_idn_addr from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz @@ -85,7 +86,8 @@ for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is - returned unmodified. + returned without surrounding it with <>. The hostname portion of the + second element (if any) is transformed using the IDNA codec. Optional charset if given is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or @@ -93,7 +95,9 @@ 'utf-8'. """ name, address = pair - # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. + address = encode_idn_addr(address) + # The address MUST (per RFC) be ASCII, so if there's any non-ASCII left + # throw a UnicodeError. address.encode('ascii') if name: try: diff -r 3ae2cd85a908 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sun Mar 09 11:18:16 2014 +0100 +++ b/Lib/test/test_email/test_email.py Wed Jun 11 10:53:32 2014 +0200 @@ -3274,6 +3274,34 @@ g.flatten(msg, linesep='\r\n') self.assertEqual(s.getvalue(), msgtxt) + def test_formataddr_encodes_idns(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "foo@d\u00f6m.ain" + puny = "foo@xn--dm-fka.ain" + self.assertEqual(utils.formataddr((None, addr)), puny) + + def test_parseaddr_decodes_idns(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + puny = "Foo " + self.assertEqual(utils.parseaddr(puny), ("Foo", "bar@d\u00f6m.ain")) + + def test_getaddresses_decodes_idns(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + self.assertEqual(utils.getaddresses( + ['aperson@xn--ol-cja.es (Aperson)', + 'Bperson ']), + [('Aperson', 'aperson@ol\xe9.es'), + ('Bperson', 'bperson@\xfcber.com')]) + + def test_utils_ignore_idn_in_local_part_only(self): + # issue 11783. email parseaddr and formataddr should be IDNA aware + addr = "xn--dm-fka" + name = "Foo" + pair = "%s <%s>" % (name, addr) + self.assertEqual(utils.parseaddr(pair), (name, addr)) + self.assertEqual(utils.formataddr((name, addr)), pair) + self.assertEqual(utils.getaddresses([pair]), [(name, addr)]) + # Test the iterator/generators class TestIterators(TestEmailBase):