diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 5084486..844c044 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1181,6 +1181,43 @@ class urlencode_Tests(unittest.TestCase): result = urllib.parse.urlencode(given) self.assertEqual(expect, result) + def test_quoting_iri(self): + iri = "http://localhost" + expect = iri + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "http://localhost/test.html" + expect = iri + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "http://localhost:8080/åäö.html" + expect = "http://localhost:8080/%C3%A5%C3%A4%C3%B6.html" + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "http://slagvärket.örebro.se:8080/åäö.html" + expect = "http://xn--slagvrket-z2a.xn--rebro-iua.se:8080/%C3%A5%C3%A4%C3%B6.html" + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "http://auth:a@sd@slagvärket.örebro.se:8080/å@äö.html" + expect = "http://auth:a@sd@xn--slagvrket-z2a.xn--rebro-iua.se:8080/%C3%A5@%C3%A4%C3%B6.html" + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "file:/auth:a@sd@slagvärket.örebro.se:8080/å@äö.html" + expect = "file:///auth:a@sd@slagv%C3%A4rket.%C3%B6rebro.se:8080/%C3%A5@%C3%A4%C3%B6.html" + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + iri = "mailto:åke@ärtan.org" + expect = "mailto:%C3%A5ke@%C3%A4rtan.org" + result = urllib.parse.quote_iri(iri) + self.assertEqual(expect, result) + + def test_doseq(self): # Test that passing True for 'doseq' parameter works correctly given = {'sequence':['1', '2', '3']} diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1d08730..57199b4 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -30,10 +30,11 @@ test_urlparse.py provides a good indicator of parsing behavior. import re import sys import collections +import string __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", - "parse_qsl", "quote", "quote_plus", "quote_from_bytes", + "parse_qsl", "quote", "quote_plus", "quote_iri", "quote_from_bytes", "unquote", "unquote_plus", "unquote_to_bytes", "DefragResult", "ParseResult", "SplitResult", "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] @@ -708,6 +709,42 @@ _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) _safe_quoters = {} +def quote_iri(iri): + split = urlsplit(iri) + auth = "" + host = split.netloc + if "@" in host: + items = host.split("@") + host = items.pop() + auth = "@".join(items) + auth += "@" # Add trailing @ so that assembly is smoother + if ":" in host: + host,port = host.split(":",1) + port = ":"+port # Add starting : so that assembly is smoother + else: + port = "" + # Encode the hostname with punycode if needed + try: + host.encode("ascii") + except: + newhost = [] + hostparts = host.split(".") + for hostpart in hostparts: + try: + hostpart.encode("ascii") + newhost.append(hostpart) + except: + newhost.append("xn--"+hostpart.encode("punycode").decode("ascii")) + host = ".".join(newhost) + host = host + port + path = quote(split.path,safe=string.punctuation) + query = quote(split.query,safe=string.punctuation) + fragment = quote(split.fragment,safe=string.punctuation) + quotediri = urlunsplit((split.scheme,auth+host,path,query,fragment)) + return quotediri + + + class Quoter(collections.defaultdict): """A mapping from bytes (in range(0,256)) to strings.