From e62f4e83abc1f467110ace7337e65dab320c4e76 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Fri, 20 Sep 2019 15:55:03 -0700 Subject: [PATCH] bpo-38216: Only forbid CR, LF, and SP in http URLs --- Lib/http/client.py | 7 +++--- Lib/test/test_urllib.py | 47 ++++++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/Lib/http/client.py b/Lib/http/client.py index dd23edcd59..aff8fe7f81 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -145,10 +145,9 @@ _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search # https://tools.ietf.org/html/rfc3986#appendix-A pchar definition. # Prevents CVE-2019-9740. Includes control characters such as \r\n. # We don't restrict chars above \x7f as putrequest() limits us to ASCII. -_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]') -# Arguably only these _should_ allowed: -# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$") -# We are more lenient for assumed real world compatibility purposes. +_contains_disallowed_url_pchar_re = re.compile('[\r\n ]') +# Note that this is much more lenient than what is allowed on master, to +# ensure smoother upgrades within a minor release. # We always set the Content-Length header for these methods because some # servers will otherwise respond with a 411 diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 7ec365b928..33c26c476c 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -332,9 +332,11 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): @unittest.skipUnless(ssl, "ssl module required") def test_url_with_control_char_rejected(self): - for char_no in list(range(0, 0x21)) + [0x7f]: + for char_no in range(129): char = chr(char_no) + quoted_char = urllib.parse.quote(char) schemeless_url = f"//localhost:7777/test{char}/" + escaped_url = f"//localhost:7777/test{quoted_char}/" self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") try: # We explicitly test urllib.request.urlopen() instead of the top @@ -345,15 +347,40 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): # above attempts at injection within the url _path_ safe. escaped_char_repr = repr(char).replace('\\', r'\\') InvalidURL = http.client.InvalidURL - with self.assertRaisesRegex( - InvalidURL, f"contain control.*{escaped_char_repr}"): - urllib.request.urlopen(f"http:{schemeless_url}") - with self.assertRaisesRegex( - InvalidURL, f"contain control.*{escaped_char_repr}"): - urllib.request.urlopen(f"https:{schemeless_url}") - # This code path quotes the URL so there is no injection. - resp = urlopen(f"http:{schemeless_url}") - self.assertNotIn(char, resp.geturl()) + if char in '\r\n ': + with self.assertRaisesRegex( + InvalidURL, f"contain control.*{escaped_char_repr}"): + urllib.request.urlopen(f"http:{schemeless_url}") + with self.assertRaisesRegex( + InvalidURL, f"contain control.*{escaped_char_repr}"): + urllib.request.urlopen(f"https:{schemeless_url}") + # This code path quotes the URL so there is no injection. + resp = urlopen(f"http:{schemeless_url}") + self.assertNotEqual(f"http:{schemeless_url}", resp.geturl()) + self.assertEqual(f"http:{escaped_url}", resp.geturl()) + self.assertEqual(f"GET /test{quoted_char}/ HTTP/1.1\r".encode('ascii'), + http.client.HTTPConnection.buf.split(b'\n', 1)[0]) + elif char == '\x80': + with self.assertRaises(UnicodeEncodeError): + urllib.request.urlopen(f"http:{schemeless_url}") + with self.assertRaises(UnicodeEncodeError): + urllib.request.urlopen(f"https:{schemeless_url}") + with self.assertRaisesRegex( + UnicodeError, r"URL \'.*\' contains non-ASCII characters"): + urlopen(f"http:{schemeless_url}") + elif char == '#': + resp = urllib.request.urlopen(f"http:{schemeless_url}") + self.assertEqual(f"http:{schemeless_url}", resp.geturl()) + # Note that the number sign is used to delimit + # fragments, which should only be used by the client. + # As such, it does not show up on the wire. + self.assertEqual(f"GET /test HTTP/1.1\r".encode('ascii'), + http.client.HTTPConnection.buf.split(b'\n', 1)[0]) + else: + resp = urllib.request.urlopen(f"http:{schemeless_url}") + self.assertEqual(f"http:{schemeless_url}", resp.geturl()) + self.assertEqual(f"GET /test{char}/ HTTP/1.1\r".encode('ascii'), + http.client.HTTPConnection.buf.split(b'\n', 1)[0]) finally: self.unfakehttp() -- 2.21.0