diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 6bb06d4..5cf2c0c 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -525,6 +525,11 @@ setting up a `Basic Authentication`_ handler: :: through a proxy. However, this can be enabled by extending urllib2 as shown in the recipe [#]_. +.. note:: + + ``HTTP_PROXY`` will be ignored if a variable ``REQUEST_METHOD`` is set; see + the documentation on :func:`~urllib.getproxies`. + Sockets and Layers ================== diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst index 3b5dc16..8dd5be4 100644 --- a/Doc/library/urllib.rst +++ b/Doc/library/urllib.rst @@ -296,6 +296,15 @@ Utility functions lowercase is preferred. .. note:: + + If the environment variable ``REQUEST_METHOD`` is set, which usually + indicates your script is running in a CGI environment, the environment + variable ``HTTP_PROXY`` (uppercase) will be ignored. This is because that + variable can be injected by a client using the "Proxy:" HTTP header. If you + need to use an HTTP proxy in a CGI environment, either use ``ProxyHandler`` + explicitly, or make sure the variable name is in lowercase (``http_proxy``). + +.. note:: urllib also exposes certain utility functions like splittype, splithost and others parsing URL into various components. But it is recommended to use :mod:`urlparse` for parsing URLs rather than using these functions directly. diff --git a/Doc/library/urllib2.rst b/Doc/library/urllib2.rst index 8a4c80e..5b48993 100644 --- a/Doc/library/urllib2.rst +++ b/Doc/library/urllib2.rst @@ -229,6 +229,11 @@ The following classes are provided: To disable autodetected proxy pass an empty dictionary. +.. note:: + + ``HTTP_PROXY`` will be ignored if a variable ``REQUEST_METHOD`` is set; see + the documentation on :func:`~urllib.getproxies`. + .. class:: HTTPPasswordMgr() diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 434d533..27a1d38 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -170,6 +170,18 @@ class ProxyTests(unittest.TestCase): self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888')) self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234')) + def test_proxy_cgi_ignore(self): + try: + self.env.set('HTTP_PROXY', 'http://somewhere:3128') + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) + self.env.set('REQUEST_METHOD', 'GET') + proxies = urllib.getproxies_environment() + self.assertNotIn('http', proxies) + finally: + self.env.unset('REQUEST_METHOD') + self.env.unset('HTTP_PROXY') + def test_proxy_bypass_environment_host_match(self): bypass = urllib.proxy_bypass_environment self.env.set('NO_PROXY', diff --git a/Lib/urllib.py b/Lib/urllib.py index 139fab9..c3ba2c9 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1380,12 +1380,21 @@ def getproxies_environment(): If you need a different way, you can pass a proxies dictionary to the [Fancy]URLopener constructor. """ + # Get all variables proxies = {} for name, value in os.environ.items(): name = name.lower() if value and name[-6:] == '_proxy': proxies[name[:-6]] = value + # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY + # (non-all-lowercase) as it may be set from the web server by a "Proxy:" + # header from the client + # If "proxy" is lowercase, it will still be used thanks to the next block + if 'REQUEST_METHOD' in os.environ: + proxies.pop('http', None) + + # Get lowercase variables for name, value in os.environ.items(): if name[-6:] == '_proxy': name = name.lower()