# HG changeset patch # Parent 469ff344f8fd13e98d8c104dff42d0c974b51576 # Parent 3e6792af95f08f0bb2bb2b9f9c5f9455783e436a Issue #23360: Clean up documentation of request data to urlopen() * Clarify that types other than application/x-www-form-urlencoded are allowed * Move discussion of headers from urlopen(data=...) to Request class, which is the only way to pass in header fields * Correct and update examples to do with request bodies * Add test for urlopen(data=...) setting the default Content-Type diff -r 3e6792af95f0 Doc/faq/library.rst --- a/Doc/faq/library.rst Fri Jun 17 00:01:30 2016 +0200 +++ b/Doc/faq/library.rst Fri Jun 17 05:23:50 2016 +0000 @@ -684,7 +684,7 @@ import urllib.request # build the query string - qs = "First=Josephine&MI=Q&Last=Public" + qs = b"First=Josephine&MI=Q&Last=Public" # connect and send the server a path req = urllib.request.urlopen('http://www.some-server.out-there' diff -r 3e6792af95f0 Doc/library/urllib.request.rst --- a/Doc/library/urllib.request.rst Fri Jun 17 00:01:30 2016 +0200 +++ b/Doc/library/urllib.request.rst Fri Jun 17 05:23:50 2016 +0000 @@ -30,18 +30,18 @@ Open the URL *url*, which can be either a string or a :class:`Request` object. - *data* must be a bytes object specifying additional data to be sent to the - server, or ``None`` if no such data is needed. *data* may also be an - iterable object and in that case Content-Length value must be specified in - the headers. Currently HTTP requests are the only ones that use *data*; the - HTTP request will be a POST instead of a GET when the *data* parameter is - provided. - - *data* should be a buffer in the standard + The *data* argument is normally ``None``, but may also be a bytes + object. It is passed to an internal :class:`Request` object. Currently + HTTP requests are the only ones that use *data*; the HTTP + request will be a GET when *data* is ``None``, and will be a + POST otherwise. For POST requests, *data* specifies additional + data to be sent to the server, and should be in the standard :mimetype:`application/x-www-form-urlencoded` format. The :func:`urllib.parse.urlencode` function takes a mapping or sequence of 2-tuples and returns an ASCII text string in this format. It should be encoded to bytes before being used as the *data* parameter. + Other *data* objects, formats, and HTTP request methods are possible + by passing a custom :class:`Request` object as the *url* parameter. urllib.request module uses HTTP/1.1 and includes ``Connection:close`` header in its HTTP requests. @@ -182,11 +182,16 @@ *url* should be a string containing a valid URL. - *data* must be a bytes object specifying additional data to send to the + The *data* argument must be a bytes or iterable + object specifying additional data to send to the server, or ``None`` if no such data is needed. Currently HTTP requests are - the only ones that use *data*; the HTTP request will be a POST instead of a - GET when the *data* parameter is provided. *data* should be a buffer in the - standard :mimetype:`application/x-www-form-urlencoded` format. + the only ones that use *data*. The HTTP request will be a GET when + *data* is ``None``, and will be a POST otherwise, unless overridden by the + *method* parameter. If *data* is an iterable object, a Content-Length + value must be specified in *headers*. + + If *data* is not ``None`` and no Content-Type field is provided, + the type is set to :mimetype:`application/x-www-form-urlencoded`. The :func:`urllib.parse.urlencode` function takes a mapping or sequence of 2-tuples and returns an ASCII string in this format. It should be encoded to bytes before being used as the *data* parameter. @@ -1165,6 +1170,7 @@ >>> import urllib.request >>> req = urllib.request.Request(url='https://localhost/cgi-bin/test.cgi', + ... headers={'Content-type': 'text/plain'}, ... data=b'This data is passed to stdin of the CGI') >>> with urllib.request.urlopen(req) as f: ... print(f.read().decode('utf-8')) @@ -1182,7 +1188,8 @@ import urllib.request DATA = b'some data' - req = urllib.request.Request(url='http://localhost:8080', data=DATA,method='PUT') + req = urllib.request.Request(url='http://localhost:8080', data=DATA, + headers={'Content-type': 'text/plain'}, method='PUT') with urllib.request.urlopen(req) as f: pass print(f.status) diff -r 3e6792af95f0 Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Fri Jun 17 00:01:30 2016 +0200 +++ b/Lib/test/test_urllib2.py Fri Jun 17 05:23:50 2016 +0000 @@ -844,8 +844,14 @@ def test_http(self): - h = urllib.request.AbstractHTTPHandler() - o = h.parent = MockOpener() + class DummyHandler(urllib.request.HTTPHandler): + def http_open(self, request): + self.open_request = request + http = MockHTTPClass() + return self.do_open(http, request) + h = DummyHandler() + o = OpenerDirector() + o.add_handler(h) url = "http://example.com/" for method, data in [("GET", None), ("POST", b"blah")]: @@ -882,17 +888,17 @@ # check adding of standard headers o.addheaders = [("Spam", "eggs")] - for data in b"", None: # POST, GET + tests = ( + (b"", "0", "application/x-www-form-urlencoded"), # POST + (None, None, None), # GET + ) + for data, expected_length, expected_type in tests: req = Request("http://example.com/", data) - r = MockResponse(200, "OK", {}, "") newreq = h.do_request_(req) - if data is None: # GET - self.assertNotIn("Content-length", req.unredirected_hdrs) - self.assertNotIn("Content-type", req.unredirected_hdrs) - else: # POST - self.assertEqual(req.unredirected_hdrs["Content-length"], "0") - self.assertEqual(req.unredirected_hdrs["Content-type"], - "application/x-www-form-urlencoded") + self.assertEqual(req.unredirected_hdrs.get("Content-length"), + expected_length) + self.assertEqual(req.unredirected_hdrs.get("Content-type"), + expected_type) # XXX the details of Host could be better tested self.assertEqual(req.unredirected_hdrs["Host"], "example.com") self.assertEqual(req.unredirected_hdrs["Spam"], "eggs") @@ -908,20 +914,29 @@ self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Spam"], "foo") + # open(data=...) should also set up Request object appropriately + o.open("http://dummy-host/", data=data) + self.assertEqual(h.open_request.get_header("Content-length"), + expected_length) + self.assertEqual(h.open_request.get_header("Content-type"), + expected_type) + # Check iterable body support def iterable_body(): yield b"one" yield b"two" yield b"three" - for headers in {}, {"Content-Length": 11}: - req = Request("http://example.com/", iterable_body(), headers) - if not headers: - # Having an iterable body without a Content-Length should - # raise an exception - self.assertRaises(ValueError, h.do_request_, req) - else: - newreq = h.do_request_(req) + # Having an iterable body without a Content-Length should + # raise an exception + req = Request("http://example.com/", iterable_body(), {}) + self.assertRaises(ValueError, h.do_request_, req) + + headers = {"Content-Length": 11} + req = Request("http://example.com/", iterable_body(), headers) + h.do_request_(req) + type = req.unredirected_hdrs.get("Content-type") + self.assertEqual(type, "application/x-www-form-urlencoded") # A file object. # Test only Content-Length attribute of request. diff -r 3e6792af95f0 Lib/urllib/request.py --- a/Lib/urllib/request.py Fri Jun 17 00:01:30 2016 +0200 +++ b/Lib/urllib/request.py Fri Jun 17 05:23:50 2016 +0000 @@ -141,17 +141,20 @@ *, cafile=None, capath=None, cadefault=False, context=None): '''Open the URL url, which can be either a string or a Request object. - *data* must be a bytes object specifying additional data to be sent to the - server, or None if no such data is needed. data may also be an iterable - object and in that case Content-Length value must be specified in the - headers. Currently HTTP requests are the only ones that use data; the HTTP - request will be a POST instead of a GET when the data parameter is - provided. - - *data* should be a buffer in the standard application/x-www-form-urlencoded - format. The urllib.parse.urlencode() function takes a mapping or sequence - of 2-tuples and returns an ASCII text string in this format. It should be - encoded to bytes before being used as the data parameter. + The *data* argument is normally None, but may also be a bytes object. It + is passed to an internal Request object. Currently HTTP requests are the + only ones that use data; the HTTP request will be a GET when *data* is + None, and will be a POST otherwise. For POST requests, *data* specifies + additional data to be sent to the server, and should be in the standard + application/x-www-form-urlencoded format. The urllib.parse.urlencode() + function takes a mapping or sequence of 2-tuples and returns an ASCII + text string in this format. It should be encoded to bytes before being + used as the *data* parameter. Other *data* objects, formats, and HTTP + request methods are possible by passing a custom Request object as the + *url* parameter. + + When using a Request object, *data* may also be an iterable object, and + in that case, a Content-Length value must be specified in the headers. urllib.request module uses HTTP/1.1 and includes a "Connection:close" header in its HTTP requests. @@ -528,7 +531,7 @@ meth = getattr(processor, meth_name) req = meth(req) - response = self._open(req, data) + response = self._open(req) # post-process response meth_name = protocol+"_response" @@ -538,7 +541,7 @@ return response - def _open(self, req, data=None): + def _open(self, req): result = self._call_chain(self.handle_open, 'default', 'default_open', req) if result: