# HG changeset patch # Parent 469ff344f8fd13e98d8c104dff42d0c974b51576 diff -r 469ff344f8fd Doc/faq/library.rst --- a/Doc/faq/library.rst Sat Jan 31 12:20:40 2015 -0800 +++ b/Doc/faq/library.rst Tue Mar 31 11:46:02 2015 +0000 @@ -682,7 +682,7 @@ import urllib.request ### build the query string - qs = "First=Josephine&MI=Q&Last=Public" + qs = b"First=Josephine&MI=Q&Last=Public" ### connect and send the server a path req = urllib.request.urlopen('http://www.some-server.out-there' diff -r 469ff344f8fd Doc/library/urllib.request.rst --- a/Doc/library/urllib.request.rst Sat Jan 31 12:20:40 2015 -0800 +++ b/Doc/library/urllib.request.rst Tue Mar 31 11:46:02 2015 +0000 @@ -21,23 +21,18 @@ Open the URL *url*, which can be either a string or a :class:`Request` object. - *data* must be a bytes object specifying additional data to be sent to the - server, or ``None`` if no such data is needed. *data* may also be an - iterable object and in that case Content-Length value must be specified in - the headers. Currently HTTP requests are the only ones that use *data*; the - HTTP request will be a POST instead of a GET when the *data* parameter is - provided. - - *data* should be a buffer in the standard + The *data* argument is normally ``None``, but may also be a bytes + object. It is passed to an internal :class:`Request` object. Currently + HTTP requests are the only ones that use *data*; the HTTP + request will be a GET when *data* is ``None``, and will be a + POST otherwise. For POST requests, *data* specifies additional + data to be sent to the server, and should be in the standard :mimetype:`application/x-www-form-urlencoded` format. The :func:`urllib.parse.urlencode` function takes a mapping or sequence of 2-tuples and returns a string in this format. It should be encoded to bytes - before being used as the *data* parameter. The charset parameter in - ``Content-Type`` header may be used to specify the encoding. If charset - parameter is not sent with the Content-Type header, the server following the - HTTP 1.1 recommendation may assume that the data is encoded in ISO-8859-1 - encoding. It is advisable to use charset parameter with encoding used in - ``Content-Type`` header with the :class:`Request`. + before being used as the *data* parameter. Other *data* objects, formats, + and HTTP request methods are possible by passing a custom :class:`Request` + object as the *url* parameter. urllib.request module uses HTTP/1.1 and includes ``Connection:close`` header in its HTTP requests. @@ -170,12 +165,16 @@ *url* should be a string containing a valid URL. - *data* must be a bytes object specifying additional data to send to the + The *data* argument must be a bytes or iterable + object specifying additional data to send to the server, or ``None`` if no such data is needed. Currently HTTP requests are - the only ones that use *data*; the HTTP request will be a POST instead of a - GET when the *data* parameter is provided. *data* should be a buffer in the - standard :mimetype:`application/x-www-form-urlencoded` format. + the only ones that use *data*. The HTTP request will be a GET when + *data* is ``None``, and will be a POST otherwise, unless overridden by the + *method* parameter. If *data* is an iterable object, a Content-Length + value must be specified in *headers*. + If *data* is not ``None`` and no Content-Type field is provided, + the type is set to :mimetype:`application/x-www-form-urlencoded`. The :func:`urllib.parse.urlencode` function takes a mapping or sequence of 2-tuples and returns a string in this format. It should be encoded to bytes before being used as the *data* parameter. The charset parameter in @@ -1102,6 +1101,7 @@ >>> import urllib.request >>> req = urllib.request.Request(url='https://localhost/cgi-bin/test.cgi', + ... headers={'Content-type': 'text/plain'}, ... data=b'This data is passed to stdin of the CGI') >>> f = urllib.request.urlopen(req) >>> print(f.read().decode('utf-8')) @@ -1112,13 +1112,14 @@ #!/usr/bin/env python import sys data = sys.stdin.read() - print('Content-type: text-plain\n\nGot Data: "%s"' % data) + print('Content-type: text/plain\n\nGot Data: "%s"' % data) Here is an example of doing a ``PUT`` request using :class:`Request`:: import urllib.request DATA=b'some data' - req = urllib.request.Request(url='http://localhost:8080', data=DATA,method='PUT') + req = urllib.request.Request(url='http://localhost:8080', data=DATA, + headers={'Content-type': 'text/plain'}, method='PUT') f = urllib.request.urlopen(req) print(f.status) print(f.reason) diff -r 469ff344f8fd Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Sat Jan 31 12:20:40 2015 -0800 +++ b/Lib/test/test_urllib2.py Tue Mar 31 11:46:02 2015 +0000 @@ -775,8 +775,14 @@ def test_http(self): - h = urllib.request.AbstractHTTPHandler() - o = h.parent = MockOpener() + class DummyHandler(urllib.request.HTTPHandler): + def http_open(self, request): + self.open_request = request + http = MockHTTPClass() + return self.do_open(http, request) + h = DummyHandler() + o = OpenerDirector() + o.add_handler(h) url = "http://example.com/" for method, data in [("GET", None), ("POST", b"blah")]: @@ -813,17 +819,17 @@ # check adding of standard headers o.addheaders = [("Spam", "eggs")] - for data in b"", None: # POST, GET + tests = ( + (b"", "0", "application/x-www-form-urlencoded"), # POST + (None, None, None), # GET + ) + for data, expected_length, expected_type in tests: req = Request("http://example.com/", data) - r = MockResponse(200, "OK", {}, "") newreq = h.do_request_(req) - if data is None: # GET - self.assertNotIn("Content-length", req.unredirected_hdrs) - self.assertNotIn("Content-type", req.unredirected_hdrs) - else: # POST - self.assertEqual(req.unredirected_hdrs["Content-length"], "0") - self.assertEqual(req.unredirected_hdrs["Content-type"], - "application/x-www-form-urlencoded") + self.assertEqual(req.unredirected_hdrs.get("Content-length"), + expected_length) + self.assertEqual(req.unredirected_hdrs.get("Content-type"), + expected_type) # XXX the details of Host could be better tested self.assertEqual(req.unredirected_hdrs["Host"], "example.com") self.assertEqual(req.unredirected_hdrs["Spam"], "eggs") @@ -839,20 +845,29 @@ self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Spam"], "foo") + # open(data=...) should also set up Request object appropriately + o.open("http://dummy-host/", data=data) + self.assertEqual(h.open_request.get_header("Content-length"), + expected_length) + self.assertEqual(h.open_request.get_header("Content-type"), + expected_type) + # Check iterable body support def iterable_body(): yield b"one" yield b"two" yield b"three" - for headers in {}, {"Content-Length": 11}: - req = Request("http://example.com/", iterable_body(), headers) - if not headers: - # Having an iterable body without a Content-Length should - # raise an exception - self.assertRaises(ValueError, h.do_request_, req) - else: - newreq = h.do_request_(req) + # Having an iterable body without a Content-Length should + # raise an exception + req = Request("http://example.com/", iterable_body(), {}) + self.assertRaises(ValueError, h.do_request_, req) + + headers = {"Content-Length": 11} + req = Request("http://example.com/", iterable_body(), headers) + h.do_request_(req) + type = req.unredirected_hdrs.get("Content-type") + self.assertEqual(type, "application/x-www-form-urlencoded") # A file object. # Test only Content-Length attribute of request. diff -r 469ff344f8fd Lib/urllib/request.py --- a/Lib/urllib/request.py Sat Jan 31 12:20:40 2015 -0800 +++ b/Lib/urllib/request.py Tue Mar 31 11:46:02 2015 +0000 @@ -460,7 +460,7 @@ meth = getattr(processor, meth_name) req = meth(req) - response = self._open(req, data) + response = self._open(req) # post-process response meth_name = protocol+"_response" @@ -470,7 +470,7 @@ return response - def _open(self, req, data=None): + def _open(self, req): result = self._call_chain(self.handle_open, 'default', 'default_open', req) if result: