# HG changeset patch # Parent cabd7261ae80ff2d52214697a3dbce04b2101d10 diff -r cabd7261ae80 Doc/library/http.client.rst --- a/Doc/library/http.client.rst Fri May 22 19:29:22 2015 -0700 +++ b/Doc/library/http.client.rst Mon Jun 22 12:20:32 2015 +0000 @@ -98,7 +98,7 @@ parameter. -.. class:: HTTPResponse(sock, debuglevel=0, method=None, url=None) +.. class:: HTTPResponse(sock, debuglevel=0, method=None, url=None, *, will_close=False) Class whose instances are returned upon successful connection. Not instantiated directly by user. @@ -107,6 +107,9 @@ The *strict* parameter was removed. HTTP 0.9 style "Simple Responses" are no longer supported. + .. versionadded:: 3.5 + The *will_close* parameter. + The following exceptions are raised as appropriate: @@ -220,7 +223,7 @@ :class:`HTTPConnection` instances have the following methods: -.. method:: HTTPConnection.request(method, url, body=None, headers={}) +.. method:: HTTPConnection.request(method, url, body=None, headers={}, *, close=False) This will send a request to the server using the HTTP request method *method* and the selector *url*. @@ -250,9 +253,17 @@ then the Content-Length header is set to the ``st_size`` reported by the ``fstat`` call. Otherwise no Content-Length header is added. + If *close* is true, a "Connection: close" header will be sent. In this + mode, if :meth:`getresponse` is successful, closing the response object + will close the connection to the server, and :meth:`HTTPConnection.close` + will have no effect. + .. versionadded:: 3.2 *body* can now be an iterable. + .. versionadded:: 3.5 + The *close* flag. + .. method:: HTTPConnection.getresponse() Should be called after a request is sent to get the response from the server. diff -r cabd7261ae80 Lib/http/client.py --- a/Lib/http/client.py Fri May 22 19:29:22 2015 -0700 +++ b/Lib/http/client.py Mon Jun 22 12:20:32 2015 +0000 @@ -209,7 +209,8 @@ # text following RFC 2047. The basic status line parsing only # accepts iso-8859-1. - def __init__(self, sock, debuglevel=0, method=None, url=None): + def __init__(self, sock, debuglevel=0, method=None, url=None, *, + will_close=False): # If the response includes a content-length header, we need to # make sure that the client doesn't read more than the # specified number of bytes. If it does, it will block until @@ -217,7 +218,10 @@ # happen if a self.fp.read() is done (without a size) whether # self.fp is buffered or not. So, no self.fp.read() by # clients unless they know what they are doing. - self.fp = sock.makefile("rb") + if hasattr(sock, "makefile"): + self.fp = sock.makefile("rb") # Backwards compatibility + else: + self.fp = sock self.debuglevel = debuglevel self._method = method @@ -237,7 +241,7 @@ self.chunked = _UNKNOWN # is "chunked" being used? self.chunk_left = _UNKNOWN # bytes left to read in current chunk self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response + self.will_close = will_close # conn will close at end of response def _read_status(self): line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") @@ -318,7 +322,7 @@ self.chunked = False # will the connection close at the end of the response? - self.will_close = self._check_close() + self.will_close = self.will_close or self._check_close() # do we have a Content-Length? # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" @@ -385,7 +389,8 @@ def _close_conn(self): fp = self.fp self.fp = None - fp.close() + if self.will_close: + fp.close() def close(self): try: @@ -399,11 +404,6 @@ # XXX This class should probably be revised to act more like # the "raw stream" that BufferedReader expects. - def flush(self): - super().flush() - if self.fp: - self.fp.flush() - def readable(self): return True @@ -729,7 +729,8 @@ self.source_address = source_address self.sock = None self._buffer = [] - self.__response = None + self._reader = None # All connection reads are via this file object + self.__response = None # Response object when connection is shared self.__state = _CS_IDLE self._method = None self._tunnel_host = None @@ -800,7 +801,8 @@ self.send(header_bytes) self.send(b'\r\n') - response = self.response_class(self.sock, method=self._method) + self._reader = self.sock.makefile("rb") + response = self.response_class(self._reader, method=self._method) (version, code, message) = response._read_status() if code != http.HTTPStatus.OK: @@ -832,11 +834,14 @@ def close(self): """Close the connection to the HTTP server.""" self.__state = _CS_IDLE + if self._reader: + self._reader.close() + self._reader = None try: sock = self.sock if sock: self.sock = None - sock.close() # close it manually... there may be other refs + sock.close() finally: response = self.__response if response: @@ -927,8 +932,8 @@ self.__response = None - # in certain cases, we cannot issue another request on this connection. - # this occurs when: + # In certain cases, we cannot issue another request on a previously- + # used connection. This occurs when: # 1) we are in the process of sending a request. (_CS_REQ_STARTED) # 2) a response to a previous request has signalled that it is going # to close the connection upon completion. @@ -1074,13 +1079,14 @@ """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT + self._will_close = False else: raise CannotSendHeader() self._send_output(message_body) - def request(self, method, url, body=None, headers={}): + def request(self, method, url, body=None, headers={}, *, close=False): """Send a complete request to the server.""" - self._send_request(method, url, body, headers) + self._send_request(method, url, body, headers, close=close) def _set_content_length(self, body, method): # Set the content-length based on the body. If the body is "empty", we @@ -1106,9 +1112,13 @@ if thelen is not None: self.putheader('Content-Length', thelen) - def _send_request(self, method, url, body, headers): + def _send_request(self, method, url, body, headers, *, close): + header_names = frozenset(k.lower() for k in headers) + if close and 'connection' in header_names: + msg = 'Cannot specify a Connection header with close=True' + raise ValueError(msg) + # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) skips = {} if 'host' in header_names: skips['skip_host'] = 1 @@ -1117,6 +1127,8 @@ self.putrequest(method, url, **skips) + if close: + self.putheader('Connection', 'close') if 'content-length' not in header_names: self._set_content_length(body, method) for hdr, value in headers.items(): @@ -1126,6 +1138,7 @@ # default charset of iso-8859-1. body = body.encode('iso-8859-1') self.endheaders(body) + self._will_close = close # Remember for getrequest() def getresponse(self): """Get the response from the server. @@ -1137,8 +1150,8 @@ If a request has not been sent or if a previous response has not be handled, ResponseNotReady is raised. If the HTTP response indicates that the connection should be closed, then - it will be closed before the response is returned. When the - connection is closed, the underlying socket is closed. + the HTTPConnection object will detach itself from this connection, + and a subsequent request will trigger a new connection. """ # if a prior response has been completed, then forget about it. @@ -1163,32 +1176,35 @@ if self.__state != _CS_REQ_SENT or self.__response: raise ResponseNotReady(self.__state) + if self._reader is None: + self._reader = self.sock.makefile("rb") if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel, - method=self._method) + response = self.response_class(self._reader, self.debuglevel, + method=self._method, + will_close=self._will_close) else: - response = self.response_class(self.sock, method=self._method) + response = self.response_class(self._reader, method=self._method, + will_close=self._will_close) try: - try: - response.begin() - except ConnectionError: - self.close() - raise - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE + response.begin() + except ConnectionError: + self.close() + raise + self.__state = _CS_IDLE - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response + if response.will_close: + # Detach the socket reader from the HTTPConnection instance, + # and close the Python-level socket object. Thus when + # the response object closes the reader, this will also close + # the OS-level socket and shut the connection down. + self._reader = None + self.close() + else: + # remember this, so we can tell when it is complete + self.__response = response - return response - except: - response.close() - raise + return response try: import ssl diff -r cabd7261ae80 Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Fri May 22 19:29:22 2015 -0700 +++ b/Lib/test/test_httplib.py Mon Jun 22 12:20:32 2015 +0000 @@ -794,11 +794,12 @@ response = self # Avoid garbage collector closing the socket client.HTTPResponse.__init__(self, *pos, **kw) conn.response_class = Response - conn.sock = FakeSocket('Invalid status line') + sock = FakeSocket('Invalid status line') + conn.sock = sock conn.request('GET', '/') self.assertRaises(client.BadStatusLine, conn.getresponse) - self.assertTrue(response.closed) - self.assertTrue(conn.sock.file_closed) + conn.close() + self.assertTrue(sock.file_closed) def test_chunked_extension(self): extra = '3;foo=bar\r\n' + 'abc\r\n' @@ -854,6 +855,18 @@ self.assertEqual(sock.file.read(100), extradata.encode("ascii")) #we read to the end resp.close() + def test_request_close(self): + conn = client.HTTPConnection("example.com") + data = "HTTP/1.1 200 OK\r\nContent-Length: 8\r\n\r\nResponse" + sock = FakeSocket(data) + conn.sock = sock + conn.request("GET", "/", close=True) + self.assertIn(b"\r\nConnection: close\r\n", sock.data) + with conn.getresponse() as resp: + self.assertIsNone(conn.sock) + self.assertEqual(resp.read(), b"Response") + self.assertTrue(sock.file_closed) + class ExtendedReadTest(TestCase): """ Test peek(), read1(), readline() @@ -1145,12 +1158,12 @@ # for an ssl_wrapped connect() to actually return from. -class TimeoutTest(TestCase): - PORT = None +class ServerTest(TestCase): + """Tests that use a real HTTP server socket""" def setUp(self): self.serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - TimeoutTest.PORT = support.bind_port(self.serv) + self.PORT = support.bind_port(self.serv) self.serv.listen() def tearDown(self): @@ -1165,7 +1178,7 @@ self.assertIsNone(socket.getdefaulttimeout()) socket.setdefaulttimeout(30) try: - httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT) + httpConn = client.HTTPConnection(HOST, self.PORT) httpConn.connect() finally: socket.setdefaulttimeout(None) @@ -1176,7 +1189,7 @@ self.assertIsNone(socket.getdefaulttimeout()) socket.setdefaulttimeout(30) try: - httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT, + httpConn = client.HTTPConnection(HOST, self.PORT, timeout=None) httpConn.connect() finally: @@ -1185,11 +1198,66 @@ httpConn.close() # a value - httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT, timeout=30) + httpConn = client.HTTPConnection(HOST, self.PORT, timeout=30) httpConn.connect() self.assertEqual(httpConn.sock.gettimeout(), 30) httpConn.close() + def test_double_response(self): + # Test when a server's response is followed by another in the same + # send() call + threading = support.import_module('threading') + + http_conn = client.HTTPConnection(HOST, self.PORT) + background = None + try: + server_socket = None + server_reader = None + def start_first_response(): + nonlocal server_socket, server_reader + server_socket, _ = self.serv.accept() + server_reader = server_socket.makefile('rb') + while server_reader.readline().rstrip(b'\r\n'): + pass + server_socket.sendall( + b'HTTP/1.1 200 First response\r\n' + b'Content-Length: 5\r\n' + b'\r\n' + ) + background = threading.Thread(target=start_first_response) + background.start() + http_conn.request('GET', '/first') + with http_conn.getresponse() as response: + background.join() + self.assertEqual(response.reason, 'First response') + + def finish_both_responses(): + with server_socket: + with server_reader: + while server_reader.readline().rstrip(b'\r\n'): + pass + server_socket.sendall( + b'ABC\r\n' # First response's body + + b'HTTP/1.1 200 Second response\r\n' + b'Content-Length: 6\r\n' + b'\r\n' + b'Done\r\n' + ) + background = threading.Thread(target=finish_both_responses) + background.start() + # Pipeline second request before reading first body, so that + # server can immediately follow it with the second response + http_conn.request('GET', '/second') + self.assertEqual(response.read(), b'ABC\r\n') + with http_conn.getresponse() as response: + self.assertEqual(response.reason, 'Second response') + self.assertEqual(response.read(), b'Done\r\n') + finally: + http_conn.close() + if background: + background.join() + class PersistenceTest(TestCase): @@ -1275,7 +1343,7 @@ def test_attributes(self): # simple test to check it's storing the timeout - h = client.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) + h = client.HTTPSConnection(HOST, 80, timeout=30) self.assertEqual(h.timeout, 30) def test_networked(self): @@ -1601,7 +1669,7 @@ @support.reap_threads def test_main(verbose=None): - support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, + support.run_unittest(HeaderTests, OfflineTest, BasicTest, ServerTest, PersistenceTest, HTTPSTest, RequestBodyTest, SourceAddressTest, HTTPResponseTest, ExtendedReadTest, diff -r cabd7261ae80 Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py Fri May 22 19:29:22 2015 -0700 +++ b/Lib/test/test_urllib.py Mon Jun 22 12:20:32 2015 +0000 @@ -53,30 +53,26 @@ def fakehttp(fakedata): - class FakeSocket(io.BytesIO): - io_refs = 1 + class FakeSocket: + def __init__(self, *args): + self.reader = io.BytesIO(*args) + self.socket_closed = False + self.reader_opened = False def sendall(self, data): FakeHTTPConnection.buf = data def makefile(self, *args, **kwds): - self.io_refs += 1 - return self - - def read(self, amt=None): - if self.closed: - return b"" - return io.BytesIO.read(self, amt) - - def readline(self, length=None): - if self.closed: - return b"" - return io.BytesIO.readline(self, length) + self.reader_opened = True + return self.reader def close(self): - self.io_refs -= 1 - if self.io_refs == 0: - io.BytesIO.close(self) + self.socket_closed = True + + @property + def closed(self): + return self.socket_closed and ( + not self.reader_opened or self.reader.closed) class FakeHTTPConnection(http.client.HTTPConnection): diff -r cabd7261ae80 Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Fri May 22 19:29:22 2015 -0700 +++ b/Lib/test/test_urllib2.py Mon Jun 22 12:20:32 2015 +0000 @@ -334,7 +334,7 @@ else: self._tunnel_headers.clear() - def request(self, method, url, body=None, headers=None): + def request(self, method, url, body=None, headers=None, *, close): self.method = method self.selector = url if headers is not None: @@ -867,8 +867,7 @@ self.assertEqual(http.method, method) self.assertEqual(http.selector, "/") self.assertEqual(http.req_headers, - [("Connection", "close"), - ("Foo", "bar"), ("Spam", "eggs")]) + [("Foo", "bar"), ("Spam", "eggs")]) self.assertEqual(http.data, data) # check OSError converted to URLError diff -r cabd7261ae80 Lib/urllib/request.py --- a/Lib/urllib/request.py Fri May 22 19:29:22 2015 -0700 +++ b/Lib/urllib/request.py Mon Jun 22 12:20:32 2015 +0000 @@ -1215,13 +1215,6 @@ # TODO(jhylton): Should this be redesigned to handle # persistent connections? - # We want to make an HTTP/1.1 request, but the addinfourl - # class isn't prepared to deal with a persistent connection. - # It will try to read all remaining data from the socket, - # which will block while the server waits for the next request. - # So make sure the connection gets closed after the (only) - # request. - headers["Connection"] = "close" headers = dict((name.title(), val) for name, val in headers.items()) if req._tunnel_host: @@ -1236,7 +1229,10 @@ try: try: - h.request(req.get_method(), req.selector, req.data, headers) + # We want to make an HTTP/1.1 request, so make sure the + # connection gets closed after the (only) request. + h.request(req.get_method(), req.selector, req.data, headers, + close=True) except OSError as err: # timeout error raise URLError(err) r = h.getresponse() @@ -1244,13 +1240,6 @@ h.close() raise - # If the server does not send us a 'Connection: close' header, - # HTTPConnection assumes the socket should be left open. Manually - # mark the socket to be closed when this response object goes away. - if h.sock: - h.sock.close() - h.sock = None - r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse # with .headers, because urllib clients expect the response to