Index: Doc/library/httplib.rst =================================================================== --- Doc/library/httplib.rst (revision 62222) +++ Doc/library/httplib.rst (working copy) @@ -30,7 +30,7 @@ The module provides the following classes: -.. class:: HTTPConnection(host[, port[, strict[, timeout]]]) +.. class:: HTTPConnection(host[, port[, strict[, timeout[, sockbuf]]]]) An :class:`HTTPConnection` instance represents one transaction with an HTTP server. It should be instantiated passing it a host and optional port number. @@ -40,7 +40,12 @@ status line can't be parsed as a valid HTTP/1.0 or 1.1 status line. If the optional *timeout* parameter is given, connection attempts will timeout after that many seconds (if it is not given or ``None``, the global default timeout - setting is used). + setting is used). Setting the optional *sockbuf* parameter to an integer larger + than 0 enables socket buffering to the specified ammount (in bytes), which can + improve transfer speeds considerably. However, it's only possible to safely use + this buffering when the connection is expected to handle a single request (i.e., + a it's non-persistent). Setting this option causes the instance to close the + connection after the first response is received. For example, the following calls all create instances that connect to the server at the same host and port:: @@ -48,15 +53,17 @@ >>> h1 = httplib.HTTPConnection('www.cwi.nl') >>> h2 = httplib.HTTPConnection('www.cwi.nl:80') >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80) - >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80, timeout=10) + >>> h4 = httplib.HTTPConnection('www.cwi.nl', 80, timeout=10) + >>> h5 = httplib.HTTPConnection('www.cwi.nl', sockbuf=4096) .. versionadded:: 2.0 .. versionchanged:: 2.6 *timeout* was added. + *sockbuf* was added. -.. class:: HTTPSConnection(host[, port[, key_file[, cert_file[, strict[, timeout]]]]]) +.. class:: HTTPSConnection(host[, port[, key_file[, cert_file[, strict[, timeout[, sockbuf]]]]]]) A subclass of :class:`HTTPConnection` that uses SSL for communication with secure servers. Default port is ``443``. *key_file* is the name of a PEM @@ -71,14 +78,16 @@ .. versionchanged:: 2.6 *timeout* was added. + *sockbuf* was added. +.. class:: HTTPResponse(sock[, debuglevel=0][, strict=0][, sockbuf=0]) -.. class:: HTTPResponse(sock[, debuglevel=0][, strict=0]) - Class whose instances are returned upon successful connection. Not instantiated directly by user. .. versionadded:: 2.0 + .. versionchanged:: 2.6 + *sockbuf* was added. The following exceptions are raised as appropriate: Index: Lib/httplib.py =================================================================== --- Lib/httplib.py (revision 62222) +++ Lib/httplib.py (working copy) @@ -317,11 +317,21 @@ # false because it prevents clients from talking to HTTP/0.9 # servers. Note that a response with a sufficiently corrupted # status line will look like an HTTP/0.9 response. + # + # sockbuf: Buffering the socket is not recommended if more requests + # might be issued on the same connection, as it will corrupt the + # content of responses by over-reading. It is only safe if the + # connection is guarranteed to close after the first response. + # Buffering can improve transfer speeds considerably, but HTTPResponse + # defaults to non-buffered sockets to keep data integrity in a + # Keep-Alive/pipelining scenario. This is further enforced by + # closing the connection after the response if a buffered socket + # is used. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - def __init__(self, sock, debuglevel=0, strict=0, method=None): - self.fp = sock.makefile('rb', 0) + def __init__(self, sock, debuglevel=0, strict=0, method=None, sockbuf=0): + self.fp = sock.makefile('rb', sockbuf) self.debuglevel = debuglevel self.strict = strict self._method = method @@ -336,7 +346,12 @@ self.chunked = _UNKNOWN # is "chunked" being used? self.chunk_left = _UNKNOWN # bytes left to read in current chunk self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response + self.will_close = 0 # will conn close at end of response? + if sockbuf != 0: + self.must_close = True # conn must close at end of response + self.will_close = 1 + else: + self.must_close = False # conn may close at end of response def _read_status(self): # Initialize with Simple-Response defaults @@ -428,7 +443,7 @@ self.chunked = 0 # will the connection close at the end of the response? - self.will_close = self._check_close() + self.will_close = self._check_close() or self.must_close # do we have a Content-Length? # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" @@ -568,6 +583,8 @@ value += self._safe_read(amt) self._safe_read(2) # toss the CRLF at the end of the chunk self.chunk_left = None + if self.must_close: + self.close() return value else: value += self._safe_read(chunk_left) @@ -611,6 +628,8 @@ while amt > 0: chunk = self.fp.read(min(amt, MAXAMOUNT)) if not chunk: + if self.must_close: + self.close() raise IncompleteRead(s) s.append(chunk) amt -= len(chunk) @@ -639,14 +658,23 @@ debuglevel = 0 strict = 0 - def __init__(self, host, port=None, strict=None, timeout=None): + def __init__(self, host, port=None, strict=None, timeout=None, sockbuf=0): self.timeout = timeout self.sock = None self._buffer = [] self.__response = None self.__state = _CS_IDLE self._method = None + self._sockbuf = 0 + try: + if sockbuf == int(sockbuf): + self._sockbuf = sockbuf + except TypeError: + pass + except ValueError: + pass + self._set_hostport(host, port) if strict is not None: self.strict = strict @@ -936,11 +964,12 @@ if self.debuglevel > 0: response = self.response_class(self.sock, self.debuglevel, strict=self.strict, - method=self._method) + method=self._method, + sockbuf=self._sockbuf) else: response = self.response_class(self.sock, strict=self.strict, - method=self._method) - + method=self._method, + sockbuf=self._sockbuf) response.begin() assert response.will_close != _UNKNOWN self.__state = _CS_IDLE @@ -1055,8 +1084,8 @@ default_port = HTTPS_PORT def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=None): - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=None, timeout=None, sockbuf=0): + HTTPConnection.__init__(self, host, port, strict, timeout, sockbuf) self.key_file = key_file self.cert_file = cert_file Index: Lib/test/test_httplib.py =================================================================== --- Lib/test/test_httplib.py (revision 62222) +++ Lib/test/test_httplib.py (working copy) @@ -19,6 +19,8 @@ if mode != 'r' and mode != 'rb': raise httplib.UnimplementedFileMode() return self.fileclass(self.text) + def close(self): + pass class NoEOFStringIO(StringIO.StringIO): """Like StringIO, but raises AssertionError on EOF. @@ -88,7 +90,7 @@ self.assertRaises(httplib.BadStatusLine, resp.begin) def test_partial_reads(self): - # if we have a lenght, the system knows when to close itself + # if we have a length, the system knows when to close itself # same behaviour than when we read the whole thing with read() body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText" sock = FakeSocket(body) @@ -191,7 +193,60 @@ self.assertEquals(resp.read(), 'Hello\r\n') resp.close() + def test_buffer_close(self): + """ Tests closing the connection when socket buffer size != 0. + """ + # Test that using a buffered socket (sockbuf > 0) causes the + # HTTPResponse to close after one request, avoiding pipelining + # problems + # See issues #508157, #2576 + print + buffer_sizes = [0, "Invalid", [1,2], "1", 1, 2, 4096, 16384] + s_hello = 'a\r\nHel\r\n1\r\nlo\r\n0\r\n' + m_hello = 'Hello ' * 200 + '\r\n' + l_hello = 'Hello There - ' * 500 + '\r\n' + for bsize in buffer_sizes: + # Check that connections are not persistent when using + # HTTPResponse via HTTPConnection GET with a socket buffer + # For bufsize == 0, allow persistent connection + conn = httplib.HTTPConnection('example.com', sockbuf=bsize) + conn.auto_open = 0 # Avoid re-connecting + conn.sock = FakeSocket('HTTP/1.1 200 OK\r\nConnection: keep-alive' + '\r\nTransfer-Encoding: chunked' + '\r\nKeep-Alive: 300\r\n\r\nHello\r\n') + conn.request('GET', '/') + t = conn.getresponse() + # XXX Is it possible to suffer from pipelining problems if the + # connection is closed then auto-opened? + try: + conn.request('GET', '/') + print conn._sockbuf, + except httplib.NotConnected: + print "Closed ", + + # XXX Is it possible to make a persistent connection from + # HTTPResponse alone? If not, this is (at best, a bit) bogus: + for hello in (s_hello, m_hello, l_hello): + sock = FakeSocket('HTTP/1.1 200 OK\r\nConnection: keep-alive' + '\r\n Keep-Alive: 300\r\n\r\n' + hello) + resp = httplib.HTTPResponse(sock, method="GET", sockbuf=bsize) + resp.begin() + try: + resp.read() + except httplib.IncompleteRead: + self.fail('Unexpected incomplete read') + if resp.must_close: + self.assertTrue(resp.isclosed()) + elif resp.will_close: + self.assertTrue(resp.isclosed()) + # TODO Find a case that keeps the connection open without + # raising IncompleteRead and test it for being open. + else: + self.assertFalse(resp.isclosed()) + resp.close() + + class OfflineTest(TestCase): def test_responses(self): self.assertEquals(httplib.responses[httplib.NOT_FOUND], "Not Found")