diff --git a/Lib/http/client.py b/Lib/http/client.py --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -267,7 +267,7 @@ return email.parser.Parser(_class=_class).parsestr(hstring) -class HTTPResponse(io.RawIOBase): +class HTTPResponse(io.BufferedIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -492,9 +492,10 @@ return b"" if amt is not None: - # Amount is given, so call base class version - # (which is implemented in terms of self.readinto) - return super(HTTPResponse, self).read(amt) + # Amount is given, implement using readinto + b = bytearray(amt) + n = self.readinto(b) + return memoryview(b)[:n].tobytes() else: # Amount is not given (unbounded read) so we must check self.length # and self.chunked @@ -574,71 +575,62 @@ if line in (b'\r\n', b'\n', b''): break + def _get_chunk_left(self): + chunk_left = self.chunk_left + if not chunk_left: + if chunk_left == 0: # i.e. not None + # We are at the end of chunk. dicard chunk end + self._safe_read(2) # toss the CRLF at the end of the chunk + try: + chunk_left = self._read_next_chunk_size() + except ValueError: + raise IncompleteRead(b'') + if chunk_left == 0: + # last chunk: 1*("0") [ chunk-extension ] CRLF + self._read_and_discard_trailer() + # we read everything; close the "file" + self._close_conn() + chunk_left = None + self.chunk_left = chunk_left + return chunk_left + def _readall_chunked(self): assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left value = [] - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(b''.join(value)) - value.append(self._safe_read(chunk_left)) - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return b''.join(value) + try: + while True: + chunk_left = self._get_chunk_left() + if chunk_left is None: + break + value.append(self._safe_read(chunk_left)) + self.chunk_left = 0 + return b''.join(value) + except IncompleteRead: + raise IncompleteRead(b''.join(value)) def _readinto_chunked(self, b): assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - total_bytes = 0 mvb = memoryview(b) - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b[0:total_bytes])) + try: + while True: + chunk_left = self._get_chunk_left() + if chunk_left is None: + return total_bytes - if len(mvb) < chunk_left: - n = self._safe_readinto(mvb) - self.chunk_left = chunk_left - n - return total_bytes + n - elif len(mvb) == chunk_left: - n = self._safe_readinto(mvb) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return total_bytes + n - else: - temp_mvb = mvb[0:chunk_left] - n = self._safe_readinto(temp_mvb) - mvb = mvb[n:] - total_bytes += n + if len(mvb) <= chunk_left: + n = self._safe_readinto(mvb) + self.chunk_left = chunk_left - n + return total_bytes + n + else: + temp_mvb = mvb[0:chunk_left] + n = self._safe_readinto(temp_mvb) + mvb = mvb[n:] + total_bytes += n + self.chunk_left = 0 - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return total_bytes + except IncompleteRead: + raise IncompleteRead(bytes(b[0:total_bytes])) def _safe_read(self, amt): """Read the number of bytes requested, compensating for partial reads. @@ -679,6 +671,72 @@ total_bytes += n return total_bytes + def read1(self, n=-1): + """Read with at most one underlying system call. If at least one + byte is buffered, return that instead. + """ + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + return self._read1_chunked(n) + result = self.fp.read1(n) + if not result and n: + self._close_conn() + return result + + def peek(self, n=-1): + # Having this enables IOBase.readline() to read more than one + # byte at a time + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + try: + return self._peek_chunked(n) + except IncompleteRead: + return b"" + return self.fp.peek(n) + + def readline(self, limit=-1): + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + # Fallback to IOBase readline which uses peek() and read() + return super().readline(limit) + result = self.fp.readline(limit) + if not result and limit: + self._close_conn() + return result + + def _read1_chunked(self, n): + # Strictly speaking, _get_chunk_left() may cause more than one read, + # but that is ok, since that is to satisfy the chunked protocol. + chunk_left = self._get_chunk_left() + if chunk_left is None or n == 0: + return b'' + if n < 0: + n = chunk_left + else: + n = min(n, chunk_left) + + read = self.fp.read1(n) + self.chunk_left -= len(read) + if not read: + raise IncompleteRead(b"") + return read + + def _peek_chunked(self, n): + # Strictly speaking, _get_chunk_left() may cause more than one read, + # but that is ok, since that is to satisfy the chunked protocol. + chunk_left = self._get_chunk_left() + if chunk_left is None: + return b'' #eof + # peek is allowed to return more than requested. Just request the + # entire chunk, and truncate what we get. + peek = self.fp.peek(chunk_left)[:chunk_left] + if not peek: + raise IncompleteRead(b'') + return peek + def fileno(self): return self.fp.fileno() diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -18,6 +18,26 @@ # Root cert file (CA) for svn.python.org's cert CACERT_svn_python_org = os.path.join(here, 'https_svn_python_org_root.pem') +# constants for testing chunked encoding +chunked_start = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello worl\r\n' + '3\r\n' + 'd! \r\n' + '8\r\n' + 'and now \r\n' + '22\r\n' + 'for something completely different\r\n' +) +chunked_expected = b'hello world! and now for something completely different' +chunk_extension = ";foo=bar" +last_chunk = "0\r\n" +last_chunk_extended = "0" + chunk_extension + "\r\n" +trailers = "X-Dummy: foo\r\nX-Dumm2: bar\r\n" +chunked_end = "\r\n" + HOST = support.HOST class FakeSocket: @@ -36,7 +56,10 @@ def makefile(self, mode, bufsize=None): if mode != 'r' and mode != 'rb': raise client.UnimplementedFileMode() - return self.fileclass(self.text) + # keep the file around so we can check how much was read from it + self.file = self.fileclass(self.text) + self.file.close = lambda:None #nerf close () + return self.file class EPipeSocket(FakeSocket): @@ -418,20 +441,8 @@ conn.request('POST', 'test', conn) def test_chunked(self): - chunked_start = ( - 'HTTP/1.1 200 OK\r\n' - 'Transfer-Encoding: chunked\r\n\r\n' - 'a\r\n' - 'hello worl\r\n' - '3\r\n' - 'd! \r\n' - '8\r\n' - 'and now \r\n' - '22\r\n' - 'for something completely different\r\n' - ) - expected = b'hello world! and now for something completely different' - sock = FakeSocket(chunked_start + '0\r\n') + expected = chunked_expected + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="GET") resp.begin() self.assertEqual(resp.read(), expected) @@ -439,7 +450,7 @@ # Various read sizes for n in range(1, 12): - sock = FakeSocket(chunked_start + '0\r\n') + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="GET") resp.begin() self.assertEqual(resp.read(n) + resp.read(n) + resp.read(), expected) @@ -462,23 +473,12 @@ resp.close() def test_readinto_chunked(self): - chunked_start = ( - 'HTTP/1.1 200 OK\r\n' - 'Transfer-Encoding: chunked\r\n\r\n' - 'a\r\n' - 'hello worl\r\n' - '3\r\n' - 'd! \r\n' - '8\r\n' - 'and now \r\n' - '22\r\n' - 'for something completely different\r\n' - ) - expected = b'hello world! and now for something completely different' + + expected = chunked_expected nexpected = len(expected) b = bytearray(128) - sock = FakeSocket(chunked_start + '0\r\n') + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="GET") resp.begin() n = resp.readinto(b) @@ -488,7 +488,7 @@ # Various read sizes for n in range(1, 12): - sock = FakeSocket(chunked_start + '0\r\n') + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="GET") resp.begin() m = memoryview(b) @@ -524,7 +524,7 @@ '1\r\n' 'd\r\n' ) - sock = FakeSocket(chunked_start + '0\r\n') + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="HEAD") resp.begin() self.assertEqual(resp.read(), b'') @@ -544,7 +544,7 @@ '1\r\n' 'd\r\n' ) - sock = FakeSocket(chunked_start + '0\r\n') + sock = FakeSocket(chunked_start + last_chunk + chunked_end) resp = client.HTTPResponse(sock, method="HEAD") resp.begin() b = bytearray(5) @@ -619,6 +619,7 @@ + '0' * 65536 + 'a\r\n' 'hello world\r\n' '0\r\n' + '\r\n' ) resp = client.HTTPResponse(FakeSocket(body)) resp.begin() @@ -658,6 +659,222 @@ conn.request('POST', '/', body) self.assertGreater(sock.sendall_calls, 1) + def test_chunked_extension(self): + extra = '3;foo=bar\r\n' + 'abc\r\n' + expected = chunked_expected + b'abc' + + sock = FakeSocket(chunked_start + extra + last_chunk_extended + chunked_end) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), expected) + resp.close() + + def test_chunked_missing_end(self): + """some servers may serve up a short chunked encoding stream""" + expected = chunked_expected + sock = FakeSocket(chunked_start + last_chunk) #no terminating crlf + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), expected) + resp.close() + + def test_chunked_trailers(self): + """See that trailers are read and ignored""" + expected = chunked_expected + sock = FakeSocket(chunked_start + last_chunk + trailers + chunked_end) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), expected) + # we should have reached the end of the file + self.assertEqual(sock.file.read(100), b"") #we read to the end + resp.close() + + def test_chunked_sync(self): + """Check that we don't read past the end of the chunked-encoding stream""" + expected = chunked_expected + extradata = "extradata" + sock = FakeSocket(chunked_start + last_chunk + trailers + chunked_end + extradata) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), expected) + # the file should now have our extradata ready to be read + self.assertEqual(sock.file.read(100), extradata.encode("ascii")) #we read to the end + resp.close() + + def test_content_length_sync(self): + """Check that we don't read past the end of the Content-Length stream""" + extradata = "extradata" + expected = b"Hello123\r\n" + sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello123\r\n' + extradata) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + self.assertEqual(resp.read(), expected) + # the file should now have our extradata ready to be read + self.assertEqual(sock.file.read(100), extradata.encode("ascii")) #we read to the end + resp.close() + +class ExtendedReadTest(TestCase): + """ + Test peek(), read1(), readline() + """ + lines = ( + 'HTTP/1.1 200 OK\r\n' + '\r\n' + 'hello world!\n' + 'and now \n' + 'for something completely different\n' + 'foo' + ) + lines_expected = lines[lines.find('hello'):].encode("ascii") + lines_chunked = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello worl\r\n' + '3\r\n' + 'd!\n\r\n' + '9\r\n' + 'and now \n\r\n' + '23\r\n' + 'for something completely different\n\r\n' + '3\r\n' + 'foo\r\n' + '0\r\n' # terminating chunk + '\r\n' # end of trailers + ) + + def test_peek(self): + sock = FakeSocket(self.lines) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + # patch up the buffered peek so that it returns not too much stuff + oldpeek = resp.fp.peek + def mypeek(n=-1): + p = oldpeek(n) + if n >= 0: + return p[:n] + return p[:10] + self._verify_peek(resp, self.lines_expected) + + def test_peek_chunked(self): + expected = b'hello world! and now for something completely different' + sock = FakeSocket(self.lines_chunked) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + self._verify_peek(resp, self.lines_expected) + + def _verify_peek(self, resp, expected): + all = [] + while True: + # try a short peek + p = resp.peek(3) + if p: + self.assertGreater(len(p), 0) + # then unbounded peek + p2 = resp.peek() + self.assertGreaterEqual(len(p2), len(p)) + self.assertTrue(p2.startswith(p)) + next = resp.read(len(p2)) + self.assertEqual(next, p2) + else: + next = resp.read() + self.assertFalse(next) + all.append(next) + if not next: + break + self.assertEqual(b"".join(all), expected) + resp.close() + + def test_readline(self): + sock = FakeSocket(self.lines) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + self._verify_readline(resp.readline, self.lines_expected) + resp.close() + + def test_readline_chunked(self): + sock = FakeSocket(self.lines_chunked) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + self._verify_readline(resp.readline, self.lines_expected) + resp.close() + + def _verify_readline(self, readline, expected): + all = [] + while True: + # short readlines + line = readline(5) + if line and line != b"foo": + if len(line) < 5: + self.assertTrue(line.endswith(b"\n")) + all.append(line) + if not line: + break + self.assertEqual(b"".join(all), expected) + + def test_read1(self): + sock = FakeSocket(self.lines) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + def r(): + res = resp.read1(4) + self.assertLessEqual(len(res), 4) + return res + readliner = Readliner(r) + self._verify_readline(readliner.readline, self.lines_expected) + resp.close() + + def test_read1_chunked(self): + sock = FakeSocket(self.lines_chunked) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + resp.fp = io.BufferedReader(resp.fp) + def r(): + res = resp.read1(4) + self.assertLessEqual(len(res), 4) + return res + readliner = Readliner(r) + self._verify_readline(readliner.readline, self.lines_expected) + resp.close() + +class Readliner: + """ + a simple readline class that uses an arbitrary read function and buffering + """ + def __init__(self, readfunc): + self.readfunc = readfunc + self.remainder = b"" + + def readline(self, limit): + data = [] + datalen = 0 + read = self.remainder + try: + while True: + idx = read.find(b'\n') + if idx != -1: + break + if datalen + len(read) >= limit: + idx = limit - datalen - 1 + # read more data + data.append(read) + read = self.readfunc() + if not read: + idx = 0 #eof condition + break + idx += 1 + data.append(read[:idx]) + self.remainder = read[idx:] + return b"".join(data) + except: + self.remainder = b"".join(data) + raise + class OfflineTest(TestCase): def test_responses(self): self.assertEqual(client.responses[client.NOT_FOUND], "Not Found") @@ -961,7 +1178,7 @@ def test_main(verbose=None): support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, HTTPSTest, RequestBodyTest, SourceAddressTest, - HTTPResponseTest) + HTTPResponseTest, ExtendedReadTest) if __name__ == '__main__': test_main()