diff -r 1d25edcbb477 Doc/library/http.client.rst --- a/Doc/library/http.client.rst Sat Feb 28 17:49:47 2015 -0800 +++ b/Doc/library/http.client.rst Thu Mar 05 16:19:59 2015 -0800 @@ -207,26 +207,36 @@ .. method:: HTTPConnection.request(method, url, body=None, headers={}) This will send a request to the server using the HTTP request - method *method* and the selector *url*. If the *body* argument is - present, it should be string or bytes object of data to send after - the headers are finished. Strings are encoded as ISO-8859-1, the - default charset for HTTP. To use other encodings, pass a bytes - object. The Content-Length header is set to the length of the - string. + method *method* and the selector *url*. - The *body* may also be an open :term:`file object`, in which case the - contents of the file is sent; this file object should support ``fileno()`` - and ``read()`` methods. The header Content-Length is automatically set to - the length of the file as reported by stat. The *body* argument may also be - an iterable and Content-Length header should be explicitly provided when the - body is an iterable. + If the *body* argument is present, it can be any of the following types: + *string*, *bytes*, iterables comprised of either *string* or *bytes* (note + the special handling of generators detailed below), readables or objects + implementing the :ref:`buffer interface ` such as + *array.array*. + + Unencoded *string* objects are encoded as ISO-8859-1 (latin-1), the default + charset for HTTP. To use other encodings, *bytes* objects must be used. The *headers* argument should be a mapping of extra HTTP - headers to send with the request. + headers to send with the request. If *headers* contains Transfer-Encoding, + the message body will be encoded as detailed in :rfc:`7230`, Section 4.1. If + encodings other than chunked (i.e. gzip) are specified in *headers*, chunked + encoding will automatically be added as specified in :rfc:`7230`, Section + 3.3.1. + + If Content-Length is not explicitly set in *headers*, the client will + attempt to determine the size of the payload and update the Content-Length + header automatically. In cases where content is generated dynamically (i.e. + *body* is a generator) and the client is using HTTP/1.1, the body will be + chunk encoded and the Transfer-Encoding header will automatically be set. .. versionadded:: 3.2 *body* can now be an iterable. + .. versionadded:: 3.5 + A generator *body* will now be chunk encoded when using HTTP/1.1. + .. method:: HTTPConnection.getresponse() Should be called after a request is sent to get the response from the server. @@ -312,12 +322,29 @@ packet as the message headers if it is string, otherwise it is sent in a separate packet. -.. method:: HTTPConnection.send(data) +.. method:: HTTPConnection.send(data, chunked=False) Send data to the server. This should be used directly only after the :meth:`endheaders` method has been called and before :meth:`getresponse` is called. + If *chunked* is *True*, the data will be chunk encoded as specified in + :rfc:`7230`, Section 3.3.1. + + .. versionadded:: 3.5 + Support for chunked encoding. + +.. attribute:: on_chunked_data + + If not *None*, *HTTPConnection.on_chunked_data* should reference a callable + taking a single argument. The purpose of this callable is to be able to + dynamically create Transfer-Encoding trailers as defined in :rfc:`7230`, + Section 4.1.2. + + The value of the sole argument when called will be either a byte string or + None, the latter signifying EOF. When EOF has been reached, + *on_chunked_data* should return a *dict* representing the trailers to send + along with the request. .. _httpresponse-objects: diff -r 1d25edcbb477 Lib/http/client.py --- a/Lib/http/client.py Sat Feb 28 17:49:47 2015 -0800 +++ b/Lib/http/client.py Thu Mar 05 16:19:59 2015 -0800 @@ -68,11 +68,14 @@ import email.parser import email.message +import gzip import http import io import os import socket import collections +import types +import zlib from urllib.parse import urlsplit # HTTPMessage, parse_headers(), and the HTTP status code constants are @@ -82,7 +85,8 @@ "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "LineTooLong", "error", "responses"] + "BadStatusLine", "LineTooLong", "error", "responses", + "HTTPEncodingError"] HTTP_PORT = 80 HTTPS_PORT = 443 @@ -94,6 +98,7 @@ _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' +_DEFAULT_ENCODING = 'latin-1' # hack to maintain backwards compatibility globals().update(http.HTTPStatus.__members__) @@ -109,6 +114,18 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# http://tools.ietf.org/html/rfc7230#section-4.1.2 +_ILLEGAL_TRAILER_KEYS = { + b'transfer-encoding', b'content-length', b'host', b'cache-control', + b'expect', b'max-forwards', b'pragma', b'range', b'te', b'authorization', + b'age', b'cache-control', b'expires', b'date', b'location', b'retry-after', + b'vary', b'warning', b'www-authenticate', b'authorization', + b'proxy-authenticate', b'proxy-authorization',} + + +class HTTPEncodingError(ValueError): + pass + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -674,6 +691,24 @@ def getcode(self): return self.status + +def _get_content_length(body): + if isinstance(body, (list, tuple)): + return sum(len(line) for line in body) + + if hasattr(body, '__len__'): + return len(body) + + if hasattr(body, 'read'): + try: + return os.fstat(body.fileno()).st_size + except (AttributeError, OSError): + if self.debuglevel > 0: print("Cannot stat!!") + + if hasattr(body, 'getvalue'): + return len(body.getvalue()) + + class HTTPConnection: _http_vsn = 11 @@ -689,6 +724,7 @@ self.timeout = timeout self.source_address = source_address self.sock = None + self.on_chunked_data = None self._buffer = [] self.__response = None self.__state = _CS_IDLE @@ -800,7 +836,68 @@ self.__response = None self.__state = _CS_IDLE - def send(self, data): + def _read_readable(self, readable): + blocksize = 8192 + if self.debuglevel > 0: + print("sendIng a read()able") + encode = False + try: + mode = readable.mode + except AttributeError: + # io.BytesIO and other file-like objects don't have a `mode` + # attribute. + pass + else: + if "b" not in mode: + encode = True + if self.debuglevel > 0: + print("encoding file using iso-8859-1") + while 1: + datablock = readable.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode(_DEFAULT_ENCODING) + yield datablock + + def _read_iterable(self, iterable): + for line in iterable: + if isinstance(line, str): + line = line.encode(_DEFAULT_ENCODING) + yield line + + def _encode_chunked(self, chunk): + trailer = None + + if self.on_chunked_data is not None: + # return value only matters on the end chunk + trailer = self.on_chunked_data(chunk) + + buff = [str(len(chunk or b'')).encode('ascii')] + if chunk is None: + if trailer is not None: + # 0-length data represents the end of chunking. at that point, + # the trailers are the only remaining things to return + for key, val in trailer.items(): + if hasattr(key, 'encode'): + key = key.encode(_DEFAULT_ENCODING) + if hasattr(val, 'encode'): + val = val.encode(_DEFAULT_ENCODING) + + if key.lower() in _ILLEGAL_TRAILER_KEYS: + raise HTTPEncodingError( + 'Illegal chunked trailer: {}'.format( + key.decode(_DEFAULT_ENCODING))) + + buff.append(b': '.join((key, val))) + buff.append(b'') # required \r\n + else: + buff.append(chunk) + + buff.append(b'') # required \r\n + return b'\r\n'.join(buff) + + def send(self, data, chunked=False): """Send `data' to the server. ``data`` can be a string object, a bytes object, an array object, a file-like object that supports a .read() method, or an iterable object. @@ -814,39 +911,36 @@ if self.debuglevel > 0: print("send:", repr(data)) - blocksize = 8192 - if hasattr(data, "read") : - if self.debuglevel > 0: - print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") - while 1: - datablock = data.read(blocksize) - if not datablock: - break - if encode: - datablock = datablock.encode("iso-8859-1") - self.sock.sendall(datablock) - return + + # create a consistent interface to the data try: - self.sock.sendall(data) + # this is solely to check to see if data implements the buffer API. + # it /would/ be easier to capture if PyObject_CheckBuffer was + # exposed to Python + memoryview(data) except TypeError: - if isinstance(data, collections.Iterable): - for d in data: - self.sock.sendall(d) + if isinstance(data, str): + read = lambda data: (data.encode(_DEFAULT_ENCODING),) + elif hasattr(data, 'read'): + read = self._read_readable + elif isinstance(data, collections.Iterable): + read = self._read_iterable else: raise TypeError("data should be a bytes-like object " "or an iterable, got %r" % type(data)) + else: + # the object implements the buffer interface and can be passed + # directly into socket.send() and compress() methods + read = lambda data: (data,) + + for line in read(data): + # transfer-encoding is only legal in HTTP/1.1 + if chunked and self._http_vsn == 11: + line = self._encode_chunked(line) + self.sock.sendall(line) + + if chunked and self._http_vsn == 11: + self.sock.sendall(self._encode_chunked(None)) def _output(self, s): """Add a line of output to the current request buffer. @@ -855,7 +949,7 @@ """ self._buffer.append(s) - def _send_output(self, message_body=None): + def _send_output(self, message_body=None, chunked=False): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. @@ -867,7 +961,7 @@ self.send(msg) if message_body is not None: - self.send(message_body) + self.send(message_body, chunked=chunked) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): """Send a request to the server. @@ -1012,7 +1106,7 @@ header = header + b': ' + value self._output(header) - def endheaders(self, message_body=None): + def endheaders(self, message_body=None, chunked=False): """Indicate that the last header line has been sent to the server. This method sends the request to the server. The optional message_body @@ -1025,32 +1119,15 @@ self.__state = _CS_REQ_SENT else: raise CannotSendHeader() - self._send_output(message_body) + self._send_output(message_body, chunked=chunked) def request(self, method, url, body=None, headers={}): """Send a complete request to the server.""" self._send_request(method, url, body, headers) - def _set_content_length(self, body): - # Set the content-length based on the body. - thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length', thelen) - def _send_request(self, method, url, body, headers): # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) + header_names = {k.lower(): k for k in headers.keys()} skips = {} if 'host' in header_names: skips['skip_host'] = 1 @@ -1059,15 +1136,56 @@ self.putrequest(method, url, **skips) - if body is not None and ('content-length' not in header_names): - self._set_content_length(body) + # chunked encoding will happen under the following conditions: + # 1. content-length has not been explicitly set + # 2. body is a generator + # 3. HTTP/1.1 is used + + chunked = False + if body is not None and 'content-length' not in header_names: + if 'transfer-encoding' in header_names or isinstance( + body, types.GeneratorType) and self._http_vsn == 11: + # it's possible that other encodings have been applied + # before chunking. + key = header_names.get('transfer-encoding') + enc = [] + if key is not None: + enc = [e.lower() for e in headers[key].split(',')] + # transfer-encoding will be re-written + del headers[key] + + # RFC 7230, Section 3.3.1 + # If any transfer coding other than + # chunked is applied to a request payload body, the sender + # MUST apply chunked as the final transfer coding to ensure + # that the message is properly framed. + if 'chunked' not in enc: + enc.append('chunked') + else: + # RFC 7230, Section 3.3.1 + # A sender MUST NOT apply chunked more than once to a + # message body (i.e., chunking an already chunked message + # is not allowed). + if len([e for e in enc if e == 'chunked']) > 1: + raise HTTPEncodingError( + 'Multipled chunked encodings found. Expected 1.') + + if enc[-1] != 'chunked': + raise HTTPEncodingError( + 'Chunked encoding expected as the final encoding ' + 'in Transfer-Encoding.') + + self.putheader('Transfer-Encoding', ','.join(enc)) + chunked = True + else: + content_length = _get_content_length(body) + if content_length is not None: + self.putheader('Content-Length', str(content_length)) + for hdr, value in headers.items(): self.putheader(hdr, value) - if isinstance(body, str): - # RFC 2616 Section 3.7.1 says that text default has a - # default charset of iso-8859-1. - body = body.encode('iso-8859-1') - self.endheaders(body) + + self.endheaders(body, chunked) def getresponse(self): """Get the response from the server. diff -r 1d25edcbb477 Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Sat Feb 28 17:49:47 2015 -0800 +++ b/Lib/test/test_httplib.py Thu Mar 05 16:19:59 2015 -0800 @@ -1,5 +1,8 @@ import errno from http import client +import gzip +import zlib +import hashlib import io import os import array @@ -201,6 +204,186 @@ self.assertEqual(resp.getheader('Second'), 'val') +class TransferEncodingTest(TestCase): + expected_body = b"It's just a flesh wound" + + def test_chunked(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.send(self._make_body(), chunked=True) + + body, _ = self._parse_chunked(conn.sock.data) + self.assertEqual(body, self.expected_body) + + def test_chunked_trailer(self): + chksum = hashlib.md5() + def _compute_checksum(data): + if data is None: + return { + 'X-Checksum': chksum.hexdigest(), + 'Foo': 'bar', + } + chksum.update(data) + + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.on_chunked_data = _compute_checksum + conn.send(self._make_body(), chunked=True) + + body, trailers = self._parse_chunked(conn.sock.data) + self.assertEqual(body, self.expected_body) + self.assertEqual( + trailers[b'X-Checksum'], chksum.hexdigest().encode('latin-1')) + self.assertEqual(trailers[b'Foo'], b'bar') + + def test_explicit_headers(self): + # explicit gzip + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.request('POST', '/', b'', {'Transfer-Encoding': 'gzip'}) + + _, headers, _ = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'gzip,chunked') + + # explicit chunked + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.request( + 'POST', '/', self._make_body(), {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + body, _ = self._parse_chunked(body) + self.assertEqual(body, self.expected_body) + + # explicit chunked, string body + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.request( + 'POST', '/', self.expected_body.decode('latin-1'), + {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + body, _ = self._parse_chunked(body) + self.assertEqual(body, self.expected_body) + + def test_gzip(self): + compressed_data = io.BytesIO(gzip.compress(self.expected_body)) + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + def _reader(): + chunk = compressed_data.read(5) + while chunk: + yield chunk + chunk = compressed_data.read(5) + + conn.request('POST', '/', _reader(), {'Transfer-Encoding': 'gzip'}) + + _, _, body = self._parse_request(conn.sock.data) + body, _ = self._parse_chunked(body) + self.assertEqual(gzip.decompress(body), self.expected_body) + + def test_request(self): + chksum = hashlib.md5() + def _compute_checksum(data): + if data is None: + return { + 'X-Checksum': chksum.hexdigest(), + } + chksum.update(data) + + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.on_chunked_data = _compute_checksum + conn.request('POST', '/', self._make_body()) + + _, headers, body = self._parse_request(conn.sock.data) + body, trailers = self._parse_chunked(body) + + self.assertEqual(body, self.expected_body) + self.assertEqual( + trailers[b'X-Checksum'], chksum.hexdigest().encode('latin-1')) + + # Content-Length and Transfer-Encoding SHOULD not be sent in the same + # request + self.assertNotIn(b'content-length', [h.lower() for h in headers.keys()]) + + def test_illegal_trailer(self): + for key in client._ILLEGAL_TRAILER_KEYS: + def _build_trailer(data): + if data is None: + return { + key: 'somedata', + } + + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket('example.com') + conn.on_chunked_data = _build_trailer + self.assertRaises( + client.HTTPEncodingError, conn.request, 'POST', '/', + self._make_body()) + + def _make_body(self): + lines = self.expected_body.split(b' ') + for idx, line in enumerate(lines): + if idx < len(lines) - 1: + yield line + b' ' + else: + yield line + + def _parse_request(self, data): + lines = data.split(b'\r\n') + request = lines[0] + headers = {} + n = 1 + while n < len(lines) and len(lines[n]) > 0: + key, val = lines[n].split(b':') + headers[key.decode('latin-1')] = val.decode('latin-1').strip() + n += 1 + + return request, headers, b'\r\n'.join(lines[n + 1:]) + + def _parse_chunked(self, data): + body = [] + trailers = {} + n = 0 + lines = data.split(b'\r\n') + # parse body + while True: + size, chunk = lines[n:n+2] + size = int(size) + + if size == 0: + n += 1 + break + + self.assertEqual(size, len(chunk)) + body.append(chunk) + + n += 2 + # we /should/ hit the end chunk, but check against the size of + # lines so we're not stuck in an infinite loop should we get + # malformed data + if n > len(lines): + break + + # parse trailers + if n < len(lines): + while True: + trailer = lines[n] + if len(trailer) == 0: + break + + key, value = trailer.split(b':') + trailers[key] = value.strip() + n += 1 + return b''.join(body), trailers + + class BasicTest(TestCase): def test_status_lines(self): # Test HTTP status lines @@ -929,7 +1112,7 @@ # intentionally omitted for simplicity blacklist = {"HTTPMessage", "parse_headers"} for name in dir(client): - if name in blacklist: + if name in blacklist or name.startswith('_'): continue module_object = getattr(client, name) if getattr(module_object, "__module__", None) == "http.client": @@ -1248,6 +1431,25 @@ message = client.parse_headers(f) return message, f + def test_list_body(self): + cases = ( + ([b'foo', b'bar'], b'foobar'), + ((b'foo', b'bar'), b'foobar'), + ((b'foo', 'bar'), b'foobar'), + ([b'foo', 'bar'], b'foobar'), + ) + for body, expected in cases: + with self.subTest(body): + self.conn = client.HTTPConnection('example.com') + self.conn.sock = self.sock = FakeSocket('') + + self.conn.request('PUT', '/url', body) + msg, f = self.get_headers_and_fp() + self.assertNotIn('Content-Type', msg) + self.assertIsNone(msg.get_charset()) + self.assertEqual(len(expected), int(msg.get('content-length'))) + self.assertEqual(expected, f.read()) + def test_manual_content_length(self): # Set an incorrect content-length so that we can verify that # it will not be over-ridden by the library. diff -r 1d25edcbb477 Misc/NEWS --- a/Misc/NEWS Sat Feb 28 17:49:47 2015 -0800 +++ b/Misc/NEWS Thu Mar 05 16:19:59 2015 -0800 @@ -80,6 +80,9 @@ argument which, if set to True, will pass messages to handlers taking handler levels into account. +- Issue #12319: Chunked transfer encoding support added to + http.client.HTTPConnection requests + Build -----