diff -r d564695b67bb Doc/library/http.client.rst --- a/Doc/library/http.client.rst Fri Mar 20 00:27:28 2015 +0100 +++ b/Doc/library/http.client.rst Tue Mar 24 09:24:14 2015 -0700 @@ -207,26 +207,38 @@ .. method:: HTTPConnection.request(method, url, body=None, headers={}) This will send a request to the server using the HTTP request - method *method* and the selector *url*. If the *body* argument is - present, it should be string or bytes object of data to send after - the headers are finished. Strings are encoded as ISO-8859-1, the - default charset for HTTP. To use other encodings, pass a bytes - object. The Content-Length header is set to the length of the - string. + method *method* and the selector *url*. - The *body* may also be an open :term:`file object`, in which case the - contents of the file is sent; this file object should support ``fileno()`` - and ``read()`` methods. The header Content-Length is automatically set to - the length of the file as reported by stat. The *body* argument may also be - an iterable and Content-Length header should be explicitly provided when the - body is an iterable. + If the *body* argument is present, it can be any of the following types: + *string*, *bytes*, iterables comprised of either *string* or *bytes* (note + the special handling of generators detailed below), :term:`file object` + (the object should support ``fileno()`` and ``read()`` methods) or objects + implementing the :ref:`buffer interface ` such as + :class:`array.array`. - The *headers* argument should be a mapping of extra HTTP - headers to send with the request. + Unencoded *string* objects are encoded as ISO-8859-1 (latin-1), the default + charset for HTTP. To use other encodings, *bytes* objects must be used. + + The *headers* argument should be a mapping of extra HTTP headers to send + with the request. If Content-Length is not explicitly set in *headers*, + the client will attempt to determine the size of the payload and update the + Content-Length header automatically. In cases where content is generated + dynamically (i.e. *body* is an instance of :class:`types.GeneratorType`), + the body will be chunk encoded and the Transfer-Encoding header will + automatically be set. If Content-Length is supplied in *headers* and the + body is an instance of :class:`types.GeneratorType`, chunked encoding will + not be applied. + + If Transfer-Encoding is specified in *headers*, the client assumes that all + encoding is handled by the calling code. In this case, Content-Length will + not be automatically added to *headers*. .. versionadded:: 3.2 *body* can now be an iterable. + .. versionadded:: 3.5 + A generator *body* will now be chunk encoded when using HTTP/1.1. + .. method:: HTTPConnection.getresponse() Should be called after a request is sent to get the response from the server. @@ -304,7 +316,7 @@ an argument. -.. method:: HTTPConnection.endheaders(message_body=None) +.. method:: HTTPConnection.endheaders(message_body=None, encode_chunked=False) Send a blank line to the server, signalling the end of the headers. The optional *message_body* argument can be used to pass a message body @@ -312,12 +324,37 @@ packet as the message headers if it is string, otherwise it is sent in a separate packet. -.. method:: HTTPConnection.send(data) + *encode_chunked* is passed directly into :meth:`send` (see :meth:`send` + documentation for details) + + +.. method:: HTTPConnection.send(data, encode_chunked=False) Send data to the server. This should be used directly only after the :meth:`endheaders` method has been called and before :meth:`getresponse` is called. + If *encode_chunked* is ``True``, the result of each iteration of *data* will + be chunk encoded as specified in :rfc:`7230`, Section 3.3.1. How the data is + encoded is dependent on the type of *data*. If *data* implements the + :ref:`buffer interface `, is a :class:`str` or + :term:`file object`, the encoding will result in a single chunk. + If *data* is a :class:`collections.Iterable`, each iteration of *data* will + result in a chunk. :meth:`send` automatically signals the end of the chunk + encoded data immediately after *data*. + + If data types that would otherwise be sent as a single chunk are to be + delivered as smaller chunks, it must be done at a higher level. The + calling code is responsible for doing this and should pass a + :class:`collections.Iterable` into :meth:`send`. + + .. note:: Due to the chunked encoding spec, it is important that no + iterations of data of type :class:`collections.Iterable` result in 0 + length data. This may result in servers prematurely ending reads of + request bodies. + + .. versionadded:: 3.5 + Support for chunked encoding. .. _httpresponse-objects: diff -r d564695b67bb Lib/http/client.py --- a/Lib/http/client.py Fri Mar 20 00:27:28 2015 +0100 +++ b/Lib/http/client.py Tue Mar 24 09:24:14 2015 -0700 @@ -74,6 +74,7 @@ import re import socket import collections +import types from urllib.parse import urlsplit # HTTPMessage, parse_headers(), and the HTTP status code constants are @@ -83,7 +84,8 @@ "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "LineTooLong", "error", "responses"] + "BadStatusLine", "LineTooLong", "error", "responses", + "HTTPEncodingError"] HTTP_PORT = 80 HTTPS_PORT = 443 @@ -95,6 +97,7 @@ _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' +_DEFAULT_ENCODING = 'latin-1' # hack to maintain backwards compatibility globals().update(http.HTTPStatus.__members__) @@ -138,6 +141,9 @@ _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search +class HTTPEncodingError(ValueError): + pass + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -703,6 +709,24 @@ def getcode(self): return self.status + +def _get_content_length(body): + if isinstance(body, (list, tuple)): + return sum(len(line) for line in body) + + if hasattr(body, '__len__'): + return len(body) + + if hasattr(body, 'read'): + try: + return os.fstat(body.fileno()).st_size + except (AttributeError, OSError): + if self.debuglevel > 0: print("Cannot stat!!") + + if hasattr(body, 'getvalue'): + return len(body.getvalue()) + + class HTTPConnection: _http_vsn = 11 @@ -829,7 +853,37 @@ self.__response = None self.__state = _CS_IDLE - def send(self, data): + def _read_readable(self, readable): + blocksize = 8192 + if self.debuglevel > 0: + print("sendIng a read()able") + encode = False + try: + mode = readable.mode + except AttributeError: + # io.BytesIO and other file-like objects don't have a `mode` + # attribute. + pass + else: + if "b" not in mode: + encode = True + if self.debuglevel > 0: + print("encoding file using iso-8859-1") + while 1: + datablock = readable.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode(_DEFAULT_ENCODING) + yield datablock + + def _read_iterable(self, iterable): + for line in iterable: + if isinstance(line, str): + line = line.encode(_DEFAULT_ENCODING) + yield line + + def send(self, data, encode_chunked=False): """Send `data' to the server. ``data`` can be a string object, a bytes object, an array object, a file-like object that supports a .read() method, or an iterable object. @@ -843,39 +897,40 @@ if self.debuglevel > 0: print("send:", repr(data)) - blocksize = 8192 - if hasattr(data, "read") : - if self.debuglevel > 0: - print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") - while 1: - datablock = data.read(blocksize) - if not datablock: - break - if encode: - datablock = datablock.encode("iso-8859-1") - self.sock.sendall(datablock) - return + + # create a consistent interface to the data try: - self.sock.sendall(data) + # this is solely to check to see if data implements the buffer API. + # it /would/ be easier to capture if PyObject_CheckBuffer was + # exposed to Python + memoryview(data) except TypeError: - if isinstance(data, collections.Iterable): - for d in data: - self.sock.sendall(d) + if isinstance(data, str): + read = lambda data: (data.encode(_DEFAULT_ENCODING),) + elif hasattr(data, 'read'): + read = self._read_readable + elif isinstance(data, collections.Iterable): + read = self._read_iterable else: raise TypeError("data should be a bytes-like object " "or an iterable, got %r" % type(data)) + else: + # the object implements the buffer interface and can be passed + # directly into socket methods + read = lambda data: (data,) + + for line in read(data): + if encode_chunked and self._http_vsn == 11: + # chunked encoding + line = b'\r\n'.join(( + format(len(line or b''), 'X').encode('ascii'), + line, + b'')) + self.sock.sendall(line) + + if encode_chunked and self._http_vsn == 11: + # end chunked transfer + self.sock.sendall(b'0\r\n\r\n') def _output(self, s): """Add a line of output to the current request buffer. @@ -884,7 +939,7 @@ """ self._buffer.append(s) - def _send_output(self, message_body=None): + def _send_output(self, message_body=None, encode_chunked=False): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. @@ -896,7 +951,7 @@ self.send(msg) if message_body is not None: - self.send(message_body) + self.send(message_body, encode_chunked=encode_chunked) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): """Send a request to the server. @@ -1049,7 +1104,7 @@ header = header + b': ' + value self._output(header) - def endheaders(self, message_body=None): + def endheaders(self, message_body=None, encode_chunked=False): """Indicate that the last header line has been sent to the server. This method sends the request to the server. The optional message_body @@ -1062,32 +1117,15 @@ self.__state = _CS_REQ_SENT else: raise CannotSendHeader() - self._send_output(message_body) + self._send_output(message_body, encode_chunked=encode_chunked) def request(self, method, url, body=None, headers={}): """Send a complete request to the server.""" self._send_request(method, url, body, headers) - def _set_content_length(self, body): - # Set the content-length based on the body. - thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length', thelen) - def _send_request(self, method, url, body, headers): # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) + header_names = {k.lower(): k for k in headers.keys()} skips = {} if 'host' in header_names: skips['skip_host'] = 1 @@ -1096,15 +1134,52 @@ self.putrequest(method, url, **skips) - if body is not None and ('content-length' not in header_names): - self._set_content_length(body) + # chunked encoding will happen under the following conditions: + # 1. content-length has not been explicitly set + # 2. body is a generator + # 3. HTTP/1.1 is used + # 4. Transfer-Encoding has NOT been explicitly set by the caller + + encode_chunked = False + if body is not None and 'content-length' not in header_names: + # only chunk body if not explicitly set for backwards + # compatibility, assuming the client code is already handling the + # chunking + if 'transfer-encoding' not in header_names: + if isinstance( + body, types.GeneratorType) and self._http_vsn == 11: + encode_chunked = True + headers['Transfer-Encoding'] = 'chunked' + else: + content_length = _get_content_length(body) + if content_length is not None: + self.putheader('Content-Length', str(content_length)) + else: + # transfer-encoding is specified, do some validation + + # RFC 7230, Section 3.3.1 + # A sender MUST NOT apply chunked more than once to a + # message body (i.e., chunking an already chunked message + # is not allowed). + enc = headers[header_names['transfer-encoding']].split(',') + if len([e for e in enc if e == 'chunked']) > 1: + raise HTTPEncodingError( + 'Multiple chunked encodings found. Expected 1.') + + # RFC 7230, Section 3.3.1 + # If any transfer coding other than + # chunked is applied to a request payload body, the sender + # MUST apply chunked as the final transfer coding to ensure + # that the message is properly framed. + if enc[-1] != 'chunked': + raise HTTPEncodingError( + 'Chunked encoding expected as the final ' + 'Transfer-Encoding.') + for hdr, value in headers.items(): self.putheader(hdr, value) - if isinstance(body, str): - # RFC 2616 Section 3.7.1 says that text default has a - # default charset of iso-8859-1. - body = body.encode('iso-8859-1') - self.endheaders(body) + + self.endheaders(body, encode_chunked) def getresponse(self): """Get the response from the server. diff -r d564695b67bb Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Fri Mar 20 00:27:28 2015 +0100 +++ b/Lib/test/test_httplib.py Tue Mar 24 09:24:14 2015 -0700 @@ -258,6 +258,120 @@ conn.putheader(name, value) +class TransferEncodingTest(TestCase): + expected_body = b"It's just a flesh wound" + + def test_chunked(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.send(self._make_body(), encode_chunked=True) + + body = self._parse_chunked(conn.sock.data) + self.assertEqual(body, self.expected_body) + + def test_explicit_headers(self): + # explicit chunked + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + # this shouldn't actually be automatically chunk encoded because the + # calling code has explicitly stated that it's taking care of it + conn.request( + 'POST', '/', self._make_body(), {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + self.assertEqual(body, self.expected_body) + + # explicit chunked, string body + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.request( + 'POST', '/', self.expected_body.decode('latin-1'), + {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + self.assertEqual(body, self.expected_body) + + # invalid ordering + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + with self.assertRaises(client.HTTPEncodingError): + conn.request( + 'POST', '/', self._make_body(), + {'Transfer-Encoding': 'chunked,gzip'}) + + # multiple chunk encodings found + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + with self.assertRaises(client.HTTPEncodingError): + conn.request( + 'POST', '/', self._make_body(), + {'Transfer-Encoding': 'chunked,gzip,chunked'}) + + def test_request(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(None) + conn.request('POST', '/', self._make_body()) + + _, headers, body = self._parse_request(conn.sock.data) + body = self._parse_chunked(body) + + self.assertEqual(body, self.expected_body) + + # Content-Length and Transfer-Encoding SHOULD not be sent in the same + # request + self.assertNotIn(b'content-length', [h.lower() for h in headers.keys()]) + + def _make_body(self): + lines = self.expected_body.split(b' ') + for idx, line in enumerate(lines): + if idx < len(lines) - 1: + yield line + b' ' + else: + yield line + + def _parse_request(self, data): + lines = data.split(b'\r\n') + request = lines[0] + headers = {} + n = 1 + while n < len(lines) and len(lines[n]) > 0: + key, val = lines[n].split(b':') + headers[key.decode('latin-1')] = val.decode('latin-1').strip() + n += 1 + + return request, headers, b'\r\n'.join(lines[n + 1:]) + + def _parse_chunked(self, data): + body = [] + trailers = {} + n = 0 + lines = data.split(b'\r\n') + # parse body + while True: + size, chunk = lines[n:n+2] + size = int(size, 16) + + if size == 0: + n += 1 + break + + self.assertEqual(size, len(chunk)) + body.append(chunk) + + n += 2 + # we /should/ hit the end chunk, but check against the size of + # lines so we're not stuck in an infinite loop should we get + # malformed data + if n > len(lines): + break + + return b''.join(body) + + class BasicTest(TestCase): def test_status_lines(self): # Test HTTP status lines @@ -986,7 +1100,7 @@ # intentionally omitted for simplicity blacklist = {"HTTPMessage", "parse_headers"} for name in dir(client): - if name in blacklist: + if name in blacklist or name.startswith('_'): continue module_object = getattr(client, name) if getattr(module_object, "__module__", None) == "http.client": @@ -1305,6 +1419,25 @@ message = client.parse_headers(f) return message, f + def test_list_body(self): + cases = ( + ([b'foo', b'bar'], b'foobar'), + ((b'foo', b'bar'), b'foobar'), + ((b'foo', 'bar'), b'foobar'), + ([b'foo', 'bar'], b'foobar'), + ) + for body, expected in cases: + with self.subTest(body): + self.conn = client.HTTPConnection('example.com') + self.conn.sock = self.sock = FakeSocket('') + + self.conn.request('PUT', '/url', body) + msg, f = self.get_headers_and_fp() + self.assertNotIn('Content-Type', msg) + self.assertIsNone(msg.get_charset()) + self.assertEqual(len(expected), int(msg.get('content-length'))) + self.assertEqual(expected, f.read()) + def test_manual_content_length(self): # Set an incorrect content-length so that we can verify that # it will not be over-ridden by the library. diff -r d564695b67bb Misc/NEWS --- a/Misc/NEWS Fri Mar 20 00:27:28 2015 +0100 +++ b/Misc/NEWS Tue Mar 24 09:24:14 2015 -0700 @@ -80,6 +80,9 @@ The usage of os.scandir() reduces the number of calls to os.stat(). Initial patch written by Ben Hoyt. +- Issue #12319: Chunked transfer encoding support added to + http.client.HTTPConnection requests + Build -----