Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1200)

Side by Side Diff: Lib/http/client.py

Issue 12319: [http.client] HTTPConnection.putrequest not support "chunked" Transfer-Encodings to send data
Patch Set: Created 2 years, 5 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 """HTTP/1.1 client library 1 """HTTP/1.1 client library
2 2
3 <intro stuff goes here> 3 <intro stuff goes here>
4 <other stuff, too> 4 <other stuff, too>
5 5
6 HTTPConnection goes through a number of "states", which define when a client 6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular 7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions: 8 request. This diagram details these state transitions:
9 9
10 (null) 10 (null)
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
79 from urllib.parse import urlsplit 79 from urllib.parse import urlsplit
80 80
81 # HTTPMessage, parse_headers(), and the HTTP status code constants are 81 # HTTPMessage, parse_headers(), and the HTTP status code constants are
82 # intentionally omitted for simplicity 82 # intentionally omitted for simplicity
83 __all__ = ["HTTPResponse", "HTTPConnection", 83 __all__ = ["HTTPResponse", "HTTPConnection",
84 "HTTPException", "NotConnected", "UnknownProtocol", 84 "HTTPException", "NotConnected", "UnknownProtocol",
85 "UnknownTransferEncoding", "UnimplementedFileMode", 85 "UnknownTransferEncoding", "UnimplementedFileMode",
86 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 86 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
88 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", 88 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
89 "responses"] 89 "responses", "EncodingError"]
90 90
91 HTTP_PORT = 80 91 HTTP_PORT = 80
92 HTTPS_PORT = 443 92 HTTPS_PORT = 443
93 93
94 _UNKNOWN = 'UNKNOWN' 94 _UNKNOWN = 'UNKNOWN'
95 95
96 # connection states 96 # connection states
97 _CS_IDLE = 'Idle' 97 _CS_IDLE = 'Idle'
98 _CS_REQ_STARTED = 'Request-started' 98 _CS_REQ_STARTED = 'Request-started'
99 _CS_REQ_SENT = 'Request-sent' 99 _CS_REQ_SENT = 'Request-sent'
100 100
101 _DEFAULT_ENCODING = 'latin-1'
101 102
102 # hack to maintain backwards compatibility 103 # hack to maintain backwards compatibility
103 globals().update(http.HTTPStatus.__members__) 104 globals().update(http.HTTPStatus.__members__)
104 105
105 # another hack to maintain backwards compatibility 106 # another hack to maintain backwards compatibility
106 # Mapping status codes to official W3C names 107 # Mapping status codes to official W3C names
107 responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} 108 responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
108 109
109 # maximal amount of data to read at one time in _safe_read 110 # maximal amount of data to read at one time in _safe_read
110 MAXAMOUNT = 1048576 111 MAXAMOUNT = 1048576
(...skipping 594 matching lines...) Expand 10 before | Expand all | Expand 10 after
705 # For compatibility with old-style urllib responses. 706 # For compatibility with old-style urllib responses.
706 707
707 def info(self): 708 def info(self):
708 return self.headers 709 return self.headers
709 710
710 def geturl(self): 711 def geturl(self):
711 return self.url 712 return self.url
712 713
713 def getcode(self): 714 def getcode(self):
714 return self.status 715 return self.status
716
717
718 def _get_content_length(body, method):
719 # Get the content-length based on the body. If the body is "empty", we
720 # set Content-Length: 0 for methods that expect a body (RFC 7230,
721 # Section 3.3.2). If the body is set for other methods, we set the
722 # header provided we can figure out what the length is.
723 if not body:
724 # do an explicit check for not None here to distinguish between unset
725 # and set but empty
726 if method.upper() in _METHODS_EXPECTING_BODY or body is not None:
727 return 0
728 return
Martin Panter 2015/05/21 13:14:17 These should be an explicit “return None”. Also ag
729
730 if hasattr(body, 'read'):
731 try:
732 return os.fstat(body.fileno()).st_size
733 except AttributeError:
734 # is the object seekable?
735 try:
736 curpos = body.tell()
Martin Panter 2015/05/21 13:14:17 It seems silly to save a non-zero starting positio
737 sz = body.seek(0, io.SEEK_END)
738 except (TypeError, AttributeError):
739 if self.debuglevel > 0:
740 print('Unable to determine size of %r' % body)
741 return
Martin Panter 2015/05/21 13:14:17 return None
742 else:
743 body.seek(curpos)
744 return sz
745
746 if hasattr(body, '__iter__'):
747 try:
748 # is body a string or bytes type?
749 ord(body[0])
750 except TypeError:
751 # are we looking at an iterable of ints?
752 if isinstance(body[0], int):
Martin Panter 2015/05/21 13:14:17 It hurts my head to consider an iterator causing b
753 return len(body)
754 # nope, this is likely an iterable of iterables
755 return sum(len(line) for line in body)
Martin Panter 2015/05/21 13:14:17 From this and the new test case, it looks like you
756 else:
757 return len(body)
758
715 759
716 class HTTPConnection: 760 class HTTPConnection:
717 761
718 _http_vsn = 11 762 _http_vsn = 11
719 _http_vsn_str = 'HTTP/1.1' 763 _http_vsn_str = 'HTTP/1.1'
720 764
721 response_class = HTTPResponse 765 response_class = HTTPResponse
722 default_port = HTTP_PORT 766 default_port = HTTP_PORT
723 auto_open = 1 767 auto_open = 1
724 debuglevel = 0 768 debuglevel = 0
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
836 sock = self.sock 880 sock = self.sock
837 if sock: 881 if sock:
838 self.sock = None 882 self.sock = None
839 sock.close() # close it manually... there may be other refs 883 sock.close() # close it manually... there may be other refs
840 finally: 884 finally:
841 response = self.__response 885 response = self.__response
842 if response: 886 if response:
843 self.__response = None 887 self.__response = None
844 response.close() 888 response.close()
845 889
846 def send(self, data): 890 def _read_readable(self, readable):
891 blocksize = 8192
892 if self.debuglevel > 0:
893 print("sendIng a read()able")
894 encode = False
895 try:
896 mode = readable.mode
897 except AttributeError:
898 # io.BytesIO and other file-like objects don't have a `mode`
899 # attribute.
900 pass
901 else:
902 if "b" not in mode:
903 encode = True
904 if self.debuglevel > 0:
905 print("encoding file using iso-8859-1")
906 while True:
907 datablock = readable.read(blocksize)
908 if not datablock:
909 break
910 if encode:
911 datablock = datablock.encode(_DEFAULT_ENCODING)
912 yield datablock
913
914 def _read_iterable(self, iterable):
915 for line in iterable:
916 if isinstance(line, str):
917 line = line.encode(_DEFAULT_ENCODING)
918 yield line
919
920 def send(self, data, encode_chunked=False):
847 """Send `data' to the server. 921 """Send `data' to the server.
848 ``data`` can be a string object, a bytes object, an array object, a 922 ``data`` can be a string object, a bytes object, an array object, a
849 file-like object that supports a .read() method, or an iterable object. 923 file-like object that supports a .read() method, or an iterable object.
850 """ 924 """
851 925
852 if self.sock is None: 926 if self.sock is None:
853 if self.auto_open: 927 if self.auto_open:
854 self.connect() 928 self.connect()
855 else: 929 else:
856 raise NotConnected() 930 raise NotConnected()
857 931
858 if self.debuglevel > 0: 932 if self.debuglevel > 0:
859 print("send:", repr(data)) 933 print("send:", repr(data))
860 blocksize = 8192 934
861 if hasattr(data, "read") : 935 # create a consistent interface to the data
862 if self.debuglevel > 0:
863 print("sendIng a read()able")
864 encode = False
865 try:
866 mode = data.mode
867 except AttributeError:
868 # io.BytesIO and other file-like objects don't have a `mode`
869 # attribute.
870 pass
871 else:
872 if "b" not in mode:
873 encode = True
874 if self.debuglevel > 0:
875 print("encoding file using iso-8859-1")
876 while 1:
877 datablock = data.read(blocksize)
878 if not datablock:
879 break
880 if encode:
881 datablock = datablock.encode("iso-8859-1")
882 self.sock.sendall(datablock)
883 return
884 try: 936 try:
885 self.sock.sendall(data) 937 # this is solely to check to see if data implements the buffer API.
938 # it /would/ be easier to capture if PyObject_CheckBuffer was
939 # exposed to Python
940 memoryview(data)
886 except TypeError: 941 except TypeError:
887 if isinstance(data, collections.Iterable): 942 if isinstance(data, str):
888 for d in data: 943 read = lambda data: (data.encode(_DEFAULT_ENCODING),)
889 self.sock.sendall(d) 944 elif hasattr(data, 'read'):
945 read = self._read_readable
946 elif isinstance(data, collections.Iterable):
947 read = self._read_iterable
890 else: 948 else:
891 raise TypeError("data should be a bytes-like object " 949 raise TypeError("data should be a bytes-like object "
892 "or an iterable, got %r" % type(data)) 950 "or an iterable, got %r" % type(data))
951 else:
952 # the object implements the buffer interface and can be passed
953 # directly into socket methods
954 read = lambda data: (data,)
955
956 for line in read(data):
957 if not line:
958 if self.debuglevel > 0:
959 print('Zero length line ignored')
960 continue
961
962 if encode_chunked and self._http_vsn == 11:
963 # chunked encoding
964 line = b'\r\n'.join((
965 format(len(line), 'X').encode('ascii'),
966 line,
967 b''))
968 self.sock.sendall(line)
969
970 if encode_chunked and self._http_vsn == 11:
971 # end chunked transfer
972 self.sock.sendall(b'0\r\n\r\n')
893 973
894 def _output(self, s): 974 def _output(self, s):
895 """Add a line of output to the current request buffer. 975 """Add a line of output to the current request buffer.
896 976
897 Assumes that the line does *not* end with \\r\\n. 977 Assumes that the line does *not* end with \\r\\n.
898 """ 978 """
899 self._buffer.append(s) 979 self._buffer.append(s)
900 980
901 def _send_output(self, message_body=None): 981 def _send_output(self, message_body=None, encode_chunked=False):
902 """Send the currently buffered request and clear the buffer. 982 """Send the currently buffered request and clear the buffer.
903 983
904 Appends an extra \\r\\n to the buffer. 984 Appends an extra \\r\\n to the buffer.
905 A message_body may be specified, to be appended to the request. 985 A message_body may be specified, to be appended to the request.
906 """ 986 """
907 self._buffer.extend((b"", b"")) 987 self._buffer.extend((b"", b""))
908 msg = b"\r\n".join(self._buffer) 988 msg = b"\r\n".join(self._buffer)
909 del self._buffer[:] 989 del self._buffer[:]
910 990
911 self.send(msg) 991 self.send(msg)
912 if message_body is not None: 992 if message_body is not None:
913 self.send(message_body) 993 self.send(message_body, encode_chunked=encode_chunked)
914 994
915 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): 995 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
916 """Send a request to the server. 996 """Send a request to the server.
917 997
918 `method' specifies an HTTP request method, e.g. 'GET'. 998 `method' specifies an HTTP request method, e.g. 'GET'.
919 `url' specifies the object being requested, e.g. '/index.html'. 999 `url' specifies the object being requested, e.g. '/index.html'.
920 `skip_host' if True does not add automatically a 'Host:' header 1000 `skip_host' if True does not add automatically a 'Host:' header
921 `skip_accept_encoding' if True does not add automatically an 1001 `skip_accept_encoding' if True does not add automatically an
922 'Accept-Encoding:' header 1002 'Accept-Encoding:' header
923 """ 1003 """
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
1056 elif isinstance(one_value, int): 1136 elif isinstance(one_value, int):
1057 values[i] = str(one_value).encode('ascii') 1137 values[i] = str(one_value).encode('ascii')
1058 1138
1059 if _is_illegal_header_value(values[i]): 1139 if _is_illegal_header_value(values[i]):
1060 raise ValueError('Invalid header value %r' % (values[i],)) 1140 raise ValueError('Invalid header value %r' % (values[i],))
1061 1141
1062 value = b'\r\n\t'.join(values) 1142 value = b'\r\n\t'.join(values)
1063 header = header + b': ' + value 1143 header = header + b': ' + value
1064 self._output(header) 1144 self._output(header)
1065 1145
1066 def endheaders(self, message_body=None): 1146 def endheaders(self, message_body=None, encode_chunked=False):
1067 """Indicate that the last header line has been sent to the server. 1147 """Indicate that the last header line has been sent to the server.
1068 1148
1069 This method sends the request to the server. The optional message_body 1149 This method sends the request to the server. The optional message_body
1070 argument can be used to pass a message body associated with the 1150 argument can be used to pass a message body associated with the
1071 request. The message body will be sent in the same packet as the 1151 request. The message body will be sent in the same packet as the
1072 message headers if it is a string, otherwise it is sent as a separate 1152 message headers if it is a string, otherwise it is sent as a separate
1073 packet. 1153 packet.
1074 """ 1154 """
1075 if self.__state == _CS_REQ_STARTED: 1155 if self.__state == _CS_REQ_STARTED:
1076 self.__state = _CS_REQ_SENT 1156 self.__state = _CS_REQ_SENT
1077 else: 1157 else:
1078 raise CannotSendHeader() 1158 raise CannotSendHeader()
1079 self._send_output(message_body) 1159 self._send_output(message_body, encode_chunked=encode_chunked)
1080 1160
1081 def request(self, method, url, body=None, headers={}): 1161 def request(self, method, url, body=None, headers=None):
1082 """Send a complete request to the server.""" 1162 """Send a complete request to the server."""
1083 self._send_request(method, url, body, headers) 1163 self._send_request(method, url, body, headers or {})
1084
1085 def _set_content_length(self, body, method):
1086 # Set the content-length based on the body. If the body is "empty", we
1087 # set Content-Length: 0 for methods that expect a body (RFC 7230,
1088 # Section 3.3.2). If the body is set for other methods, we set the
1089 # header provided we can figure out what the length is.
1090 thelen = None
1091 method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
1092 if body is None and method_expects_body:
1093 thelen = '0'
1094 elif body is not None:
1095 try:
1096 thelen = str(len(body))
1097 except TypeError:
1098 # If this is a file-like object, try to
1099 # fstat its file descriptor
1100 try:
1101 thelen = str(os.fstat(body.fileno()).st_size)
1102 except (AttributeError, OSError):
1103 # Don't send a length if this failed
1104 if self.debuglevel > 0: print("Cannot stat!!")
1105
1106 if thelen is not None:
1107 self.putheader('Content-Length', thelen)
1108 1164
1109 def _send_request(self, method, url, body, headers): 1165 def _send_request(self, method, url, body, headers):
1110 # Honor explicitly requested Host: and Accept-Encoding: headers. 1166 # Honor explicitly requested Host: and Accept-Encoding: headers.
1111 header_names = dict.fromkeys([k.lower() for k in headers]) 1167 header_names = {k.lower(): k for k in headers.keys()}
1112 skips = {} 1168 skips = {}
1113 if 'host' in header_names: 1169 if 'host' in header_names:
1114 skips['skip_host'] = 1 1170 skips['skip_host'] = 1
1115 if 'accept-encoding' in header_names: 1171 if 'accept-encoding' in header_names:
1116 skips['skip_accept_encoding'] = 1 1172 skips['skip_accept_encoding'] = 1
1117 1173
1118 self.putrequest(method, url, **skips) 1174 self.putrequest(method, url, **skips)
1119 1175
1176 # chunked encoding will happen under the following conditions:
1177 # 1. content-length has not been explicitly set
1178 # 2. body is a generator
1179 # 3. HTTP/1.1 is used
1180 # 4. Transfer-Encoding has NOT been explicitly set by the caller
1181
1182 encode_chunked = False
1120 if 'content-length' not in header_names: 1183 if 'content-length' not in header_names:
1121 self._set_content_length(body, method) 1184 # only chunk body if not explicitly set for backwards
1185 # compatibility, assuming the client code is already handling the
1186 # chunking
1187 if 'transfer-encoding' not in header_names:
1188 # if content-length cannot be automatically determined, fall
1189 # back to chunked encoding
1190 try:
1191 content_length = _get_content_length(body, method)
1192 if content_length is None:
1193 raise TypeError
1194 except TypeError:
1195 if body:
1196 # content length is applied to requests for which the
1197 # method expects a body (i.e. PUT, POST). such requests
1198 # should not have chunked encoding applied.
1199 encode_chunked = True
1200 self.putheader('Transfer-Encoding', 'chunked')
1201 else:
1202 self.putheader('Content-Length', str(content_length))
1203 else:
1204 # transfer-encoding is specified, do some validation
1205
1206 # RFC 7230, Section 3.3.1
1207 # A sender MUST NOT apply chunked more than once to a
1208 # message body (i.e., chunking an already chunked message
1209 # is not allowed).
1210 enc = headers[header_names['transfer-encoding']].split(',')
Martin Panter 2015/05/21 13:14:17 I think a client should not be in the business of
1211 if len([e for e in enc if e == 'chunked']) > 1:
Martin Panter 2015/05/21 13:14:17 enc.count('chunked')?
1212 raise EncodingError(
1213 'Multiple chunked encodings found. Expected 1.')
1214
1215 # RFC 7230, Section 3.3.1
1216 # If any transfer coding other than
1217 # chunked is applied to a request payload body, the sender
1218 # MUST apply chunked as the final transfer coding to ensure
1219 # that the message is properly framed.
1220 if enc[-1] != 'chunked':
Martin Panter 2015/05/21 13:14:17 I think this will fail the example straight from t
1221 raise EncodingError(
1222 'Chunked encoding expected as the final '
1223 'Transfer-Encoding.')
1224
1225
1122 for hdr, value in headers.items(): 1226 for hdr, value in headers.items():
1123 self.putheader(hdr, value) 1227 self.putheader(hdr, value)
1124 if isinstance(body, str): 1228 self.endheaders(body, encode_chunked)
1125 # RFC 2616 Section 3.7.1 says that text default has a
1126 # default charset of iso-8859-1.
1127 body = body.encode('iso-8859-1')
1128 self.endheaders(body)
1129 1229
1130 def getresponse(self): 1230 def getresponse(self):
1131 """Get the response from the server. 1231 """Get the response from the server.
1132 1232
1133 If the HTTPConnection is in the correct state, returns an 1233 If the HTTPConnection is in the correct state, returns an
1134 instance of HTTPResponse or of whatever object is returned by 1234 instance of HTTPResponse or of whatever object is returned by
1135 class the response_class variable. 1235 class the response_class variable.
1136 1236
1137 If a request has not been sent or if a previous response has 1237 If a request has not been sent or if a previous response has
1138 not be handled, ResponseNotReady is raised. If the HTTP 1238 not be handled, ResponseNotReady is raised. If the HTTP
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
1304 class LineTooLong(HTTPException): 1404 class LineTooLong(HTTPException):
1305 def __init__(self, line_type): 1405 def __init__(self, line_type):
1306 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1406 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1307 % (_MAXLINE, line_type)) 1407 % (_MAXLINE, line_type))
1308 1408
1309 class RemoteDisconnected(ConnectionResetError, BadStatusLine): 1409 class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1310 def __init__(self, *pos, **kw): 1410 def __init__(self, *pos, **kw):
1311 BadStatusLine.__init__(self, "") 1411 BadStatusLine.__init__(self, "")
1312 ConnectionResetError.__init__(self, *pos, **kw) 1412 ConnectionResetError.__init__(self, *pos, **kw)
1313 1413
1414
1415 class EncodingError(HTTPException):
1416 pass
1417
1418
1314 # for backwards compatibility 1419 # for backwards compatibility
1315 error = HTTPException 1420 error = HTTPException
OLDNEW

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7