Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(16)

Side by Side Diff: Lib/http/client.py

Issue 7776: http.client.HTTPConnection tunneling is broken
Patch Set: Created 6 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | Lib/test/test_httplib.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """HTTP/1.1 client library 1 """HTTP/1.1 client library
2 2
3 <intro stuff goes here> 3 <intro stuff goes here>
4 <other stuff, too> 4 <other stuff, too>
5 5
6 HTTPConnection goes through a number of "states", which define when a client 6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular 7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions: 8 request. This diagram details these state transitions:
9 9
10 (null) 10 (null)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 """ 67 """
68 68
69 import email.parser 69 import email.parser
70 import email.message 70 import email.message
71 import io 71 import io
72 import os 72 import os
73 import socket 73 import socket
74 import collections 74 import collections
75 from urllib.parse import urlsplit 75 from urllib.parse import urlsplit
76 import warnings 76 import warnings
77 from collections import namedtuple
77 78
78 __all__ = ["HTTPResponse", "HTTPConnection", 79 __all__ = ["HTTPResponse", "HTTPConnection",
79 "HTTPException", "NotConnected", "UnknownProtocol", 80 "HTTPException", "NotConnected", "UnknownProtocol",
80 "UnknownTransferEncoding", "UnimplementedFileMode", 81 "UnknownTransferEncoding", "UnimplementedFileMode",
81 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 82 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
82 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 83 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
83 "BadStatusLine", "error", "responses"] 84 "BadStatusLine", "error", "responses"]
84 85
85 HTTP_PORT = 80 86 HTTP_PORT = 80
86 HTTPS_PORT = 443 87 HTTPS_PORT = 443
(...skipping 622 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 # For compatibility with old-style urllib responses. 710 # For compatibility with old-style urllib responses.
710 711
711 def info(self): 712 def info(self):
712 return self.headers 713 return self.headers
713 714
714 def geturl(self): 715 def geturl(self):
715 return self.url 716 return self.url
716 717
717 def getcode(self): 718 def getcode(self):
718 return self.status 719 return self.status
720
721
722 # To store tunneling information
723 TunnelInfo = namedtuple('TunnelInfo', ('host', 'port', 'headers'))
719 724
720 class HTTPConnection: 725 class HTTPConnection:
721 726
722 _http_vsn = 11 727 _http_vsn = 11
723 _http_vsn_str = 'HTTP/1.1' 728 _http_vsn_str = 'HTTP/1.1'
724 729
725 response_class = HTTPResponse 730 response_class = HTTPResponse
726 default_port = HTTP_PORT 731 default_port = HTTP_PORT
727 auto_open = 1 732 auto_open = 1
728 debuglevel = 0 733 debuglevel = 0
729 # TCP Maximum Segment Size (MSS) is determined by the TCP stack on 734 # TCP Maximum Segment Size (MSS) is determined by the TCP stack on
730 # a per-connection basis. There is no simple and efficient 735 # a per-connection basis. There is no simple and efficient
731 # platform independent mechanism for determining the MSS, so 736 # platform independent mechanism for determining the MSS, so
732 # instead a reasonable estimate is chosen. The getsockopt() 737 # instead a reasonable estimate is chosen. The getsockopt()
733 # interface using the TCP_MAXSEG parameter may be a suitable 738 # interface using the TCP_MAXSEG parameter may be a suitable
734 # approach on some operating systems. A value of 16KiB is chosen 739 # approach on some operating systems. A value of 16KiB is chosen
735 # as a reasonable estimate of the maximum MSS. 740 # as a reasonable estimate of the maximum MSS.
736 mss = 16384 741 mss = 16384
737 742
738 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 743 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
739 source_address=None): 744 source_address=None):
740 self.timeout = timeout 745 self.timeout = timeout
741 self.source_address = source_address 746 self.source_address = source_address
742 self.sock = None 747 self.sock = None
743 self._buffer = [] 748 self._buffer = []
744 self.__response = None 749 self.__response = None
745 self.__state = _CS_IDLE 750 self.__state = _CS_IDLE
746 self._method = None 751 self._method = None
747 self._tunnel_host = None
748 self._tunnel_port = None
749 self._tunnel_headers = {}
750 752
751 self._set_hostport(host, port) 753 # Will hold TunnelInfo tuples
754 self._tunnel_info = []
755
756 (self.host, self.port) = self._get_hostport(host, port)
757
758 # This is stored as an instance variable to allow unit
759 # tests to replace it with a suitable mockup
760 self._create_connection = socket.create_connection
752 761
753 def set_tunnel(self, host, port=None, headers=None): 762 def set_tunnel(self, host, port=None, headers=None):
754 """ Sets up the host and the port for the HTTP CONNECT Tunnelling. 763 """Prepare connection for HTTP CONNECT tunneling
755 764
756 The headers argument should be a mapping of extra HTTP headers 765 This method must be called before the connection has been established,
757 to send with the CONNECT request. 766 and may be called repeatedly to set up a chain of tunnels.
767
768 When establishing a connection that uses tunnels, HTTP CONNECT requests
769 are used to tunnel through all intermediate hosts (starting from the
770 host passed to the constructor). Once the connection is established, all
771 further communication is exchanged with the last host corresponding to
772 the most recent call to set_tunnel(), with the remaining hosts acting as
773 invisible gateways.
774
775 The headers argument should be a mapping of extra HTTP headers to send
776 with the CONNECT request.
758 """ 777 """
759 self._tunnel_host = host 778
760 self._tunnel_port = port 779 if self.sock:
780 raise RuntimeError("Can't set up tunnel for established connection")
781
761 if headers: 782 if headers:
762 self._tunnel_headers = headers 783 self._tunnel_info.append(TunnelInfo(host, port, headers))
763 else: 784 else:
764 self._tunnel_headers.clear() 785 self._tunnel_info.append(TunnelInfo(host, port, {}))
765 786
766 def _set_hostport(self, host, port): 787 def _get_hostport(self, host, port):
767 if port is None: 788 if port is None:
768 i = host.rfind(':') 789 i = host.rfind(':')
769 j = host.rfind(']') # ipv6 addresses have [...] 790 j = host.rfind(']') # ipv6 addresses have [...]
770 if i > j: 791 if i > j:
771 try: 792 try:
772 port = int(host[i+1:]) 793 port = int(host[i+1:])
773 except ValueError: 794 except ValueError:
774 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 795 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
775 port = self.default_port 796 port = self.default_port
776 else: 797 else:
777 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 798 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
778 host = host[:i] 799 host = host[:i]
779 else: 800 else:
780 port = self.default_port 801 port = self.default_port
781 if host and host[0] == '[' and host[-1] == ']': 802 if host and host[0] == '[' and host[-1] == ']':
782 host = host[1:-1] 803 host = host[1:-1]
783 self.host = host 804
784 self.port = port 805 return (host, port)
785 806
786 def set_debuglevel(self, level): 807 def set_debuglevel(self, level):
787 self.debuglevel = level 808 self.debuglevel = level
788 809
789 def _tunnel(self): 810 def _tunnel(self, host, port, headers):
790 self._set_hostport(self._tunnel_host, self._tunnel_port) 811 (host, port) = self._get_hostport(host, port)
791 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) 812 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (host, port)
792 connect_bytes = connect_str.encode("ascii") 813 connect_bytes = connect_str.encode("ascii")
793 self.send(connect_bytes) 814 self.send(connect_bytes)
794 for header, value in self._tunnel_headers.items(): 815 for header, value in headers.items():
795 header_str = "%s: %s\r\n" % (header, value) 816 header_str = "%s: %s\r\n" % (header, value)
796 header_bytes = header_str.encode("latin-1") 817 header_bytes = header_str.encode("latin-1")
797 self.send(header_bytes) 818 self.send(header_bytes)
798 self.send(b'\r\n') 819 self.send(b'\r\n')
799 820
800 response = self.response_class(self.sock, method=self._method) 821 response = self.response_class(self.sock, method=self._method)
801 (version, code, message) = response._read_status() 822 (version, code, message) = response._read_status()
802 823
803 if code != 200: 824 if code != 200:
804 self.close() 825 self.close()
805 raise OSError("Tunnel connection failed: %d %s" % (code, 826 raise OSError("Tunnel connection failed: %d %s" % (code,
806 message.strip())) 827 message.strip()))
807 while True: 828 while True:
808 line = response.fp.readline(_MAXLINE + 1) 829 line = response.fp.readline(_MAXLINE + 1)
809 if len(line) > _MAXLINE: 830 if len(line) > _MAXLINE:
810 raise LineTooLong("header line") 831 raise LineTooLong("header line")
811 if not line: 832 if not line:
812 # for sites which EOF without sending a trailer 833 # for sites which EOF without sending a trailer
813 break 834 break
814 if line in (b'\r\n', b'\n', b''): 835 if line in (b'\r\n', b'\n', b''):
815 break 836 break
816 837
817 def connect(self): 838 def connect(self):
818 """Connect to the host and port specified in __init__.""" 839 """Connect to the host and port specified in __init__."""
819 self.sock = socket.create_connection((self.host,self.port), 840 self.sock = self._create_connection((self.host,self.port),
820 self.timeout, self.source_address) 841 self.timeout, self.source_address)
821 if self._tunnel_host: 842
822 self._tunnel() 843 for (host, port, headers) in self._tunnel_info:
844 self._tunnel(host, port, headers)
823 845
824 def close(self): 846 def close(self):
825 """Close the connection to the HTTP server.""" 847 """Close the connection to the HTTP server."""
826 if self.sock: 848 if self.sock:
827 self.sock.close() # close it manually... there may be other refs 849 self.sock.close() # close it manually... there may be other refs
828 self.sock = None 850 self.sock = None
829 if self.__response: 851 if self.__response:
830 self.__response.close() 852 self.__response.close()
831 self.__response = None 853 self.__response = None
832 self.__state = _CS_IDLE 854 self.__state = _CS_IDLE
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
979 if url.startswith('http'): 1001 if url.startswith('http'):
980 nil, netloc, nil, nil, nil = urlsplit(url) 1002 nil, netloc, nil, nil, nil = urlsplit(url)
981 1003
982 if netloc: 1004 if netloc:
983 try: 1005 try:
984 netloc_enc = netloc.encode("ascii") 1006 netloc_enc = netloc.encode("ascii")
985 except UnicodeEncodeError: 1007 except UnicodeEncodeError:
986 netloc_enc = netloc.encode("idna") 1008 netloc_enc = netloc.encode("idna")
987 self.putheader('Host', netloc_enc) 1009 self.putheader('Host', netloc_enc)
988 else: 1010 else:
1011 if self._tunnel_info:
1012 (host, port, _) = self._tunnel_info[-1]
1013 else:
1014 host = self.host
1015 port = self.port
1016
989 try: 1017 try:
990 host_enc = self.host.encode("ascii") 1018 host_enc = host.encode("ascii")
991 except UnicodeEncodeError: 1019 except UnicodeEncodeError:
992 host_enc = self.host.encode("idna") 1020 host_enc = host.encode("idna")
993 1021
994 # As per RFC 273, IPv6 address should be wrapped with [] 1022 # As per RFC 273, IPv6 address should be wrapped with []
995 # when used as Host header 1023 # when used as Host header
996 1024
997 if self.host.find(':') >= 0: 1025 if host.find(':') >= 0:
998 host_enc = b'[' + host_enc + b']' 1026 host_enc = b'[' + host_enc + b']'
999 1027
1000 if self.port == self.default_port: 1028 if port == self.default_port:
1001 self.putheader('Host', host_enc) 1029 self.putheader('Host', host_enc)
1002 else: 1030 else:
1003 host_enc = host_enc.decode("ascii") 1031 host_enc = host_enc.decode("ascii")
1004 self.putheader('Host', "%s:%s" % (host_enc, self.port)) 1032 self.putheader('Host', "%s:%s" % (host_enc, port))
1005 1033
1006 # note: we are assuming that clients will not attempt to set these 1034 # note: we are assuming that clients will not attempt to set these
1007 # headers since *this* library must deal with the 1035 # headers since *this* library must deal with the
1008 # consequences. this also means that when the supporting 1036 # consequences. this also means that when the supporting
1009 # libraries are updated to recognize other forms, then this 1037 # libraries are updated to recognize other forms, then this
1010 # code should be changed (removed or updated). 1038 # code should be changed (removed or updated).
1011 1039
1012 # we only want a Content-Encoding of "identity" since we don't 1040 # we only want a Content-Encoding of "identity" since we don't
1013 # support encodings such as x-gzip or x-deflate. 1041 # support encodings such as x-gzip or x-deflate.
1014 if not skip_accept_encoding: 1042 if not skip_accept_encoding:
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
1187 raise ValueError("check_hostname needs a SSL context with " 1215 raise ValueError("check_hostname needs a SSL context with "
1188 "either CERT_OPTIONAL or CERT_REQUIRED") 1216 "either CERT_OPTIONAL or CERT_REQUIRED")
1189 if key_file or cert_file: 1217 if key_file or cert_file:
1190 context.load_cert_chain(cert_file, key_file) 1218 context.load_cert_chain(cert_file, key_file)
1191 self._context = context 1219 self._context = context
1192 self._check_hostname = check_hostname 1220 self._check_hostname = check_hostname
1193 1221
1194 def connect(self): 1222 def connect(self):
1195 "Connect to a host on a given (SSL) port." 1223 "Connect to a host on a given (SSL) port."
1196 1224
1197 sock = socket.create_connection((self.host, self.port), 1225 super().connect()
1198 self.timeout, self.source_address)
1199 1226
1200 if self._tunnel_host: 1227 if self._tunnel_info:
1201 self.sock = sock 1228 server_hostname = self._tunnel_info[-1].host
1202 self._tunnel() 1229 else:
1230 server_hostname = self.host
1231 sni_hostname = server_hostname if ssl.HAS_SNI else None
1203 1232
1204 server_hostname = self.host if ssl.HAS_SNI else None 1233 self.sock = self._context.wrap_socket(self.sock,
1205 self.sock = self._context.wrap_socket(sock, 1234 server_hostname=sni_hostname)
1206 server_hostname=server_hostnam e)
1207 if not self._context.check_hostname and self._check_hostname: 1235 if not self._context.check_hostname and self._check_hostname:
1208 try: 1236 try:
1209 ssl.match_hostname(self.sock.getpeercert(), self.host) 1237 ssl.match_hostname(self.sock.getpeercert(), server_hostname)
1210 except Exception: 1238 except Exception:
1211 self.sock.shutdown(socket.SHUT_RDWR) 1239 self.sock.shutdown(socket.SHUT_RDWR)
1212 self.sock.close() 1240 self.sock.close()
1213 raise 1241 raise
1214 1242
1215 __all__.append("HTTPSConnection") 1243 __all__.append("HTTPSConnection")
1216 1244
1217 class HTTPException(Exception): 1245 class HTTPException(Exception):
1218 # Subclasses that define an __init__ must call Exception.__init__ 1246 # Subclasses that define an __init__ must call Exception.__init__
1219 # or define self.args. Otherwise, str() will fail. 1247 # or define self.args. Otherwise, str() will fail.
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
1269 self.args = line, 1297 self.args = line,
1270 self.line = line 1298 self.line = line
1271 1299
1272 class LineTooLong(HTTPException): 1300 class LineTooLong(HTTPException):
1273 def __init__(self, line_type): 1301 def __init__(self, line_type):
1274 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1302 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1275 % (_MAXLINE, line_type)) 1303 % (_MAXLINE, line_type))
1276 1304
1277 # for backwards compatibility 1305 # for backwards compatibility
1278 error = HTTPException 1306 error = HTTPException
OLDNEW
« no previous file with comments | « no previous file | Lib/test/test_httplib.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+