Index: Lib/httplib.py =================================================================== --- Lib/httplib.py (revision 88454) +++ Lib/httplib.py (working copy) @@ -695,7 +695,8 @@ self.strict = strict def set_tunnel(self, host, port=None, headers=None): - """ Sets up the host and the port for the HTTP CONNECT Tunnelling. + """ Sets up the host and the port that the HTTP CONNECT will tunnel to. + self.host, self.port are the host and port of the proxy server. The headers argument should be a mapping of extra HTTP headers to send with the CONNECT request. @@ -729,25 +730,18 @@ def _tunnel(self): self._set_hostport(self._tunnel_host, self._tunnel_port) - self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)) + connect = ["CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)] for header, value in self._tunnel_headers.iteritems(): - self.send("%s: %s\r\n" % (header, value)) - self.send("\r\n") + connect.append("%s: %s\r\n" % (header, value)) + connect.append("\r\n") + self.send("".join(connect)) response = self.response_class(self.sock, strict = self.strict, method = self._method) - (version, code, message) = response._read_status() + response.begin() - if code != 200: - self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if line == '\r\n': break + if response.status != 200: + raise ProxyTunnelError(response) - def connect(self): """Connect to the host and port specified in __init__.""" self.sock = socket.create_connection((self.host,self.port), @@ -775,7 +769,8 @@ raise NotConnected() if self.debuglevel > 0: - print "send:", repr(data) + for line in data.split('\r\n'): + print "send:", repr(line) blocksize = 8192 if hasattr(data,'read') and not isinstance(data, array): if self.debuglevel > 0: print "sendIng a read()able" @@ -823,7 +818,6 @@ `skip_accept_encoding' if True does not add automatically an 'Accept-Encoding:' header """ - # if a prior response has been completed, then forget about it. if self.__response and self.__response.isclosed(): self.__response = None @@ -1077,7 +1071,6 @@ def connect(self, host=None, port=None): "Accept arguments to set the host/port, since the superclass doesn't." - if host is not None: self._conn._set_hostport(host, port) self._conn.connect() @@ -1149,7 +1142,6 @@ def connect(self): "Connect to a host on a given (SSL) port." - sock = socket.create_connection((self.host, self.port), self.timeout, self.source_address) if self._tunnel_host: @@ -1197,6 +1189,14 @@ # or define self.args. Otherwise, str() will fail. pass +class ProxyTunnelError(HTTPException): + def __init__(self, response): + self.response = response + + def __str__(self): + return "ProxyTunnelError(HTTPResponse(code=%d, reason=%s))" % ( + self.response.status, self.response.reason) + class NotConnected(HTTPException): pass Index: Lib/urllib2.py =================================================================== --- Lib/urllib2.py (revision 88454) +++ Lib/urllib2.py (working copy) @@ -185,6 +185,10 @@ return host.lower() class Request: + HTTP_DIRECT = 1 # HTTP connection direct to host + HTTPS_DIRECT = 2 # HTTPS connection direct to host + HTTP_PROXY = 3 # HTTP connect to proxy + HTTPS_PROXY = 4 # HTTP connect to proxy tunnelling HTTPS to host def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): @@ -195,7 +199,7 @@ # self.__r_type is what's left after doing the splittype self.host = None self.port = None - self._tunnel_host = None + self.__tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): @@ -206,18 +210,73 @@ self.origin_req_host = origin_req_host self.unverifiable = unverifiable - def __getattr__(self, attr): - # XXX this is a fallback mechanism to guard against these - # methods getting called in a non-standard order. this may be - # too complicated and/or unnecessary. - # XXX should the __r_XXX attributes be public? - if attr[:12] == '_Request__r_': - name = attr[12:] - if hasattr(Request, 'get_' + name): - getattr(self, 'get_' + name)() - return getattr(self, attr) - raise AttributeError, attr + self.get_type() + self.get_host() + if self.type == 'http': + self.connection_type = self.HTTP_DIRECT + else: + self.connection_type = self.HTTPS_DIRECT + + def set_proxy(self, host, type): + assert type == 'http', 'HTTP Proxies are always http' + + if self.connection_type == self.HTTP_DIRECT: + self.connection_type = self.HTTP_PROXY + self.__r_host = self.__original + self.host = host + + elif self.connection_type == self.HTTPS_DIRECT: + self.connection_type = self.HTTPS_PROXY + self.__tunnel_host = self.host + self.host = host + + else: + assert self.host == host, 'Attempt to change proxy settings' + + def has_https_proxy( self ): + return self.connection_type ==self.HTTPS_PROXY + + def has_http_proxy( self ): + return self.connection_type == self.HTTP_PROXY + + def has_proxy(self): + return self.connection_type in (self.HTTP_PROXY, self.HTTPS_PROXY) + + def get_tunnel_host(self): + return self.__tunnel_host + + def get_origin_passwd_lookup_uri(self): + # always use the original url in full + return self.get_full_url() + + def get_proxy_passwd_lookup_uri(self): + # the proxy address is in host now + return self.get_host() + + def get_origin_auth_uri(self): + # called by get_athorization + # return the /X + if self.connection_type in (self.HTTP_PROXY, self.HTTPS_PROXY): + return self.__origin_selector + else: + # HTTP_DIRECT and HTTPS_DIRECT + return self.__r_host + + def get_proxy_auth_uri( self ): + # called by get_athorization + # return the uri that is used for digest auth + if self.connection_type in (self.HTTP_DIRECT, self.HTTPS_DIRECT): + return self.__r_host + + elif self.connection_type == self.HTTP_PROXY: + # HTTP_DIRECT and HTTPS_DIRECT + return self.__original + + elif self.connection_type == self.HTTPS_PROXY: + # HTTP_DIRECT and HTTPS_DIRECT + return self.host + def get_method(self): if self.has_data(): return "POST" @@ -225,6 +284,8 @@ return "GET" # XXX these helper methods are lame + # XXX no these methods allow for debugging + # problems with the Request object def add_data(self, data): self.data = data @@ -248,6 +309,7 @@ def get_host(self): if self.host is None: self.host, self.__r_host = splithost(self.__r_type) + self.__origin_selector = self.__r_host if self.host: self.host = unquote(self.host) return self.host @@ -255,18 +317,9 @@ def get_selector(self): return self.__r_host - def set_proxy(self, host, type): - if self.type == 'https' and not self._tunnel_host: - self._tunnel_host = self.host - else: - self.type = type - self.__r_host = self.__original + def get_origin_selector(self): + return self.__origin_selector - self.host = host - - def has_proxy(self): - return self.__r_host == self.__original - def get_origin_req_host(self): return self.origin_req_host @@ -307,6 +360,21 @@ self.process_response = {} self.process_request = {} + self._debuglevel = 0 + + def set_debuglevel( self, debuglevel ): + # set the debuglevel of all of the handlers + # by calling there set_debuglevel or set_http_debuglevel + # methods. + self._debuglevel = debuglevel + print 'debug: OpenerDirector.set_debuglevel %r' % (self._debuglevel,) + + for handler in self.handlers: + if hasattr(handler, 'set_debuglevel'): + handler.set_debuglevel(debuglevel) + elif hasattr(handler, 'set_http_debuglevel'): + handler.set_http_debuglevel(debuglevel) + def add_handler(self, handler): if not hasattr(handler, "add_parent"): raise TypeError("expected BaseHandler instance, got %r" % @@ -362,14 +430,28 @@ # Handlers raise an exception if no one else should try to handle # the request, or return None if they can't but another handler # could. Otherwise, they return the response. + if self._debuglevel >= 2: + print 'debug: _call_chain( kind %r, meth_name %r, args %r, chain %r )' % (kind, meth_name, args, chain) + handlers = chain.get(kind, ()) for handler in handlers: func = getattr(handler, meth_name) + if self._debuglevel >= 2: + print 'debug: _call_chain handler %r' % (handler,) + print 'debug: _call_chain func %r' % (func,) + result = func(*args) + + if self._debuglevel >= 2: + print 'debug: _call_chain result %r' % (result,) + if result is not None: return result + if self._debuglevel >= 2: + print 'debug: _call_chain no handler matched' + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): # accept a URL or a Request object if isinstance(fullurl, basestring): @@ -414,6 +496,9 @@ 'unknown_open', req) def error(self, proto, *args): + if self._debuglevel >= 2: + print 'debug: OpenerDirector.error proto %r args %r' % (proto, args) + if proto in ('http', 'https'): # XXX http[s] protocols are special-cased dict = self.handle_error['http'] # https is not different than http @@ -921,11 +1006,17 @@ self.retried = 0 self.nonce_count = 0 self.last_nonce = None + self._debuglevel = 0 + def set_debuglevel(self, debuglevel): + self._debuglevel = debuglevel + def reset_retry_count(self): self.retried = 0 def http_error_auth_reqed(self, auth_header, host, req, headers): + proxy_auth_reqed = auth_header=='proxy-authenticate' + authreq = headers.get(auth_header, None) if self.retried > 5: # Don't fail endlessly - if we failed once, we'll probably @@ -933,19 +1024,24 @@ # prompting for the information. Crap. This isn't great # but it's better than the current 'repeat until recursion # depth exceeded' approach - raise HTTPError(req.get_full_url(), 401, "digest auth failed", - headers, None) + if proxy_auth_reqed: + raise HTTPError(req.get_full_url(), 407, "digest auth failed", + headers, None) + else: + raise HTTPError(req.get_full_url(), 401, "digest auth failed", + headers, None) + else: self.retried += 1 if authreq: scheme = authreq.split()[0] if scheme.lower() == 'digest': - return self.retry_http_digest_auth(req, authreq) + return self.retry_http_digest_auth(req, authreq, proxy_auth_reqed) - def retry_http_digest_auth(self, req, auth): + def retry_http_digest_auth(self, req, auth, proxy_auth_reqed): token, challenge = auth.split(' ', 1) chal = parse_keqv_list(parse_http_list(challenge)) - auth = self.get_authorization(req, chal) + auth = self.get_authorization(req, chal, proxy_auth_reqed) if auth: auth_val = 'Digest %s' % auth if req.headers.get(self.auth_header, None) == auth_val: @@ -964,7 +1060,20 @@ randombytes(8))).hexdigest() return dig[:16] - def get_authorization(self, req, chal): + def get_authorization(self, req, chal, proxy_auth_reqed): + # get_authorization is called in response for + # a 401 or a 407 error + # + # proxy_auth_reqed is true when called because of a 407 + # + # proxy_auth_reqed is used to select the correct fields + # in this algorithm + if self._debuglevel > 0: + print 'debug: AbstractDigestAuthHandler.get_authorization proxy %r %r' % (proxy_auth_reqed, chal) + + if proxy_auth_reqed and not req.has_proxy(): + return None + try: realm = chal['realm'] nonce = chal['nonce'] @@ -980,8 +1089,15 @@ if H is None: return None - user, pw = self.passwd.find_user_password(realm, req.get_full_url()) + if proxy_auth_reqed: + passwd_authuri = req.get_proxy_passwd_lookup_uri() + else: + passwd_authuri = req.get_origin_passwd_lookup_uri() + + user, pw = self.passwd.find_user_password(realm, passwd_authuri) if user is None: + if self._debuglevel > 0: + print 'debug: AbstractDigestAuthHandler.get_authorization no credentials for realm %r authuri %r' % (realm, passwd_authuri) return None # XXX not implemented yet @@ -991,9 +1107,28 @@ entdig = None A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.get_method(), - # XXX selector: what about proxies and full urls - req.get_selector()) + + if proxy_auth_reqed and req.has_https_proxy(): + uri = req.get_tunnel_host() + + # need to default the port if it is missing to 443 + # to match the CONNECT messages + if ':' not in uri: + uri = '%s:443' % (uri,) + A2 = "CONNECT:%s" % uri + + elif proxy_auth_reqed: + uri = req.get_proxy_auth_uri() + if not uri: + uri = '/' + A2 = "%s:%s" % (req.get_method(), uri) + + else: + uri = req.get_origin_auth_uri() + if not uri: + uri = '/' + A2 = "%s:%s" % (req.get_method(), uri) + if qop == 'auth': if nonce == self.last_nonce: self.nonce_count += 1 @@ -1013,9 +1148,9 @@ # XXX should the partial digests be encoded too? - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.get_selector(), - respdig) + base = ('username="%s", realm="%s", nonce="%s", uri="%s", ' + 'response="%s"' % + (user, realm, nonce, uri, respdig)) if opaque: base += ', opaque="%s"' % opaque if entdig: @@ -1023,6 +1158,10 @@ base += ', algorithm="%s"' % algorithm if qop: base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + + if self._debuglevel > 0: + print 'debug: AbstractDigestAuthHandler.get_authorization base %r' % (base,) + return base def get_algorithm_impls(self, algorithm): @@ -1140,7 +1279,7 @@ headers = dict( (name.title(), val) for name, val in headers.items()) - if req._tunnel_host: + if req.has_https_proxy(): tunnel_headers = {} proxy_auth_hdr = "Proxy-Authorization" if proxy_auth_hdr in headers: @@ -1148,10 +1287,15 @@ # Proxy-Authorization should not be sent to origin # server. del headers[proxy_auth_hdr] - h.set_tunnel(req._tunnel_host, headers=tunnel_headers) + h.set_tunnel(req.get_tunnel_host(), headers=tunnel_headers) try: - h.request(req.get_method(), req.get_selector(), req.data, headers) + if req.has_http_proxy(): + uri = req.get_selector() + else: + uri = req.get_origin_selector() + + h.request(req.get_method(), uri, req.data, headers) try: r = h.getresponse(buffering=True) except TypeError: #buffering kw not supported @@ -1190,7 +1334,18 @@ class HTTPSHandler(AbstractHTTPHandler): def https_open(self, req): - return self.do_open(httplib.HTTPSConnection, req) + try: + return self.do_open(httplib.HTTPSConnection, req) + except httplib.ProxyTunnelError as e: + if e.response.status == 407: + fp = socket._fileobject(e.response, close=True) + resp = addinfourl(fp, e.response.msg, req.get_full_url()) + return self.parent.error('https', req, resp, + e.response.status, e.response.reason, resp.info()) + else: + raise HTTPError(req.get_full_url(), + e.response.status, e.response.reason, + e.response.msg, e.response.fp) https_request = AbstractHTTPHandler.do_request_