import os import sys import httplib import urllib import urllib2 import socket from urllib2 import URLError import logging log = logging.getLogger('Patch') if os.environ.get('https_proxy', None) and sys.version_info[:2] == (2, 6) : log.info('Buggy version of httplib detected, dynamically patching.') # Everything that follows (except MyHTTPSHandler) is a monkey patch for # urllib2 and httplib for http://bugs.python.org/issue7291 # Inherit from httplib.HTTPException for consistency with httplib # exceptions. class ProxyTunnelError(httplib.HTTPException): def __init__(self, response): self.response = response def __str__(self): return "ProxyTunnelError(HTTPResponse(code=%d, reason=%s))" % ( self.response.status, self.response.reason) if hasattr(urllib2, 'HTTPSHandler'): class HTTPSHandler(urllib2.HTTPSHandler): def https_open(self, req): try: return self.do_open(httplib.HTTPSConnection, req) except ProxyTunnelError as e: if e.response.status == 407: fp = socket._fileobject(e.response, close=True) resp = urllib.addinfourl(fp, e.response.msg, req.get_full_url()) return self.parent.error('https', req, resp, e.response.status, e.response.reason, resp.info()) else: raise urllib2.HTTPError(req.get_full_url(), e.response.status, e.response.reason, e.response.msg, e.response.fp) def do_open(self, http_class, req): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers - geturl(): return the original request URL - code: HTTP status code """ host = req.get_host() if not host: raise urllib2.URLError('no host given') h = http_class(host, timeout=req.timeout) # will parse host:port h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" headers = dict( (name.title(), val) for name, val in headers.items()) if hasattr(req, '_tunnel_host') and req._tunnel_host: tunnel_headers = {} proxy_auth_hdr = "Proxy-Authorization" if proxy_auth_hdr in headers: tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] # Proxy-Authorization should not be sent to origin # server. del headers[proxy_auth_hdr] h._set_tunnel(req._tunnel_host, headers=tunnel_headers) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() except socket.error, err: # XXX what error? raise urllib2.URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = socket._fileobject(r, close=True) resp = urllib.addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp urllib2.AbstractHTTPHandler.do_open = do_open def get_authorization(self, req, chal): try: realm = chal['realm'] nonce = chal['nonce'] qop = chal.get('qop') algorithm = chal.get('algorithm', 'MD5') # mod_digest doesn't send an opaque, even though it isn't # supposed to be optional opaque = chal.get('opaque', None) except KeyError: return None H, KD = self.get_algorithm_impls(algorithm) if H is None: return None user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if user is None: return None # XXX not implemented yet if req.has_data(): entdig = self.get_entity_digest(req.get_data(), chal) else: entdig = None A1 = "%s:%s:%s" % (user, realm, pw) if req._tunnel_host: # All requests with _tunnel_host are https; hardwire 443 uri = "%s:443" % req._tunnel_host A2 = "CONNECT:%s" % uri else: # XXX selector: what about full urls uri = req.get_selector() A2 = "%s:%s" % (req.get_method(), uri) if qop == 'auth': if nonce == self.last_nonce: self.nonce_count += 1 else: self.nonce_count = 1 self.last_nonce = nonce ncvalue = '%08x' % self.nonce_count cnonce = self.get_cnonce(nonce) noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) respdig = KD(H(A1), noncebit) elif qop is None: respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) else: # XXX handle auth-int. raise URLError("qop '%s' is not supported." % qop) # XXX should the partial digests be encoded too? base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (user, realm, nonce, uri, respdig) if opaque: base += ', opaque="%s"' % opaque if entdig: base += ', digest="%s"' % entdig base += ', algorithm="%s"' % algorithm if qop: base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) return base urllib2.AbstractDigestAuthHandler.get_authorization = get_authorization _orig_init = httplib.HTTPConnection.__init__ def __init__(self, *args, **kwargs): _orig_init(self, *args, **kwargs) self._tunnel_headers = {} self._tunnel_host = '' self._tunnel_port = '' def _set_tunnel(self, host, port=None, headers=None): """ Sets up the host and the port for the HTTP CONNECT Tunnelling. The headers argument should be a mapping of extra HTTP headers to send with the CONNECT request. """ self._tunnel_host = host self._tunnel_port = port if headers: self._tunnel_headers = headers else: self._tunnel_headers.clear() def _tunnel(self): self._set_hostport(self._tunnel_host, self._tunnel_port) connect = ["CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)] for header, value in self._tunnel_headers.iteritems(): connect.append("%s: %s\r\n" % (header, value)) connect.append("\r\n") self.send("".join(connect)) response = self.response_class(self.sock, strict = self.strict, method = self._method) response.begin() if response.status != 200: raise ProxyTunnelError(response) httplib.HTTPConnection.__init__ = __init__ httplib.HTTPConnection._set_tunnel = _set_tunnel httplib.HTTPConnection._tunnel = _tunnel