# -*- coding: UTF-8 -*- """ HTTP handler with timeout and source address binding """ __author__ = "Pau Aliagas " __license__ = "GPL v2" import socket from httplib import HTTPConnection from urllib import addinfourl from urllib2 import HTTPHandler, URLError class BindingHTTPConnectionWithTimeout(HTTPConnection): """HTTPConnection subclass that supports timeouts and a binding address""" def __init__(self, host, port=None, strict=None, timeout=None, bind_to=None, proxy_info=None): HTTPConnection.__init__(self, host, port, strict) # attributes used to achieve the new functionality self.timeout = timeout self.bind_to = bind_to self.proxy_info = proxy_info def connect(self): """Connect to the host and port specified in __init__.""" # mostly verbatim from httplib.py and httplib2 msg = "getaddrinfo returns an empty list" for res in socket.getaddrinfo(self.host, self.port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res try: if self.proxy_info and self.proxy_info.isgood(): self.sock = socks.socksocket(af, socktype, proto) self.sock.setproxy(*self.proxy_info.astuple()) else: self.sock = socket.socket(af, socktype, proto) # difference from httplib # support timeout if self.timeout is not None: self.sock.settimeout(self.timeout) # support bind_to if self.bind_to is not None: self.sock.bind(self.bind_to) # end of difference from httplib if self.debuglevel > 0: print "connect: (%s, %s)" % (self.host, self.port) self.sock.connect(sa) except socket.error, msg: if self.debuglevel > 0: print 'connect fail:', (self.host, self.port) if self.sock: self.sock.close() self.sock = None continue break if not self.sock: raise socket.error, msg class BindingHTTPHandlerWithTimeout(HTTPHandler): """ HTTPHandler that suports timeout and source address binding It needs methods to add the extra params, which can be also provided when instancing the class. To use them we need to define some methods that are used to call us: - add_bind_to_ sets the source address to bind to. - add_timeout: sets the socket timeout. - http_open: only to call do_open with the new HTTTPConnection class that provides the extra features. - do_open: to call the http class with the given params """ def __init__(self, timeout=None, bind_to=None): # we slightly protect the new attributes, rather use the methods self._timeout = timeout self._bind_to = bind_to HTTPHandler.__init__(self) def add_bind_to(self, bind_to): self._bind_to = bind_to def add_timeout(self, timeout): self._timeout = timeout def http_open(self, req): """Call do_open with the new http class""" return self.do_open(BindingHTTPConnectionWithTimeout, req) def do_open(self, http_class, req): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers - geturl(): return the original request URL - code: HTTP status code """ # mostly verbatim from urllib2.py host = req.get_host() if not host: raise URLError('no host given') # we call the class with all the params h = http_class(host, timeout=self._timeout, bind_to=self._bind_to) h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" headers = dict( (name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() except socket.error, err: # XXX what error? raise URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = socket._fileobject(r, close=True) resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp