*** urllib2.py.orig Wed Mar 5 18:38:25 2003 --- urllib2.py Wed Mar 5 19:31:28 2003 *************** *** 5,18 **** below). It opens the URL and returns the results as file-like object; the returned object has some extra methods described below. ! The OpenerDirector manages a collection of Handler objects that do ! all the actual work. Each Handler implements a particular protocol or option. The OpenerDirector is a composite object that invokes the Handlers needed to open the requested URL. For example, the HTTPHandler performs HTTP GET and POST requests and deals with non-error returns. The HTTPRedirectHandler automatically deals with ! HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals ! with digest authentication. urlopen(url, data=None) -- basic usage is that same as original urllib. pass the url and optionally data to post to an HTTP URL, and --- 5,18 ---- below). It opens the URL and returns the results as file-like object; the returned object has some extra methods described below. ! The OpenerDirector manages a collection of Handler objects that do all ! the actual work. Each Handler implements a particular protocol or option. The OpenerDirector is a composite object that invokes the Handlers needed to open the requested URL. For example, the HTTPHandler performs HTTP GET and POST requests and deals with non-error returns. The HTTPRedirectHandler automatically deals with ! HTTP 301, 302, 303 and 307 redirect errors, and the ! HTTPDigestAuthHandler deals with digest authentication. urlopen(url, data=None) -- basic usage is that same as original urllib. pass the url and optionally data to post to an HTTP URL, and *************** *** 207,212 **** --- 207,216 ---- return getattr(self, attr) raise AttributeError, attr + def get_method(self): + if self.has_data(): return "POST" + else: return "GET" + def add_data(self, data): self.data = data *************** *** 402,407 **** --- 406,431 ---- raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) class HTTPRedirectHandler(BaseHandler): + def redirect_request(self, req, fp, code, msg, headers): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a redirection + response is received. If a redirection should take place, return a new + Request to allow http_error_30x to perform the redirect. Otherwise, + raise HTTPError if no-one else should try to handle this url. Return + None if you can't but another Handler might. + + """ + if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or + code in (302, 303) and req.method() == "POST"): + # Strictly (according to RFC 2616), 302 in response to a POST + # MUST NOT cause a redirection without confirmation from the user + # (of urllib2, in this case). In practice, essentially all clients + # do redirect in this case, so we do the same. + return Request(newurl, headers=req.headers) + else: + raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) + # Implementation note: To avoid the server sending us into an # infinite loop, the request object needs to track what URLs we # have already seen. Do this by adding a handler-specific *************** *** 418,424 **** # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes ! new = Request(newurl, req.get_data(), req.headers) new.error_302_dict = {} if hasattr(req, 'error_302_dict'): if len(req.error_302_dict)>10 or \ --- 442,452 ---- # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes ! new = self.redirect_request(req, fp, code, msg, headers) ! if new is None: ! return ! ! # loop detection new.error_302_dict = {} if hasattr(req, 'error_302_dict'): if len(req.error_302_dict)>10 or \ *************** *** 435,441 **** return self.parent.open(new) ! http_error_301 = http_error_302 inf_msg = "The HTTP server returned a redirect error that would" \ "lead to an infinite loop.\n" \ --- 463,469 ---- return self.parent.open(new) ! http_error_301 = http_error_303 = http_error_307 = http_error_302 inf_msg = "The HTTP server returned a redirect error that would" \ "lead to an infinite loop.\n" \