diff -r d9c98730e2e8 Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Sat Jul 07 13:34:50 2012 +1000 +++ b/Lib/test/test_urllib2.py Mon Jul 09 23:22:40 2012 +0100 @@ -1300,6 +1300,7 @@ self.recorded = [] def record(self, info): self.recorded.append(info) + class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler): def http_error_401(self, *args, **kwds): self.parent.record("digest") @@ -1332,6 +1333,113 @@ # _test_basic_auth called .open() twice) self.assertEqual(opener.recorded, ["digest", "basic"]*2) + def _test_digest_auth_handler_is_matched(self, realm, http_handler): + recorded = [] + class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler): + def http_error_401(self, *args, **kwds): + recorded.append("try digest") + return urllib.request.HTTPDigestAuthHandler.http_error_401( + self, *args, **kwds) + def retry_http_digest_auth(self, *args, **kwds): + recorded.append("matched digest") + return urllib.request.HTTPDigestAuthHandler.retry_http_digest_auth( + self, *args, **kwds) + + class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler): + def http_error_401(self, *args, **kwds): + recorded.append("try basic") + urllib.request.HTTPBasicAuthHandler.http_error_401( + self, *args, **kwds) + + opener = OpenerDirector() + password_manager = MockPasswordManager() + digest_handler = TestDigestAuthHandler(password_manager) + basic_handler = TestBasicAuthHandler(password_manager) + opener.add_handler(basic_handler) + opener.add_handler(digest_handler) + opener.add_handler(http_handler) + request_url = "http://www.example.com/" + user, password = "wile", "coyote" + + password_manager.add_password(realm, request_url, user, password) + + opener.open(request_url) + # digest should have been found, tried and matched, basic + # shouldn't have got a look in + self.assertEqual(recorded, ["try digest","matched digest"]) + + def test_basic_and_digest_auth_headers_present(self): + # It is valid HTTP for there to be more than one + # WWW-Authenticate header. Even if the Basic header is + # supplied first we should still use Digest if possible. + # + # See RFC 2617 section 1.2 and 4.6 + realm = "ACME Networks" + nonce = "acmeguaranteedrandomnonce" + basic_header = 'WWW-Authenticate: Basic realm="%s"' % realm + digest_header = 'WWW-Authenticate: Digest realm="%s", nonce="%s"' \ + % (realm, nonce) + http_handler = MockHTTPHandler( + 401, "\r\n".join([basic_header, digest_header, ""])) + self._test_digest_auth_handler_is_matched(realm, http_handler) + + def test_basic_and_digest_auth_in_same_header(self): + # It is valid HTTP for one WWW-Authenticate header to give + # multiple authentication options. Should always choose the + # strongest. + # + # See RFC 2617 section 1.2 and 4.6 + realm = "ACME Networks" + nonce = "acmeguaranteedrandomnonce" + basic = 'Basic realm="%s"' % realm + digest = 'Digest realm="%s", nonce="%s"' % (realm, nonce) + auth_header = "WWW-Authenticate: %s, %s" % (basic, digest) + http_handler = MockHTTPHandler( + 401, auth_header + "\r\n\r\n") + self._test_digest_auth_handler_is_matched(realm, http_handler) + # Test with digest first as well, we want to show up bugs that + # make it blindly pick the first or the last + auth_header = "WWW-Authenticate: %s, %s" % (digest, basic) + http_handler = MockHTTPHandler( + 401, auth_header + "\r\n\r\n") + self._test_digest_auth_handler_is_matched(realm, http_handler) + + def test_unknown_auth_scheme_is_ok_if_there_is_also_digest(self): + realm = "ACME Networks" + nonce = "acmeguaranteedrandomnonce" + acme = 'Acme realm="%s",foo=bar' % realm + digest = 'Digest realm="%s", nonce="%s"' % (realm, nonce) + + auth_header = "WWW-Authenticate: %s, %s" % (acme, digest) + http_handler = MockHTTPHandler( + 401, auth_header + "\r\n\r\n") + self._test_digest_auth_handler_is_matched(realm, http_handler) + + def test_unknown_auth_scheme_is_ok_if_there_is_also_basic(self): + realm = "ACME Networks" + nonce = "acmeguaranteedrandomnonce" + basic = 'Basic realm="%s"' % realm + acme = 'Acme realm="%s",foo=bar' % realm + auth_header = "WWW-Authenticate: %s, %s" % (acme, basic) + http_handler = MockHTTPHandler( + 401, auth_header + "\r\n\r\n") + + opener = urllib.request.OpenerDirector() + password_manager = MockPasswordManager() + digest_handler = urllib.request.HTTPDigestAuthHandler(password_manager) + basic_handler = urllib.request.HTTPBasicAuthHandler(password_manager) + opener.add_handler(basic_handler) + opener.add_handler(digest_handler) + opener.add_handler(http_handler) + + # check basic auth isn't blocked by digest handler failing + self._test_basic_auth(opener, basic_handler, "Authorization", + realm, http_handler, password_manager, + "http://acme.example.com/protected", + "http://acme.example.com/protected", + ) + + def test_unsupported_auth_digest_handler(self): opener = OpenerDirector() # While using DigestAuthHandler diff -r d9c98730e2e8 Lib/urllib/request.py --- a/Lib/urllib/request.py Sat Jul 07 13:34:50 2012 +1000 +++ b/Lib/urllib/request.py Mon Jul 09 23:22:40 2012 +0100 @@ -892,14 +892,6 @@ class AbstractBasicAuthHandler: - # XXX this allows for multiple auth-schemes, but will stupidly pick - # the last one with a realm specified. - - # allow for double- and single-quoted realm values - # (single quotes are a violation of the RFC, but appear in the wild) - rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' - 'realm=(["\']?)([^"\']*)\\2', re.I) - # XXX could pre-emptively send auth info already accepted (RFC 2617, # end of section 2, and section 1.2 immediately after "credentials" # production). @@ -914,12 +906,9 @@ def reset_retry_count(self): self.retried = 0 - def http_error_auth_reqed(self, authreq, host, req, headers): + def http_error_auth_reqed(self, auth_header, host, req, headers): # host may be an authority (without userinfo) or a URL with an # authority - # XXX could be multiple headers - authreq = headers.get(authreq, None) - if self.retried > 5: # retry sending the username:password 5 times before failing. raise HTTPError(req.get_full_url(), 401, "basic auth failed", @@ -927,24 +916,28 @@ else: self.retried += 1 - if authreq: - scheme = authreq.split()[0] - if scheme.lower() != 'basic': + challenges = {} + for authreq in headers.get_all(auth_header, []): + challenges.update(_parse_www_authenticate_header(authreq, remove_quotes=False)) + + if challenges: + if 'basic' not in challenges: raise ValueError("AbstractBasicAuthHandler does not" - " support the following scheme: '%s'" % - scheme) + " support the following schemes: %s" % + ", ".join(challenges.keys())) else: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, quote, realm = mo.groups() - if quote not in ['"',"'"]: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 2) - if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response + if "realm" not in challenges["basic"]: + raise ValueError("Basic Auth request supplied without a realm") + realm = challenges["basic"]["realm"] + if realm[0] not in "'\"": + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 2) + else: + realm = realm[1:-1] + response = self.retry_http_basic_auth(host, req, realm) + if response and response.code != 401: + self.retried = 0 + return response def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) @@ -990,6 +983,41 @@ # Return n random bytes. _randombytes = os.urandom +_re_challenge_start = re.compile(r"[a-zA-Z]+ +[^=]") +def _parse_www_authenticate_header(header, remove_quotes=True): + """Parse a WWW-Authenticate header which may contain 1 or more + challenges. Return the challenges as a dictionary indexed by the + scheme in lower case (eg "digest" or "basic"). The value will be + another disctionary containing the options (eg "realm"). + + If remove_quotes is False then the quotes will be left on option + values if they are present. This allows the calling code to check + that values are properly quoted (needed so a warning can be issued + if the realm value in unquoted in basic auth). + + """ + # WWW-Authenticate headers are crazy! They allow multiple challenges + # seperate by commas, but each of those challenges has parameters + # which are also seperated by commas. WHY??? + + challenges = {} + cur = None + # Single quotes are an RFC violation but (at least for realm + # values in WWW-Authenticate), they do appear in the wild. + for item in parse_http_list(header, "'\""): + item = item.strip() + if _re_challenge_start.match(item): + scheme, item = item.split(" ", 1) + challenges[scheme.lower()] = cur = {} + item = item.strip() + if cur is None: + raise ValueError("Failed to parse www-authenticate header") + if item: + k, v = item.split('=', 1) + if remove_quotes and v and v[0] in "'\"" and v[-1] == v[0]: + v = v[1:-1] + cur[k.strip().lower()] = v.strip() + return challenges class AbstractDigestAuthHandler: # Digest authentication is specified in RFC 2617. @@ -1015,7 +1043,6 @@ self.retried = 0 def http_error_auth_reqed(self, auth_header, host, req, headers): - authreq = headers.get(auth_header, None) if self.retried > 5: # Don't fail endlessly - if we failed once, we'll probably # fail a second time. Hm. Unless the Password Manager is @@ -1026,17 +1053,17 @@ headers, None) else: self.retried += 1 - if authreq: - scheme = authreq.split()[0] - if scheme.lower() == 'digest': - return self.retry_http_digest_auth(req, authreq) - elif scheme.lower() != 'basic': + challenges = {} + for authreq in headers.get_all(auth_header, []): + challenges.update(_parse_www_authenticate_header(authreq)) + if challenges: + if "digest" in challenges: + return self.retry_http_digest_auth(req, challenges["digest"]) + elif "basic" not in challenges: raise ValueError("AbstractDigestAuthHandler does not support" - " the following scheme: '%s'" % scheme) - - def retry_http_digest_auth(self, req, auth): - token, challenge = auth.split(' ', 1) - chal = parse_keqv_list(filter(None, parse_http_list(challenge))) + " the following schemes: %s" % ", ".join(challenges.keys())) + + def retry_http_digest_auth(self, req, chal): auth = self.get_authorization(req, chal) if auth: auth_val = 'Digest %s' % auth @@ -1318,12 +1345,12 @@ parsed = {} for elt in l: k, v = elt.split('=', 1) - if v[0] == '"' and v[-1] == '"': + if v[0] in ['"',"'"] and v[-1] == v[0]: v = v[1:-1] parsed[k] = v return parsed -def parse_http_list(s): +def parse_http_list(s, allowed_quotes='"'): """Parse lists as described by RFC 2068 Section 2. In particular, parse comma-separated lists where the elements of @@ -1345,7 +1372,7 @@ if cur == '\\': escape = True continue - elif cur == '"': + elif cur == quote: quote = False part += cur continue @@ -1355,8 +1382,8 @@ part = '' continue - if cur == '"': - quote = True + if cur in allowed_quotes: + quote = cur part += cur