Index: Doc/lib/libcookielib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcookielib.tex,v retrieving revision 1.2 diff -u -3 -p -u -r1.2 libcookielib.tex --- Doc/lib/libcookielib.tex 10 Jul 2004 18:41:28 -0000 1.2 +++ Doc/lib/libcookielib.tex 16 Sep 2004 00:15:07 -0000 @@ -28,6 +28,9 @@ various named parameters found in \mailh To distinguish them from Python attributes, the documentation for this module uses the term \dfn{cookie-attribute} instead}. +\warning{This module is NOT yet known to be thread safe in any +sense.} + The module defines the following exception: @@ -59,8 +62,8 @@ corresponding attributes. A \class{CookieJar} which can load cookies from, and perhaps save cookies to, a file on disk. Cookies are \strong{NOT} loaded from the named file until either the \method{load()} or \method{revert()} -method is called. Subclasses of this class are documented in section -\ref{file-cookie-jar-classes}. +method is called. Concrete subclasses of this abstract class are +documented in section \ref{file-cookie-jar-classes}. \end{classdesc} \begin{classdesc}{CookiePolicy}{} @@ -84,8 +87,8 @@ accepted from / returned to the server. Constructor arguments should be passed as keyword arguments only. \var{blocked_domains} is a sequence of domain names that we never accept cookies from, nor return cookies to. \var{allowed_domains} if -not \constant{None}, this is a sequence of the only domains for which -we accept and return cookies. For all other arguments, see the +not \constant{None}, is a sequence of the only domains for which we +accept and return cookies. For all other arguments, see the documentation for \class{CookiePolicy} and \class{DefaultCookiePolicy} objects. @@ -239,8 +242,8 @@ anyway, unless you ask otherwise by pass ignore_discard=\constant{False}, ignore_expires=\constant{False}} Save cookies to a file. -This base class raises \class{NotImplementedError}. Subclasses may -leave this method unimplemented. +This abstract class raises \class{NotImplementedError}. Concrete +subclasses may leave this method unimplemented. \var{filename} is the name of file in which to save cookies. If \var{filename} is not specified, \member{self.filename} is used (whose @@ -298,9 +301,9 @@ loads cookies. \label{file-cookie-jar-classes}} The following \class{CookieJar} subclasses are provided for reading -and writing . Further \class{CookieJar} subclasses, including one -that reads Microsoft Internet Explorer cookies, are available at -\url{http://wwwsearch.sf.net/ClientCookie/}. +and writing cookies to files on disk. Further \class{CookieJar} +subclasses, including one that reads Microsoft Internet Explorer +cookies, are available at \url{http://wwwsearch.sf.net/ClientCookie/}. \begin{classdesc}{MozillaCookieJar}{filename, delayload=\constant{None}, policy=\constant{None}} @@ -494,10 +497,10 @@ name, and which may all be assigned to. General strictness switches: \begin{memberdesc}{strict_domain} -Don't allow sites to set two-component domains with country-code -top-level domains like \code{.co.uk}, \code{.gov.uk}, -\code{.co.nz}.etc. This is far from perfect and isn't guaranteed to -work! +Attempt to prevent sites setting cookies with two-component +country-code "pseudo top-level" domains like \code{.co.uk}, +\code{.gov.uk}, \code{.co.nz}, etc. This is not perfect and is not +guaranteed to work! \end{memberdesc} RFC 2965 protocol strictness switches: @@ -506,14 +509,14 @@ RFC 2965 protocol strictness switches: Follow RFC 2965 rules on unverifiable transactions (usually, an unverifiable transaction is one resulting from a redirect or a request for an image hosted on another site). If this is false, cookies are -\emph{never} blocked on the basis of verifiability +\emph{never} blocked on the basis of verifiability. \end{memberdesc} Netscape protocol strictness switches: \begin{memberdesc}{strict_ns_unverifiable} -apply RFC 2965 rules on unverifiable transactions even to Netscape -cookies +Apply RFC 2965 rules on unverifiable transactions even to Netscape +cookies. \end{memberdesc} \begin{memberdesc}{strict_ns_domain} Flags indicating how strict to be with domain-matching rules for @@ -614,6 +617,9 @@ this cookie, or \constant{None}. True if a port or set of ports was explicitly specified by the server (in the \mailheader{Set-Cookie} / \mailheader{Set-Cookie2} header). \end{memberdesc} +\begin{memberdesc}[Cookie]{path_specified} +True if a path was explicitly specified by the server. +\end{memberdesc} \begin{memberdesc}[Cookie]{domain_specified} True if a domain was explicitly specified by the server. \end{memberdesc} @@ -626,14 +632,18 @@ Cookies may have additional non-standard be accessed using the following methods: \begin{methoddesc}[Cookie]{has_nonstandard_attr}{name} -Return true if cookie has the named cookie-attribute. +Return true if cookie has the named nonstandard cookie-attribute. \end{methoddesc} \begin{methoddesc}[Cookie]{get_nonstandard_attr}{name, default=\constant{None}} -If cookie has the named cookie-attribute, return its value. -Otherwise, return \var{default}. +If cookie has the named nonstandard cookie-attribute, return its +value. Otherwise, return \var{default}. \end{methoddesc} \begin{methoddesc}[Cookie]{set_nonstandard_attr}{name, value} -Set the value of the named cookie-attribute. +Set the value of the named nonstandard cookie-attribute. +\end{methoddesc} +\begin{methoddesc}[Cookie]{nonstandard_attr_keys}{} +Return a list of the names of all nonstandard cookie-attributes of +this cookie. \end{methoddesc} The \class{Cookie} class also defines the following method: @@ -683,3 +693,26 @@ cj = CookieJar(policy) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) r = opener.open("http://example.com/") \end{verbatim} + +The last (but not least) example shows how to display cookie and HTTP +debugging information, which can be very useful when writing code that +uses this module: + +\begin{verbatim} +import cookielib, urllib2, logging + +cj = cookielib.CookieJar() +# turn on printing of cookie debugging information +logger = logging.getLogger('cookielib') +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.DEBUG) + +hh = urllib2.HTTPHandler() +hsh = urllib2.HTTPSHandler() +# turn on printing of HTTP debugging information +for handler in hh, hsh: + handler.set_http_debuglevel(1) + +opener = urllib2.build_opener(cj, hh, hsh) +r = opener.open("http://example.com/") +\end{verbatim} Index: Lib/_LWPCookieJar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/_LWPCookieJar.py,v retrieving revision 1.2 diff -u -3 -p -u -r1.2 _LWPCookieJar.py --- Lib/_LWPCookieJar.py 31 Aug 2004 11:38:12 -0000 1.2 +++ Lib/_LWPCookieJar.py 16 Sep 2004 00:15:16 -0000 @@ -36,10 +36,10 @@ def lwp_cookie_str(cookie): if cookie.comment: h.append(("comment", cookie.comment)) if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - keys = cookie._rest.keys() + keys = cookie.nonstandard_attr_keys() keys.sort() for k in keys: - h.append((k, str(cookie._rest[k]))) + h.append((k, str(cookie.get_nonstandard_attr(k)))) h.append(("version", str(cookie.version))) @@ -58,6 +58,8 @@ class LWPCookieJar(FileCookieJar): """ + magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. Index: Lib/cookielib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/cookielib.py,v retrieving revision 1.3 diff -u -3 -p -u -r1.3 cookielib.py --- Lib/cookielib.py 10 Jul 2004 18:32:12 -0000 1.3 +++ Lib/cookielib.py 16 Sep 2004 00:15:32 -0000 @@ -3,6 +3,9 @@ This module has (now fairly distant) origins in Gisle Aas' Perl module HTTP::Cookies, from the libwww-perl library. +WARNING: despite the presence of thread locks, this code is NOT known to be +thread safe in any sense! Patches welcome. + Docstrings, comments and debug strings in this code refer to the attributes of the HTTP cookie system as cookie-attributes, to distinguish them clearly from Python attributes. @@ -27,6 +30,7 @@ are available from http://wwwsearch.sf.n import sys, re, urlparse, copy, time, struct, urllib, types, logging from types import StringTypes +# WARNING: this module is NOT known to be thread safe! try: import threading as _threading except ImportError: @@ -493,13 +497,9 @@ def is_HDN(text): # the purposes of RFC 2965)? # For the current implementation, what about IPv6? Remember to look # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True + return not (IPV4_RE.search(text) or + text == "" or + text[0] == "." or text[-1] == ".") def domain_match(A, B): """Return True if domain A domain-matches domain B, according to RFC 2965. @@ -531,14 +531,12 @@ def domain_match(A, B): if not is_HDN(A): return False i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True + has_form_nb = not (i == -1 or i == 0) + return ( + has_form_nb and + B.startswith(".") and + is_HDN(B[1:]) + ) def liberal_is_HDN(text): """Return True if text is a sort-of-like a host domain name. @@ -546,9 +544,7 @@ def liberal_is_HDN(text): For accepting/blocking domains. """ - if IPV4_RE.search(text): - return False - return True + return not IPV4_RE.search(text) def user_domain_match(A, B): """For blocking/accepting domains. @@ -559,16 +555,10 @@ def user_domain_match(A, B): A = A.lower() B = B.lower() if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False + return A == B initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False + return (initial_dot and A.endswith(B) or + not initial_dot and A == B) cut_port_re = re.compile(r":\d+$") def request_host(request): @@ -695,10 +685,7 @@ def is_third_party(request): """ req_host = request_host(request) - if not domain_match(req_host, reach(request.get_origin_req_host())): - return True - else: - return False + return not domain_match(req_host, reach(request.get_origin_req_host())) class Cookie: @@ -764,12 +751,12 @@ class Cookie: return self._rest.get(name, default) def set_nonstandard_attr(self, name, value): self._rest[name] = value + def nonstandard_attr_keys(self): + return self._rest.keys() def is_expired(self, now=None): if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False + return (self.expires is not None) and (self.expires <= now) def __str__(self): if self.port is None: p = "" @@ -965,6 +952,30 @@ class DefaultCookiePolicy(CookiePolicy): return False return True + def set_ok_countrycode_domain(self, cookie, request): + """Return False if explicit cookie domain is not acceptable. + + Called by set_ok_domain, for convenience of overriding by + subclasses. + + """ + if cookie.domain_specified and self.strict_domain: + domain = cookie.domain + # since domain was specified, we know that: + assert domain.startswith(".") + if domain.count(".") == 2: + # domain like .foo.bar + i = domain.rfind(".") + tld = domain[i+1:] + sld = domain[1:i] + if (sld.lower() in [ + "co", "ac", + "com", "edu", "org", "net", "gov", "mil", "int"] and + len(tld) == 2): + # domain like .co.uk + return False + return True + def set_ok_domain(self, cookie, request): if self.is_blocked(cookie.domain): debug(" domain %s is in user block-list", cookie.domain) @@ -972,22 +983,12 @@ class DefaultCookiePolicy(CookiePolicy): if self.is_not_allowed(cookie.domain): debug(" domain %s is not in user allow-list", cookie.domain) return False + if not self.set_ok_countrycode_domain(cookie, request): + debug(" country-code second level domain %s", cookie.domain) + return False if cookie.domain_specified: req_host, erhn = eff_request_host(request) domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if (sld.lower() in [ - "co", "ac", - "com", "edu", "org", "net", "gov", "mil", "int"] and - len(tld) == 2): - # domain like .co.uk - debug(" country-code second level domain %s", domain) - return False if domain.startswith("."): undotted_domain = domain[1:] else: @@ -1130,7 +1131,7 @@ class DefaultCookiePolicy(CookiePolicy): return True def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid + # Liberal domain check. This is here as an optimization to avoid # having to load lots of MSIE cookie files unless necessary. req_host, erhn = eff_request_host(request) if not req_host.startswith("."): @@ -1197,11 +1198,6 @@ class CookieJar: non_word_re = re.compile(r"\W") quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" def __init__(self, policy=None): if policy is None: Index: Lib/urllib2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v retrieving revision 1.76 diff -u -3 -p -u -r1.76 urllib2.py --- Lib/urllib2.py 31 Aug 2004 11:38:12 -0000 1.76 +++ Lib/urllib2.py 16 Sep 2004 00:15:55 -0000 @@ -305,7 +305,7 @@ class OpenerDirector: self.handle_error[protocol] = lookup elif condition == "open": kind = protocol - lookup = getattr(self, "handle_"+condition) + lookup = self.handle_open elif condition in ["response", "request"]: kind = protocol lookup = getattr(self, "process_"+condition) Index: Lib/test/test_cookielib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cookielib.py,v retrieving revision 1.1 diff -u -3 -p -u -r1.1 test_cookielib.py --- Lib/test/test_cookielib.py 31 May 2004 18:22:40 -0000 1.1 +++ Lib/test/test_cookielib.py 16 Sep 2004 00:16:28 -0000 @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Tests for cookielib.py.""" import re, os, time @@ -596,6 +595,24 @@ class CookieTests(TestCase): 'foo=bar; domain=friendly.org; Version="1"') self.assertEquals(len(c), 0) + def test_strict_domain(self): + # Cookies whose domain is a country-code tld like .co.uk should + # not be set if CookiePolicy.strict_domain is true. + from cookielib import CookieJar, DefaultCookiePolicy + + cp = DefaultCookiePolicy(strict_domain=True) + cj = CookieJar(policy=cp) + interact_netscape(cj, "http://example.co.uk/", 'no=problemo') + interact_netscape(cj, "http://example.co.uk/", + 'okey=dokey; Domain=.example.co.uk') + self.assertEquals(len(cj), 2) + for pseudo_tld in [".co.uk", ".org.za", ".tx.us"]: + interact_netscape(cj, "http://example.%s/" % pseudo_tld, + 'spam=eggs; Domain=.co.uk') + self.assertEquals(len(cj), 2) + # XXXX what about blah.tv, .name, etc etc? See Konqueror + # (kcookiejar.cpp) and Mozilla implementations. + def test_two_component_domain_ns(self): # Netscape: .www.bar.com, www.bar.com, .bar.com, bar.com, no domain # should all get accepted, as should .acme.com, acme.com and no domain @@ -622,7 +639,6 @@ class CookieTests(TestCase): # because .foo.net matches foo.net interact_netscape(c, "http://foo.net/foo/", 'spam1=eggs; domain=foo.net') - # even if starts with a dot -- in NS rules, .foo.net matches foo.net! interact_netscape(c, "http://foo.net/foo/bar/", 'spam2=eggs; domain=.foo.net') self.assertEquals(len(c), 3) @@ -1415,13 +1431,13 @@ class LWPCookieTests(TestCase): "foo = bar; version = 1") cookie = interact_2965( - c, "http://www.acme.com/foo%2f%25/<<%0anewå/æøå", + c, "http://www.acme.com/foo%2f%25/<<%0anew\345/\346\370\345", 'bar=baz; path="/foo/"; version=1'); version_re = re.compile(r'^\$version=\"?1\"?', re.I) self.assert_("foo=bar" in cookie and version_re.search(cookie)) cookie = interact_2965( - c, "http://www.acme.com/foo/%25/<<%0anewå/æøå") + c, "http://www.acme.com/foo/%25/<<%0anew\345/\346\370\345") self.assert_(not cookie) # unicode URL doesn't raise exception