diff -r bd97eab25c70 Lib/http/cookies.py --- a/Lib/http/cookies.py Fri Nov 28 15:22:15 2014 -0800 +++ b/Lib/http/cookies.py Mon Dec 01 18:20:37 2014 -0800 @@ -422,32 +422,49 @@ # # Pattern for finding cookie # -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. +# Cookie values adhere to RFC 6265, which defines the grammar as: # +# cookie-pair = cookie-name "=" cookie-value +# cookie-name = token +# cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) +# cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E +# ; US-ASCII characters excluding CTLs, +# ; whitespace DQUOTE, comma, semicolon, +# ; and backslash +# token = -_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +# RFC 2616, Section 2.2 defines a token as: +# +# token = 1* +# separators = "(" | ")" | "<" | ">" | "@" +# | "," | ";" | ":" | "\" | <"> +# | "/" | "[" | "]" | "?" | "=" +# | "{" | "}" | SP | HT + +# Note: ":" has been added to tokens to preserve backwards compatibility + +_LEGAL_TOKENS_PATT = r"[\w\d\*\+\-\.\$\|#%&'^_`~!:]" +_LEGAL_VALUES_PATT = r"[\w\d\(\)\*\+\-\.\/\$\=\?\[\]\^\{\|\}!#%&':<>@_`~]" + _CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern - \s* # Optional whitespace at start of cookie - (?P # Start of group 'key' - """ + _LegalCharsPatt + r"""+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string - | # or + (?x) # This is a verbose pattern + \s* # Optional whitespace at start of cookie + (?P # Start of group 'key' + """ + _LEGAL_TOKENS_PATT + r"""+? # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr - | # or - """ + _LegalCharsPatt + r"""* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII) # May be removed if safe. + | # or + """ + _LEGAL_VALUES_PATT + r"""* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII) # May be removed if safe. # At long last, here is the cookie class. Using this class is almost just like diff -r bd97eab25c70 Lib/test/test_http_cookies.py --- a/Lib/test/test_http_cookies.py Fri Nov 28 15:22:15 2014 -0800 +++ b/Lib/test/test_http_cookies.py Mon Dec 01 18:20:37 2014 -0800 @@ -35,6 +35,12 @@ 'repr': "", 'output': 'Set-Cookie: keebler=E=mc2'}, + # issue22931 + {'data': 'a=b; c=[; d=r; f=h', + 'dict': {'a':'b', 'c':'[', 'd':'r', 'f':'h'}, + 'repr': "", + 'output': 'Set-Cookie: a=b\nSet-Cookie: c=[\nSet-Cookie: d=r\nSet-Cookie: f=h'}, + # Cookies with ':' character in their name. Though not mentioned in # RFC, servers / browsers allow it.