diff -r b3d4fd17e96d Lib/http/client.py --- a/Lib/http/client.py Fri Mar 06 09:10:45 2015 -0500 +++ b/Lib/http/client.py Sun Mar 08 22:19:25 2015 -0700 @@ -71,6 +71,7 @@ import http import io import os +import re import socket import collections from urllib.parse import urlsplit @@ -109,6 +110,35 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) +# +# VCHAR = %x21-7E +# obs-text = %x80-FF +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 + +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +# +# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 + +# the patterns for both name and value are much more leniant than RFC +# definitions to allow for backwards compatibility +_is_legal_header_name = re.compile(b'(.(?!\r))*').fullmatch +_is_legal_header_value = re.compile( + b'[ \t]*((.(?!\r))(\r\n\t)?)*[ \t]*').fullmatch + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -1002,12 +1032,21 @@ if hasattr(header, 'encode'): header = header.encode('ascii') + + if not _is_legal_header_name(header): + raise ValueError('Invalid header name {!r}'.format(header)) + values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): values[i] = one_value.encode('latin-1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') + + if not _is_legal_header_value(values[i]): + raise ValueError( + 'Invalid header value {!r}'.format(values[i])) + value = b'\r\n\t'.join(values) header = header + b': ' + value self._output(header) diff -r b3d4fd17e96d Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Fri Mar 06 09:10:45 2015 -0500 +++ b/Lib/test/test_httplib.py Sun Mar 08 22:19:25 2015 -0700 @@ -171,6 +171,17 @@ conn.putheader('Content-length', 42) self.assertIn(b'Content-length: 42', conn._buffer) + conn.putheader('Foo', ' bar ') + self.assertIn(b'Foo: bar ', conn._buffer) + conn.putheader('Bar', '\tbaz\t') + self.assertIn(b'Bar: \tbaz\t', conn._buffer) + conn.putheader('Authorization', 'Bearer mytoken') + self.assertIn(b'Authorization: Bearer mytoken', conn._buffer) + conn.putheader('IterHeader', 'IterA', 'IterB') + self.assertIn(b'IterHeader: IterA\r\n\tIterB', conn._buffer) + conn.putheader('LatinHeader', b'\xFF') + self.assertIn(b'LatinHeader: \xFF', conn._buffer) + def test_ipv6host_header(self): # Default host header on IPv6 transaction should wrapped by [] if # its actual IPv6 address @@ -200,6 +211,25 @@ self.assertEqual(resp.getheader('First'), 'val') self.assertEqual(resp.getheader('Second'), 'val') + def test_invalid_headers(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/') + + # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no + # longer allowed in header names + cases = ( + (b'Invalid\r\nName', b'ValidValue'), + (b'Invalid\rName', b'ValidValue'), + (b'Invalid\nName', b'ValidValue'), + (b'ValidName', b'Invalid\r\nValue'), + (b'ValidName', b'Invalid\rValue'), + (b'ValidName', b'Invalid\nValue'), + ) + for name, value in cases: + with self.subTest((name, value)): + self.assertRaises(ValueError, conn.putheader, name, value) + class BasicTest(TestCase): def test_status_lines(self):