diff -r e548ab4ce71d Lib/http/client.py --- a/Lib/http/client.py Mon Feb 09 19:49:00 2015 +0000 +++ b/Lib/http/client.py Fri Feb 13 17:00:20 2015 -0800 @@ -71,6 +71,7 @@ import http import io import os +import re import socket import collections from urllib.parse import urlsplit @@ -87,6 +88,7 @@ _UNKNOWN = 'UNKNOWN' + # connection states _CS_IDLE = 'Idle' _CS_REQ_STARTED = 'Request-started' @@ -107,6 +109,32 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) +# +# VCHAR = %x21-7E +# obs-text = %x80-FF +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 + +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +# +# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 + +_HEADER_LEGAL_NAME = re.compile(b'^[!#$%&\'*+-.^_`|~a-zA-z0-9]+$') +_HEADER_LEGAL_VALUE = re.compile(b'^\s*[\x21-\x7E]*\s*$') + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -1000,13 +1028,32 @@ if hasattr(header, 'encode'): header = header.encode('ascii') + + if not _HEADER_LEGAL_NAME.match(header): + raise ValueError('Invalid header name {}'.format(header)) + values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin-1') + encoded_value = one_value.encode('latin-1') elif isinstance(one_value, int): - values[i] = str(one_value).encode('ascii') - value = b'\r\n\t'.join(values) + encoded_value = str(one_value).encode('ascii') + else: + encoded_value = one_value + + if not _HEADER_LEGAL_VALUE.match(encoded_value): + raise ValueError( + 'Invalid header value {}'.format(encoded_value)) + + values[i] = encoded_value + + # http://tools.ietf.org/html/rfc7230#section-3.2.4 states that line + # folding is obsolete, unless message/http MIME type is used and rules + # are conformed to. otherwise, spaces should be used. it might be a + # good idea to put validation for this rule in sometime in the future. + # as it currently stands, there's no way to determine the MIME type of + # the message at this point. + value = b' '.join(values) header = header + b': ' + value self._output(header) diff -r e548ab4ce71d Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Mon Feb 09 19:49:00 2015 +0000 +++ b/Lib/test/test_httplib.py Fri Feb 13 17:00:20 2015 -0800 @@ -171,6 +171,11 @@ conn.putheader('Content-length', 42) self.assertIn(b'Content-length: 42', conn._buffer) + conn.putheader('Foo', ' bar ') + self.assertIn(b'Foo: bar ', conn._buffer) + conn.putheader('Bar', '\tbaz\t') + self.assertIn(b'Bar: \tbaz\t', conn._buffer) + def test_ipv6host_header(self): # Default host header on IPv6 transaction should wrapped by [] if # its actual IPv6 address @@ -200,6 +205,22 @@ self.assertEqual(resp.getheader('First'), 'val') self.assertEqual(resp.getheader('Second'), 'val') + def test_invalid_control_characters(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/') + + # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no + # longer allowed in header names + self.assertRaises( + ValueError, conn.putheader, b' InvalidName ', b'foo') + self.assertRaises( + ValueError, conn.putheader, b'Invalid\x80Name', b'foo') + # issue22928 + self.assertRaises( + ValueError, conn.putheader, b'User-agent', + 'Mozilla/5.0' + chr(0x0A) + 'Location: header injection') + class BasicTest(TestCase): def test_status_lines(self):