diff -r 46bfddb14cbe Doc/library/http.client.rst --- a/Doc/library/http.client.rst Fri Feb 20 10:34:20 2015 -0500 +++ b/Doc/library/http.client.rst Fri Feb 20 07:58:18 2015 -0800 @@ -298,10 +298,10 @@ .. method:: HTTPConnection.putheader(header, argument[, ...]) - Send an :rfc:`822`\ -style header to the server. It sends a line to the server - consisting of the header, a colon and a space, and the first argument. If more - arguments are given, continuation lines are sent, each consisting of a tab and - an argument. + Send an :rfc:`7230`\ -style header to the server. It sends a line to the server + consisting of the header, a colon and a space, and the first argument. If + more arguments are given, they are appended to the header value, each + prepended with a single space. .. method:: HTTPConnection.endheaders(message_body=None) diff -r 46bfddb14cbe Lib/http/client.py --- a/Lib/http/client.py Fri Feb 20 10:34:20 2015 -0500 +++ b/Lib/http/client.py Fri Feb 20 07:58:18 2015 -0800 @@ -71,6 +71,7 @@ import http import io import os +import re import socket import collections from urllib.parse import urlsplit @@ -109,6 +110,36 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) +# +# VCHAR = %x21-7E +# obs-text = %x80-FF +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 + +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +# +# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 + +_HEADER_LEGAL_NAME = re.compile(b'^[!#$%&\'*+-.^_`|~a-zA-z0-9]+$') +# 0x20 (sp) is included in the valid character range for backwards +# compatibility, where header values with spaces (i.e. auth headers) are passed +# through to putheader as single values. latin-1 charset is also legal as ASCII +# is only suggested in RFC 7230 +_HEADER_LEGAL_VALUE = re.compile(b'^[ \t]*[\x20-\x7E\xA0-\xFF]*[ \t]*$') + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -1002,13 +1033,35 @@ if hasattr(header, 'encode'): header = header.encode('ascii') + + if not _HEADER_LEGAL_NAME.match(header): + raise ValueError('Invalid header name {!r}'.format(header)) + values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin-1') + encoded_value = one_value.encode('latin-1') elif isinstance(one_value, int): - values[i] = str(one_value).encode('ascii') - value = b'\r\n\t'.join(values) + encoded_value = str(one_value).encode('ascii') + else: + encoded_value = one_value + + # Newly defined header fields SHOULD limit their field values to + # US-ASCII octets. A recipient SHOULD treat other octets in field + # content (obs-text) as opaque data. + if not _HEADER_LEGAL_VALUE.match(encoded_value): + raise ValueError( + 'Invalid header value {!r}'.format(encoded_value)) + + values[i] = encoded_value + + # http://tools.ietf.org/html/rfc7230#section-3.2.4 states that line + # folding is obsolete, unless message/http MIME type is used and rules + # are conformed to. otherwise, spaces should be used. it might be a + # good idea to put validation for this rule in sometime in the future. + # as it currently stands, there's no way to determine the MIME type of + # the message at this point. + value = b' '.join(values) header = header + b': ' + value self._output(header) diff -r 46bfddb14cbe Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Fri Feb 20 10:34:20 2015 -0500 +++ b/Lib/test/test_httplib.py Fri Feb 20 07:58:18 2015 -0800 @@ -171,6 +171,17 @@ conn.putheader('Content-length', 42) self.assertIn(b'Content-length: 42', conn._buffer) + conn.putheader('Foo', ' bar ') + self.assertIn(b'Foo: bar ', conn._buffer) + conn.putheader('Bar', '\tbaz\t') + self.assertIn(b'Bar: \tbaz\t', conn._buffer) + conn.putheader('Authorization', 'Bearer mytoken') + self.assertIn(b'Authorization: Bearer mytoken', conn._buffer) + conn.putheader('IterHeader', 'IterA', 'IterB') + self.assertIn(b'IterHeader: IterA IterB', conn._buffer) + conn.putheader('LatinHeader', b'\xFF') + self.assertIn(b'LatinHeader: \xFF', conn._buffer) + def test_ipv6host_header(self): # Default host header on IPv6 transaction should wrapped by [] if # its actual IPv6 address @@ -200,6 +211,22 @@ self.assertEqual(resp.getheader('First'), 'val') self.assertEqual(resp.getheader('Second'), 'val') + def test_invalid_control_characters(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/') + + # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no + # longer allowed in header names + self.assertRaises( + ValueError, conn.putheader, b' InvalidName ', b'foo') + self.assertRaises( + ValueError, conn.putheader, b'Invalid\x80Name', b'foo') + # issue22928 + self.assertRaises( + ValueError, conn.putheader, b'User-agent', + 'Mozilla/5.0' + chr(0x0A) + 'Location: header injection') + class BasicTest(TestCase): def test_status_lines(self): diff -r 46bfddb14cbe Misc/NEWS --- a/Misc/NEWS Fri Feb 20 10:34:20 2015 -0500 +++ b/Misc/NEWS Fri Feb 20 07:58:18 2015 -0800 @@ -49,6 +49,9 @@ argument which, if set to True, will pass messages to handlers taking handler levels into account. +- Issue #22928: HTTPConnection.putheader has been updated to conform to RFC + 7230. + Build -----