# HG changeset patch # Parent 8160786d029509a7563a1321068fc330ec2ff0c6 Issue #24291: Fix partial writes to StreamRequestHandler.wfile As far as I can tell, many servers in Python 3 suffer from this flaw, including those defined in the “http.server”, “wsgiref”, and “xmlrpc.server” modules. If the underlying send() method indicates a partial write, such as when the call is interrupted to handle a signal, these servers would silently drop the remaining data. Also document that shutil.copyfile() does not anticipate partial writes. diff -r 8160786d0295 Doc/library/http.server.rst --- a/Doc/library/http.server.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/http.server.rst Mon Apr 11 03:42:42 2016 +0000 @@ -100,13 +100,14 @@ .. attribute:: rfile - Contains an input stream, positioned at the start of the optional input - data. + An :class:`io.BufferedIOBase` input stream, ready to read from + the start of the optional input data. .. attribute:: wfile - Contains the output stream for writing a response back to the - client. Proper adherence to the HTTP protocol must be used when writing to + An :class:`io.RawIOBase` output stream for writing a response back to + the client. Beware that :meth:`~io.RawIOBase.write` may do partial + writes. Proper adherence to the HTTP protocol must be used when writing to this stream. :class:`BaseHTTPRequestHandler` has the following class variables: diff -r 8160786d0295 Doc/library/shutil.rst --- a/Doc/library/shutil.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/shutil.rst Mon Apr 11 03:42:42 2016 +0000 @@ -44,7 +44,8 @@ chunks; by default the data is read in chunks to avoid uncontrolled memory consumption. Note that if the current file position of the *fsrc* object is not 0, only the contents from the current file position to the end of the file will - be copied. + be copied. The *fdst* object should implement + :class:`io.BufferedIOBase.write`, which must write each chunk in full. .. function:: copyfile(src, dst, *, follow_symlinks=True) diff -r 8160786d0295 Doc/library/socketserver.rst --- a/Doc/library/socketserver.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/socketserver.rst Mon Apr 11 03:42:42 2016 +0000 @@ -401,6 +401,15 @@ read or written, respectively, to get the request data or return data to the client. + The :attr:`rfile` attributes of both classes support the + :class:`io.BufferedIOBase` readable interface, and + :attr:`DatagramRequestHandler.wfile` supports the + :class:`io.BufferedIOBase` writable interface. However, + :attr:`StreamRequestHandler.wfile` supports the :class:`io.RawIOBase` + writable interface instead. As a consequence, with + :class:`StreamRequestHandler`, a single :meth:`~io.RawIOBase.write` call + may do a partial write. + Examples -------- @@ -442,6 +451,8 @@ An alternative request handler class that makes use of streams (file-like objects that simplify communication by providing the standard file interface):: + import io + class MyTCPHandler(socketserver.StreamRequestHandler): def handle(self): @@ -450,9 +461,11 @@ self.data = self.rfile.readline().strip() print("{} wrote:".format(self.client_address[0])) print(self.data) - # Likewise, self.wfile is a file-like object used to write back - # to the client - self.wfile.write(self.data.upper()) + # The wfile attribute is a raw file-like object used to write back + # to the client. Use BufferedWriter to guarantee a complete + # write. + with io.BufferedWriter(self.wfile) as writer: + writer.write(self.data.upper()) The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the diff -r 8160786d0295 Doc/library/wsgiref.rst --- a/Doc/library/wsgiref.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/wsgiref.rst Mon Apr 11 03:42:42 2016 +0000 @@ -515,6 +515,9 @@ streams are stored in the :attr:`stdin`, :attr:`stdout`, :attr:`stderr`, and :attr:`environ` attributes. + The *stdout* argument should implement :meth:`io.BufferedIOBase.write`, + which must write each chunk in full. + .. class:: BaseHandler() diff -r 8160786d0295 Lib/http/server.py --- a/Lib/http/server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/http/server.py Mon Apr 11 03:42:42 2016 +0000 @@ -463,7 +463,7 @@ code >= 200 and code not in ( HTTPStatus.NO_CONTENT, HTTPStatus.NOT_MODIFIED)): - self.wfile.write(body) + self.__write_exactly(body) def send_response(self, code, message=None): """Add the response header to the headers buffer and log the @@ -514,9 +514,17 @@ def flush_headers(self): if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) + self.__write_exactly(b"".join(self._headers_buffer)) self._headers_buffer = [] + def __write_exactly(self, data): + """Work around wfile being a raw file, which may do partial writes""" + writer = io.BufferedWriter(self.wfile) + try: + writer.write(data) + finally: + writer.detach() + def log_request(self, code='-', size='-'): """Log an accepted request. @@ -629,7 +637,11 @@ f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + writer = io.BufferedWriter(self.wfile) + try: + self.copyfile(f, writer) + finally: + writer.detach() finally: f.close() @@ -781,8 +793,8 @@ The SOURCE argument is a file object open for reading (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + argument is a buffered file object open for writing, or + anything else implementing io.BufferedIOBase.write(). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -1153,7 +1165,7 @@ if not self.rfile._sock.recv(1): break stdout, stderr = p.communicate(data) - self.wfile.write(stdout) + self._BaseHTTPRequestHandler__write_exactly(stdout) if stderr: self.log_error('%s', stderr) p.stderr.close() diff -r 8160786d0295 Lib/pydoc.py --- a/Lib/pydoc.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/pydoc.py Mon Apr 11 03:42:42 2016 +0000 @@ -2205,7 +2205,7 @@ self.send_response(200) self.send_header('Content-Type', '%s; charset=UTF-8' % content_type) self.end_headers() - self.wfile.write(self.urlhandler( + self._BaseHTTPRequestHandler__write_exactly(self.urlhandler( self.path, content_type).encode('utf-8')) def log_message(self, *args): diff -r 8160786d0295 Lib/test/ssl_servers.py --- a/Lib/test/ssl_servers.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/ssl_servers.py Mon Apr 11 03:42:42 2016 +0000 @@ -106,7 +106,7 @@ self.send_header("Content-Length", str(len(body))) self.end_headers() if send_body: - self.wfile.write(body) + self._BaseHTTPRequestHandler__write_exactly(body) def do_HEAD(self): """Serve a HEAD request.""" diff -r 8160786d0295 Lib/test/test_httpservers.py --- a/Lib/test/test_httpservers.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_httpservers.py Mon Apr 11 03:42:42 2016 +0000 @@ -18,7 +18,7 @@ import http.client import tempfile import time -from io import BytesIO +from io import BytesIO, RawIOBase import unittest from test import support @@ -29,9 +29,6 @@ # don't write log messages to stderr pass - def read(self, n=None): - return '' - class TestServerThread(threading.Thread): def __init__(self, test_object, request_handler): @@ -113,7 +110,7 @@ self.send_header('Connection', 'close') self.end_headers() body = self.headers['x-special-incoming'].encode('utf-8') - self.wfile.write(body) + self._BaseHTTPRequestHandler__write_exactly(body) def setUp(self): BaseTestCase.setUp(self) @@ -660,13 +657,19 @@ return False -class AuditableBytesIO: +class AuditableBytesIO(RawIOBase): def __init__(self): self.datas = [] + def writable(self): + return True + def write(self, data): + # Work around limited memoryview lifetime (Issue 26720) + data = bytes(data) self.datas.append(data) + return len(data) def getData(self): return b''.join(self.datas) diff -r 8160786d0295 Lib/test/test_imaplib.py --- a/Lib/test/test_imaplib.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_imaplib.py Mon Apr 11 03:42:42 2016 +0000 @@ -6,6 +6,7 @@ from contextlib import contextmanager import imaplib +from io import BufferedWriter import os.path import socketserver import time @@ -100,11 +101,17 @@ def setup(self): super().setup() self.server.logged = None + self.writer = BufferedWriter(self.wfile) + + def finish(self): + self.writer.detach() + super().finish() def _send(self, message): if verbose: print("SENT: %r" % message.strip()) - self.wfile.write(message) + self.writer.write(message) + self.writer.flush() def _send_line(self, message): self._send(message + b'\r\n') @@ -297,7 +304,8 @@ class EOFHandler(socketserver.StreamRequestHandler): def handle(self): # EOF without sending a complete welcome message. - self.wfile.write(b'* OK') + with BufferedWriter(self.wfile) as writer: + writer.write(b'* OK') with self.reaped_server(EOFHandler) as server: self.assertRaises(imaplib.IMAP4.abort, @@ -494,7 +502,8 @@ class TooLongHandler(SimpleIMAPHandler): def handle(self): # Send a very long response line - self.wfile.write(b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n') + line = b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n' + self.writer.write(line) with self.reaped_server(TooLongHandler) as server: self.assertRaises(imaplib.IMAP4.error, diff -r 8160786d0295 Lib/test/test_socketserver.py --- a/Lib/test/test_socketserver.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_socketserver.py Mon Apr 11 03:42:42 2016 +0000 @@ -3,6 +3,7 @@ """ import contextlib +import io import os import select import signal @@ -110,7 +111,9 @@ class MyHandler(hdlrbase): def handle(self): line = self.rfile.readline() - self.wfile.write(line) + writer = io.BufferedWriter(self.wfile) + writer.write(line) + writer.detach() if verbose: print("creating server") server = MyServer(addr, MyHandler) diff -r 8160786d0295 Lib/test/test_urllib2_localnet.py --- a/Lib/test/test_urllib2_localnet.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_urllib2_localnet.py Mon Apr 11 03:42:42 2016 +0000 @@ -151,7 +151,8 @@ # not. #request_handler.send_header('Connection', 'close') request_handler.end_headers() - request_handler.wfile.write(b"Proxy Authentication Required.") + body = b"Proxy Authentication Required." + request_handler._BaseHTTPRequestHandler__write_exactly(body) return False def handle_request(self, request_handler): @@ -229,12 +230,13 @@ def do_GET(self): if not self.headers.get("Authorization", ""): self.do_AUTHHEAD() - self.wfile.write(b"No Auth header received") + body = b"No Auth header received" + self._BaseHTTPRequestHandler__write_exactly(body) elif self.headers.get( "Authorization", "") == "Basic " + self.ENCODED_AUTH: self.send_response(200) self.end_headers() - self.wfile.write(b"It works") + self._BaseHTTPRequestHandler__write_exactly(b"It works") else: # Request Unauthorized self.do_AUTHHEAD() @@ -269,10 +271,10 @@ self.send_response(200, "OK") self.send_header("Content-Type", "text/html") self.end_headers() - self.wfile.write(bytes("You've reached %s!
" % self.path, - "ascii")) - self.wfile.write(b"Our apologies, but our server is down due to " - b"a sudden zombie invasion.") + body = ("You've reached %s!
" + "Our apologies, but our server is down due to " + "a sudden zombie invasion.") % self.path + self._BaseHTTPRequestHandler__write_exactly(bytes(body, "ascii")) # Test cases diff -r 8160786d0295 Lib/test/test_wsgiref.py --- a/Lib/test/test_wsgiref.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_wsgiref.py Mon Apr 11 03:42:42 2016 +0000 @@ -1,3 +1,5 @@ +from test import support +from test.test_httpservers import NoLogRequestHandler from unittest import TestCase from wsgiref.util import setup_testing_defaults from wsgiref.headers import Headers @@ -6,12 +8,15 @@ from wsgiref.validate import validator from wsgiref.simple_server import WSGIServer, WSGIRequestHandler from wsgiref.simple_server import make_server +from http.client import HTTPConnection from io import StringIO, BytesIO, BufferedReader from socketserver import BaseServer from platform import python_implementation import os import re +import signal +import socket import sys import unittest @@ -221,6 +226,47 @@ b"data", out) + def test_interrupted_write(self): + # BaseHandler._write() and _flush() have to write all data, even if + # it takes multiple send() calls + threading = support.import_module("threading") + SIGUSR1 = support.get_attribute(signal, "SIGUSR1") + + def app(environ, start_response): + start_response("200 OK", []) + return [bytes(support.SOCK_MAX_SIZE)] + + class WsgiHandler(NoLogRequestHandler, WSGIRequestHandler): + pass + + server = make_server(support.HOST, 0, app, handler_class=WsgiHandler) + self.addCleanup(server.server_close) + interrupted = threading.Event() + + def signal_handler(signum, frame): + interrupted.set() + + original = signal.signal(SIGUSR1, signal_handler) + self.addCleanup(signal.signal, SIGUSR1, original) + received = None + + def run_client(): + http = HTTPConnection(*server.server_address) + http.request("GET", "/") + with http.getresponse() as response: + response.read(100) + os.kill(os.getpid(), SIGUSR1) + interrupted.wait() + nonlocal received + received = len(response.read()) + http.close() + + background = threading.Thread(target=run_client) + background.start() + server.handle_request() + background.join() + self.assertEqual(received, support.SOCK_MAX_SIZE - 100) + class UtilityTests(TestCase): diff -r 8160786d0295 Lib/test/test_xmlrpc.py --- a/Lib/test/test_xmlrpc.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_xmlrpc.py Mon Apr 11 03:42:42 2016 +0000 @@ -262,7 +262,7 @@ self.send_response(http.HTTPStatus.OK) self.send_header("Content-Length", len(response)) self.end_headers() - self.wfile.write(response) + self._BaseHTTPRequestHandler__write_exactly(response) self.handled = True self.close_connection = False diff -r 8160786d0295 Lib/wsgiref/simple_server.py --- a/Lib/wsgiref/simple_server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/wsgiref/simple_server.py Mon Apr 11 03:42:42 2016 +0000 @@ -11,6 +11,7 @@ """ from http.server import BaseHTTPRequestHandler, HTTPServer +from io import BufferedWriter import sys import urllib.parse from wsgiref.handlers import SimpleHandler @@ -126,11 +127,15 @@ if not self.parse_request(): # An error code has been sent, just exit return - handler = ServerHandler( - self.rfile, self.wfile, self.get_stderr(), self.get_environ() - ) - handler.request_handler = self # backpointer for logging - handler.run(self.server.get_app()) + stdout = BufferedWriter(self.wfile) + try: + handler = ServerHandler( + self.rfile, stdout, self.get_stderr(), self.get_environ() + ) + handler.request_handler = self # backpointer for logging + handler.run(self.server.get_app()) + finally: + stdout.detach() diff -r 8160786d0295 Lib/xmlrpc/server.py --- a/Lib/xmlrpc/server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/xmlrpc/server.py Mon Apr 11 03:42:42 2016 +0000 @@ -523,7 +523,7 @@ pass self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + self._BaseHTTPRequestHandler__write_exactly(response) def decode_request_content(self, data): #support gzip encoding of request @@ -549,7 +549,7 @@ self.send_header("Content-type", "text/plain") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + self._BaseHTTPRequestHandler__write_exactly(response) def log_request(self, code='-', size='-'): """Selectively log an accepted request.""" @@ -915,7 +915,7 @@ self.send_header("Content-type", "text/html") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + self._BaseHTTPRequestHandler__write_exactly(response) class DocXMLRPCServer( SimpleXMLRPCServer, XMLRPCDocGenerator): diff -r 8160786d0295 Misc/NEWS --- a/Misc/NEWS Sat Apr 09 11:33:53 2016 +0200 +++ b/Misc/NEWS Mon Apr 11 03:42:42 2016 +0000 @@ -237,6 +237,11 @@ Library ------- +- Issue #24291: Fix various servers based on + socketserver.StreamRequestHandler, which could do partial writes and + truncate data. This affectes the servers defined in the "http.server", + "wsgiref", and "xmlrpc.server" modules. + - Issue #16329: Add .webm to mimetypes.types_map. Patch by Giampaolo Rodola'. - Issue #13952: Add .csv to mimetypes.types_map. Patch by Geoff Wilson.