# HG changeset patch # Parent 8160786d029509a7563a1321068fc330ec2ff0c6 Issue #24291: Fix partial writes to StreamRequestHandler.wfile As far as I can tell, many servers in Python 3 suffer from this flaw, including those defined in the “http.server”, “wsgiref”, and “xmlrpc.server” modules. If the underlying send() method indicates a partial write, such as when the call is interrupted to handle a signal, these servers would silently drop the remaining data. Also document that shutil.copyfile() does not anticipate partial writes. diff -r 8160786d0295 Doc/library/http.server.rst --- a/Doc/library/http.server.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/http.server.rst Mon May 16 10:53:59 2016 +0000 @@ -100,13 +100,14 @@ .. attribute:: rfile - Contains an input stream, positioned at the start of the optional input - data. + An :class:`io.BufferedIOBase` input stream, ready to read from + the start of the optional input data. .. attribute:: wfile - Contains the output stream for writing a response back to the - client. Proper adherence to the HTTP protocol must be used when writing to + An :class:`io.RawIOBase` output stream for writing a response back to + the client. Beware that :meth:`~io.RawIOBase.write` may do partial + writes. Proper adherence to the HTTP protocol must be used when writing to this stream. :class:`BaseHTTPRequestHandler` has the following class variables: diff -r 8160786d0295 Doc/library/shutil.rst --- a/Doc/library/shutil.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/shutil.rst Mon May 16 10:53:59 2016 +0000 @@ -44,7 +44,8 @@ chunks; by default the data is read in chunks to avoid uncontrolled memory consumption. Note that if the current file position of the *fsrc* object is not 0, only the contents from the current file position to the end of the file will - be copied. + be copied. The :meth:`~io.BufferedIOBase.write` method of *fdst* should + write each chunk in full, like :class:`io.BufferedIOBase`. .. function:: copyfile(src, dst, *, follow_symlinks=True) diff -r 8160786d0295 Doc/library/socketserver.rst --- a/Doc/library/socketserver.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/socketserver.rst Mon May 16 10:53:59 2016 +0000 @@ -401,6 +401,15 @@ read or written, respectively, to get the request data or return data to the client. + The :attr:`rfile` attributes of both classes support the + :class:`io.BufferedIOBase` readable interface, and + :attr:`DatagramRequestHandler.wfile` supports the + :class:`io.BufferedIOBase` writable interface. However, + :attr:`StreamRequestHandler.wfile` supports the :class:`io.RawIOBase` + writable interface instead. As a consequence, with + :class:`StreamRequestHandler`, a single :meth:`~io.RawIOBase.write` call + may do a partial write. + Examples -------- @@ -442,6 +451,8 @@ An alternative request handler class that makes use of streams (file-like objects that simplify communication by providing the standard file interface):: + import io + class MyTCPHandler(socketserver.StreamRequestHandler): def handle(self): @@ -450,9 +461,11 @@ self.data = self.rfile.readline().strip() print("{} wrote:".format(self.client_address[0])) print(self.data) - # Likewise, self.wfile is a file-like object used to write back - # to the client - self.wfile.write(self.data.upper()) + # The wfile attribute is a raw file-like object used to write back + # to the client. Use BufferedWriter to guarantee a complete + # write. + with io.BufferedWriter(self.wfile) as writer: + writer.write(self.data.upper()) The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the diff -r 8160786d0295 Doc/library/wsgiref.rst --- a/Doc/library/wsgiref.rst Sat Apr 09 11:33:53 2016 +0200 +++ b/Doc/library/wsgiref.rst Mon May 16 10:53:59 2016 +0000 @@ -515,6 +515,9 @@ streams are stored in the :attr:`stdin`, :attr:`stdout`, :attr:`stderr`, and :attr:`environ` attributes. + The *stdout* argument should implement :meth:`io.BufferedIOBase.write`, + which must write each chunk in full. + .. class:: BaseHandler() diff -r 8160786d0295 Lib/http/server.py --- a/Lib/http/server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/http/server.py Mon May 16 10:53:59 2016 +0000 @@ -463,7 +463,7 @@ code >= 200 and code not in ( HTTPStatus.NO_CONTENT, HTTPStatus.NOT_MODIFIED)): - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def send_response(self, code, message=None): """Add the response header to the headers buffer and log the @@ -514,7 +514,8 @@ def flush_headers(self): if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) + headers = b"".join(self._headers_buffer) + socketserver._write_exactly(self.wfile, headers) self._headers_buffer = [] def log_request(self, code='-', size='-'): @@ -629,7 +630,13 @@ f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + # Avoid passing a raw file object that may do partial writes + # (Issue 24291) + writer = io.BufferedWriter(self.wfile) + try: + self.copyfile(f, writer) + finally: + writer.detach() finally: f.close() @@ -781,8 +788,8 @@ The SOURCE argument is a file object open for reading (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + argument is a buffered file object open for writing, or + anything else implementing io.BufferedIOBase.write(). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -1153,7 +1160,7 @@ if not self.rfile._sock.recv(1): break stdout, stderr = p.communicate(data) - self.wfile.write(stdout) + socketserver._write_exactly(self.wfile, stdout) if stderr: self.log_error('%s', stderr) p.stderr.close() diff -r 8160786d0295 Lib/pydoc.py --- a/Lib/pydoc.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/pydoc.py Mon May 16 10:53:59 2016 +0000 @@ -62,6 +62,7 @@ import pkgutil import platform import re +import socketserver import sys import time import tokenize @@ -2205,7 +2206,7 @@ self.send_response(200) self.send_header('Content-Type', '%s; charset=UTF-8' % content_type) self.end_headers() - self.wfile.write(self.urlhandler( + socketserver._write_exactly(self.wfile, self.urlhandler( self.path, content_type).encode('utf-8')) def log_message(self, *args): diff -r 8160786d0295 Lib/socketserver.py --- a/Lib/socketserver.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/socketserver.py Mon May 16 10:53:59 2016 +0000 @@ -132,6 +132,7 @@ import threading except ImportError: import dummy_threading as threading +from io import BytesIO, BufferedWriter from time import monotonic as time __all__ = ["BaseServer", "TCPServer", "UDPServer", "ForkingUDPServer", @@ -747,13 +748,23 @@ self.wfile.close() self.rfile.close() +def _write_exactly(wfile, data): + """Write all of data to wfile, which might otherwise do partial writes + + This is an internal helper and will probably be removed in a future + version. + """ + writer = BufferedWriter(wfile) + try: + writer.write(data) + finally: + writer.detach() class DatagramRequestHandler(BaseRequestHandler): """Define self.rfile and self.wfile for datagram sockets.""" def setup(self): - from io import BytesIO self.packet, self.socket = self.request self.rfile = BytesIO(self.packet) self.wfile = BytesIO() diff -r 8160786d0295 Lib/test/ssl_servers.py --- a/Lib/test/ssl_servers.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/ssl_servers.py Mon May 16 10:53:59 2016 +0000 @@ -3,6 +3,7 @@ import ssl import pprint import socket +import socketserver import urllib.parse # Rename HTTPServer to _HTTPServer so as to avoid confusion with HTTPSServer. from http.server import (HTTPServer as _HTTPServer, @@ -16,7 +17,7 @@ HOST = support.HOST CERTFILE = os.path.join(here, 'keycert.pem') -# This one's based on HTTPServer, which is based on SocketServer +# This one's based on HTTPServer, which is based on socketserver class HTTPSServer(_HTTPServer): @@ -106,7 +107,7 @@ self.send_header("Content-Length", str(len(body))) self.end_headers() if send_body: - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def do_HEAD(self): """Serve a HEAD request.""" diff -r 8160786d0295 Lib/test/test_httpservers.py --- a/Lib/test/test_httpservers.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_httpservers.py Mon May 16 10:53:59 2016 +0000 @@ -16,9 +16,10 @@ import urllib.parse import html import http.client +import socketserver import tempfile import time -from io import BytesIO +from io import BytesIO, RawIOBase import unittest from test import support @@ -29,9 +30,6 @@ # don't write log messages to stderr pass - def read(self, n=None): - return '' - class TestServerThread(threading.Thread): def __init__(self, test_object, request_handler): @@ -113,7 +111,7 @@ self.send_header('Connection', 'close') self.end_headers() body = self.headers['x-special-incoming'].encode('utf-8') - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def setUp(self): BaseTestCase.setUp(self) @@ -660,13 +658,19 @@ return False -class AuditableBytesIO: +class AuditableBytesIO(RawIOBase): def __init__(self): self.datas = [] + def writable(self): + return True + def write(self, data): + # Cannot rely on data being alive after write() returns (Issue 26720) + data = bytes(data) self.datas.append(data) + return len(data) def getData(self): return b''.join(self.datas) diff -r 8160786d0295 Lib/test/test_imaplib.py --- a/Lib/test/test_imaplib.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_imaplib.py Mon May 16 10:53:59 2016 +0000 @@ -104,7 +104,7 @@ def _send(self, message): if verbose: print("SENT: %r" % message.strip()) - self.wfile.write(message) + socketserver._write_exactly(self.wfile, message) def _send_line(self, message): self._send(message + b'\r\n') @@ -297,7 +297,7 @@ class EOFHandler(socketserver.StreamRequestHandler): def handle(self): # EOF without sending a complete welcome message. - self.wfile.write(b'* OK') + socketserver._write_exactly(self.wfile, b'* OK') with self.reaped_server(EOFHandler) as server: self.assertRaises(imaplib.IMAP4.abort, @@ -494,7 +494,8 @@ class TooLongHandler(SimpleIMAPHandler): def handle(self): # Send a very long response line - self.wfile.write(b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n') + line = b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n' + socketserver._write_exactly(self.wfile, line) with self.reaped_server(TooLongHandler) as server: self.assertRaises(imaplib.IMAP4.error, diff -r 8160786d0295 Lib/test/test_socketserver.py --- a/Lib/test/test_socketserver.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_socketserver.py Mon May 16 10:53:59 2016 +0000 @@ -110,7 +110,7 @@ class MyHandler(hdlrbase): def handle(self): line = self.rfile.readline() - self.wfile.write(line) + socketserver._write_exactly(self.wfile, line) if verbose: print("creating server") server = MyServer(addr, MyHandler) diff -r 8160786d0295 Lib/test/test_urllib2_localnet.py --- a/Lib/test/test_urllib2_localnet.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_urllib2_localnet.py Mon May 16 10:53:59 2016 +0000 @@ -1,6 +1,7 @@ import base64 import os import email +import socketserver import urllib.parse import urllib.request import http.server @@ -151,7 +152,8 @@ # not. #request_handler.send_header('Connection', 'close') request_handler.end_headers() - request_handler.wfile.write(b"Proxy Authentication Required.") + body = b"Proxy Authentication Required." + socketserver._write_exactly(request_handler.wfile, body) return False def handle_request(self, request_handler): @@ -229,12 +231,13 @@ def do_GET(self): if not self.headers.get("Authorization", ""): self.do_AUTHHEAD() - self.wfile.write(b"No Auth header received") + body = b"No Auth header received" + socketserver._write_exactly(self.wfile, body) elif self.headers.get( "Authorization", "") == "Basic " + self.ENCODED_AUTH: self.send_response(200) self.end_headers() - self.wfile.write(b"It works") + socketserver._write_exactly(self.wfile, b"It works") else: # Request Unauthorized self.do_AUTHHEAD() @@ -269,10 +272,10 @@ self.send_response(200, "OK") self.send_header("Content-Type", "text/html") self.end_headers() - self.wfile.write(bytes("You've reached %s!
" % self.path, - "ascii")) - self.wfile.write(b"Our apologies, but our server is down due to " - b"a sudden zombie invasion.") + body = ("You've reached %s!
" + "Our apologies, but our server is down due to " + "a sudden zombie invasion.") % self.path + socketserver._write_exactly(self.wfile, bytes(body, "ascii")) # Test cases diff -r 8160786d0295 Lib/test/test_wsgiref.py --- a/Lib/test/test_wsgiref.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_wsgiref.py Mon May 16 10:53:59 2016 +0000 @@ -1,3 +1,5 @@ +from test import support +from test.test_httpservers import NoLogRequestHandler from unittest import TestCase from wsgiref.util import setup_testing_defaults from wsgiref.headers import Headers @@ -6,12 +8,14 @@ from wsgiref.validate import validator from wsgiref.simple_server import WSGIServer, WSGIRequestHandler from wsgiref.simple_server import make_server +from http.client import HTTPConnection from io import StringIO, BytesIO, BufferedReader from socketserver import BaseServer from platform import python_implementation import os import re +import signal import sys import unittest @@ -221,6 +225,47 @@ b"data", out) + def test_interrupted_write(self): + # BaseHandler._write() and _flush() have to write all data, even if + # it takes multiple send() calls + threading = support.import_module("threading") + SIGUSR1 = support.get_attribute(signal, "SIGUSR1") + + def app(environ, start_response): + start_response("200 OK", []) + return [bytes(support.SOCK_MAX_SIZE)] + + class WsgiHandler(NoLogRequestHandler, WSGIRequestHandler): + pass + + server = make_server(support.HOST, 0, app, handler_class=WsgiHandler) + self.addCleanup(server.server_close) + interrupted = threading.Event() + + def signal_handler(signum, frame): + interrupted.set() + + original = signal.signal(SIGUSR1, signal_handler) + self.addCleanup(signal.signal, SIGUSR1, original) + received = None + + def run_client(): + http = HTTPConnection(*server.server_address) + http.request("GET", "/") + with http.getresponse() as response: + response.read(100) + os.kill(os.getpid(), SIGUSR1) + interrupted.wait() + nonlocal received + received = len(response.read()) + http.close() + + background = threading.Thread(target=run_client) + background.start() + server.handle_request() + background.join() + self.assertEqual(received, support.SOCK_MAX_SIZE - 100) + class UtilityTests(TestCase): diff -r 8160786d0295 Lib/test/test_xmlrpc.py --- a/Lib/test/test_xmlrpc.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/test/test_xmlrpc.py Mon May 16 10:53:59 2016 +0000 @@ -21,6 +21,7 @@ gzip = None try: import threading + import socketserver except ImportError: threading = None @@ -262,7 +263,7 @@ self.send_response(http.HTTPStatus.OK) self.send_header("Content-Length", len(response)) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) self.handled = True self.close_connection = False diff -r 8160786d0295 Lib/wsgiref/simple_server.py --- a/Lib/wsgiref/simple_server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/wsgiref/simple_server.py Mon May 16 10:53:59 2016 +0000 @@ -11,6 +11,7 @@ """ from http.server import BaseHTTPRequestHandler, HTTPServer +from io import BufferedWriter import sys import urllib.parse from wsgiref.handlers import SimpleHandler @@ -126,11 +127,17 @@ if not self.parse_request(): # An error code has been sent, just exit return - handler = ServerHandler( - self.rfile, self.wfile, self.get_stderr(), self.get_environ() - ) - handler.request_handler = self # backpointer for logging - handler.run(self.server.get_app()) + # Avoid passing the raw file object wfile, which can do partial + # writes (Issue 24291) + stdout = BufferedWriter(self.wfile) + try: + handler = ServerHandler( + self.rfile, stdout, self.get_stderr(), self.get_environ() + ) + handler.request_handler = self # backpointer for logging + handler.run(self.server.get_app()) + finally: + stdout.detach() diff -r 8160786d0295 Lib/xmlrpc/server.py --- a/Lib/xmlrpc/server.py Sat Apr 09 11:33:53 2016 +0200 +++ b/Lib/xmlrpc/server.py Mon May 16 10:53:59 2016 +0000 @@ -523,7 +523,7 @@ pass self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) def decode_request_content(self, data): #support gzip encoding of request @@ -549,7 +549,7 @@ self.send_header("Content-type", "text/plain") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) def log_request(self, code='-', size='-'): """Selectively log an accepted request.""" @@ -915,7 +915,7 @@ self.send_header("Content-type", "text/html") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) class DocXMLRPCServer( SimpleXMLRPCServer, XMLRPCDocGenerator): diff -r 8160786d0295 Misc/NEWS --- a/Misc/NEWS Sat Apr 09 11:33:53 2016 +0200 +++ b/Misc/NEWS Mon May 16 10:53:59 2016 +0000 @@ -237,6 +237,11 @@ Library ------- +- Issue #24291: Fix various servers based on + socketserver.StreamRequestHandler, which could do partial writes and + truncate data. This affectes the servers defined in the "http.server", + "wsgiref", and "xmlrpc.server" modules. + - Issue #16329: Add .webm to mimetypes.types_map. Patch by Giampaolo Rodola'. - Issue #13952: Add .csv to mimetypes.types_map. Patch by Geoff Wilson.