# HG changeset patch # Parent 31ad7885e2e50daf1f58c320b7c8b2b048db8949 Issue #24291: Fix partial writes to StreamRequestHandler.wfile Many servers based on socketserver.StreamRequestHandler in Python 3 suffer from this flaw. The wsgiref.simple_server.WSGIRequestHandler class already has a workaround. Also, document that shutil.copyfileobj() should be passed a BufferedIOBase writer. diff -r 31ad7885e2e5 Doc/library/http.server.rst --- a/Doc/library/http.server.rst Sun Jun 05 10:50:16 2016 +0300 +++ b/Doc/library/http.server.rst Sun Jun 05 10:46:03 2016 +0000 @@ -99,13 +99,14 @@ .. attribute:: rfile - Contains an input stream, positioned at the start of the optional input - data. + An :class:`io.BufferedIOBase` input stream, ready to read from + the start of the optional input data. .. attribute:: wfile - Contains the output stream for writing a response back to the - client. Proper adherence to the HTTP protocol must be used when writing to + An :class:`io.RawIOBase` output stream for writing a response back to + the client. Beware that :meth:`~io.RawIOBase.write` may do partial + writes. Proper adherence to the HTTP protocol must be used when writing to this stream. :class:`BaseHTTPRequestHandler` has the following attributes: diff -r 31ad7885e2e5 Doc/library/shutil.rst --- a/Doc/library/shutil.rst Sun Jun 05 10:50:16 2016 +0300 +++ b/Doc/library/shutil.rst Sun Jun 05 10:46:03 2016 +0000 @@ -44,7 +44,8 @@ chunks; by default the data is read in chunks to avoid uncontrolled memory consumption. Note that if the current file position of the *fsrc* object is not 0, only the contents from the current file position to the end of the file will - be copied. + be copied. The :meth:`~io.BufferedIOBase.write` method of *fdst* should + write each chunk in full, like :class:`io.BufferedIOBase`. .. function:: copyfile(src, dst, *, follow_symlinks=True) diff -r 31ad7885e2e5 Doc/library/socketserver.rst --- a/Doc/library/socketserver.rst Sun Jun 05 10:50:16 2016 +0300 +++ b/Doc/library/socketserver.rst Sun Jun 05 10:46:03 2016 +0000 @@ -409,6 +409,15 @@ read or written, respectively, to get the request data or return data to the client. + The :attr:`rfile` attributes of both classes support the + :class:`io.BufferedIOBase` readable interface, and + :attr:`DatagramRequestHandler.wfile` supports the + :class:`io.BufferedIOBase` writable interface. However, + :attr:`StreamRequestHandler.wfile` supports the :class:`io.RawIOBase` + writable interface instead. As a consequence, with + :class:`StreamRequestHandler`, a single :meth:`~io.RawIOBase.write` call + may do a partial write. + Examples -------- @@ -449,6 +458,8 @@ An alternative request handler class that makes use of streams (file-like objects that simplify communication by providing the standard file interface):: + import io + class MyTCPHandler(socketserver.StreamRequestHandler): def handle(self): @@ -457,9 +468,11 @@ self.data = self.rfile.readline().strip() print("{} wrote:".format(self.client_address[0])) print(self.data) - # Likewise, self.wfile is a file-like object used to write back - # to the client - self.wfile.write(self.data.upper()) + # The wfile attribute is a raw file-like object used to write back + # to the client. Use BufferedWriter to guarantee a complete + # write. + with io.BufferedWriter(self.wfile) as writer: + writer.write(self.data.upper()) The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the diff -r 31ad7885e2e5 Lib/http/server.py --- a/Lib/http/server.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/http/server.py Sun Jun 05 10:46:03 2016 +0000 @@ -463,7 +463,7 @@ code >= 200 and code not in ( HTTPStatus.NO_CONTENT, HTTPStatus.NOT_MODIFIED)): - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def send_response(self, code, message=None): """Add the response header to the headers buffer and log the @@ -514,7 +514,8 @@ def flush_headers(self): if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) + headers = b"".join(self._headers_buffer) + socketserver._write_exactly(self.wfile, headers) self._headers_buffer = [] def log_request(self, code='-', size='-'): @@ -629,7 +630,13 @@ f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + # Avoid passing a raw file object that may do partial writes + # (Issue 24291) + writer = io.BufferedWriter(self.wfile) + try: + self.copyfile(f, writer) + finally: + writer.detach() finally: f.close() @@ -781,8 +788,8 @@ The SOURCE argument is a file object open for reading (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + argument is a buffered file object open for writing, or + anything else implementing io.BufferedIOBase.write(). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -1153,7 +1160,7 @@ if not self.rfile._sock.recv(1): break stdout, stderr = p.communicate(data) - self.wfile.write(stdout) + socketserver._write_exactly(self.wfile, stdout) if stderr: self.log_error('%s', stderr) p.stderr.close() diff -r 31ad7885e2e5 Lib/pydoc.py --- a/Lib/pydoc.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/pydoc.py Sun Jun 05 10:46:03 2016 +0000 @@ -62,6 +62,7 @@ import pkgutil import platform import re +import socketserver import sys import time import tokenize @@ -2205,7 +2206,7 @@ self.send_response(200) self.send_header('Content-Type', '%s; charset=UTF-8' % content_type) self.end_headers() - self.wfile.write(self.urlhandler( + socketserver._write_exactly(self.wfile, self.urlhandler( self.path, content_type).encode('utf-8')) def log_message(self, *args): diff -r 31ad7885e2e5 Lib/shutil.py --- a/Lib/shutil.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/shutil.py Sun Jun 05 10:46:03 2016 +0000 @@ -68,8 +68,8 @@ def copyfileobj(fsrc, fdst, length=16*1024): - """copy data from file-like object fsrc to file-like object fdst""" - while 1: + """copy data from file-like object fsrc to buffered file object fdst""" + while True: buf = fsrc.read(length) if not buf: break diff -r 31ad7885e2e5 Lib/socketserver.py --- a/Lib/socketserver.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/socketserver.py Sun Jun 05 10:46:03 2016 +0000 @@ -132,6 +132,7 @@ import threading except ImportError: import dummy_threading as threading +from io import BytesIO, BufferedWriter from time import monotonic as time __all__ = ["BaseServer", "TCPServer", "UDPServer", @@ -756,13 +757,24 @@ self.wfile.close() self.rfile.close() +def _write_exactly(wfile, data): + """Write all of data to wfile, which might otherwise do partial writes + + This is an internal helper and will probably be removed in a future + version. + """ + writer = BufferedWriter(wfile) + try: + writer.write(data) + finally: + writer.detach() + class DatagramRequestHandler(BaseRequestHandler): """Define self.rfile and self.wfile for datagram sockets.""" def setup(self): - from io import BytesIO self.packet, self.socket = self.request self.rfile = BytesIO(self.packet) self.wfile = BytesIO() diff -r 31ad7885e2e5 Lib/test/ssl_servers.py --- a/Lib/test/ssl_servers.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/ssl_servers.py Sun Jun 05 10:46:03 2016 +0000 @@ -3,6 +3,7 @@ import ssl import pprint import socket +import socketserver import urllib.parse # Rename HTTPServer to _HTTPServer so as to avoid confusion with HTTPSServer. from http.server import (HTTPServer as _HTTPServer, @@ -16,7 +17,7 @@ HOST = support.HOST CERTFILE = os.path.join(here, 'keycert.pem') -# This one's based on HTTPServer, which is based on SocketServer +# This one's based on HTTPServer, which is based on socketserver class HTTPSServer(_HTTPServer): @@ -106,7 +107,7 @@ self.send_header("Content-Length", str(len(body))) self.end_headers() if send_body: - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def do_HEAD(self): """Serve a HEAD request.""" diff -r 31ad7885e2e5 Lib/test/test_httpservers.py --- a/Lib/test/test_httpservers.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/test_httpservers.py Sun Jun 05 10:46:03 2016 +0000 @@ -17,9 +17,10 @@ import urllib.parse import html import http.client +import socketserver import tempfile import time -from io import BytesIO +from io import BytesIO, RawIOBase import unittest from test import support @@ -30,9 +31,6 @@ # don't write log messages to stderr pass - def read(self, n=None): - return '' - class TestServerThread(threading.Thread): def __init__(self, test_object, request_handler): @@ -114,7 +112,7 @@ self.send_header('Connection', 'close') self.end_headers() body = self.headers['x-special-incoming'].encode('utf-8') - self.wfile.write(body) + socketserver._write_exactly(self.wfile, body) def setUp(self): BaseTestCase.setUp(self) @@ -699,13 +697,19 @@ return False -class AuditableBytesIO: +class AuditableBytesIO(RawIOBase): def __init__(self): self.datas = [] + def writable(self): + return True + def write(self, data): + # Cannot rely on data being alive after write() returns (Issue 26720) + data = bytes(data) self.datas.append(data) + return len(data) def getData(self): return b''.join(self.datas) diff -r 31ad7885e2e5 Lib/test/test_imaplib.py --- a/Lib/test/test_imaplib.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/test_imaplib.py Sun Jun 05 10:46:03 2016 +0000 @@ -104,7 +104,7 @@ def _send(self, message): if verbose: print("SENT: %r" % message.strip()) - self.wfile.write(message) + socketserver._write_exactly(self.wfile, message) def _send_line(self, message): self._send(message + b'\r\n') @@ -297,7 +297,7 @@ class EOFHandler(socketserver.StreamRequestHandler): def handle(self): # EOF without sending a complete welcome message. - self.wfile.write(b'* OK') + socketserver._write_exactly(self.wfile, b'* OK') with self.reaped_server(EOFHandler) as server: self.assertRaises(imaplib.IMAP4.abort, @@ -494,7 +494,8 @@ class TooLongHandler(SimpleIMAPHandler): def handle(self): # Send a very long response line - self.wfile.write(b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n') + line = b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n' + socketserver._write_exactly(self.wfile, line) with self.reaped_server(TooLongHandler) as server: self.assertRaises(imaplib.IMAP4.error, diff -r 31ad7885e2e5 Lib/test/test_socketserver.py --- a/Lib/test/test_socketserver.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/test_socketserver.py Sun Jun 05 10:46:03 2016 +0000 @@ -107,7 +107,7 @@ class MyHandler(hdlrbase): def handle(self): line = self.rfile.readline() - self.wfile.write(line) + socketserver._write_exactly(self.wfile, line) if verbose: print("creating server") server = MyServer(addr, MyHandler) diff -r 31ad7885e2e5 Lib/test/test_urllib2_localnet.py --- a/Lib/test/test_urllib2_localnet.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/test_urllib2_localnet.py Sun Jun 05 10:46:03 2016 +0000 @@ -1,6 +1,7 @@ import base64 import os import email +import socketserver import urllib.parse import urllib.request import http.server @@ -151,7 +152,8 @@ # not. #request_handler.send_header('Connection', 'close') request_handler.end_headers() - request_handler.wfile.write(b"Proxy Authentication Required.") + body = b"Proxy Authentication Required." + socketserver._write_exactly(request_handler.wfile, body) return False def handle_request(self, request_handler): @@ -229,12 +231,13 @@ def do_GET(self): if not self.headers.get("Authorization", ""): self.do_AUTHHEAD() - self.wfile.write(b"No Auth header received") + body = b"No Auth header received" + socketserver._write_exactly(self.wfile, body) elif self.headers.get( "Authorization", "") == "Basic " + self.ENCODED_AUTH: self.send_response(200) self.end_headers() - self.wfile.write(b"It works") + socketserver._write_exactly(self.wfile, b"It works") else: # Request Unauthorized self.do_AUTHHEAD() @@ -269,10 +272,10 @@ self.send_response(200, "OK") self.send_header("Content-Type", "text/html") self.end_headers() - self.wfile.write(bytes("You've reached %s!
" % self.path, - "ascii")) - self.wfile.write(b"Our apologies, but our server is down due to " - b"a sudden zombie invasion.") + body = ("You've reached %s!
" + "Our apologies, but our server is down due to " + "a sudden zombie invasion.") % self.path + socketserver._write_exactly(self.wfile, bytes(body, "ascii")) # Test cases diff -r 31ad7885e2e5 Lib/test/test_xmlrpc.py --- a/Lib/test/test_xmlrpc.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/test/test_xmlrpc.py Sun Jun 05 10:46:03 2016 +0000 @@ -20,6 +20,7 @@ gzip = None try: import threading + import socketserver except ImportError: threading = None @@ -275,7 +276,7 @@ self.send_response(http.HTTPStatus.OK) self.send_header("Content-Length", len(response)) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) self.handled = True self.close_connection = False diff -r 31ad7885e2e5 Lib/xmlrpc/server.py --- a/Lib/xmlrpc/server.py Sun Jun 05 10:50:16 2016 +0300 +++ b/Lib/xmlrpc/server.py Sun Jun 05 10:46:03 2016 +0000 @@ -523,7 +523,7 @@ pass self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) def decode_request_content(self, data): #support gzip encoding of request @@ -549,7 +549,7 @@ self.send_header("Content-type", "text/plain") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) def log_request(self, code='-', size='-'): """Selectively log an accepted request.""" @@ -915,7 +915,7 @@ self.send_header("Content-type", "text/html") self.send_header("Content-length", str(len(response))) self.end_headers() - self.wfile.write(response) + socketserver._write_exactly(self.wfile, response) class DocXMLRPCServer( SimpleXMLRPCServer, XMLRPCDocGenerator): diff -r 31ad7885e2e5 Misc/NEWS --- a/Misc/NEWS Sun Jun 05 10:50:16 2016 +0300 +++ b/Misc/NEWS Sun Jun 05 10:46:03 2016 +0000 @@ -27,10 +27,12 @@ Library ------- -- Issue #24291: Fix wsgiref.simple_server.WSGIRequestHandler to completely - write data to the client. Previously it could do partial writes and - truncate data. Also, wsgiref.handler.ServerHandler can now handle stdout - doing partial writes, but this is deprecated. +- Issue #24291: Fix various servers based on Python 3's + socketserver.StreamRequestHandler to completely write data to the client. + This includes the servers defined in the "http.server", "wsgiref" and + "xmlrpc.server" modules. Previously they could do partial writes and + truncate data. Also, wsgiref.handler.ServerHandler now handles stdout + doing partial writes, which produces a DeprecationWarning. - Issue #21272: Use _sysconfigdata.py to initialize distutils.sysconfig.