# HG changeset patch
# Parent 8160786d029509a7563a1321068fc330ec2ff0c6
Issue #24291: Fix partial writes to StreamRequestHandler.wfile
As far as I can tell, many servers in Python 3 suffer from this flaw,
including those defined in the “http.server”, “wsgiref”, and “xmlrpc.server”
modules. If the underlying send() method indicates a partial write, such as
when the call is interrupted to handle a signal, these servers would silently
drop the remaining data.
Also document that shutil.copyfile() does not anticipate partial writes.
diff -r 8160786d0295 Doc/library/http.server.rst
--- a/Doc/library/http.server.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/http.server.rst Mon May 16 10:53:59 2016 +0000
@@ -100,13 +100,14 @@
.. attribute:: rfile
- Contains an input stream, positioned at the start of the optional input
- data.
+ An :class:`io.BufferedIOBase` input stream, ready to read from
+ the start of the optional input data.
.. attribute:: wfile
- Contains the output stream for writing a response back to the
- client. Proper adherence to the HTTP protocol must be used when writing to
+ An :class:`io.RawIOBase` output stream for writing a response back to
+ the client. Beware that :meth:`~io.RawIOBase.write` may do partial
+ writes. Proper adherence to the HTTP protocol must be used when writing to
this stream.
:class:`BaseHTTPRequestHandler` has the following class variables:
diff -r 8160786d0295 Doc/library/shutil.rst
--- a/Doc/library/shutil.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/shutil.rst Mon May 16 10:53:59 2016 +0000
@@ -44,7 +44,8 @@
chunks; by default the data is read in chunks to avoid uncontrolled memory
consumption. Note that if the current file position of the *fsrc* object is not
0, only the contents from the current file position to the end of the file will
- be copied.
+ be copied. The :meth:`~io.BufferedIOBase.write` method of *fdst* should
+ write each chunk in full, like :class:`io.BufferedIOBase`.
.. function:: copyfile(src, dst, *, follow_symlinks=True)
diff -r 8160786d0295 Doc/library/socketserver.rst
--- a/Doc/library/socketserver.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/socketserver.rst Mon May 16 10:53:59 2016 +0000
@@ -401,6 +401,15 @@
read or written, respectively, to get the request data or return data
to the client.
+ The :attr:`rfile` attributes of both classes support the
+ :class:`io.BufferedIOBase` readable interface, and
+ :attr:`DatagramRequestHandler.wfile` supports the
+ :class:`io.BufferedIOBase` writable interface. However,
+ :attr:`StreamRequestHandler.wfile` supports the :class:`io.RawIOBase`
+ writable interface instead. As a consequence, with
+ :class:`StreamRequestHandler`, a single :meth:`~io.RawIOBase.write` call
+ may do a partial write.
+
Examples
--------
@@ -442,6 +451,8 @@
An alternative request handler class that makes use of streams (file-like
objects that simplify communication by providing the standard file interface)::
+ import io
+
class MyTCPHandler(socketserver.StreamRequestHandler):
def handle(self):
@@ -450,9 +461,11 @@
self.data = self.rfile.readline().strip()
print("{} wrote:".format(self.client_address[0]))
print(self.data)
- # Likewise, self.wfile is a file-like object used to write back
- # to the client
- self.wfile.write(self.data.upper())
+ # The wfile attribute is a raw file-like object used to write back
+ # to the client. Use BufferedWriter to guarantee a complete
+ # write.
+ with io.BufferedWriter(self.wfile) as writer:
+ writer.write(self.data.upper())
The difference is that the ``readline()`` call in the second handler will call
``recv()`` multiple times until it encounters a newline character, while the
diff -r 8160786d0295 Doc/library/wsgiref.rst
--- a/Doc/library/wsgiref.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/wsgiref.rst Mon May 16 10:53:59 2016 +0000
@@ -515,6 +515,9 @@
streams are stored in the :attr:`stdin`, :attr:`stdout`, :attr:`stderr`, and
:attr:`environ` attributes.
+ The *stdout* argument should implement :meth:`io.BufferedIOBase.write`,
+ which must write each chunk in full.
+
.. class:: BaseHandler()
diff -r 8160786d0295 Lib/http/server.py
--- a/Lib/http/server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/http/server.py Mon May 16 10:53:59 2016 +0000
@@ -463,7 +463,7 @@
code >= 200 and
code not in (
HTTPStatus.NO_CONTENT, HTTPStatus.NOT_MODIFIED)):
- self.wfile.write(body)
+ socketserver._write_exactly(self.wfile, body)
def send_response(self, code, message=None):
"""Add the response header to the headers buffer and log the
@@ -514,7 +514,8 @@
def flush_headers(self):
if hasattr(self, '_headers_buffer'):
- self.wfile.write(b"".join(self._headers_buffer))
+ headers = b"".join(self._headers_buffer)
+ socketserver._write_exactly(self.wfile, headers)
self._headers_buffer = []
def log_request(self, code='-', size='-'):
@@ -629,7 +630,13 @@
f = self.send_head()
if f:
try:
- self.copyfile(f, self.wfile)
+ # Avoid passing a raw file object that may do partial writes
+ # (Issue 24291)
+ writer = io.BufferedWriter(self.wfile)
+ try:
+ self.copyfile(f, writer)
+ finally:
+ writer.detach()
finally:
f.close()
@@ -781,8 +788,8 @@
The SOURCE argument is a file object open for reading
(or anything with a read() method) and the DESTINATION
- argument is a file object open for writing (or
- anything with a write() method).
+ argument is a buffered file object open for writing, or
+ anything else implementing io.BufferedIOBase.write().
The only reason for overriding this would be to change
the block size or perhaps to replace newlines by CRLF
@@ -1153,7 +1160,7 @@
if not self.rfile._sock.recv(1):
break
stdout, stderr = p.communicate(data)
- self.wfile.write(stdout)
+ socketserver._write_exactly(self.wfile, stdout)
if stderr:
self.log_error('%s', stderr)
p.stderr.close()
diff -r 8160786d0295 Lib/pydoc.py
--- a/Lib/pydoc.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/pydoc.py Mon May 16 10:53:59 2016 +0000
@@ -62,6 +62,7 @@
import pkgutil
import platform
import re
+import socketserver
import sys
import time
import tokenize
@@ -2205,7 +2206,7 @@
self.send_response(200)
self.send_header('Content-Type', '%s; charset=UTF-8' % content_type)
self.end_headers()
- self.wfile.write(self.urlhandler(
+ socketserver._write_exactly(self.wfile, self.urlhandler(
self.path, content_type).encode('utf-8'))
def log_message(self, *args):
diff -r 8160786d0295 Lib/socketserver.py
--- a/Lib/socketserver.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/socketserver.py Mon May 16 10:53:59 2016 +0000
@@ -132,6 +132,7 @@
import threading
except ImportError:
import dummy_threading as threading
+from io import BytesIO, BufferedWriter
from time import monotonic as time
__all__ = ["BaseServer", "TCPServer", "UDPServer", "ForkingUDPServer",
@@ -747,13 +748,23 @@
self.wfile.close()
self.rfile.close()
+def _write_exactly(wfile, data):
+ """Write all of data to wfile, which might otherwise do partial writes
+
+ This is an internal helper and will probably be removed in a future
+ version.
+ """
+ writer = BufferedWriter(wfile)
+ try:
+ writer.write(data)
+ finally:
+ writer.detach()
class DatagramRequestHandler(BaseRequestHandler):
"""Define self.rfile and self.wfile for datagram sockets."""
def setup(self):
- from io import BytesIO
self.packet, self.socket = self.request
self.rfile = BytesIO(self.packet)
self.wfile = BytesIO()
diff -r 8160786d0295 Lib/test/ssl_servers.py
--- a/Lib/test/ssl_servers.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/ssl_servers.py Mon May 16 10:53:59 2016 +0000
@@ -3,6 +3,7 @@
import ssl
import pprint
import socket
+import socketserver
import urllib.parse
# Rename HTTPServer to _HTTPServer so as to avoid confusion with HTTPSServer.
from http.server import (HTTPServer as _HTTPServer,
@@ -16,7 +17,7 @@
HOST = support.HOST
CERTFILE = os.path.join(here, 'keycert.pem')
-# This one's based on HTTPServer, which is based on SocketServer
+# This one's based on HTTPServer, which is based on socketserver
class HTTPSServer(_HTTPServer):
@@ -106,7 +107,7 @@
self.send_header("Content-Length", str(len(body)))
self.end_headers()
if send_body:
- self.wfile.write(body)
+ socketserver._write_exactly(self.wfile, body)
def do_HEAD(self):
"""Serve a HEAD request."""
diff -r 8160786d0295 Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_httpservers.py Mon May 16 10:53:59 2016 +0000
@@ -16,9 +16,10 @@
import urllib.parse
import html
import http.client
+import socketserver
import tempfile
import time
-from io import BytesIO
+from io import BytesIO, RawIOBase
import unittest
from test import support
@@ -29,9 +30,6 @@
# don't write log messages to stderr
pass
- def read(self, n=None):
- return ''
-
class TestServerThread(threading.Thread):
def __init__(self, test_object, request_handler):
@@ -113,7 +111,7 @@
self.send_header('Connection', 'close')
self.end_headers()
body = self.headers['x-special-incoming'].encode('utf-8')
- self.wfile.write(body)
+ socketserver._write_exactly(self.wfile, body)
def setUp(self):
BaseTestCase.setUp(self)
@@ -660,13 +658,19 @@
return False
-class AuditableBytesIO:
+class AuditableBytesIO(RawIOBase):
def __init__(self):
self.datas = []
+ def writable(self):
+ return True
+
def write(self, data):
+ # Cannot rely on data being alive after write() returns (Issue 26720)
+ data = bytes(data)
self.datas.append(data)
+ return len(data)
def getData(self):
return b''.join(self.datas)
diff -r 8160786d0295 Lib/test/test_imaplib.py
--- a/Lib/test/test_imaplib.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_imaplib.py Mon May 16 10:53:59 2016 +0000
@@ -104,7 +104,7 @@
def _send(self, message):
if verbose:
print("SENT: %r" % message.strip())
- self.wfile.write(message)
+ socketserver._write_exactly(self.wfile, message)
def _send_line(self, message):
self._send(message + b'\r\n')
@@ -297,7 +297,7 @@
class EOFHandler(socketserver.StreamRequestHandler):
def handle(self):
# EOF without sending a complete welcome message.
- self.wfile.write(b'* OK')
+ socketserver._write_exactly(self.wfile, b'* OK')
with self.reaped_server(EOFHandler) as server:
self.assertRaises(imaplib.IMAP4.abort,
@@ -494,7 +494,8 @@
class TooLongHandler(SimpleIMAPHandler):
def handle(self):
# Send a very long response line
- self.wfile.write(b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n')
+ line = b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n'
+ socketserver._write_exactly(self.wfile, line)
with self.reaped_server(TooLongHandler) as server:
self.assertRaises(imaplib.IMAP4.error,
diff -r 8160786d0295 Lib/test/test_socketserver.py
--- a/Lib/test/test_socketserver.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_socketserver.py Mon May 16 10:53:59 2016 +0000
@@ -110,7 +110,7 @@
class MyHandler(hdlrbase):
def handle(self):
line = self.rfile.readline()
- self.wfile.write(line)
+ socketserver._write_exactly(self.wfile, line)
if verbose: print("creating server")
server = MyServer(addr, MyHandler)
diff -r 8160786d0295 Lib/test/test_urllib2_localnet.py
--- a/Lib/test/test_urllib2_localnet.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_urllib2_localnet.py Mon May 16 10:53:59 2016 +0000
@@ -1,6 +1,7 @@
import base64
import os
import email
+import socketserver
import urllib.parse
import urllib.request
import http.server
@@ -151,7 +152,8 @@
# not.
#request_handler.send_header('Connection', 'close')
request_handler.end_headers()
- request_handler.wfile.write(b"Proxy Authentication Required.")
+ body = b"Proxy Authentication Required."
+ socketserver._write_exactly(request_handler.wfile, body)
return False
def handle_request(self, request_handler):
@@ -229,12 +231,13 @@
def do_GET(self):
if not self.headers.get("Authorization", ""):
self.do_AUTHHEAD()
- self.wfile.write(b"No Auth header received")
+ body = b"No Auth header received"
+ socketserver._write_exactly(self.wfile, body)
elif self.headers.get(
"Authorization", "") == "Basic " + self.ENCODED_AUTH:
self.send_response(200)
self.end_headers()
- self.wfile.write(b"It works")
+ socketserver._write_exactly(self.wfile, b"It works")
else:
# Request Unauthorized
self.do_AUTHHEAD()
@@ -269,10 +272,10 @@
self.send_response(200, "OK")
self.send_header("Content-Type", "text/html")
self.end_headers()
- self.wfile.write(bytes("You've reached %s!
" % self.path,
- "ascii"))
- self.wfile.write(b"Our apologies, but our server is down due to "
- b"a sudden zombie invasion.")
+ body = ("You've reached %s!
"
+ "Our apologies, but our server is down due to "
+ "a sudden zombie invasion.") % self.path
+ socketserver._write_exactly(self.wfile, bytes(body, "ascii"))
# Test cases
diff -r 8160786d0295 Lib/test/test_wsgiref.py
--- a/Lib/test/test_wsgiref.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_wsgiref.py Mon May 16 10:53:59 2016 +0000
@@ -1,3 +1,5 @@
+from test import support
+from test.test_httpservers import NoLogRequestHandler
from unittest import TestCase
from wsgiref.util import setup_testing_defaults
from wsgiref.headers import Headers
@@ -6,12 +8,14 @@
from wsgiref.validate import validator
from wsgiref.simple_server import WSGIServer, WSGIRequestHandler
from wsgiref.simple_server import make_server
+from http.client import HTTPConnection
from io import StringIO, BytesIO, BufferedReader
from socketserver import BaseServer
from platform import python_implementation
import os
import re
+import signal
import sys
import unittest
@@ -221,6 +225,47 @@
b"data",
out)
+ def test_interrupted_write(self):
+ # BaseHandler._write() and _flush() have to write all data, even if
+ # it takes multiple send() calls
+ threading = support.import_module("threading")
+ SIGUSR1 = support.get_attribute(signal, "SIGUSR1")
+
+ def app(environ, start_response):
+ start_response("200 OK", [])
+ return [bytes(support.SOCK_MAX_SIZE)]
+
+ class WsgiHandler(NoLogRequestHandler, WSGIRequestHandler):
+ pass
+
+ server = make_server(support.HOST, 0, app, handler_class=WsgiHandler)
+ self.addCleanup(server.server_close)
+ interrupted = threading.Event()
+
+ def signal_handler(signum, frame):
+ interrupted.set()
+
+ original = signal.signal(SIGUSR1, signal_handler)
+ self.addCleanup(signal.signal, SIGUSR1, original)
+ received = None
+
+ def run_client():
+ http = HTTPConnection(*server.server_address)
+ http.request("GET", "/")
+ with http.getresponse() as response:
+ response.read(100)
+ os.kill(os.getpid(), SIGUSR1)
+ interrupted.wait()
+ nonlocal received
+ received = len(response.read())
+ http.close()
+
+ background = threading.Thread(target=run_client)
+ background.start()
+ server.handle_request()
+ background.join()
+ self.assertEqual(received, support.SOCK_MAX_SIZE - 100)
+
class UtilityTests(TestCase):
diff -r 8160786d0295 Lib/test/test_xmlrpc.py
--- a/Lib/test/test_xmlrpc.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_xmlrpc.py Mon May 16 10:53:59 2016 +0000
@@ -21,6 +21,7 @@
gzip = None
try:
import threading
+ import socketserver
except ImportError:
threading = None
@@ -262,7 +263,7 @@
self.send_response(http.HTTPStatus.OK)
self.send_header("Content-Length", len(response))
self.end_headers()
- self.wfile.write(response)
+ socketserver._write_exactly(self.wfile, response)
self.handled = True
self.close_connection = False
diff -r 8160786d0295 Lib/wsgiref/simple_server.py
--- a/Lib/wsgiref/simple_server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/wsgiref/simple_server.py Mon May 16 10:53:59 2016 +0000
@@ -11,6 +11,7 @@
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
+from io import BufferedWriter
import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
@@ -126,11 +127,17 @@
if not self.parse_request(): # An error code has been sent, just exit
return
- handler = ServerHandler(
- self.rfile, self.wfile, self.get_stderr(), self.get_environ()
- )
- handler.request_handler = self # backpointer for logging
- handler.run(self.server.get_app())
+ # Avoid passing the raw file object wfile, which can do partial
+ # writes (Issue 24291)
+ stdout = BufferedWriter(self.wfile)
+ try:
+ handler = ServerHandler(
+ self.rfile, stdout, self.get_stderr(), self.get_environ()
+ )
+ handler.request_handler = self # backpointer for logging
+ handler.run(self.server.get_app())
+ finally:
+ stdout.detach()
diff -r 8160786d0295 Lib/xmlrpc/server.py
--- a/Lib/xmlrpc/server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/xmlrpc/server.py Mon May 16 10:53:59 2016 +0000
@@ -523,7 +523,7 @@
pass
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ socketserver._write_exactly(self.wfile, response)
def decode_request_content(self, data):
#support gzip encoding of request
@@ -549,7 +549,7 @@
self.send_header("Content-type", "text/plain")
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ socketserver._write_exactly(self.wfile, response)
def log_request(self, code='-', size='-'):
"""Selectively log an accepted request."""
@@ -915,7 +915,7 @@
self.send_header("Content-type", "text/html")
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ socketserver._write_exactly(self.wfile, response)
class DocXMLRPCServer( SimpleXMLRPCServer,
XMLRPCDocGenerator):
diff -r 8160786d0295 Misc/NEWS
--- a/Misc/NEWS Sat Apr 09 11:33:53 2016 +0200
+++ b/Misc/NEWS Mon May 16 10:53:59 2016 +0000
@@ -237,6 +237,11 @@
Library
-------
+- Issue #24291: Fix various servers based on
+ socketserver.StreamRequestHandler, which could do partial writes and
+ truncate data. This affectes the servers defined in the "http.server",
+ "wsgiref", and "xmlrpc.server" modules.
+
- Issue #16329: Add .webm to mimetypes.types_map. Patch by Giampaolo Rodola'.
- Issue #13952: Add .csv to mimetypes.types_map. Patch by Geoff Wilson.