# HG changeset patch
# Parent 8160786d029509a7563a1321068fc330ec2ff0c6
Issue #24291: Fix partial writes to StreamRequestHandler.wfile
As far as I can tell, many servers in Python 3 suffer from this flaw,
including those defined in the “http.server”, “wsgiref”, and “xmlrpc.server”
modules. If the underlying send() method indicates a partial write, such as
when the call is interrupted to handle a signal, these servers would silently
drop the remaining data.
Also document that shutil.copyfile() does not anticipate partial writes.
diff -r 8160786d0295 Doc/library/http.server.rst
--- a/Doc/library/http.server.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/http.server.rst Mon Apr 11 03:42:42 2016 +0000
@@ -100,13 +100,14 @@
.. attribute:: rfile
- Contains an input stream, positioned at the start of the optional input
- data.
+ An :class:`io.BufferedIOBase` input stream, ready to read from
+ the start of the optional input data.
.. attribute:: wfile
- Contains the output stream for writing a response back to the
- client. Proper adherence to the HTTP protocol must be used when writing to
+ An :class:`io.RawIOBase` output stream for writing a response back to
+ the client. Beware that :meth:`~io.RawIOBase.write` may do partial
+ writes. Proper adherence to the HTTP protocol must be used when writing to
this stream.
:class:`BaseHTTPRequestHandler` has the following class variables:
diff -r 8160786d0295 Doc/library/shutil.rst
--- a/Doc/library/shutil.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/shutil.rst Mon Apr 11 03:42:42 2016 +0000
@@ -44,7 +44,8 @@
chunks; by default the data is read in chunks to avoid uncontrolled memory
consumption. Note that if the current file position of the *fsrc* object is not
0, only the contents from the current file position to the end of the file will
- be copied.
+ be copied. The *fdst* object should implement
+ :class:`io.BufferedIOBase.write`, which must write each chunk in full.
.. function:: copyfile(src, dst, *, follow_symlinks=True)
diff -r 8160786d0295 Doc/library/socketserver.rst
--- a/Doc/library/socketserver.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/socketserver.rst Mon Apr 11 03:42:42 2016 +0000
@@ -401,6 +401,15 @@
read or written, respectively, to get the request data or return data
to the client.
+ The :attr:`rfile` attributes of both classes support the
+ :class:`io.BufferedIOBase` readable interface, and
+ :attr:`DatagramRequestHandler.wfile` supports the
+ :class:`io.BufferedIOBase` writable interface. However,
+ :attr:`StreamRequestHandler.wfile` supports the :class:`io.RawIOBase`
+ writable interface instead. As a consequence, with
+ :class:`StreamRequestHandler`, a single :meth:`~io.RawIOBase.write` call
+ may do a partial write.
+
Examples
--------
@@ -442,6 +451,8 @@
An alternative request handler class that makes use of streams (file-like
objects that simplify communication by providing the standard file interface)::
+ import io
+
class MyTCPHandler(socketserver.StreamRequestHandler):
def handle(self):
@@ -450,9 +461,11 @@
self.data = self.rfile.readline().strip()
print("{} wrote:".format(self.client_address[0]))
print(self.data)
- # Likewise, self.wfile is a file-like object used to write back
- # to the client
- self.wfile.write(self.data.upper())
+ # The wfile attribute is a raw file-like object used to write back
+ # to the client. Use BufferedWriter to guarantee a complete
+ # write.
+ with io.BufferedWriter(self.wfile) as writer:
+ writer.write(self.data.upper())
The difference is that the ``readline()`` call in the second handler will call
``recv()`` multiple times until it encounters a newline character, while the
diff -r 8160786d0295 Doc/library/wsgiref.rst
--- a/Doc/library/wsgiref.rst Sat Apr 09 11:33:53 2016 +0200
+++ b/Doc/library/wsgiref.rst Mon Apr 11 03:42:42 2016 +0000
@@ -515,6 +515,9 @@
streams are stored in the :attr:`stdin`, :attr:`stdout`, :attr:`stderr`, and
:attr:`environ` attributes.
+ The *stdout* argument should implement :meth:`io.BufferedIOBase.write`,
+ which must write each chunk in full.
+
.. class:: BaseHandler()
diff -r 8160786d0295 Lib/http/server.py
--- a/Lib/http/server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/http/server.py Mon Apr 11 03:42:42 2016 +0000
@@ -463,7 +463,7 @@
code >= 200 and
code not in (
HTTPStatus.NO_CONTENT, HTTPStatus.NOT_MODIFIED)):
- self.wfile.write(body)
+ self.__write_exactly(body)
def send_response(self, code, message=None):
"""Add the response header to the headers buffer and log the
@@ -514,9 +514,17 @@
def flush_headers(self):
if hasattr(self, '_headers_buffer'):
- self.wfile.write(b"".join(self._headers_buffer))
+ self.__write_exactly(b"".join(self._headers_buffer))
self._headers_buffer = []
+ def __write_exactly(self, data):
+ """Work around wfile being a raw file, which may do partial writes"""
+ writer = io.BufferedWriter(self.wfile)
+ try:
+ writer.write(data)
+ finally:
+ writer.detach()
+
def log_request(self, code='-', size='-'):
"""Log an accepted request.
@@ -629,7 +637,11 @@
f = self.send_head()
if f:
try:
- self.copyfile(f, self.wfile)
+ writer = io.BufferedWriter(self.wfile)
+ try:
+ self.copyfile(f, writer)
+ finally:
+ writer.detach()
finally:
f.close()
@@ -781,8 +793,8 @@
The SOURCE argument is a file object open for reading
(or anything with a read() method) and the DESTINATION
- argument is a file object open for writing (or
- anything with a write() method).
+ argument is a buffered file object open for writing, or
+ anything else implementing io.BufferedIOBase.write().
The only reason for overriding this would be to change
the block size or perhaps to replace newlines by CRLF
@@ -1153,7 +1165,7 @@
if not self.rfile._sock.recv(1):
break
stdout, stderr = p.communicate(data)
- self.wfile.write(stdout)
+ self._BaseHTTPRequestHandler__write_exactly(stdout)
if stderr:
self.log_error('%s', stderr)
p.stderr.close()
diff -r 8160786d0295 Lib/pydoc.py
--- a/Lib/pydoc.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/pydoc.py Mon Apr 11 03:42:42 2016 +0000
@@ -2205,7 +2205,7 @@
self.send_response(200)
self.send_header('Content-Type', '%s; charset=UTF-8' % content_type)
self.end_headers()
- self.wfile.write(self.urlhandler(
+ self._BaseHTTPRequestHandler__write_exactly(self.urlhandler(
self.path, content_type).encode('utf-8'))
def log_message(self, *args):
diff -r 8160786d0295 Lib/test/ssl_servers.py
--- a/Lib/test/ssl_servers.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/ssl_servers.py Mon Apr 11 03:42:42 2016 +0000
@@ -106,7 +106,7 @@
self.send_header("Content-Length", str(len(body)))
self.end_headers()
if send_body:
- self.wfile.write(body)
+ self._BaseHTTPRequestHandler__write_exactly(body)
def do_HEAD(self):
"""Serve a HEAD request."""
diff -r 8160786d0295 Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_httpservers.py Mon Apr 11 03:42:42 2016 +0000
@@ -18,7 +18,7 @@
import http.client
import tempfile
import time
-from io import BytesIO
+from io import BytesIO, RawIOBase
import unittest
from test import support
@@ -29,9 +29,6 @@
# don't write log messages to stderr
pass
- def read(self, n=None):
- return ''
-
class TestServerThread(threading.Thread):
def __init__(self, test_object, request_handler):
@@ -113,7 +110,7 @@
self.send_header('Connection', 'close')
self.end_headers()
body = self.headers['x-special-incoming'].encode('utf-8')
- self.wfile.write(body)
+ self._BaseHTTPRequestHandler__write_exactly(body)
def setUp(self):
BaseTestCase.setUp(self)
@@ -660,13 +657,19 @@
return False
-class AuditableBytesIO:
+class AuditableBytesIO(RawIOBase):
def __init__(self):
self.datas = []
+ def writable(self):
+ return True
+
def write(self, data):
+ # Work around limited memoryview lifetime (Issue 26720)
+ data = bytes(data)
self.datas.append(data)
+ return len(data)
def getData(self):
return b''.join(self.datas)
diff -r 8160786d0295 Lib/test/test_imaplib.py
--- a/Lib/test/test_imaplib.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_imaplib.py Mon Apr 11 03:42:42 2016 +0000
@@ -6,6 +6,7 @@
from contextlib import contextmanager
import imaplib
+from io import BufferedWriter
import os.path
import socketserver
import time
@@ -100,11 +101,17 @@
def setup(self):
super().setup()
self.server.logged = None
+ self.writer = BufferedWriter(self.wfile)
+
+ def finish(self):
+ self.writer.detach()
+ super().finish()
def _send(self, message):
if verbose:
print("SENT: %r" % message.strip())
- self.wfile.write(message)
+ self.writer.write(message)
+ self.writer.flush()
def _send_line(self, message):
self._send(message + b'\r\n')
@@ -297,7 +304,8 @@
class EOFHandler(socketserver.StreamRequestHandler):
def handle(self):
# EOF without sending a complete welcome message.
- self.wfile.write(b'* OK')
+ with BufferedWriter(self.wfile) as writer:
+ writer.write(b'* OK')
with self.reaped_server(EOFHandler) as server:
self.assertRaises(imaplib.IMAP4.abort,
@@ -494,7 +502,8 @@
class TooLongHandler(SimpleIMAPHandler):
def handle(self):
# Send a very long response line
- self.wfile.write(b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n')
+ line = b'* OK ' + imaplib._MAXLINE * b'x' + b'\r\n'
+ self.writer.write(line)
with self.reaped_server(TooLongHandler) as server:
self.assertRaises(imaplib.IMAP4.error,
diff -r 8160786d0295 Lib/test/test_socketserver.py
--- a/Lib/test/test_socketserver.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_socketserver.py Mon Apr 11 03:42:42 2016 +0000
@@ -3,6 +3,7 @@
"""
import contextlib
+import io
import os
import select
import signal
@@ -110,7 +111,9 @@
class MyHandler(hdlrbase):
def handle(self):
line = self.rfile.readline()
- self.wfile.write(line)
+ writer = io.BufferedWriter(self.wfile)
+ writer.write(line)
+ writer.detach()
if verbose: print("creating server")
server = MyServer(addr, MyHandler)
diff -r 8160786d0295 Lib/test/test_urllib2_localnet.py
--- a/Lib/test/test_urllib2_localnet.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_urllib2_localnet.py Mon Apr 11 03:42:42 2016 +0000
@@ -151,7 +151,8 @@
# not.
#request_handler.send_header('Connection', 'close')
request_handler.end_headers()
- request_handler.wfile.write(b"Proxy Authentication Required.")
+ body = b"Proxy Authentication Required."
+ request_handler._BaseHTTPRequestHandler__write_exactly(body)
return False
def handle_request(self, request_handler):
@@ -229,12 +230,13 @@
def do_GET(self):
if not self.headers.get("Authorization", ""):
self.do_AUTHHEAD()
- self.wfile.write(b"No Auth header received")
+ body = b"No Auth header received"
+ self._BaseHTTPRequestHandler__write_exactly(body)
elif self.headers.get(
"Authorization", "") == "Basic " + self.ENCODED_AUTH:
self.send_response(200)
self.end_headers()
- self.wfile.write(b"It works")
+ self._BaseHTTPRequestHandler__write_exactly(b"It works")
else:
# Request Unauthorized
self.do_AUTHHEAD()
@@ -269,10 +271,10 @@
self.send_response(200, "OK")
self.send_header("Content-Type", "text/html")
self.end_headers()
- self.wfile.write(bytes("You've reached %s!
" % self.path,
- "ascii"))
- self.wfile.write(b"Our apologies, but our server is down due to "
- b"a sudden zombie invasion.")
+ body = ("You've reached %s!
"
+ "Our apologies, but our server is down due to "
+ "a sudden zombie invasion.") % self.path
+ self._BaseHTTPRequestHandler__write_exactly(bytes(body, "ascii"))
# Test cases
diff -r 8160786d0295 Lib/test/test_wsgiref.py
--- a/Lib/test/test_wsgiref.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_wsgiref.py Mon Apr 11 03:42:42 2016 +0000
@@ -1,3 +1,5 @@
+from test import support
+from test.test_httpservers import NoLogRequestHandler
from unittest import TestCase
from wsgiref.util import setup_testing_defaults
from wsgiref.headers import Headers
@@ -6,12 +8,15 @@
from wsgiref.validate import validator
from wsgiref.simple_server import WSGIServer, WSGIRequestHandler
from wsgiref.simple_server import make_server
+from http.client import HTTPConnection
from io import StringIO, BytesIO, BufferedReader
from socketserver import BaseServer
from platform import python_implementation
import os
import re
+import signal
+import socket
import sys
import unittest
@@ -221,6 +226,47 @@
b"data",
out)
+ def test_interrupted_write(self):
+ # BaseHandler._write() and _flush() have to write all data, even if
+ # it takes multiple send() calls
+ threading = support.import_module("threading")
+ SIGUSR1 = support.get_attribute(signal, "SIGUSR1")
+
+ def app(environ, start_response):
+ start_response("200 OK", [])
+ return [bytes(support.SOCK_MAX_SIZE)]
+
+ class WsgiHandler(NoLogRequestHandler, WSGIRequestHandler):
+ pass
+
+ server = make_server(support.HOST, 0, app, handler_class=WsgiHandler)
+ self.addCleanup(server.server_close)
+ interrupted = threading.Event()
+
+ def signal_handler(signum, frame):
+ interrupted.set()
+
+ original = signal.signal(SIGUSR1, signal_handler)
+ self.addCleanup(signal.signal, SIGUSR1, original)
+ received = None
+
+ def run_client():
+ http = HTTPConnection(*server.server_address)
+ http.request("GET", "/")
+ with http.getresponse() as response:
+ response.read(100)
+ os.kill(os.getpid(), SIGUSR1)
+ interrupted.wait()
+ nonlocal received
+ received = len(response.read())
+ http.close()
+
+ background = threading.Thread(target=run_client)
+ background.start()
+ server.handle_request()
+ background.join()
+ self.assertEqual(received, support.SOCK_MAX_SIZE - 100)
+
class UtilityTests(TestCase):
diff -r 8160786d0295 Lib/test/test_xmlrpc.py
--- a/Lib/test/test_xmlrpc.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/test/test_xmlrpc.py Mon Apr 11 03:42:42 2016 +0000
@@ -262,7 +262,7 @@
self.send_response(http.HTTPStatus.OK)
self.send_header("Content-Length", len(response))
self.end_headers()
- self.wfile.write(response)
+ self._BaseHTTPRequestHandler__write_exactly(response)
self.handled = True
self.close_connection = False
diff -r 8160786d0295 Lib/wsgiref/simple_server.py
--- a/Lib/wsgiref/simple_server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/wsgiref/simple_server.py Mon Apr 11 03:42:42 2016 +0000
@@ -11,6 +11,7 @@
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
+from io import BufferedWriter
import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
@@ -126,11 +127,15 @@
if not self.parse_request(): # An error code has been sent, just exit
return
- handler = ServerHandler(
- self.rfile, self.wfile, self.get_stderr(), self.get_environ()
- )
- handler.request_handler = self # backpointer for logging
- handler.run(self.server.get_app())
+ stdout = BufferedWriter(self.wfile)
+ try:
+ handler = ServerHandler(
+ self.rfile, stdout, self.get_stderr(), self.get_environ()
+ )
+ handler.request_handler = self # backpointer for logging
+ handler.run(self.server.get_app())
+ finally:
+ stdout.detach()
diff -r 8160786d0295 Lib/xmlrpc/server.py
--- a/Lib/xmlrpc/server.py Sat Apr 09 11:33:53 2016 +0200
+++ b/Lib/xmlrpc/server.py Mon Apr 11 03:42:42 2016 +0000
@@ -523,7 +523,7 @@
pass
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ self._BaseHTTPRequestHandler__write_exactly(response)
def decode_request_content(self, data):
#support gzip encoding of request
@@ -549,7 +549,7 @@
self.send_header("Content-type", "text/plain")
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ self._BaseHTTPRequestHandler__write_exactly(response)
def log_request(self, code='-', size='-'):
"""Selectively log an accepted request."""
@@ -915,7 +915,7 @@
self.send_header("Content-type", "text/html")
self.send_header("Content-length", str(len(response)))
self.end_headers()
- self.wfile.write(response)
+ self._BaseHTTPRequestHandler__write_exactly(response)
class DocXMLRPCServer( SimpleXMLRPCServer,
XMLRPCDocGenerator):
diff -r 8160786d0295 Misc/NEWS
--- a/Misc/NEWS Sat Apr 09 11:33:53 2016 +0200
+++ b/Misc/NEWS Mon Apr 11 03:42:42 2016 +0000
@@ -237,6 +237,11 @@
Library
-------
+- Issue #24291: Fix various servers based on
+ socketserver.StreamRequestHandler, which could do partial writes and
+ truncate data. This affectes the servers defined in the "http.server",
+ "wsgiref", and "xmlrpc.server" modules.
+
- Issue #16329: Add .webm to mimetypes.types_map. Patch by Giampaolo Rodola'.
- Issue #13952: Add .csv to mimetypes.types_map. Patch by Geoff Wilson.