# HG changeset patch # User Steve Dower # Date 1473200341 25200 # Tue Sep 06 15:19:01 2016 -0700 # Node ID 2f004ab31d4fd27066e35c6f2ba0710f84c96575 # Parent f824ccaecd8ba7c407a27df4648fa698d1e9a79e Issue #6135: Adds encoding and errors parameters to subprocess diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -38,7 +38,8 @@ .. function:: run(args, *, stdin=None, input=None, stdout=None, stderr=None,\ - shell=False, timeout=None, check=False) + shell=False, timeout=None, check=False, \ + encoding=None, errors=None) Run the command described by *args*. Wait for command to complete, then return a :class:`CompletedProcess` instance. @@ -60,15 +61,20 @@ The *input* argument is passed to :meth:`Popen.communicate` and thus to the subprocess's stdin. If used it must be a byte sequence, or a string if - ``universal_newlines=True``. When used, the internal :class:`Popen` object - is automatically created with ``stdin=PIPE``, and the *stdin* argument may - not be used as well. + *encoding* is specified or *universal_newlines* is True. When used, the + internal :class:`Popen` object is automatically created with ``stdin=PIPE``, + and the *stdin* argument may not be used as well. If *check* is True, and the process exits with a non-zero exit code, a :exc:`CalledProcessError` exception will be raised. Attributes of that exception hold the arguments, the exit code, and stdout and stderr if they were captured. + If *encoding* is specified, file objects for stdin, stdout and stderr are + opened in text mode using the specified encoding and *errors*. If omitted + and *universal_newlines* is not specified, file objects are opened in + binary mode. + Examples:: >>> subprocess.run(["ls", "-l"]) # doesn't capture output @@ -85,6 +91,10 @@ .. versionadded:: 3.5 + .. versionchanged:: 3.6 + + Added *encoding* and *errors* parameters + .. class:: CompletedProcess The return value from :func:`run`, representing a process that has finished. @@ -104,8 +114,7 @@ .. attribute:: stdout Captured stdout from the child process. A bytes sequence, or a string if - :func:`run` was called with ``universal_newlines=True``. None if stdout - was not captured. + :func:`run` was called with an encoding. None if stdout was not captured. If you ran the process with ``stderr=subprocess.STDOUT``, stdout and stderr will be combined in this attribute, and :attr:`stderr` will be @@ -114,8 +123,7 @@ .. attribute:: stderr Captured stderr from the child process. A bytes sequence, or a string if - :func:`run` was called with ``universal_newlines=True``. None if stderr - was not captured. + :func:`run` was called with an encoding. None if stderr was not captured. .. method:: check_returncode() @@ -246,22 +254,27 @@ :data:`STDOUT`, which indicates that the stderr data from the child process should be captured into the same file handle as for *stdout*. + If *encoding* is ``None`` the file objects *stdin*, *stdout* and *stderr* + will be opened as binary streams. No encoding or line ending conversion is + done. + + If *encoding* is specified, these file objects will be opened as text + streams in :term:`universal newlines` mode using the *encoding* and *errors* + specified in the call. For *stdin*, line ending characters ``'\n'`` in the + input will be converted to the default line separator :data:`os.linesep`. + For *stdout* and *stderr*, all line endings in the output will be converted + to ``'\n'``. For more information see the documentation of the + :class:`io.TextIOWrapper` class when the *newline* argument to its + constructor is ``None``. + + .. versionadded:: 3.6 + *encoding* and *errors* added + .. index:: single: universal newlines; subprocess module - If *universal_newlines* is ``False`` the file objects *stdin*, *stdout* and - *stderr* will be opened as binary streams, and no line ending conversion is - done. - - If *universal_newlines* is ``True``, these file objects - will be opened as text streams in :term:`universal newlines` mode - using the encoding returned by :func:`locale.getpreferredencoding(False) - `. For *stdin*, line ending characters - ``'\n'`` in the input will be converted to the default line separator - :data:`os.linesep`. For *stdout* and *stderr*, all line endings in the - output will be converted to ``'\n'``. For more information see the - documentation of the :class:`io.TextIOWrapper` class when the *newline* - argument to its constructor is ``None``. + Specifying *universal_newlines* as ``True`` is equivalent to passing + ``locale.getdefaultencoding(False)`` for *encoding*. .. note:: @@ -306,7 +319,8 @@ stderr=None, preexec_fn=None, close_fds=True, shell=False, \ cwd=None, env=None, universal_newlines=False, \ startupinfo=None, creationflags=0, restore_signals=True, \ - start_new_session=False, pass_fds=()) + start_new_session=False, pass_fds=(), *, \ + encoding=None, errors=None) Execute a child program in a new process. On POSIX, the class uses :meth:`os.execvp`-like behavior to execute the child program. On Windows, @@ -482,10 +496,18 @@ .. _side-by-side assembly: https://en.wikipedia.org/wiki/Side-by-Side_Assembly + If *encoding* is specified, the file objects *stdin*, *stdout* and *stderr* + are opened as text streams with the specified encoding and *errors*, as + described above in :ref:`frequently-used-arguments`. Otherwise, they are + opened as binary streams. + + .. versionadded:: 3.6 + *encoding* and *errors* arguments + If *universal_newlines* is ``True``, the file objects *stdin*, *stdout* - and *stderr* are opened as text streams in universal newlines mode, as - described above in :ref:`frequently-used-arguments`, otherwise they are - opened as binary streams. + and *stderr* are opened as text streams in universal newlines mode with + *encoding* or the default locale encoding. This flag has no effect if + *encoding* is already specified. If given, *startupinfo* will be a :class:`STARTUPINFO` object, which is passed to the underlying ``CreateProcess`` function. @@ -602,10 +624,12 @@ until end-of-file is reached. Wait for process to terminate. The optional *input* argument should be data to be sent to the child process, or ``None``, if no data should be sent to the child. The type of *input* - must be bytes or, if *universal_newlines* was ``True``, a string. + must be bytes or, if *encoding* was specified or *universal_newlines* was + ``True``, a string. :meth:`communicate` returns a tuple ``(stdout_data, stderr_data)``. - The data will be bytes or, if *universal_newlines* was ``True``, strings. + The data will be bytes or, if *encoding* was specified or + *universal_newlines* was ``True``, strings. Note that if you want to send data to the process's stdin, you need to create the Popen object with ``stdin=PIPE``. Similarly, to get anything other than @@ -672,28 +696,30 @@ .. attribute:: Popen.stdin If the *stdin* argument was :data:`PIPE`, this attribute is a writeable - stream object as returned by :func:`open`. If the *universal_newlines* - argument was ``True``, the stream is a text stream, otherwise it is a byte - stream. If the *stdin* argument was not :data:`PIPE`, this attribute is - ``None``. + stream object as returned by :func:`open`. If the *encoding* argument was + specified or the *universal_newlines* argument was ``True``, the stream is + a text stream, otherwise it is a byte stream. If the *stdin* argument was + not :data:`PIPE`, this attribute is ``None``. .. attribute:: Popen.stdout If the *stdout* argument was :data:`PIPE`, this attribute is a readable stream object as returned by :func:`open`. Reading from the stream provides - output from the child process. If the *universal_newlines* argument was - ``True``, the stream is a text stream, otherwise it is a byte stream. If the - *stdout* argument was not :data:`PIPE`, this attribute is ``None``. + output from the child process. If the *encoding* argument was specified or + the *universal_newlines* argument was ``True``, the stream is a text stream, + otherwise it is a byte stream. If the *stdout* argument was not :data:`PIPE`, + this attribute is ``None``. .. attribute:: Popen.stderr If the *stderr* argument was :data:`PIPE`, this attribute is a readable stream object as returned by :func:`open`. Reading from the stream provides - error output from the child process. If the *universal_newlines* argument was - ``True``, the stream is a text stream, otherwise it is a byte stream. If the - *stderr* argument was not :data:`PIPE`, this attribute is ``None``. + error output from the child process. If the *encoding* argument was specified + or the *universal_newlines* argument was ``True``, the stream is a text + stream, otherwise it is a byte stream. If the *stderr* argument was not + :data:`PIPE`, this attribute is ``None``. .. warning:: @@ -886,7 +912,9 @@ *timeout* was added. -.. function:: check_output(args, *, stdin=None, stderr=None, shell=False, universal_newlines=False, timeout=None) +.. function:: check_output(args, *, stdin=None, stderr=None, shell=False, \ + encoding=None, errors=None, \ + universal_newlines=False, timeout=None) Run command with arguments and return its output. @@ -1142,7 +1170,7 @@ Return ``(status, output)`` of executing *cmd* in a shell. Execute the string *cmd* in a shell with :meth:`Popen.check_output` and - return a 2-tuple ``(status, output)``. Universal newlines mode is used; + return a 2-tuple ``(status, output)``. The locale encoding is used; see the notes on :ref:`frequently-used-arguments` for more details. A trailing newline is stripped from the output. diff --git a/Lib/subprocess.py b/Lib/subprocess.py --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -30,7 +30,8 @@ preexec_fn=None, close_fds=True, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, - restore_signals=True, start_new_session=False, pass_fds=()): + restore_signals=True, start_new_session=False, pass_fds=(), + *, encoding=None, errors=None): Arguments are: @@ -104,20 +105,18 @@ If env is not None, it defines the environment variables for the new process. -If universal_newlines is False, the file objects stdin, stdout and stderr -are opened as binary files, and no line ending conversion is done. +If encoding is specified or universal_newlines is True, the file objects +stdout and stderr are opened as a text file. Lines may be terminated by +any of '\n', the Unix end-of-line convention, '\r', the old Macintosh +convention or '\r\n', the Windows convention. All of these external +representations are seen as '\n' by the Python program. Also, the +newlines attribute of the file objects stdout, stdin and stderr are not +updated by the communicate() method. If encoding or errors are not +specified, the default encoding for io.TextIOWrapper is used. -If universal_newlines is True, the file objects stdout and stderr are -opened as a text file, but lines may be terminated by any of '\n', -the Unix end-of-line convention, '\r', the old Macintosh convention or -'\r\n', the Windows convention. All of these external representations -are seen as '\n' by the Python program. Also, the newlines attribute -of the file objects stdout, stdin and stderr are not updated by the -communicate() method. - -In either case, the process being communicated with should start up -expecting to receive bytes on its standard input and decode them with -the same encoding they are sent in. +If no encoding is specified and universal_newlines is False, the file +objects stdin, stdout and stderr are opened as binary files, and no +line ending conversion is done. The startupinfo and creationflags, if given, will be passed to the underlying CreateProcess() function. They can specify things such as @@ -358,6 +357,7 @@ import sys _mswindows = (sys.platform == "win32") +import _bootlocale import io import os import time @@ -808,8 +808,8 @@ """ Return (status, output) of executing cmd in a shell. Execute the string 'cmd' in a shell with 'check_output' and - return a 2-tuple (status, output). Universal newlines mode is used, - meaning that the result with be decoded to a string. + return a 2-tuple (status, output). The locale encoding is used + to decode the output and process newlines. A trailing newline is stripped from the output. The exit status for the command can be interpreted @@ -859,7 +859,7 @@ shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, restore_signals=True, start_new_session=False, - pass_fds=()): + pass_fds=(), *, encoding=None, errors=None): """Create new Popen instance.""" _cleanup() # Held while anything is calling waitpid before returncode has been @@ -944,22 +944,29 @@ if errread != -1: errread = msvcrt.open_osfhandle(errread.Detach(), 0) - if p2cwrite != -1: - self.stdin = io.open(p2cwrite, 'wb', bufsize) - if universal_newlines: - self.stdin = io.TextIOWrapper(self.stdin, write_through=True, - line_buffering=(bufsize == 1)) - if c2pread != -1: - self.stdout = io.open(c2pread, 'rb', bufsize) - if universal_newlines: - self.stdout = io.TextIOWrapper(self.stdout) - if errread != -1: - self.stderr = io.open(errread, 'rb', bufsize) - if universal_newlines: - self.stderr = io.TextIOWrapper(self.stderr) + if not encoding and universal_newlines: + encoding = _bootlocale.getpreferredencoding(False) self._closed_child_pipe_fds = False + try: + if p2cwrite != -1: + self.stdin = io.open(p2cwrite, 'wb', bufsize) + if encoding: + self.stdin = io.TextIOWrapper(self.stdin, write_through=True, + line_buffering=(bufsize == 1), + encoding=encoding, errors=errors) + if c2pread != -1: + self.stdout = io.open(c2pread, 'rb', bufsize) + if encoding: + self.stdout = io.TextIOWrapper(self.stdout, + encoding=encoding, errors=errors) + if errread != -1: + self.stderr = io.open(errread, 'rb', bufsize) + if encoding: + self.stderr = io.TextIOWrapper(self.stderr, + encoding=encoding, errors=errors) + self._execute_child(args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -891,29 +891,19 @@ # # UTF-16 and UTF-32-BE are sufficient to check both with BOM and # without, and UTF-16 and UTF-32. - import _bootlocale for encoding in ['utf-16', 'utf-32-be']: - old_getpreferredencoding = _bootlocale.getpreferredencoding - # Indirectly via io.TextIOWrapper, Popen() defaults to - # locale.getpreferredencoding(False) and earlier in Python 3.2 to - # locale.getpreferredencoding(). - def getpreferredencoding(do_setlocale=True): - return encoding code = ("import sys; " r"sys.stdout.buffer.write('1\r\n2\r3\n4'.encode('%s'))" % encoding) args = [sys.executable, '-c', code] - try: - _bootlocale.getpreferredencoding = getpreferredencoding - # We set stdin to be non-None because, as of this writing, - # a different code path is used when the number of pipes is - # zero or one. - popen = subprocess.Popen(args, universal_newlines=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - stdout, stderr = popen.communicate(input='') - finally: - _bootlocale.getpreferredencoding = old_getpreferredencoding + # We set stdin to be non-None because, as of this writing, + # a different code path is used when the number of pipes is + # zero or one. + popen = subprocess.Popen(args, universal_newlines=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + encoding=encoding) + stdout, stderr = popen.communicate(input='') self.assertEqual(stdout, '1\n2\n3\n4') def test_no_leaking(self):