diff --git a/Doc/library/io.rst b/Doc/library/io.rst --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -831,23 +831,31 @@ .. index:: single: universal newlines; io.TextIOWrapper class - *newline* controls how line endings are handled. It can be ``None``, - ``''``, ``'\n'``, ``'\r'``, and ``'\r\n'``. It works as follows: + *newline* controls how line endings are handled. - * When reading input from the stream, if *newline* is ``None``, - :term:`universal newlines` mode is enabled. Lines in the input can end in - ``'\n'``, ``'\r'``, or ``'\r\n'``, and these are translated into ``'\n'`` - before being returned to the caller. If it is ``''``, universal newlines - mode is enabled, but line endings are returned to the caller untranslated. - If it has any of the other legal values, input lines are only terminated - by the given string, and the line ending is returned to the caller - untranslated. + When reading input from the stream: - * When writing output to the stream, if *newline* is ``None``, any ``'\n'`` - characters written are translated to the system default line separator, - :data:`os.linesep`. If *newline* is ``''`` or ``'\n'``, no translation - takes place. If *newline* is any of the other legal values, any ``'\n'`` - characters written are translated to the given string. + - if *newline* is ``None``, :term:`universal newlines` mode is + enabled: lines in the input can end in ``'\n'``, ``'\r'``, or + ``'\r\n'``, and these are translated into ``'\n'`` before being + returned to the caller + - if *newline* is ``''``, :term:`universal newlines` mode is + enabled, but line endings are returned to the caller + untranslated + - if *newline* is any other value, input lines are only terminated + by the given string, and the line ending is returned to the + caller untranslated. + + When writing output to the stream: + + - if *newline* is ``None``, any ``'\n'`` characters written are + translated to the system default line separator, `os.linesep` + - if *newline* is ``'\r'`` or ``'\r\n'``, any ``'\n'`` characters + written are translated to the given string + - no translation takes place for any other *newline* value. + + .. versionchanged:: 3.5 + *newline* accepts ``None`` and any :class:`str` object now. If *line_buffering* is ``True``, :meth:`flush` is implied when a call to write contains a newline character. @@ -976,4 +984,3 @@ will wrap a buffered object inside a :class:`TextIOWrapper`. This includes standard streams and therefore affects the built-in function :func:`print()` as well. - diff --git a/Lib/_pyio.py b/Lib/_pyio.py --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -112,22 +112,26 @@ encoding error strings. newline is a string controlling how universal newlines works (it only - applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works - as follows: + applies to text mode). - * On input, if newline is None, universal newlines mode is - enabled. Lines in the input can end in '\n', '\r', or '\r\n', and - these are translated into '\n' before being returned to the - caller. If it is '', universal newline mode is enabled, but line - endings are returned to the caller untranslated. If it has any of - the other legal values, input lines are only terminated by the given - string, and the line ending is returned to the caller untranslated. + When reading input from the stream: - * On output, if newline is None, any '\n' characters written are - translated to the system default line separator, os.linesep. If - newline is '', no translation takes place. If newline is any of the - other legal values, any '\n' characters written are translated to - the given string. + - if newline is None, universal newlines mode is enabled: lines in the + input can end in '\n', '\r', or '\r\n', and these are translated + into '\n' before being returned to the caller + - if newline is '', universal newlines mode is enabled, but line + endings are returned to the caller untranslated + - if newline is any other value, input lines are only terminated by + the given string, and the line ending is returned to the caller + untranslated. + + When writing output to the stream: + + - if newline is None, any '\n' characters written are translated to + the system default line separator, os.linesep + - if newline is '\r' or '\r\n', any '\n' characters written are + translated to the given string + - no translation takes place for any other newline value. closedfd is a bool. If closefd is False, the underlying file descriptor will be kept open when the file is closed. This does not work when a file name is @@ -1545,15 +1549,24 @@ errors determines the strictness of encoding and decoding (see the codecs.register) and defaults to "strict". - newline can be None, '', '\n', '\r', or '\r\n'. It controls the - handling of line endings. If it is None, universal newlines is - enabled. With this enabled, on input, the lines endings '\n', '\r', - or '\r\n' are translated to '\n' before being returned to the - caller. Conversely, on output, '\n' is translated to the system - default line separator, os.linesep. If newline is any other of its - legal values, that newline becomes the newline when the file is read - and it is returned untranslated. On output, '\n' is converted to the - newline. + When reading input from the stream: + + - if newline is None, universal newlines mode is enabled: lines in the + input can end in '\n', '\r', or '\r\n', and these are translated + into '\n' before being returned to the caller + - if newline is '', universal newlines mode is enabled, but line + endings are returned to the caller untranslated + - if newline is any other value, input lines are only terminated by + the given string, and the line ending is returned to the caller + untranslated. + + When writing output to the stream: + + - if newline is None, any '\n' characters written are translated to + the system default line separator, os.linesep + - if newline is '\r' or '\r\n', any '\n' characters written are + translated to the given string + - no translation takes place for any other newline value. If line_buffering is True, a call to flush is implied when a call to write contains a newline character. @@ -1568,8 +1581,6 @@ line_buffering=False, write_through=False): if newline is not None and not isinstance(newline, str): raise TypeError("illegal newline type: %r" % (type(newline),)) - if newline not in (None, "", "\n", "\r", "\r\n"): - raise ValueError("illegal newline value: %r" % (newline,)) if encoding is None: try: encoding = os.device_encoding(buffer.fileno()) @@ -1605,8 +1616,8 @@ self._readuniversal = not newline self._readtranslate = newline is None self._readnl = newline - self._writetranslate = newline != '' - self._writenl = newline or os.linesep + self._writetranslate = newline in (None, '\r', '\r\n') + self._writenl = newline if newline is not None else os.linesep self._encoder = None self._decoder = None self._decoded_chars = '' # buffer for text returned from decoder @@ -1709,16 +1720,28 @@ if not isinstance(s, str): raise TypeError("can't write %s to text stream" % s.__class__.__name__) + + # translate newlines length = len(s) - haslf = (self._writetranslate or self._line_buffering) and "\n" in s - if haslf and self._writetranslate and self._writenl != "\n": + if self._writetranslate: + assert self._writenl in ('\r', '\r\n', os.linesep) s = s.replace("\n", self._writenl) + + # write data encoder = self._encoder or self._get_encoder() # XXX What if we were just reading? b = encoder.encode(s) - self.buffer.write(b) - if self._line_buffering and (haslf or "\r" in s): - self.flush() + self.buffer.write(b) #XXX what if an incomplete write? + + # flush() if newline is detected + if self._line_buffering: + if self._writenl == '': # universal but no newline translation + assert self._readuniversal and not self._writetranslate + has_newline = '\n' in s or '\r' in s + else: # only _writenl is a newline + has_newline = self._writenl in s + if has_newline: + self.flush() self._snapshot = None if self._decoder: self._decoder.reset() diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -40,6 +40,7 @@ import codecs import io # C implementation of io import _pyio as pyio # Python implementation of io + try: import threading except ImportError: @@ -662,8 +663,8 @@ def test_invalid_newline(self): with warnings.catch_warnings(record=True) as recorded: - with self.assertRaises(ValueError): - self.open(support.TESTFN, 'w', newline='invalid') + with self.assertRaises(TypeError): + self.open(support.TESTFN, 'w', newline=b'invalid') support.gc_collect() self.assertEqual(recorded, []) @@ -2056,12 +2057,15 @@ t.__init__(b, encoding="latin-1", newline="\r\n") self.assertEqual(t.encoding, "latin-1") self.assertEqual(t.line_buffering, False) + t.__init__(b, encoding="latin-1", newline="\0") + self.assertEqual(t.encoding, "latin-1") + self.assertEqual(t.line_buffering, False) t.__init__(b, encoding="utf-8", line_buffering=True) self.assertEqual(t.encoding, "utf-8") self.assertEqual(t.line_buffering, True) self.assertEqual("\xe9\n", t.readline()) self.assertRaises(TypeError, t.__init__, b, newline=42) - self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') + self.assertRaises(TypeError, t.__init__, b, newline=b'xyzzy') def test_non_text_encoding_codecs_are_rejected(self): # Ensure the constructor complains if passed a codec that isn't @@ -2110,7 +2114,9 @@ self.assertEqual(r.getvalue(), b"") # No flush happened t.write("Y\nZ") self.assertEqual(r.getvalue(), b"XY\nZ") # All got flushed - t.write("A\rB") + t.write("A\rB") # no newline and no buffer overflow + self.assertEqual(r.getvalue(), b"XY\nZ") # nothing is flushed + t.flush() self.assertEqual(r.getvalue(), b"XY\nZA\rB") def test_default_encoding(self): @@ -2234,6 +2240,78 @@ self.assertEqual(got_line, exp_line) self.assertEqual(len(got_lines), len(exp_lines)) + def test_extended_newlines(self): + input_lines = [ + "a\n", "\n", "bb\n", "\r", "\r\n", "\0\N{NEL}\N{SNAKE}$"] + testdata = [ + [None, ['a\n', '\n', 'bb\n', '\n', '\n', '\0\N{NEL}\N{SNAKE}$']], + ['', input_lines], + ['\n\n', ["a\n\n", "bb\n\r\r\n\0\N{NEL}\N{SNAKE}$"]], + ['\n\r', ["a\n\nbb\n\r", "\r\n\0\N{NEL}\N{SNAKE}$"]], + ['\r\r', ["a\n\nbb\n\r\r", "\n\0\N{NEL}\N{SNAKE}$"]], + ['\0', ["a\n\nbb\n\r\r\n\0", "\N{NEL}\N{SNAKE}$"]], + ['\N{NEL}', ["a\n\nbb\n\r\r\n\0\N{NEL}", "\N{SNAKE}$"]], + ['\N{SNAKE}', ["a\n\nbb\n\r\r\n\0\N{NEL}\N{SNAKE}", '$']], + ] + encodings = ( + 'utf-8', 'latin-1', 'hz', + 'utf-16', 'utf-16-le', 'utf-16-be', + 'utf-32', 'utf-32-le', 'utf-32-be', + ) + + for encoding in encodings: + data = ''.join(input_lines).encode(encoding, 'replace') + for newline, expected in testdata: + if encoding in ('latin-1', 'hz'): + if newline in ('\N{NEL}', '\N{SNAKE}'): + continue # skip Unicode newlines for non-Unicode enc. + # replace undecodable chars + expected = [s.encode(encoding, 'replace').decode(encoding) + for s in expected] + # read data + textio = self.TextIOWrapper(self.BytesIO(data), + newline=newline, + encoding=encoding) + # write data, line buffering -- off + output_rawio = self.BytesIO() + output_bufio = self.BufferedWriter(output_rawio, 1000) + output_textio = self.TextIOWrapper(output_bufio, + newline=newline, + encoding=encoding, + line_buffering=False) + # write data, line buffering -- on + linebuf_rawio = self.BytesIO() + linebuf_bufio = self.BufferedWriter(linebuf_rawio, 1000) + linebuf_textio = self.TextIOWrapper(linebuf_bufio, + newline=newline, + encoding=encoding, + line_buffering=True) + def value(rawio): + s = rawio.getvalue().decode(encoding) + if newline is None: # universal newline mode + s = s.replace(os.linesep, '\n') # undo it + return s + + lines = [] + for line, expected_line in zip(textio, expected): + lines.append(line) + self.assertEqual(line, expected_line) # reading + output_textio.write(line) + linebuf_textio.write(line) + # no buffer overflow, no flush() on newline + self.assertEqual(output_rawio.getvalue(), b'') + if line.endswith('$'): # last (no newline) + linebuf_textio.flush() + output_textio.flush() + # flush() on newline + self.assertEqual(value(linebuf_rawio), ''.join(lines)) + self.assertEqual(value(output_rawio), ''.join(expected)) + self.assertEqual(lines, expected) # `for line in file` works + textio.seek(0) + self.assertEqual(textio.readlines(), expected) + textio.seek(0) + self.assertEqual(textio.read(), "".join(expected)) + def test_newlines_input(self): testdata = b"AAA\nBB\x00B\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") @@ -3567,6 +3645,21 @@ for name, obj in py_io_ns.items(): setattr(test, name, obj) + #XXX mark expected failures due to lack of C implementation + # for the extended newline support + + # Create dummy wrapper to avoid enabling expectedFailure for Py*Test + import functools + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + for name in ['constructor', 'extended_newlines', 'line_buffering']: + name = 'test_' + name + func = decorator(getattr(TextIOWrapperTest, name)) + setattr(CTextIOWrapperTest, name, unittest.expectedFailure(func)) + suite = unittest.TestSuite([unittest.makeSuite(test) for test in tests]) return suite diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -640,7 +640,7 @@ def test_newline_argument(self): self.assertRaises(TypeError, self.ioclass, newline=b"\n") - self.assertRaises(ValueError, self.ioclass, newline="error") + self.assertRaises(TypeError, self.ioclass, newline=b"error") # These should not raise an error for newline in (None, "", "\n", "\r", "\r\n"): self.ioclass(newline=newline)