# HG changeset patch # Parent 6502cccf47fc9356ec99ee482737c3532b768bb9 Issue 13881: StreamWriter from incremental encoder for zlib and others The stream writer is now automatically generated by default by the CodecInfo constructor. Also fixes StreamWriter.writelines() in general for all byte codecs. diff -r 6502cccf47fc Doc/library/codecs.rst --- a/Doc/library/codecs.rst Sun Jan 25 03:11:40 2015 +0000 +++ b/Doc/library/codecs.rst Sun Jan 25 03:21:35 2015 +0000 @@ -100,6 +100,11 @@ :class:`StreamWriter` and :class:`StreamReader`, respectively. Stream codecs can maintain state. + .. versionadded:: 3.5 + If no stream writer is specified to the constructor, a + stream writer factory function is automatically generated using + the incremental encoder, if provided. + To simplify access to the various codec components, the module provides these additional functions which use :func:`lookup` for the codec lookup: @@ -670,8 +675,7 @@ .. method:: writelines(list) Writes the concatenated list of strings to the stream (possibly by reusing - the :meth:`write` method). The standard bytes-to-bytes codecs - do not support this method. + the :meth:`write` method). .. method:: reset() diff -r 6502cccf47fc Lib/codecs.py --- a/Lib/codecs.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/codecs.py Sun Jan 25 03:21:35 2015 +0000 @@ -93,6 +93,9 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None, incrementalencoder=None, incrementaldecoder=None, name=None, *, _is_text_encoding=None): + if not streamwriter and incrementalencoder: + class streamwriter(_IncrementalStreamWriter): + _Encoder = incrementalencoder self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) self.name = name self.encode = encode @@ -379,7 +382,13 @@ """ Writes the concatenated list of strings to the stream using .write(). """ - self.write(''.join(list)) + if not list: + return + if isinstance(list[0], str): + join = ''.join + else: + join = b''.join + self.write(join(list)) def reset(self): @@ -411,6 +420,28 @@ def __exit__(self, type, value, tb): self.stream.close() +class _IncrementalStreamWriter(StreamWriter): + """Adapts an IncrementalEncoder to the StreamWriter API. + + This class uses a codec's IncrementalEncoder to create a stateful + implementation of the StreamWriter API. All that is needed is to make + a subclass of this and set the _Encoder class attribute to the + particular IncrementalEncoder class to use. + """ + + def __init__(self, *pos, **kw): + super().__init__(*pos, **kw) + self._encoder = self._Encoder(self.errors) + + def write(self, object): + """Implements StreamWriter.write().""" + self.stream.write(self._encoder.encode(object)) + + def reset(self): + """Implements StreamWriter.reset().""" + self.stream.write(self._encoder.encode(final=True)) + self._encoder.reset() + ### class StreamReader(Codec): diff -r 6502cccf47fc Lib/encodings/base64_codec.py --- a/Lib/encodings/base64_codec.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/encodings/base64_codec.py Sun Jan 25 03:21:35 2015 +0000 @@ -34,9 +34,6 @@ assert self.errors == 'strict' return base64.decodebytes(input) -class StreamWriter(Codec, codecs.StreamWriter): - charbuffertype = bytes - class StreamReader(Codec, codecs.StreamReader): charbuffertype = bytes @@ -49,7 +46,6 @@ decode=base64_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) diff -r 6502cccf47fc Lib/encodings/bz2_codec.py --- a/Lib/encodings/bz2_codec.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/encodings/bz2_codec.py Sun Jan 25 03:21:35 2015 +0000 @@ -57,9 +57,6 @@ def reset(self): self.decompressobj = bz2.BZ2Decompressor() -class StreamWriter(Codec, codecs.StreamWriter): - charbuffertype = bytes - class StreamReader(Codec, codecs.StreamReader): charbuffertype = bytes @@ -72,7 +69,6 @@ decode=bz2_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) diff -r 6502cccf47fc Lib/encodings/quopri_codec.py --- a/Lib/encodings/quopri_codec.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/encodings/quopri_codec.py Sun Jan 25 03:21:35 2015 +0000 @@ -35,9 +35,6 @@ def decode(self, input=b'', final=False): return quopri_decode(input, self.errors)[0] -class StreamWriter(Codec, codecs.StreamWriter): - charbuffertype = bytes - class StreamReader(Codec, codecs.StreamReader): charbuffertype = bytes @@ -50,7 +47,6 @@ decode=quopri_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) diff -r 6502cccf47fc Lib/encodings/zlib_codec.py --- a/Lib/encodings/zlib_codec.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/encodings/zlib_codec.py Sun Jan 25 03:21:35 2015 +0000 @@ -56,9 +56,6 @@ def reset(self): self.decompressobj = zlib.decompressobj() -class StreamWriter(Codec, codecs.StreamWriter): - charbuffertype = bytes - class StreamReader(Codec, codecs.StreamReader): charbuffertype = bytes @@ -72,6 +69,5 @@ incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, - streamwriter=StreamWriter, _is_text_encoding=False, ) diff -r 6502cccf47fc Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Sun Jan 25 03:11:40 2015 +0000 +++ b/Lib/test/test_codecs.py Sun Jan 25 03:21:35 2015 +0000 @@ -19,9 +19,23 @@ except ImportError: ctypes = None SIZEOF_WCHAR_T = -1 + + import array + def byteslike(*pos, **kw): + return array.array("b", bytes(*pos, **kw)) else: SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar) + def byteslike(*pos, **kw): + """Create a bytes-like object having no string or sequence methods""" + data = bytes(*pos, **kw) + obj = EmptyStruct() + ctypes.resize(obj, len(data)) + memoryview(obj).cast("B")[:] = data + return obj + class EmptyStruct(ctypes.Structure): + pass + def coding_checker(self, coder): def check(input, expect): self.assertEqual(coder(input), (expect, len(input))) @@ -1677,6 +1691,33 @@ self.assertRaises(TypeError, codecs.getwriter) self.assertRaises(LookupError, codecs.getwriter, "__spam__") + def test_writer_reuse(self): + """StreamWriter should be reusable after reset""" + with support.check_warnings( + ("unicode_internal", DeprecationWarning)): + for encoding in all_unicode_encodings: + if encoding in broken_stream_codecs: + continue # Calling reset() crashes; see Issue 23247 + with self.subTest(encoding=encoding): + writer = codecs.getwriter(encoding)(io.BytesIO()) + writer.reset() + writer.write("abc") + writer.reset() + writer.write("def") + for encoding in bytes_transform_encodings: + with self.subTest(encoding=encoding): + writer = codecs.getwriter(encoding)(io.BytesIO()) + writer.reset() + writer.write(b"abc") + writer.reset() + writer.write(b"def") + with self.subTest(encoding="rot-13"): + writer = codecs.getwriter("rot-13")(io.StringIO()) + writer.reset() + writer.write("abc") + writer.reset() + writer.write("def") + def test_lookup_issue1813(self): # Issue #1813: under Turkish locales, lookup of some codecs failed # because 'I' is lowercased as "ı" (dotless i) @@ -1876,6 +1917,16 @@ "unicode_internal" ] +# These encodings have broken stream readers and writers; see Issue 23247 +broken_stream_codecs = { + "big5", "big5hkscs", "cp932", "cp949", "cp950", + "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", + "gb2312", "gbk", "gb18030", "hz", + "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", + "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", + "johab", "shift_jis", "shift_jis_2004", "shift_jisx0213", +} + class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): def test_basics(self): s = "abc123" # all codecs should be able to encode these @@ -2542,6 +2593,39 @@ self.assertEqual(size, len(o)) self.assertEqual(i, binput) + def test_writelines(self): + data = b"12345678" + for encoding in bytes_transform_encodings: + Writer = codecs.getwriter(encoding) + with self.subTest(encoding=encoding): + writer = Writer(io.BytesIO()) + writer.reset() + expected = writer.getvalue() + writer = Writer(io.BytesIO()) + writer.writelines([]) + writer.reset() + self.assertEqual(writer.getvalue(), expected) + + expected = codecs.encode(data * 3, encoding) + writer = Writer(io.BytesIO()) + writer.writelines([byteslike(data)] * 3) + writer.reset() + self.assertEqual(writer.getvalue(), expected) + + def test_multi_write(self): + data = bytes(200) # Long enough to span a base64/quopri/uu line + broken = {"base64_codec", "quopri_codec", "uu_codec"} + for encoding in bytes_transform_encodings: + if encoding in broken: # See Issue 20132 + continue + with self.subTest(encoding=encoding): + expected = codecs.encode(data, encoding) + writer = codecs.getwriter(encoding)(io.BytesIO()) + for b in data: + writer.write(bytes((b,))) + writer.reset() + self.assertEqual(writer.getvalue(), expected) + def test_read(self): for encoding in bytes_transform_encodings: with self.subTest(encoding=encoding):