# HG changeset patch # Parent 41717421b134fb082374f8dbbcfc4804277c2685 Issue #25190: Allow StringIO seeking to arbitrary code point offsets * No longer restricted to zero offset with SEEK_CUR and SEEK_END * _pyio implementation stores in UTF-32 instead of UTF-8 for random access * C implementation already uses UCS-4 diff -r 41717421b134 Lib/_pyio.py --- a/Lib/_pyio.py Sun Dec 13 20:15:26 2015 -0800 +++ b/Lib/_pyio.py Tue Dec 15 06:50:44 2015 +0000 @@ -2264,7 +2264,7 @@ def truncate(self, pos=None): self.flush() if pos is None: - pos = self.tell() + pos = TextIOWrapper.tell(self) return self.buffer.truncate(pos) def detach(self): @@ -2299,7 +2299,7 @@ # Seeking to the current position should attempt to # sync the underlying buffer with the current position. whence = 0 - cookie = self.tell() + cookie = TextIOWrapper.tell(self) if whence == 2: # seek relative to end of file if cookie != 0: raise UnsupportedOperation("can't do nonzero end-relative seeks") @@ -2477,6 +2477,8 @@ return self._decoder.newlines if self._decoder else None +_utf32_encoding = "utf-32-{}e".format(sys.byteorder[0]) # le or be + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. @@ -2486,7 +2488,7 @@ def __init__(self, initial_value="", newline="\n"): super(StringIO, self).__init__(BytesIO(), - encoding="utf-8", + encoding=_utf32_encoding, errors="surrogatepass", newline=newline) # Issue #5645: make universal newlines semantics the same as in the @@ -2526,3 +2528,22 @@ def detach(self): # This doesn't make sense on StringIO. self._unsupported("detach") + + def tell(self): + return super().tell() // 4 + + def seek(self, offset, whence=SEEK_SET): + offset *= 4 + if offset == 0 or whence == SEEK_SET: + return super().seek(offset, whence) // 4 + if whence == SEEK_CUR: + return super().seek(super().tell() + offset) // 4 + else: + return super().seek(super().seek(0, whence) + offset) // 4 + + def truncate(self, size=None): + if size is not None: + size *= 4 + size = super().truncate(size) + super().seek(0, SEEK_CUR) # Discards the internal read buffer + return size // 4 diff -r 41717421b134 Lib/test/test_memoryio.py --- a/Lib/test/test_memoryio.py Sun Dec 13 20:15:26 2015 -0800 +++ b/Lib/test/test_memoryio.py Tue Dec 15 06:50:44 2015 +0000 @@ -45,6 +45,21 @@ self.assertEqual(buf[3:], bytesIo.read()) self.assertRaises(TypeError, bytesIo.seek, 0.0) + def test_relative_seek(self): + buf = self.buftype("1234567890") + memio = self.ioclass(buf) + + self.assertEqual(memio.seek(3, 1), 3) + self.assertEqual(memio.seek(-3, 1), 0) + self.assertEqual(memio.seek(-1, 2), 9) + self.assertEqual(memio.seek(1, 1), 10) + self.assertEqual(memio.seek(1, 2), 11) + memio.seek(-3, 2) + self.assertEqual(memio.read(), buf[-3:]) + memio.seek(0) + memio.seek(1, 1) + self.assertEqual(memio.read(), buf[1:]) + def testTell(self): buf = self.buftype("1234567890") bytesIo = self.ioclass(buf) @@ -478,21 +493,13 @@ memio.readinto(b) self.assertEqual(b, b"") - def test_relative_seek(self): + def test_seek_before_start(self): buf = self.buftype("1234567890") memio = self.ioclass(buf) self.assertEqual(memio.seek(-1, 1), 0) self.assertEqual(memio.seek(3, 1), 3) self.assertEqual(memio.seek(-4, 1), 0) - self.assertEqual(memio.seek(-1, 2), 9) - self.assertEqual(memio.seek(1, 1), 10) - self.assertEqual(memio.seek(1, 2), 11) - memio.seek(-3, 2) - self.assertEqual(memio.read(), buf[-3:]) - memio.seek(0) - memio.seek(1, 1) - self.assertEqual(memio.read(), buf[1:]) def test_unicode(self): memio = self.ioclass() @@ -537,16 +544,6 @@ force_decode() self.assertEqual(memio.newlines, ("\r", "\n", "\r\n")) - def test_relative_seek(self): - memio = self.ioclass() - - self.assertRaises(OSError, memio.seek, -1, 1) - self.assertRaises(OSError, memio.seek, 3, 1) - self.assertRaises(OSError, memio.seek, -3, 1) - self.assertRaises(OSError, memio.seek, -1, 2) - self.assertRaises(OSError, memio.seek, 1, 1) - self.assertRaises(OSError, memio.seek, 1, 2) - def test_textio_properties(self): memio = self.ioclass() @@ -682,6 +679,20 @@ memio.write('\ud800') self.assertEqual(memio.getvalue(), '\ud800') + def test_random_access(self): + s = self.ioclass("abcde") + self.assertEqual(s.seek(2), 2) + s.write("\U0001F600") + self.assertEqual(s.tell(), 3) + self.assertEqual(s.getvalue(), "ab\U0001F600de") + self.assertEqual(s.seek(-2, io.SEEK_CUR), 1) + self.assertEqual(s.read(2), "b\U0001F600") + self.assertEqual(s.truncate(4), 4) + self.assertEqual(s.read(), "d") + self.assertEqual(s.seek(-1, io.SEEK_END), 3) + self.assertEqual(s.truncate(), 3) + self.assertEqual(s.getvalue(), "ab\U0001F600") + class PyStringIOPickleTest(TextIOTestMixin, unittest.TestCase): """Test if pickle restores properly the internal state of StringIO. diff -r 41717421b134 Modules/_io/stringio.c --- a/Modules/_io/stringio.c Sun Dec 13 20:15:26 2015 -0800 +++ b/Modules/_io/stringio.c Tue Dec 15 06:50:44 2015 +0000 @@ -519,8 +519,8 @@ Seek to character offset pos relative to position indicated by whence: 0 Start of stream (the default). pos should be >= 0; - 1 Current position - pos must be 0; - 2 End of stream - pos must be 0. + 1 Current position - pos may be positive or negative; + 2 End of stream - pos should be <= 0. Returns the new absolute position. [clinic start generated code]*/ @@ -528,34 +528,36 @@ _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence) /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/ { + Py_ssize_t base; + CHECK_INITIALIZED(self); CHECK_CLOSED(self); - if (whence != 0 && whence != 1 && whence != 2) { + switch (whence) { + case 0: + base = 0; + break; + case 1: + base = self->pos; + break; + case 2: + base = self->string_size; + break; + default: PyErr_Format(PyExc_ValueError, "Invalid whence (%i, should be 0, 1 or 2)", whence); return NULL; } - else if (pos < 0 && whence == 0) { + if (pos > PY_SSIZE_T_MAX - base) { + PyErr_SetString(PyExc_OverflowError, "Seek position too large"); + return NULL; + } + pos += base; + if (pos < 0) { PyErr_Format(PyExc_ValueError, "Negative seek position %zd", pos); return NULL; } - else if (whence != 0 && pos != 0) { - PyErr_SetString(PyExc_IOError, - "Can't do nonzero cur-relative seeks"); - return NULL; - } - - /* whence = 0: offset relative to beginning of the string. - whence = 1: no change to current position. - whence = 2: change position to end of file. */ - if (whence == 1) { - pos = self->pos; - } - else if (whence == 2) { - pos = self->string_size; - } self->pos = pos;