Index: Modules/_textio.c =================================================================== --- Modules/_textio.c (révision 70589) +++ Modules/_textio.c (copie de travail) @@ -707,6 +707,39 @@ return res; } +static PyObject * +utf32be_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 1); +} + +static PyObject * +utf32le_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), -1); +} + +static PyObject * +utf32_encode(PyTextIOWrapperObject *self, PyObject *text) +{ + PyObject *res; + res = PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text), + PyUnicode_GET_SIZE(text), + PyBytes_AS_STRING(self->errors), 0); + if (res == NULL) + return NULL; + /* Next writes will skip the BOM and use native byte ordering */ +#if defined(WORDS_BIGENDIAN) + self->encodefunc = (encodefunc_t) utf32be_encode; +#else + self->encodefunc = (encodefunc_t) utf32le_encode; +#endif + return res; +} static PyObject * utf8_encode(PyTextIOWrapperObject *self, PyObject *text) @@ -734,10 +767,13 @@ encodefuncentry encodefuncs[] = { {"ascii", (encodefunc_t) ascii_encode}, {"iso8859-1", (encodefunc_t) latin1_encode}, + {"utf-8", (encodefunc_t) utf8_encode}, {"utf-16-be", (encodefunc_t) utf16be_encode}, {"utf-16-le", (encodefunc_t) utf16le_encode}, {"utf-16", (encodefunc_t) utf16_encode}, - {"utf-8", (encodefunc_t) utf8_encode}, + {"utf-32-be", (encodefunc_t) utf32be_encode}, + {"utf-32-le", (encodefunc_t) utf32le_encode}, + {"utf-32", (encodefunc_t) utf32_encode}, {NULL, NULL} }; @@ -754,6 +790,7 @@ char *newline = NULL; int line_buffering = 0; _PyIO_State *state = IO_STATE; + static PyObject *zero = NULL; PyObject *res; int r; @@ -962,6 +999,48 @@ self->seekable = self->telling = PyObject_IsTrue(res); Py_DECREF(res); + if (self->seekable && self->encoder) { + PyObject *cookieObj; + int cmp; + + if (zero == NULL) { + zero = PyLong_FromLong(0L); + if (zero == NULL) + goto error; + } + + cookieObj = PyObject_CallMethod(buffer, "tell", NULL); + if (cookieObj == NULL) + goto error; + + cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); + Py_DECREF(cookieObj); + if (cmp < 0) { + goto error; + } + + if (cmp == 0) { + res = PyObject_CallMethod(self->encoder, "setstate", "O", zero); + if (res == NULL) + goto error; + Py_DECREF(res); + + if (self->encodefunc == (encodefunc_t)utf16_encode) { +#if defined(WORDS_BIGENDIAN) + self->encodefunc = (encodefunc_t)utf16be_encode; +#else + self->encodefunc = (encodefunc_t)utf16le_encode; +#endif + } else if (self->encodefunc == (encodefunc_t)utf32_encode) { +#if defined(WORDS_BIGENDIAN) + self->encodefunc = (encodefunc_t)utf32be_encode; +#else + self->encodefunc = (encodefunc_t)utf32le_encode; +#endif + } + } + } + self->ok = 1; return 0; Index: Lib/_pyio.py =================================================================== --- Lib/_pyio.py (révision 70589) +++ Lib/_pyio.py (copie de travail) @@ -1390,6 +1390,12 @@ self._snapshot = None # info for reconstructing decoder state self._seekable = self._telling = self.buffer.seekable() + if self._seekable and self.writable(): + position = self.buffer.tell() + if position != 0: + self._encoder = self._get_encoder() + self._encoder.setstate(0) + # self._snapshot is either None, or a tuple (dec_flags, next_input) # where dec_flags is the second (integer) item of the decoder state # and next_input is the chunk of input bytes that comes next after the Index: Lib/test/test_io.py =================================================================== --- Lib/test/test_io.py (révision 70589) +++ Lib/test/test_io.py (copie de travail) @@ -1797,6 +1797,20 @@ self.assertEqual(buffer.seekable(), txt.seekable()) + def test_append_bom(self): + filename = support.TESTFN + for charset in ('utf-8-sig', 'utf16', 'utf-32'): + with self.open(filename, 'w', encoding=charset) as f: + f.write('aaa') + with self.open(filename, 'r', encoding=charset) as f: + self.assertEquals(f.read(), 'aaa') + + with self.open(filename, 'a', encoding=charset) as f: + f.write('xxx') + with self.open(filename, 'r', encoding=charset) as f: + self.assertEquals(f.read(), 'aaaxxx') + + class CTextIOWrapperTest(TextIOWrapperTest): def test_initialization(self):