Index: Doc/lib/libstdtypes.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstdtypes.tex,v retrieving revision 1.180 diff -u -p -r1.180 libstdtypes.tex --- Doc/lib/libstdtypes.tex 31 May 2005 11:03:59 -0000 1.180 +++ Doc/lib/libstdtypes.tex 5 Jun 2005 16:31:04 -0000 @@ -1628,12 +1628,14 @@ to a file, they will be converted to byt In addition, when the file is connected to a terminal, the attribute gives the encoding that the terminal is likely to use (that information might be incorrect if the user has misconfigured the -terminal). The attribute is read-only and may not be present on -all file-like objects. It may also be \code{None}, in which case -the file uses the system default encoding for converting Unicode -strings. +terminal). The attribute may not be present on all file-like objects. +It may also be \code{None}, in which case the file uses the system +default encoding for converting Unicode strings. \versionadded{2.3} +\versionchanged[The encoding attribute is now writable and is used +for encoding Unicode strings given to \method{write()} and +\method{writelines()}]{2.5} \end{memberdesc} \begin{memberdesc}[file]{mode} Index: Lib/test/test_file.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_file.py,v retrieving revision 1.17 diff -u -p -r1.17 test_file.py --- Lib/test/test_file.py 20 May 2005 03:07:04 -0000 1.17 +++ Lib/test/test_file.py 5 Jun 2005 16:31:04 -0000 @@ -175,6 +175,62 @@ else: os.unlink(TESTFN) +# verify write/writelines with unicode objects +try: + unicode +except NameError: + pass +else: + testuni = 'test\xec\xa1\xb0\xec\x95\x84\xeb\x9d\xbc'.decode('utf-8') + + # file.write(unicode) + f = file(TESTFN, 'w') + f.encoding = 'utf-8' + f.write(testuni) + f.close() + + if file(TESTFN).read() != testuni.encode('utf-8'): + raise TestFailed, ('file.write() wrote wrongly encoded string for ' + 'unicode') + + # file.writelines([unicode, ..]) + f = file(TESTFN, 'w') + f.encoding = 'utf-8' + f.writelines([testuni[:5], testuni[5], testuni[6]]) + f.close() + + if file(TESTFN).read() != testuni.encode('utf-8'): + raise TestFailed, ('file.writelines() wrote wrongly encoded string ' + 'for unicode') + + # test whether file.write(unencodable unicode) raises exceptions + f = file(TESTFN, 'w') + f.encoding = 'ascii' + try: + try: + f.write(testuni) + except UnicodeEncodeError: + pass + else: + raise TestFailed, ("file.write() doesn't pass UnicodeEncodeError " + "correctly") + finally: + f.close() + + # test whether file.writelines(unencodable unicode) raises exceptions + f = file(TESTFN, 'w') + f.encoding = 'ascii' + try: + try: + f.writelines(['1', '2', testuni, '3']) + except UnicodeEncodeError: + pass + else: + raise TestFailed, ("file.writelines() doesn't pass " + "UnicodeEncodeError correctly") + finally: + f.close() + def bug801631(): # SF bug # "file.truncate fault on windows" Index: Objects/fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.194 diff -u -p -r2.194 fileobject.c --- Objects/fileobject.c 20 May 2005 03:07:05 -0000 2.194 +++ Objects/fileobject.c 5 Jun 2005 16:31:05 -0000 @@ -323,7 +323,7 @@ PyFile_SetBufSize(PyObject *f, int bufsi } /* Set the encoding used to output Unicode strings. - Returh 1 on success, 0 on failure. */ + Return 1 on success, 0 on failure. */ int PyFile_SetEncoding(PyObject *f, const char *enc) @@ -1430,11 +1430,20 @@ file_readlines(PyFileObject *f, PyObject static PyObject * file_write(PyFileObject *f, PyObject *args) { - char *s; + char *s = NULL; + char *enc; int n, n2; + if (f->f_fp == NULL) return err_closed(); - if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n)) + + assert(f->f_encoding == Py_None || PyString_Check(f->f_encoding)); + if (f->f_encoding == Py_None) + enc = NULL; + else + enc = PyString_AS_STRING(f->f_encoding); + + if (!PyArg_ParseTuple(args, "et#", enc, &s, &n)) return NULL; f->f_softspace = 0; Py_BEGIN_ALLOW_THREADS @@ -1458,11 +1467,20 @@ file_writelines(PyFileObject *f, PyObjec PyObject *it; /* iter(seq) */ PyObject *result; int i, j, index, len, nwritten, islist; + char *encoding; assert(seq != NULL); if (f->f_fp == NULL) return err_closed(); + /* Get current encoding name for encoding Unicode strings in + the list. */ + assert(f->f_encoding == Py_None || PyString_Check(f->f_encoding)); + if (f->f_encoding == Py_None) + encoding = NULL; + else + encoding = PyString_AS_STRING(f->f_encoding); + result = NULL; list = NULL; islist = PyList_Check(seq); @@ -1513,7 +1531,15 @@ file_writelines(PyFileObject *f, PyObjec could potentially execute Python code. */ for (i = 0; i < j; i++) { PyObject *v = PyList_GET_ITEM(list, i); - if (!PyString_Check(v)) { + if (PyUnicode_Check(v)) { + line = PyUnicode_AsEncodedString(v, encoding, + NULL); + if (line == NULL) + goto error; + Py_DECREF(v); + PyList_SET_ITEM(list, i, line); + } + else if (!PyString_Check(v)) { const char *buffer; int len; if (((f->f_binary && @@ -1690,9 +1716,7 @@ static PyMemberDef file_memberlist[] = { "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"}, {"name", T_OBJECT, OFF(f_name), RO, "file name"}, - {"encoding", T_OBJECT, OFF(f_encoding), RO, - "file encoding"}, - /* getattr(f, "closed") is implemented without this table */ + /* "closed" and "encoding" is implemented in file_getsetlist */ {NULL} /* Sentinel */ }; @@ -1730,10 +1754,35 @@ get_newlines(PyFileObject *f, void *clos } } +static PyObject * +get_encoding(PyFileObject *f, void *closure) +{ + Py_INCREF(f->f_encoding); + return f->f_encoding; +} + +static int +set_encoding(PyFileObject *f, PyObject *value) +{ + char *val; + if (value == Py_None) { + Py_DECREF(f->f_encoding); + Py_INCREF(Py_None); + f->f_encoding = Py_None; + return 0; + } + val = PyString_AsString(value); + if (val == NULL || PyFile_SetEncoding((PyObject*)f, val) == 0) + return -1; + return 0; +} + static PyGetSetDef file_getsetlist[] = { {"closed", (getter)get_closed, NULL, "True if the file is closed"}, {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"}, + {"encoding", (getter)get_encoding, (setter)set_encoding, + "file encoding"}, {0}, };