Index: Modules/_csv.c =================================================================== --- Modules/_csv.c (revision 56719) +++ Modules/_csv.c (working copy) @@ -95,9 +95,9 @@ PyObject_HEAD int doublequote; /* is " represented by ""? */ - char delimiter; /* field separator */ - char quotechar; /* quote character */ - char escapechar; /* escape character */ + Py_UNICODE delimiter; /* field separator */ + Py_UNICODE quotechar; /* quote character */ + Py_UNICODE escapechar; /* escape character */ int skipinitialspace; /* ignore spaces following delimiter? */ PyObject *lineterminator; /* string to write between records */ int quoting; /* style of quoting to write */ @@ -116,9 +116,9 @@ PyObject *fields; /* field list for current record */ ParserState state; /* current CSV parse state */ - char *field; /* build current field in here */ + Py_UNICODE *field; /* build current field in here */ int field_size; /* size of allocated buffer */ - int field_len; /* length of current field */ + Py_ssize_t field_len; /* length of current field */ int numeric_field; /* treat field as numeric */ unsigned long line_num; /* Source-file line number */ } ReaderObj; @@ -134,9 +134,9 @@ DialectObj *dialect; /* parsing dialect */ - char *rec; /* buffer for parser.join */ + Py_UNICODE *rec; /* buffer for parser.join */ int rec_size; /* size of allocated record */ - int rec_len; /* length of record */ + Py_ssize_t rec_len; /* length of record */ int num_fields; /* number of fields in record */ } WriterObj; @@ -176,7 +176,7 @@ return Py_None; } else - return PyString_FromStringAndSize((char*)&c, 1); + return PyUnicode_DecodeASCII((char*)&c, 1, NULL); } static PyObject * @@ -230,20 +230,21 @@ } static int -_set_char(const char *name, char *target, PyObject *src, char dflt) +_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt) { if (src == NULL) *target = dflt; else { *target = '\0'; if (src != Py_None) { - const char *buf; - Py_ssize_t len; - if (PyObject_AsCharBuffer(src, &buf, &len) < 0 || - len > 1) { + Py_UNICODE *buf; + Py_ssize_t len; + buf = PyUnicode_AsUnicode(src); + len = PyUnicode_GetSize(src); + if (buf == NULL || len > 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an 1-character string", - name); + "\"%s\" must be an 1-character string", + name); return -1; } if (len > 0) @@ -256,8 +257,8 @@ static int _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) { - if (src == NULL) - *target = PyString_FromString(dflt); + if (src == NULL) + *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { if (src == Py_None) *target = NULL; @@ -528,7 +529,7 @@ { PyObject *field; - field = PyString_FromStringAndSize(self->field, self->field_len); + field = PyUnicode_FromUnicode(self->field, self->field_len); if (field == NULL) return -1; self->field_len = 0; @@ -553,14 +554,15 @@ parse_grow_buff(ReaderObj *self) { if (self->field_size == 0) { - self->field_size = 4096; + self->field_size = 4096; if (self->field != NULL) PyMem_Free(self->field); - self->field = PyMem_Malloc(self->field_size); + self->field = PyMem_New(Py_UNICODE, self->field_size); } else { self->field_size *= 2; - self->field = PyMem_Realloc(self->field, self->field_size); + self->field = PyMem_Resize(self->field, Py_UNICODE, + self->field_size); } if (self->field == NULL) { PyErr_NoMemory(); @@ -570,7 +572,7 @@ } static int -parse_add_char(ReaderObj *self, char c) +parse_add_char(ReaderObj *self, Py_UNICODE c) { if (self->field_len >= field_limit) { PyErr_Format(error_obj, "field larger than field limit (%ld)", @@ -584,7 +586,7 @@ } static int -parse_process_char(ReaderObj *self, char c) +parse_process_char(ReaderObj *self, Py_UNICODE c) { DialectObj *dialect = self->dialect; @@ -771,8 +773,8 @@ { PyObject *lineobj; PyObject *fields = NULL; - char *line, c; - int linelen; + Py_UNICODE *line, c; + Py_ssize_t linelen; if (parse_reset(self) < 0) return NULL; @@ -785,11 +787,9 @@ "newline inside string"); return NULL; } - ++self->line_num; - - line = PyString_AsString(lineobj); - linelen = PyString_Size(lineobj); - + ++self->line_num; + line = PyUnicode_AsUnicode(lineobj); + linelen = PyUnicode_GetSize(lineobj); if (line == NULL || linelen < 0) { Py_DECREF(lineobj); return NULL; @@ -958,16 +958,18 @@ #define MEM_INCR 32768 + /* Calculate new record length or append field to record. Return new * record length. */ static int -join_append_data(WriterObj *self, char *field, int quote_empty, - int *quoted, int copy_phase) +join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty, + int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; - int i, rec_len; - char *lineterm; + int i; + int rec_len; + Py_UNICODE *lineterm; #define ADDCH(c) \ do {\ @@ -976,7 +978,7 @@ rec_len++;\ } while(0) - lineterm = PyString_AsString(dialect->lineterminator); + lineterm = PyUnicode_AsUnicode(dialect->lineterminator); if (lineterm == NULL) return -1; @@ -991,8 +993,9 @@ ADDCH(dialect->quotechar); /* Copy/count field data */ - for (i = 0;; i++) { - char c = field[i]; + /* If field is null just pass over */ + for (i = 0; field; i++) { + Py_UNICODE c = field[i]; int want_escape = 0; if (c == '\0') @@ -1000,8 +1003,8 @@ if (c == dialect->delimiter || c == dialect->escapechar || - c == dialect->quotechar || - strchr(lineterm, c)) { + c == dialect->quotechar || + Py_UNICODE_strchr(lineterm, c)) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { @@ -1033,7 +1036,7 @@ if (i == 0 && quote_empty) { if (dialect->quoting == QUOTE_NONE) { PyErr_Format(error_obj, - "single empty field record must be quoted"); + "single empty field record must be quoted"); return -1; } else @@ -1058,13 +1061,14 @@ self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; if (self->rec != NULL) PyMem_Free(self->rec); - self->rec = PyMem_Malloc(self->rec_size); + self->rec = PyMem_New(Py_UNICODE, self->rec_size); } else { - char *old_rec = self->rec; + Py_UNICODE* old_rec = self->rec; self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; - self->rec = PyMem_Realloc(self->rec, self->rec_size); + self->rec = PyMem_Resize(self->rec, Py_UNICODE, + self->rec_size); if (self->rec == NULL) PyMem_Free(old_rec); } @@ -1077,7 +1081,7 @@ } static int -join_append(WriterObj *self, char *field, int *quoted, int quote_empty) +join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty) { int rec_len; @@ -1099,9 +1103,9 @@ join_append_lineterminator(WriterObj *self) { int terminator_len; - char *terminator; + Py_UNICODE *terminator; - terminator_len = PyString_Size(self->dialect->lineterminator); + terminator_len = PyUnicode_GetSize(self->dialect->lineterminator); if (terminator_len == -1) return 0; @@ -1109,10 +1113,11 @@ if (!join_check_rec_size(self, self->rec_len + terminator_len)) return 0; - terminator = PyString_AsString(self->dialect->lineterminator); + terminator = PyUnicode_AsUnicode(self->dialect->lineterminator); if (terminator == NULL) return 0; - memmove(self->rec + self->rec_len, terminator, terminator_len); + memmove(self->rec + self->rec_len, terminator, + sizeof(Py_UNICODE)*terminator_len); self->rec_len += terminator_len; return 1; @@ -1129,7 +1134,6 @@ { DialectObj *dialect = self->dialect; int len, i; - if (!PySequence_Check(seq)) return PyErr_Format(error_obj, "sequence expected"); @@ -1144,7 +1148,7 @@ PyObject *field; int append_ok; int quoted; - + field = PySequence_GetItem(seq, i); if (field == NULL) return NULL; @@ -1161,26 +1165,28 @@ break; } - if (PyString_Check(field)) { + if (PyUnicode_Check(field)) { append_ok = join_append(self, - PyString_AS_STRING(field), - "ed, len == 1); + PyUnicode_AS_UNICODE(field), + "ed, len == 1); Py_DECREF(field); } else if (field == Py_None) { - append_ok = join_append(self, "", "ed, len == 1); + + append_ok = join_append(self, NULL, + "ed, len == 1); Py_DECREF(field); } else { PyObject *str; - str = PyObject_Str(field); - Py_DECREF(field); + str = PyObject_Unicode(field); + Py_DECREF(field); if (str == NULL) return NULL; - - append_ok = join_append(self, PyString_AS_STRING(str), - "ed, len == 1); + append_ok = join_append(self, + PyUnicode_AS_UNICODE(str), + "ed, len == 1); Py_DECREF(str); } if (!append_ok) @@ -1193,7 +1199,9 @@ return 0; return PyObject_CallFunction(self->writeline, - "(s#)", self->rec, self->rec_len); + "(u#)", self->rec, + self->rec_len); + } PyDoc_STRVAR(csv_writerows_doc, Index: Lib/test/test_csv.py =================================================================== --- Lib/test/test_csv.py (revision 56719) +++ Lib/test/test_csv.py (working copy) @@ -6,7 +6,7 @@ import os import unittest from StringIO import StringIO -import tempfile +from tempfile import TemporaryFile import csv import gc from test import test_support @@ -34,7 +34,7 @@ quoting=csv.QUOTE_ALL, quotechar='') self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_ALL, quotechar=None) - + def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -117,17 +117,12 @@ def _write_test(self, fields, expect, **kwargs): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, **kwargs) writer.writerow(fields) fileobj.seek(0) - self.assertEqual(fileobj.read(), + self.assertEqual(str(fileobj.read()), expect + writer.dialect.lineterminator) - finally: - fileobj.close() - os.unlink(name) def test_write_arg_valid(self): self.assertRaises(csv.Error, self._write_test, None, '') @@ -192,17 +187,13 @@ raise IOError writer = csv.writer(BrokenFile()) self.assertRaises(IOError, writer.writerows, [['a']]) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj) self.assertRaises(TypeError, writer.writerows, None) writer.writerows([['a','b'],['c','d']]) fileobj.seek(0) - self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n") - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(fileobj.read(), b"a,b\r\nc,d\r\n") def _read_test(self, input, expect, **kwargs): reader = csv.reader(input, **kwargs) @@ -333,18 +324,20 @@ quoting = csv.QUOTE_NONE escapechar = "\\" - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("abc def\nc1ccccc1 benzene\n") fileobj.seek(0) reader = csv.reader(fileobj, dialect=space()) self.assertEqual(next(reader), ["abc", "def"]) self.assertEqual(next(reader), ["c1ccccc1", "benzene"]) - finally: - fileobj.close() - os.unlink(name) + def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): + with TemporaryFile("w+b") as fileobj: + writer = csv.writer(fileobj, *writeargs, **kwwriteargs) + writer.writerow([1,2,3]) + fileobj.seek(0) + self.assertEqual(str(fileobj.read()), expected) + def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" @@ -352,64 +345,20 @@ delimiter = ":" class testC(csv.excel): delimiter = "|" + class testUni(csv.excel): + delimiter = "\u039B" csv.register_dialect('testC', testC) try: - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: - writer = csv.writer(fileobj) - writer.writerow([1,2,3]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "1,2,3\r\n") - finally: - fileobj.close() - os.unlink(name) + self.compare_dialect_123("1,2,3\r\n") + self.compare_dialect_123("1\t2\t3\r\n", testA) + self.compare_dialect_123("1:2:3\r\n", dialect=testB()) + self.compare_dialect_123("1|2|3\r\n", dialect='testC') + self.compare_dialect_123("1;2;3\r\n", dialect=testA, + delimiter=';') + self.compare_dialect_123("1\u039B2\u039B3\r\n", + dialect=testUni) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: - writer = csv.writer(fileobj, testA) - writer.writerow([1,2,3]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "1\t2\t3\r\n") - finally: - fileobj.close() - os.unlink(name) - - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: - writer = csv.writer(fileobj, dialect=testB()) - writer.writerow([1,2,3]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "1:2:3\r\n") - finally: - fileobj.close() - os.unlink(name) - - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: - writer = csv.writer(fileobj, dialect='testC') - writer.writerow([1,2,3]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "1|2|3\r\n") - finally: - fileobj.close() - os.unlink(name) - - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: - writer = csv.writer(fileobj, dialect=testA, delimiter=';') - writer.writerow([1,2,3]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "1;2;3\r\n") - finally: - fileobj.close() - os.unlink(name) - finally: csv.unregister_dialect('testC') @@ -423,29 +372,19 @@ class TestCsvBase(unittest.TestCase): def readerAssertEqual(self, input, expected_result): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write(input) fileobj.seek(0) reader = csv.reader(fileobj, dialect = self.dialect) fields = list(reader) self.assertEqual(fields, expected_result) - finally: - fileobj.close() - os.unlink(name) def writerAssertEqual(self, input, expected_result): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, dialect = self.dialect) writer.writerows(input) fileobj.seek(0) - self.assertEqual(fileobj.read(), expected_result) - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), expected_result) class TestDialectExcel(TestCsvBase): dialect = 'excel' @@ -574,91 +513,59 @@ ### "long" means the row is longer than the number of fieldnames ### "short" means there are fewer elements in the row than fieldnames def test_write_simple_dict(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"]) writer.writerow({"f1": 10, "f3": "abc"}) fileobj.seek(0) - self.assertEqual(fileobj.read(), "10,,abc\r\n") - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), "10,,abc\r\n") def test_write_no_fields(self): fileobj = StringIO() self.assertRaises(TypeError, csv.DictWriter, fileobj) def test_read_dict_fields(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("1,2,abc\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, fieldnames=["f1", "f2", "f3"]) self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) - finally: - fileobj.close() - os.unlink(name) def test_read_dict_no_fieldnames(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj) self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) - finally: - fileobj.close() - os.unlink(name) def test_read_long(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("1,2,abc,4,5,6\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, fieldnames=["f1", "f2"]) self.assertEqual(next(reader), {"f1": '1', "f2": '2', None: ["abc", "4", "5", "6"]}) - finally: - fileobj.close() - os.unlink(name) def test_read_long_with_rest(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("1,2,abc,4,5,6\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, fieldnames=["f1", "f2"], restkey="_rest") self.assertEqual(next(reader), {"f1": '1', "f2": '2', "_rest": ["abc", "4", "5", "6"]}) - finally: - fileobj.close() - os.unlink(name) def test_read_long_with_rest_no_fieldnames(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, restkey="_rest") self.assertEqual(next(reader), {"f1": '1', "f2": '2', "_rest": ["abc", "4", "5", "6"]}) - finally: - fileobj.close() - os.unlink(name) def test_read_short(self): - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+") as fileobj: fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, @@ -669,9 +576,6 @@ self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', "4": 'DEFAULT', "5": 'DEFAULT', "6": 'DEFAULT'}) - finally: - fileobj.close() - os.unlink(name) def test_read_multi(self): sample = [ @@ -710,64 +614,45 @@ contents = [(20-i) for i in range(20)] a = array.array('i', contents) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join([str(i) for i in a])+"\r\n" fileobj.seek(0) - self.assertEqual(fileobj.read(), expected) - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), expected) def test_double_write(self): import array contents = [(20-i)*0.1 for i in range(20)] a = array.array('d', contents) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join([str(i) for i in a])+"\r\n" fileobj.seek(0) - self.assertEqual(fileobj.read(), expected) - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), expected) def test_float_write(self): import array contents = [(20-i)*0.1 for i in range(20)] a = array.array('f', contents) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join([str(i) for i in a])+"\r\n" fileobj.seek(0) - self.assertEqual(fileobj.read(), expected) - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), expected) def test_char_write(self): import array, string - a = array.array('c', string.letters) - fd, name = tempfile.mkstemp() - fileobj = os.fdopen(fd, "w+b") - try: + a = array.array('u', string.letters) + + with TemporaryFile("w+b") as fileobj: writer = csv.writer(fileobj, dialect="excel") writer.writerow(a) expected = ",".join(a)+"\r\n" fileobj.seek(0) - self.assertEqual(fileobj.read(), expected) - finally: - fileobj.close() - os.unlink(name) + self.assertEqual(str(fileobj.read()), expected) class TestDialectValidity(unittest.TestCase): def test_quoting(self): @@ -970,21 +855,37 @@ # if writer leaks during write, last delta should be 5 or more self.assertEqual(delta < 5, True) -# commented out for now - csv module doesn't yet support Unicode -## class TestUnicode(unittest.TestCase): -## def test_unicode_read(self): -## import codecs -## f = codecs.EncodedFile(StringIO("Martin von Löwis," -## "Marc André Lemburg," -## "Guido van Rossum," -## "François Pinard\r\n"), -## data_encoding='iso-8859-1') -## reader = csv.reader(f) -## self.assertEqual(list(reader), [["Martin von Löwis", -## "Marc André Lemburg", -## "Guido van Rossum", -## "François Pinardn"]]) +class TestUnicode(unittest.TestCase): + names = ["Martin von Löwis", + "Marc André Lemburg", + "Guido van Rossum", + "François Pinard"] + + def test_unicode_read(self): + import io + fileobj = io.TextIOWrapper(TemporaryFile("w+b"), encoding="utf-16") + with fileobj as fileobj: + fileobj.write(",".join(self.names) + "\r\n") + + fileobj.seek(0) + reader = csv.reader(fileobj) + self.assertEqual(list(reader), [self.names]) + + + def test_unicode_write(self): + import io + with TemporaryFile("w+b") as fileobj: + encwriter = io.TextIOWrapper(fileobj, encoding="utf-8") + writer = csv.writer(encwriter) + writer.writerow(self.names) + expected = ",".join(self.names)+"\r\n" + fileobj.seek(0) + self.assertEqual(str(fileobj.read()), expected) + + + + def test_main(): mod = sys.modules[__name__] test_support.run_unittest(