diff -r 6d5336a193cc Doc/library/csv.rst --- a/Doc/library/csv.rst Sun Apr 12 13:52:49 2015 +0300 +++ b/Doc/library/csv.rst Sun Apr 19 23:57:24 2015 -0400 @@ -30,7 +30,8 @@ The :mod:`csv` module's :class:`reader` and :class:`writer` objects read and write sequences. Programmers can also read and write data in dictionary form -using the :class:`DictReader` and :class:`DictWriter` classes. +using the :class:`DictReader` and :class:`DictWriter` classes or in namedtuple +form using the :class:`NamedTupleReader` and :class:`NamedTupleWriter`. .. seealso:: @@ -205,6 +206,45 @@ writer.writerow({'first_name': 'Lovely', 'last_name': 'Spam'}) writer.writerow({'first_name': 'Wonderful', 'last_name': 'Spam'}) +.. class:: NamedTupleReader(csvfile, fieldnames=None[, restkey=None[, restval=None[, dialect='excel', rename=False[, *args, **kwds]]]]) + + Create an object which operates like a regular reader but maps the information + read into a :func:`namedtuple` whose fields can be accessed using attribute lookup. + The contents of *fieldnames* can be passed directly to be used as the + namedtuple fieldnames or can be a namedtuple class. If *fieldnames* is + a namedtuple class, it will be used to create the returned rows. + If *fieldnames* is None the values in the first row of the *csvfile* will + be used as the fieldnames. + If the row read has fewer fields than the fieldnames sequence, the value of + *restval* will be used as the default value. If the row read has more fields + than the fieldnames sequence, then the extra fields will be clipped unless + *restkey* has been specified. If *restkey* has been defined, then the extra + fields are stored as a single list in the last field, named as *restkey*. + The returned row will be a new namedtuple with fields specified as in + *fieldnames* but with the addition of a field *restkey*, the namedtuple + stored in the fieldnames attribute is not used to create this returned row. + The contents of *rename* are passed to the namedtuple factory function + as a keyword argument. + If *rename* is true, invalid fieldnames are automatically replaced with + positional names (implemented by the :func:`namedtuple` factory function.) + Any other optional or keyword arguments are passed to the underlying + :class:`reader` instance. + + +.. class:: NamedTupleWriter(csvfile[, fieldnames=None[, restkey=None[, restval=None[, dialect='excel'[, *args, **kwds]]]]]) + + Create an object which operates like a regular writer but maps namedtuples onto + output rows. The *fieldnames* parameter identifies the valid fieldnames that will + be written from a *namedtuple* passed to the :meth:`writetrow` to the *csvfile*. + The optional *restval* parameter specifies the value to be written if the + *namedtuple* is missing a field listed in *fieldnames*. If the *namedtuple* + passed to the :meth:`writerow` method contains a field not found in *fieldnames*, + the optional *extrasaction* parameter indicates what action to take. + If it is set to ``'raise'`` a :exc:`ValueError` is raised. If it is set to + ``'ignore'``, extra fields in the *namedtuple* are ignored. + Any other optional or keyword arguments are passed to the underlying + :class:`writer` instance. + .. class:: Dialect @@ -403,7 +443,8 @@ number of records returned, as records can span multiple lines. -DictReader objects have the following public attribute: +:class:`DictReader` and :class:`NamedTupleReader` objects have the following +public attribute: .. attribute:: csvreader.fieldnames @@ -416,13 +457,17 @@ Writer Objects -------------- -:class:`Writer` objects (:class:`DictWriter` instances and objects returned by -the :func:`writer` function) have the following public methods. A *row* must be -a sequence of strings or numbers for :class:`Writer` objects and a dictionary -mapping fieldnames to strings or numbers (by passing them through :func:`str` -first) for :class:`DictWriter` objects. Note that complex numbers are written -out surrounded by parens. This may cause some problems for other programs which -read CSV files (assuming they support complex numbers at all). +:class:`Writer` objects (:class:`DictWriter` instances, :class:`NamedTupleWriter` +instances and objects returned by the :func:`writer` function) have the following +public methods. A *row* must be a sequence of strings or numbers for +:class:`Writer` objects, a dictionary mapping fieldnames to strings or numbers +(by passing them through :func:`str` first) for :class:`DictWriter` objects or +a namedtuple for :class:`NamedTupleWriter` objects that define the fieldnames +attribute. :class:`NamedTupleWriter` objects that do not define the fieldnames +attribute behave in accordance with a :class:`Writer` object. +Note that complex numbers are written out surrounded by parens. +This may cause some problems for other programs which read CSV files +(assuming they support complex numbers at all). .. method:: csvwriter.writerow(row) diff -r 6d5336a193cc Lib/csv.py --- a/Lib/csv.py Sun Apr 12 13:52:49 2015 +0300 +++ b/Lib/csv.py Sun Apr 19 23:57:24 2015 -0400 @@ -12,6 +12,7 @@ from _csv import Dialect as _Dialect from io import StringIO +from collections import namedtuple as _namedtuple __all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", "Error", "Dialect", "__doc__", "excel", "excel_tab", @@ -158,6 +159,138 @@ rows.append(self._dict_to_list(rowdict)) return self.writer.writerows(rows) + +class NamedTupleReader: + def __init__(self, f, fieldnames=None, restkey=None, restval=None, + dialect="excel", rename=False, *args, **kwds): + + # list of fieldnames for the namedtuple or a namedtuple. + self._fieldnames = fieldnames + + try: + # namedtuple subclass name. + self._name = self._fieldnames.__name__ + except AttributeError: + self._name = 'Fields' + + self.restkey = restkey # key to catch long rows + self.restval = restval # default value for short rows + # Unlike the basic reader we prefer not to return blank lines. + self.lines = (x for x in reader(f, dialect, *args, **kwds) if x) + # self.filtered_reader = (x for x in self.reader if x) + self.dialect = dialect + self.line_num = 0 + + # Allow namedtuple module to automatically replace invalid fieldnames. + self.rename = rename + + @property + def fieldnames(self): + """Fetch field names from the stored namedtuple subclass, create one if + one is not yet stored.""" + # attempt to short circuit if we already have a namedtuple stored. + try: + return list(self._fieldnames._fields) + except AttributeError: + pass + + # if no fieldnames were passed, attempt to read from first row. + if self._fieldnames is None: + try: + self._fieldnames = next(self.lines) + except StopIteration: + return + finally: + self.line_num += 1 + + # update the fieldnames attr and its length + self._fieldnames = _namedtuple(self._name, + self._fieldnames, + rename=self.rename) + return list(self._fieldnames._fields) + + @fieldnames.setter + def fieldnames(self, value): + """Set the fieldnames to a new namedtuple. + Can be a sequence of fieldnames or a namedtuple subclass.""" + if hasattr(value, '_fields'): + # attempt to keep self._name attribute up to date. + self._name = value.__name__ + + self._fieldnames = value + + def __iter__(self): + return self + + def __next__(self): + # this does the double duty of setting up the number of fields + # while filling in the fieldnames attr if that's not already set + fields_len = len(self.fieldnames) + + row = next(self.lines) + self.line_num += 1 + + row_len = len(row) + if row_len < fields_len: + # pad missing fields with restval if row is shorter than fields + row += [self.restval] * (fields_len - row_len) + + elif row_len > fields_len: + # if row is longer than fiedset, either clip or assign to restkey. + if self.restkey is None: + row = row[:fields_len] + else: + # need to make a new nt with restkey fieldname. + fieldnames = self._fieldnames._fields + (self.restkey,) + rest_nt = _namedtuple(self._name, fieldnames, rename=self.rename) + # Combine all extra members of the row into one in order to associate + # this one member with restkey. + row[fields_len:] = [row[fields_len:]] + + return rest_nt._make(row) + + return self._fieldnames._make(row) + + +class NamedTupleWriter: + + def __init__(self, f, fieldnames=None, restval="", extrasaction="raise", + dialect="excel", *args, **kwds): + + self.fieldnames = fieldnames # list of fieldnames for the namedtuple. + self.restval = restval # for writing short namedtuples. + + if extrasaction.lower() not in ("raise", "ignore"): + raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'" + % extrasaction) + + self.extrasaction = extrasaction + self.writer = writer(f, dialect, *args, **kwds) + + def writeheader(self): + self.writerow(self._fieldnames) + + def _nt_to_list(self, row_nt): + if self.fieldnames is None: + return row_nt + + if self.extrasaction == "raise": + wrong_fields = [n for n in row_nt._fields if n not in self.fieldnames] + if wrong_fields: + raise ValueError("namedtuple contains fields not in fieldnames: " + ", ".join(wrong_fields)) + + return [getattr(row_nt, name, self.restval) for name in self.fieldnames] + + def writerow(self, row_nt): + return self.writer.writerow(self._nt_to_list(row_nt)) + + def writerows(self, row_nts): + rows = [] + for row_nt in row_nts: + rows.append(self._nt_to_list(row_nt)) + return self.writer.writerows(rows) + # Guard Sniffer's type checking against builds that exclude complex() try: complex diff -r 6d5336a193cc Lib/test/test_csv.py --- a/Lib/test/test_csv.py Sun Apr 12 13:52:49 2015 +0300 +++ b/Lib/test/test_csv.py Sun Apr 19 23:57:24 2015 -0400 @@ -8,6 +8,7 @@ from io import StringIO from tempfile import TemporaryFile import csv +import collections import gc from test import support @@ -564,6 +565,207 @@ def test_read_escape_fieldsep(self): self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) +#--------------------------------------------------------------------------------- + +class TestNamedTupleFields(unittest.TestCase): + ### "long" means the row is longer than the number of fieldnames + ### "short" means there are fewer elements in the row than fieldnames + def test_rename_fields(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1one,class,_private\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, rename=True) + self.assertEqual(reader.fieldnames, ["_0", "_1", "_2"]) + + Fields = collections.namedtuple('fieldnames', '_0 _1 _2', rename=True) + values = Fields(_0='1', _1='2', _2='abc') + + self.assertEqual(next(reader), values) + + def test_no_rename_fields(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("valid,2invalid,alsovalid\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj) + # accessing fieldnames causes the namedtuple factory function to be + # called. It will be passed (invalid) fieldnames from the first row in the + # fileobj. + self.assertRaises(ValueError, getattr, reader, 'fieldnames') + + def test_write_simple_nt(self): + with TemporaryFile('w+', newline='') as fileobj: + writer = csv.NamedTupleWriter(fileobj, fieldnames=["f1", "f2", "f3"]) + + Fields = collections.namedtuple('mynt', 'f1 f3') + fieldnames = Fields(f1=10, f3='abc') + + writer.writerow(fieldnames) + fileobj.seek(0) + self.assertEqual(fileobj.read(), "10,,abc\r\n") + + def test_write_nt_no_fields(self): + with TemporaryFile('w+', newline='') as fileobj: + writer = csv.NamedTupleWriter(fileobj) + + Fields = collections.namedtuple('mynt', 'f1 f2 f3') + fieldnames = Fields(f1=10, f2=None, f3='abc') + + writer.writerow(fieldnames) + fileobj.seek(0) + self.assertEqual(fileobj.read(), "10,,abc\r\n") + + def test_read_namedtuple_fields(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1,2,abc\r\n") + fileobj.seek(0) + fieldnames = collections.namedtuple('fieldnames', 'f1 f2 f3') + reader = csv.NamedTupleReader(fileobj, fieldnames=fieldnames) + + Fields = collections.namedtuple('fieldnames', 'f1 f2 f3') + fieldnames = Fields(f1='1', f2='2', f3='abc') + + self.assertEqual(next(reader), fieldnames) + + def test_read_nt_no_fieldnames(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj) + self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) + + Fields = collections.namedtuple('fieldnames', 'f1 f2 f3') + fieldnames = Fields(f1='1', f2='2', f3='abc') + + self.assertEqual(next(reader), fieldnames) + + # Two test cases to make sure existing ways of implicitly setting + # fieldnames continue to work. Both arise from discussion in issue3436. + def test_read_nt_fieldnames_from_file(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, + fieldnames=next(csv.reader(fileobj))) + self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) + + Fields = collections.namedtuple('fieldnames', 'f1 f2 f3') + fieldnames = Fields(f1='1', f2='2', f3='abc') + + self.assertEqual(next(reader), fieldnames) + + def test_read_nt_fieldnames_chain(self): + import itertools + with TemporaryFile('w+') as fileobj: + fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj) + first = next(reader) + for row in itertools.chain([first], reader): + self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) + + Fields = collections.namedtuple('fieldnames', 'f1 f2 f3') + fieldnames = Fields(f1='1', f2='2', f3='abc') + + self.assertEqual(row, fieldnames) + + def test_read_long_clipped(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1,2,abc,4,5,6\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, + fieldnames=["f1", "f2"]) + + Fields = collections.namedtuple('Fields', 'f1 f2') + fieldnames = Fields(f1='1', f2='2') + + self.assertEqual(next(reader), fieldnames) + + def test_read_long_rest(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1,2,abc,4,5,6\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, + fieldnames=["f1", "f2"], restkey='DEFAULT') + + Fields = collections.namedtuple('fieldnames', 'f1 f2 DEFAULT') + fieldnames = Fields(f1='1', f2='2', DEFAULT=['abc', '4', '5', '6']) + + self.assertEqual(next(reader), fieldnames) + + def test_read_long_with_rest(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1,2,abc,4,5,6\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, + fieldnames=["f1", "f2"], restkey="rest") + + Fields = collections.namedtuple('fieldnames', 'f1 f2 rest') + fieldnames = Fields(f1='1', f2='2', rest=['abc', '4', '5', '6']) + + self.assertEqual(next(reader), fieldnames) + + def test_read_long_with_rest_no_fieldnames(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, restkey="rest") + self.assertEqual(reader.fieldnames, ["f1", "f2"]) + self.assertEqual(next(reader)._asdict(), {"f1": '1', "f2": '2', + "rest": ["abc", "4", "5", "6"]}) + + def test_read_short(self): + with TemporaryFile('w+') as fileobj: + fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n") + fileobj.seek(0) + reader = csv.NamedTupleReader(fileobj, + fieldnames="one two three four five six", + restval="DEFAULT") + + self.assertEqual(next(reader)._asdict(), + dict(one='1', two='2', three='abc', + four='4', five='5', six='6')) + + self.assertEqual(next(reader)._asdict(), + dict(one='1', two='2', three='abc', + four='DEFAULT', five='DEFAULT', + six='DEFAULT')) + + def test_read_multi(self): + sample = [ + '2147483648,43.0e12,17,abc,def\r\n', + '147483648,43.0e2,17,abc,def\r\n', + '47483648,43.0,170,abc,def\r\n' + ] + + reader = csv.NamedTupleReader(sample, + fieldnames="i1 float i2 s1 s2".split()) + self.assertEqual(next(reader)._asdict(), {"i1": '2147483648', + "float": '43.0e12', + "i2": '17', + "s1": 'abc', + "s2": 'def'}) + + def test_read_with_blanks(self): + reader = csv.NamedTupleReader(["1,2,abc,4,5,6\r\n","\r\n", + "1,2,abc,4,5,6\r\n"], + fieldnames="one two three four five six") + + Fields = collections.namedtuple('Fields', 'one two three four five six') + fieldnames = Fields(one='1', two='2', three='abc', four='4', five='5', + six='6') + + self.assertEqual(next(reader), fieldnames) + self.assertEqual(next(reader), fieldnames) + + def test_read_semi_sep(self): + reader = csv.NamedTupleReader(["1;2;abc;4;5;6\r\n"], + fieldnames="one two three four five six", + delimiter=';') + self.assertEqual(next(reader)._asdict(), dict(one='1', two='2', + three='abc', four='4', five='5', six='6')) + +#--------------------------------------------------------------------------------- + class TestDictFields(unittest.TestCase): ### "long" means the row is longer than the number of fieldnames ### "short" means there are fewer elements in the row than fieldnames