Index: Objects/dictobject.c =================================================================== --- Objects/dictobject.c (revision 42548) +++ Objects/dictobject.c (working copy) @@ -8,6 +8,7 @@ */ #include "Python.h" +#include "ceval.h" typedef PyDictEntry dictentry; typedef PyDictObject dictobject; @@ -882,8 +883,16 @@ return NULL; } v = (mp->ma_lookup)(mp, key, hash) -> me_value; - if (v == NULL) - PyErr_SetObject(PyExc_KeyError, key); + if (v == NULL) { + if (PyDict_CheckExact(mp)) { + /* Avoid infinite recursion via interning. + It's also faster this way. */ + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + return PyEval_CallMethod((PyObject *)mp, + "on_missing", "(O)", key); + } else Py_INCREF(v); return v; @@ -1756,6 +1765,12 @@ return dictiter_new(dict, &PyDictIterItem_Type); } +static PyObject * +dict_on_missing(dictobject *dict, PyObject *key) +{ + PyErr_SetObject(PyExc_KeyError, key); + return NULL; +} PyDoc_STRVAR(has_key__doc__, "D.has_key(k) -> True if D has a key k, else False"); @@ -1811,6 +1826,10 @@ PyDoc_STRVAR(iteritems__doc__, "D.iteritems() -> an iterator over the (key, value) items of D"); +PyDoc_STRVAR(on_missing__doc__, +"D.on_missing(key) raises KeyError(key)\n\ +This is a hook called by __getitem__() when the key is not found."); + static PyMethodDef mapp_methods[] = { {"__contains__",(PyCFunction)dict_has_key, METH_O | METH_COEXIST, contains__doc__}, @@ -1846,6 +1865,8 @@ itervalues__doc__}, {"iteritems", (PyCFunction)dict_iteritems, METH_NOARGS, iteritems__doc__}, + {"on_missing", (PyCFunction)dict_on_missing, METH_O, + on_missing__doc__}, {NULL, NULL} /* sentinel */ }; Index: Doc/lib/libcollections.tex =================================================================== --- Doc/lib/libcollections.tex (revision 42548) +++ Doc/lib/libcollections.tex (working copy) @@ -8,9 +8,10 @@ \versionadded{2.4} -This module implements high-performance container datatypes. Currently, the -only datatype is a deque. Future additions may include B-trees -and Fibonacci heaps. +This module implements high-performance container datatypes. Currently, +there are two datatypes, deque and defaultdict. +Future additions may include B-trees and Fibonacci heaps. +\versionchanged[Added defaultdict]{2.5} \begin{funcdesc}{deque}{\optional{iterable}} Returns a new deque objected initialized left-to-right (using @@ -211,3 +212,47 @@ [[[['a', 'b'], ['c', 'd']], [['e', 'f'], ['g', 'h']]]] \end{verbatim} + + + +\begin{funcdesc}{defaultdict}{\optional{default_factory\optional{, ...}}} + Returns a new dictionary-like object. \class{defaultdict} is a subclass + of the builtin \class{dict} class. It overrides one method and adds one + writable instance variable. The remaining functionality is the same as + for the \class{dict} class and is not documented here. + + The first argument provides the initial value for the + \member{default_factory} attribute; it defaults to \code{None}. + All remaining arguments are treated the same as if they were + passed to the \class{dict} constructor, including keyword arguments. + + \versionadded{2.5} +\end{funcdesc} + +\class{defaultdict} objects support the following method in addition to +the standard \class{dict} operations: + +\begin{methoddesc}{on_missing}{key} + If the \member{default_factory} attribute is \code{None}, this is the same + as the \method{on_missing()} method of the \class{dict} class: it raises + an \exception{KeyError} exception with the \var{key} as argument. + + If \member{default_factory} is not \code{None}, it is called without + arguments to provide a default value for the given \var{key}, this + value is inserted in the dictionary for the \var{key}, and returned. + + If calling \member{default_factory} raises an exception this exception + is propagated unchanged. + + This method is called by the \method{__getitem__} method of the + \class{dict} class when the requested key is not found; whatever it + returns or raises is then returned or raised by \method{__getitem__}. +\end{methoddesc} + +\class{defaultdict} objects support the following instance variable: + +\begin{datadesc}{default_factory} + This attribute is used by the \method{on_missing} method; it is initialized + from the first argument to the constructor, if present, or to \code{None}, + if absent. +\end{datadesc} Index: Doc/lib/libcopy.tex =================================================================== --- Doc/lib/libcopy.tex (revision 42548) +++ Doc/lib/libcopy.tex (working copy) @@ -67,9 +67,12 @@ \end{itemize} -This version does not copy types like module, class, function, method, +This module does not copy types like module, method, stack trace, stack frame, file, socket, window, array, or any similar -types. +types. It does ``copy'' functions and classes (shallow and deeply), +by returning the original object unchanged; this is compatible with +the way these are treated by the \module{pickle} module. +\versionchanged[Added copying functions]{2.5} Classes can use the same interfaces to control copying that they use to control pickling. See the description of module Index: Doc/lib/libstdtypes.tex =================================================================== --- Doc/lib/libstdtypes.tex (revision 42548) +++ Doc/lib/libstdtypes.tex (working copy) @@ -1346,11 +1346,12 @@ \ttindex{popitem()} \ttindex{iteritems()} \ttindex{iterkeys()} - \ttindex{itervalues()}} + \ttindex{itervalues()} + \\ttindex{on_missing()}} \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes} \lineiii{len(\var{a})}{the number of items in \var{a}}{} - \lineiii{\var{a}[\var{k}]}{the item of \var{a} with key \var{k}}{(1)} + \lineiii{\var{a}[\var{k}]}{the item of \var{a} with key \var{k}}{(1), (10)} \lineiii{\var{a}[\var{k}] = \var{v}} {set \code{\var{a}[\var{k}]} to \var{v}} {} @@ -1403,6 +1404,9 @@ \lineiii{\var{a}.itervalues()} {return an iterator over the mapping's values} {(2), (3)} + \lineiii{\var{a}.on_missing(\var{key})} + {raises \exception{KeyError}(\var{key})} + {(10)} \end{tableiii} \noindent @@ -1454,6 +1458,16 @@ \versionchanged[Allowed the argument to be an iterable of key/value pairs and allowed keyword arguments]{2.4} +\item[(10)] \function{on_missing(\var{k})} is called by the +\var{a}[\var{k}] operation in order to raise the \exception{KeyError} +exception if the key \var{k} is not present. A subclass may override +this method to implement different behavior; the \var{a}[\var{k}] +operation returns or raises whatever is returned or raised by the +\function{on_missing}(\var{k}) call if the key is not present. +No other operations or methods invoke \function{on_missing}(). +For an example, see \module{collections}.\class{defaultdict}. +\versionadded{2.5} + \end{description} \subsection{File Objects Index: Lib/copy.py =================================================================== --- Lib/copy.py (revision 42548) +++ Lib/copy.py (working copy) @@ -101,7 +101,8 @@ return x for t in (type(None), int, long, float, bool, str, tuple, frozenset, type, xrange, types.ClassType, - types.BuiltinFunctionType): + types.BuiltinFunctionType, + types.FunctionType): d[t] = _copy_immutable for name in ("ComplexType", "UnicodeType", "CodeType"): t = getattr(types, name, None) @@ -217,6 +218,7 @@ d[xrange] = _deepcopy_atomic d[types.ClassType] = _deepcopy_atomic d[types.BuiltinFunctionType] = _deepcopy_atomic +d[types.FunctionType] = _deepcopy_atomic def _deepcopy_list(x, memo): y = [] Index: Lib/UserDict.py =================================================================== --- Lib/UserDict.py (revision 42548) +++ Lib/UserDict.py (working copy) @@ -14,7 +14,12 @@ else: return cmp(self.data, dict) def __len__(self): return len(self.data) - def __getitem__(self, key): return self.data[key] + def __getitem__(self, key): + if key in self.data: + return self.data[key] + return self.on_missing(key) + def on_missing(self, key): + raise KeyError(key) def __setitem__(self, key, item): self.data[key] = item def __delitem__(self, key): del self.data[key] def clear(self): self.data.clear() @@ -168,3 +173,7 @@ return cmp(dict(self.iteritems()), other) def __len__(self): return len(self.keys()) + + # The __getitem__() implementation should call this if key not found + def on_missing(self, key): + raise KeyError(key) Index: Lib/test/test_dict.py =================================================================== --- Lib/test/test_dict.py (revision 42548) +++ Lib/test/test_dict.py (working copy) @@ -395,6 +395,46 @@ else: self.fail("< didn't raise Exc") + def test_on_missing(self): + # on_missing() must exist, and raise KeyError() + self.assert_(hasattr(dict, "on_missing")) + d = {} + self.assert_(hasattr(d, "on_missing")) + self.assertRaises(KeyError, d.on_missing, 42) + self.assertRaises(KeyError, dict.on_missing, d, 42) + + def test_on_missing_subclass(self): + class D(dict): + def on_missing(self, key): + return 42 + d = D({1: 2, 3: 4}) + self.assertEqual(d[1], 2) + self.assertEqual(d[3], 4) + self.assert_(2 not in d) + self.assert_(2 not in d.keys()) + self.assertEqual(d[2], 42) + class E(dict): + def on_missing(self, key): + raise RuntimeError(key) + e = E() + try: + e[42] + except RuntimeError, err: + self.assertEqual(err.args, (42,)) + else: + self.fail_("e[42] didn't raise RuntimeError") + class F(dict): + def on_missing(self, key): + return super(F, self).on_missing(key) + f = F() + try: + f[42] + except KeyError, err: + self.assertEqual(err.args, (42,)) + else: + self.fail_("e[42] didn't raise KeyError") + + import mapping_tests class GeneralMappingTests(mapping_tests.BasicTestMappingProtocol): Index: Lib/test/test_copy.py =================================================================== --- Lib/test/test_copy.py (revision 42548) +++ Lib/test/test_copy.py (working copy) @@ -568,6 +568,22 @@ raise ValueError, "ain't got no stickin' state" self.assertRaises(ValueError, copy.copy, EvilState()) + def test_copy_function(self): + self.assertEqual(copy.copy(global_foo), global_foo) + def foo(x, y): return x+y + self.assertEqual(copy.copy(foo), foo) + bar = lambda: None + self.assertEqual(copy.copy(bar), bar) + + def test_deepcopy_function(self): + self.assertEqual(copy.deepcopy(global_foo), global_foo) + def foo(x, y): return x+y + self.assertEqual(copy.deepcopy(foo), foo) + bar = lambda: None + self.assertEqual(copy.deepcopy(bar), bar) + +def global_foo(x, y): return x+y + def test_main(): test_support.run_unittest(TestCopy) Index: Modules/collectionsmodule.c =================================================================== --- Modules/collectionsmodule.c (revision 42548) +++ Modules/collectionsmodule.c (working copy) @@ -1065,10 +1065,269 @@ 0, }; +/* defaultdict type *********************************************************/ + +typedef struct { + PyDictObject dict; + PyObject *default_factory; +} defdictobject; + +static PyTypeObject defdict_type; /* Forward */ + +PyDoc_STRVAR(defdict_on_missing_doc, +"on_missing(key) # Called by __getitem__ for missing key; pseudo-code:\n\ + if self.default_factory is None: raise KeyError(key)\n\ + self[key] = value = self.default_factory()\n\ + return value\n\ +"); + +static PyObject * +defdict_on_missing(defdictobject *dd, PyObject *key) +{ + PyObject *factory = dd->default_factory; + PyObject *value; + if (factory == NULL || factory == Py_None) { + /* XXX Call dict.on_missing(key) */ + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + value = PyEval_CallObject(factory, NULL); + if (value == NULL) + return value; + if (PyObject_SetItem((PyObject *)dd, key, value) < 0) { + Py_DECREF(value); + return NULL; + } + return value; +} + +PyDoc_STRVAR(defdict_copy_doc, "D.copy() -> a shallow copy of D."); + +static PyObject * +defdict_copy(defdictobject *dd) +{ + /* This calls the object's class. That only works for subclasses + whose class constructor has the same signature. Subclasses that + define a different constructor signature must override copy(). + */ + return PyObject_CallFunctionObjArgs((PyObject *)dd->dict.ob_type, + dd->default_factory, dd, NULL); +} + +static PyObject * +defdict_reduce(defdictobject *dd) +{ + /* __reduce__ must returns a 5-tuple as follows: + + - factory function + - tuple of args for the factory function + - additional state (here None) + - sequence iterator (here None) + - dictionary iterator (yielding successive (key, value) pairs + + This API is used by pickle.py and copy.py. + + For this to be useful with pickle.py, the default_factory + must be picklable; e.g., None, a built-in, or a global + function in a module or package. + + Both shallow and deep copying are supported, but for deep + copying, the default_factory must be deep-copyable; e.g. None, + or a built-in (functions are not copyable at this time). + + This only works for subclasses as long as their constructor + signature is compatible; the first argument must be the + optional default_factory, defaulting to None. + */ + PyObject *args; + PyObject *items; + PyObject *result; + if (dd->default_factory == NULL || dd->default_factory == Py_None) + args = PyTuple_New(0); + else + args = PyTuple_Pack(1, dd->default_factory); + if (args == NULL) + return NULL; + items = PyObject_CallMethod((PyObject *)dd, "iteritems", "()"); + if (items == NULL) { + Py_DECREF(args); + return NULL; + } + result = PyTuple_Pack(5, dd->dict.ob_type, args, + Py_None, Py_None, items); + Py_DECREF(args); + return result; +} + +static PyMethodDef defdict_methods[] = { + {"on_missing", (PyCFunction)defdict_on_missing, METH_O, + defdict_on_missing_doc}, + {"copy", (PyCFunction)defdict_copy, METH_NOARGS, + defdict_copy_doc}, + {"__copy__", (PyCFunction)defdict_copy, METH_NOARGS, + defdict_copy_doc}, + {"__reduce__", (PyCFunction)defdict_reduce, METH_NOARGS, + reduce_doc}, + {NULL} +}; + +static PyMemberDef defdict_members[] = { + {"default_factory", T_OBJECT, + offsetof(defdictobject, default_factory), 0, + PyDoc_STR("Factory for default value called by on_missing().")}, + {NULL} +}; + +static void +defdict_dealloc(defdictobject *dd) +{ + Py_CLEAR(dd->default_factory); + PyDict_Type.tp_dealloc((PyObject *)dd); +} + +static int +defdict_print(defdictobject *dd, FILE *fp, int flags) +{ + int sts; + fprintf(fp, "defaultdict("); + if (dd->default_factory == NULL) + fprintf(fp, "None"); + else { + PyObject_Print(dd->default_factory, fp, 0); + } + fprintf(fp, ", "); + sts = PyDict_Type.tp_print((PyObject *)dd, fp, 0); + fprintf(fp, ")"); + return sts; +} + +static PyObject * +defdict_repr(defdictobject *dd) +{ + PyObject *defrepr; + PyObject *baserepr; + PyObject *result; + baserepr = PyDict_Type.tp_repr((PyObject *)dd); + if (baserepr == NULL) + return NULL; + if (dd->default_factory == NULL) + defrepr = PyString_FromString("None"); + else + defrepr = PyObject_Repr(dd->default_factory); + if (defrepr == NULL) { + Py_DECREF(baserepr); + return NULL; + } + result = PyString_FromFormat("defaultdict(%s, %s)", + PyString_AS_STRING(defrepr), + PyString_AS_STRING(baserepr)); + Py_DECREF(defrepr); + Py_DECREF(baserepr); + return result; +} + +static int +defdict_traverse(PyObject *self, visitproc visit, void *arg) +{ + Py_VISIT(((defdictobject *)self)->default_factory); + return PyDict_Type.tp_traverse(self, visit, arg); +} + +static int +defdict_tp_clear(defdictobject *dd) +{ + if (dd->default_factory != NULL) { + Py_DECREF(dd->default_factory); + dd->default_factory = NULL; + } + return PyDict_Type.tp_clear((PyObject *)dd); +} + +static int +defdict_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + defdictobject *dd = (defdictobject *)self; + PyObject *olddefault = dd->default_factory; + PyObject *newdefault = NULL; + PyObject *newargs; + int result; + if (args == NULL || !PyTuple_Check(args)) + newargs = PyTuple_New(0); + else { + Py_ssize_t n = PyTuple_GET_SIZE(args); + if (n > 0) + newdefault = PyTuple_GET_ITEM(args, 0); + newargs = PySequence_GetSlice(args, 1, n); + } + if (newargs == NULL) + return -1; + Py_XINCREF(newdefault); + dd->default_factory = newdefault; + result = PyDict_Type.tp_init(self, newargs, kwds); + Py_DECREF(newargs); + Py_XDECREF(olddefault); + return result; +} + +PyDoc_STRVAR(defdict_doc, +"defaultdict(default_factory) --> dict with default factory\n\ +\n\ +The default factory is called without arguments to produce\n\ +a new value when a key is not present, in __getitem__ only.\n\ +A defaultdict compares equal to a dict with the same items.\n\ +"); + +static PyTypeObject defdict_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "collections.defaultdict", /* tp_name */ + sizeof(defdictobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)defdict_dealloc, /* tp_dealloc */ + (printfunc)defdict_print, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)defdict_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ + defdict_doc, /* tp_doc */ + (traverseproc)defdict_traverse, /* tp_traverse */ + (inquiry)defdict_tp_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + defdict_methods, /* tp_methods */ + defdict_members, /* tp_members */ + 0, /* tp_getset */ + &PyDict_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)defdict_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + 0, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, -"High performance data structures\n\ +"High performance data structures.\n\ +- deque: ordered collection accessible from endpoints only\n\ +- defaultdict: dict subclass with a default value factory\n\ "); PyMODINIT_FUNC @@ -1085,6 +1344,11 @@ Py_INCREF(&deque_type); PyModule_AddObject(m, "deque", (PyObject *)&deque_type); + if (PyType_Ready(&defdict_type) < 0) + return; + Py_INCREF(&defdict_type); + PyModule_AddObject(m, "defaultdict", (PyObject *)&defdict_type); + if (PyType_Ready(&dequeiter_type) < 0) return;