Index: Doc/lib/libshelve.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libshelve.tex,v retrieving revision 1.19 diff -c -r1.19 libshelve.tex *** Doc/lib/libshelve.tex 21 Jan 2003 01:52:39 -0000 1.19 --- Doc/lib/libshelve.tex 19 Apr 2003 18:29:33 -0000 *************** *** 13,19 **** sub-objects. The keys are ordinary strings. \refstmodindex{pickle} ! \begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,binary=\code{False}}}} Open a persistent dictionary. The filename specified is the base filename for the underlying database. As a side-effect, an extension may be added to the filename and more than one file may be created. By default, the --- 13,19 ---- sub-objects. The keys are ordinary strings. \refstmodindex{pickle} ! \begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,binary=\code{False\optional{,writeback=\code{False}}}}}} Open a persistent dictionary. The filename specified is the base filename for the underlying database. As a side-effect, an extension may be added to the filename and more than one file may be created. By default, the *************** *** 21,27 **** {}\var{flag} pararameter has the same interpretation as the \var{flag} parameter of \function{anydbm.open}. By default, ASCII pickles are used to serialize values. If the optional \var{binary} parameter is set to ! {}\var{True}, binary pickles will be used instead. \end{funcdesc} Shelve objects support all methods supported by dictionaries. This eases --- 21,35 ---- {}\var{flag} pararameter has the same interpretation as the \var{flag} parameter of \function{anydbm.open}. By default, ASCII pickles are used to serialize values. If the optional \var{binary} parameter is set to ! {}\var{True}, binary pickles will be used instead. By default, mutations ! to persistent-dictionary mutable entries are not automatically written back. ! If the optional \var{writeback} parameter is set to {}\var{True}, all ! entries accessed are cached in memory, and written back at close time; this ! can make it handier to mutate mutable entries in the persistent dictionary, ! but, if many entries are accessed, it can consume vast amounts of memory ! for the cache, and it can make the close operation very slow since all ! accessed entries are written back (there is no way to determine which ! accessed entries are mutable, nor which ones were actually mutated). \end{funcdesc} Shelve objects support all methods supported by dictionaries. This eases *************** *** 61,93 **** \end{itemize} ! \begin{classdesc}{Shelf}{dict\optional{, binary=False}} A subclass of \class{UserDict.DictMixin} which stores pickled values in the \var{dict} object. If the \var{binary} parameter is \code{True}, binary pickles will be used. This can provide much more compact storage than plain text pickles, depending on the nature of the objects stored in the database. \end{classdesc} ! \begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False}} A subclass of \class{Shelf} which exposes \method{first}, \method{next}, \method{previous}, \method{last} and \method{set_location} which are available in the \module{bsddb} module but not in other database modules. The \var{dict} object passed to the constructor must support those methods. This is generally accomplished by calling one of \function{bsddb.hashopen}, \function{bsddb.btopen} or \function{bsddb.rnopen}. The optional ! \var{binary} parameter has the same interpretation as for the \class{Shelf} ! class. \end{classdesc} ! \begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, binary=False}}} A subclass of \class{Shelf} which accepts a \var{filename} instead of a dict-like object. The underlying file will be opened using {}\function{anydbm.open}. By default, the file will be created and opened for both read and write. The optional \var{flag} parameter has the same interpretation as for the \function{open} function. The optional ! \var{binary} parameter has the same interpretation as for the ! {}\class{Shelf} class. \end{classdesc} \subsection{Example} --- 69,105 ---- \end{itemize} ! \begin{classdesc}{Shelf}{dict\optional{, binary=False\optional{, writeback=False}}} A subclass of \class{UserDict.DictMixin} which stores pickled values in the \var{dict} object. If the \var{binary} parameter is \code{True}, binary pickles will be used. This can provide much more compact storage than plain text pickles, depending on the nature of the objects stored in the database. + If the \var{writeback} parameter is \code{True}, the object will hold a + cache of all entries accessed and write them back to the \var{dict} at + sync and close times. This allows natural operations on mutable entries, + but can consume much more memory and make sync and close take a long time. \end{classdesc} ! \begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False\optional{, writeback=False}}} A subclass of \class{Shelf} which exposes \method{first}, \method{next}, \method{previous}, \method{last} and \method{set_location} which are available in the \module{bsddb} module but not in other database modules. The \var{dict} object passed to the constructor must support those methods. This is generally accomplished by calling one of \function{bsddb.hashopen}, \function{bsddb.btopen} or \function{bsddb.rnopen}. The optional ! \var{binary} and \var{writeback}parameters have the same interpretation ! as for the \class{Shelf} class. \end{classdesc} ! \begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, binary=False\optional{, writeback=False}}}} A subclass of \class{Shelf} which accepts a \var{filename} instead of a dict-like object. The underlying file will be opened using {}\function{anydbm.open}. By default, the file will be created and opened for both read and write. The optional \var{flag} parameter has the same interpretation as for the \function{open} function. The optional ! \var{binary} and \var{writeback}parameters have the same interpretation ! as for the \class{Shelf} class. \end{classdesc} \subsection{Example} *************** *** 103,114 **** d[key] = data # store data at key (overwrites old data if # using an existing key) ! data = d[key] # retrieve data at key (raise KeyError if no # such key) del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists list = d.keys() # a list of all existing keys (slow!) d.close() # close it \end{verbatim} --- 115,137 ---- d[key] = data # store data at key (overwrites old data if # using an existing key) ! data = d[key] # retrieve a COPY of data at key (raise KeyError if no # such key) del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists list = d.keys() # a list of all existing keys (slow!) + + # as d was opened WITHOUT writeback=True, beware: + d['xx'] = range(4) # this works as expected, but... + d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)!!! + # having opened d without writeback=True, you need to code carefully: + temp = d['xx'] # extracts the copy + temp.append(5) # mutates the copy + d['xx'] = temp # stores the copy right back, to persist it + # or, d=shelve.open(filename,writeback=True) would let you just code + # d['xx'].append(5) and have it work as expected, BUT it would also + # consume more memory and make the d.close() operation slower. d.close() # close it \end{verbatim} Index: Lib/shelve.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/shelve.py,v retrieving revision 1.20 diff -c -r1.20 shelve.py *** Lib/shelve.py 21 Jan 2003 01:53:09 -0000 1.20 --- Lib/shelve.py 19 Apr 2003 18:29:34 -0000 *************** *** 15,22 **** d[key] = data # store data at key (overwrites old data if # using an existing key) ! data = d[key] # retrieve data at key (raise KeyError if no ! # such key) del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists; same as "key in d" --- 15,23 ---- d[key] = data # store data at key (overwrites old data if # using an existing key) ! data = d[key] # retrieve a COPY of the data at key (raise ! # KeyError if no such key) -- NOTE that this ! # access returns a *copy* of the entry! del d[key] # delete data stored at key (raises KeyError # if no such key) flag = d.has_key(key) # true if the key exists; same as "key in d" *************** *** 26,31 **** --- 27,59 ---- Dependent on the implementation, closing a persistent dictionary may or may not be necessary to flush changes to disk. + + Normally, d[key] returns a COPY of the entry. This needs care when + mutable entries are mutated: for example, if d[key] is a list, + d[key].append(anitem) + does NOT modify the entry d[key] itself, as stored in the persistent + mapping -- it only modifies the copy, which is then immediately + discarded, so that the append has NO effect whatsoever. To append an + item to d[key] in a way that will affect the persistent mapping, use: + data = d[key] + data.append(anitem) + d[key] = data + + To avoid the problem with mutable entries, you may pass the keyword + argument writeback=True in the call to shelve.open. When you use: + d = shelve.open(filename, writeback=True) + then d keeps a cache of all entries you access, and writes them all back + to the persistent mapping when you call d.close(). This ensures that + such usage as d[key].append(anitem) works as intended. + + However, using keyword argument writeback=True may consume vast amount + of memory for the cache, and it may make d.close() very slow, if you + access many of d's entries after opening it in this way: d has no way to + check which of the entries you access are mutable and/or which ones you + actually mutate, so it must cache, and write back at close, all of the + entries that you access. You can call d.sync() to write back all the + entries in the cache, and empty the cache (d.sync() also synchronizes + the persistent dictionary on disk, if feasible). """ # Try using cPickle and cStringIO if available. *************** *** 51,59 **** See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, dict, binary=False): self.dict = dict self._binary = binary def keys(self): return self.dict.keys() --- 79,89 ---- See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, dict, binary=False, writeback=False): self.dict = dict self._binary = binary + self.writeback = writeback + self.cache = {} def keys(self): return self.dict.keys() *************** *** 73,82 **** return default def __getitem__(self, key): ! f = StringIO(self.dict[key]) ! return Unpickler(f).load() def __setitem__(self, key, value): f = StringIO() p = Pickler(f, self._binary) p.dump(value) --- 103,120 ---- return default def __getitem__(self, key): ! try: ! value = self.cache[key] ! except KeyError: ! f = StringIO(self.dict[key]) ! value = Unpickler(f).load() ! if self.writeback: ! self.cache[key] = value ! return value def __setitem__(self, key, value): + if self.writeback: + self.cache[key] = value f = StringIO() p = Pickler(f, self._binary) p.dump(value) *************** *** 84,91 **** --- 122,134 ---- def __delitem__(self, key): del self.dict[key] + try: + del self.cache[key] + except KeyError: + pass def close(self): + self.sync() try: self.dict.close() except: *************** *** 96,101 **** --- 139,150 ---- self.close() def sync(self): + if self.writeback and self.cache: + self.writeback = False + for key, entry in self.cache.iteritems(): + self[key] = entry + self.writeback = True + self.cache = {} if hasattr(self.dict, 'sync'): self.dict.sync() *************** *** 113,120 **** See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, dict, binary=False): ! Shelf.__init__(self, dict, binary) def set_location(self, key): (key, value) = self.dict.set_location(key) --- 162,169 ---- See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, dict, binary=False, writeback=False): ! Shelf.__init__(self, dict, binary, writeback) def set_location(self, key): (key, value) = self.dict.set_location(key) *************** *** 149,160 **** See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, filename, flag='c', binary=False): import anydbm ! Shelf.__init__(self, anydbm.open(filename, flag), binary) ! def open(filename, flag='c', binary=False): """Open a persistent dictionary for reading and writing. The filename parameter is the base filename for the underlying database. --- 198,209 ---- See the module's __doc__ string for an overview of the interface. """ ! def __init__(self, filename, flag='c', binary=False, writeback=False): import anydbm ! Shelf.__init__(self, anydbm.open(filename, flag), binary, writeback) ! def open(filename, flag='c', binary=False, writeback=False): """Open a persistent dictionary for reading and writing. The filename parameter is the base filename for the underlying database. *************** *** 167,170 **** See the module's __doc__ string for an overview of the interface. """ ! return DbfilenameShelf(filename, flag, binary) --- 216,219 ---- See the module's __doc__ string for an overview of the interface. """ ! return DbfilenameShelf(filename, flag, binary, writeback) Index: Lib/test/test_shelve.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_shelve.py,v retrieving revision 1.3 diff -c -r1.3 test_shelve.py *** Lib/test/test_shelve.py 9 Mar 2003 07:05:14 -0000 1.3 --- Lib/test/test_shelve.py 19 Apr 2003 18:29:37 -0000 *************** *** 43,48 **** --- 43,69 ---- self.assertEqual(len(d1), 1) self.assertNotEqual(d1, d2) + def test_mutable_entry(self): + d1 = {} + s = shelve.Shelf(d1, binary=True, writeback=False) + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4]) + s.close() + + d2 = {} + s = shelve.Shelf(d2, binary=True, writeback=True) + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4,5]) + s.close() + + self.assertEqual(len(d1), 1) + self.assertEqual(len(d2), 1) + + from test_userdict import TestMappingProtocol class TestShelveBase(TestMappingProtocol):