Index: Lib/dbm/dumb.py =================================================================== --- Lib/dbm/dumb.py (revision 67294) +++ Lib/dbm/dumb.py (working copy) @@ -84,6 +84,7 @@ for line in f: line = line.rstrip() key, pos_and_siz_pair = eval(line) + key = key.encode('latin-1') self._index[key] = pos_and_siz_pair f.close() @@ -110,13 +111,14 @@ f = self._io.open(self._dirfile, 'w') self._chmod(self._dirfile) for key, pos_and_siz_pair in self._index.items(): - f.write("%r, %r\n" % (key, pos_and_siz_pair)) + f.write("%r, %r\n" % (key.decode('latin-1'), pos_and_siz_pair)) f.close() sync = _commit def __getitem__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') pos, siz = self._index[key] # may raise KeyError f = _io.open(self._datfile, 'rb') f.seek(pos) @@ -161,11 +163,12 @@ f.close() def __setitem__(self, key, val): - if not isinstance(key, bytes): - raise TypeError("keys must be bytes") - key = key.decode("latin-1") # hashable bytes + if isinstance(key, str): + key = key.encode('utf-8') + elif not isinstance(key, (bytes, bytearray)): + raise TypeError("keys must be bytes or strings") if not isinstance(val, (bytes, bytearray)): - raise TypeError("values must be byte strings") + raise TypeError("values must be bytes") if key not in self._index: self._addkey(key, self._addval(val)) else: @@ -191,7 +194,8 @@ # (so that _commit() never gets called). def __delitem__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') # The blocks used by the associated value are lost. del self._index[key] # XXX It's unclear why we do a _commit() here (the code always @@ -201,14 +205,14 @@ self._commit() def keys(self): - return [key.encode("latin-1") for key in self._index.keys()] + return list(self._index.keys()) def items(self): - return [(key.encode("latin-1"), self[key.encode("latin-1")]) - for key in self._index.keys()] + return [(key, self[key]) for key in self._index.keys()] def __contains__(self, key): - key = key.decode("latin-1") + if isinstance(key, str): + key = key.encode('utf-8') return key in self._index def iterkeys(self): Index: Lib/test/test_dbm_dumb.py =================================================================== --- Lib/test/test_dbm_dumb.py (revision 67294) +++ Lib/test/test_dbm_dumb.py (working copy) @@ -19,13 +19,14 @@ pass class DumbDBMTestCase(unittest.TestCase): - _dict = {'0': b'', - 'a': b'Python:', - 'b': b'Programming', - 'c': b'the', - 'd': b'way', - 'f': b'Guido', - 'g': b'intended', + _dict = {b'0': b'', + b'a': b'Python:', + b'b': b'Programming', + b'c': b'the', + b'd': b'way', + b'f': b'Guido', + b'g': b'intended', + '\u00fc'.encode('utf-8') : b'!', } def __init__(self, *args): @@ -35,7 +36,7 @@ f = dumbdbm.open(_fname, 'c') self.assertEqual(list(f.keys()), []) for key in self._dict: - f[key.encode("ascii")] = self._dict[key] + f[key] = self._dict[key] self.read_helper(f) f.close() @@ -73,7 +74,7 @@ def test_dumbdbm_modification(self): self.init_db() f = dumbdbm.open(_fname, 'w') - self._dict['g'] = f[b'g'] = b"indented" + self._dict[b'g'] = f[b'g'] = b"indented" self.read_helper(f) f.close() @@ -105,6 +106,21 @@ self.assertEqual(f[b'1'], b'hello2') f.close() + def test_str_read(self): + self.init_db() + f = dumbdbm.open(_fname, 'r') + self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')]) + + def test_str_write_contains(self): + self.init_db() + f = dumbdbm.open(_fname) + f['\u00fc'] = b'!' + f.close() + f = dumbdbm.open(_fname, 'r') + self.assert_('\u00fc' in f) + self.assertEqual(f['\u00fc'.encode('utf-8')], + self._dict['\u00fc'.encode('utf-8')]) + def test_line_endings(self): # test for bug #1172763: dumbdbm would die if the line endings # weren't what was expected. @@ -129,16 +145,16 @@ def read_helper(self, f): keys = self.keys_helper(f) for key in self._dict: - self.assertEqual(self._dict[key], f[key.encode("ascii")]) + self.assertEqual(self._dict[key], f[key]) def init_db(self): f = dumbdbm.open(_fname, 'w') for k in self._dict: - f[k.encode("ascii")] = self._dict[k] + f[k] = self._dict[k] f.close() def keys_helper(self, f): - keys = sorted(k.decode("ascii") for k in f.keys()) + keys = sorted(f.keys()) dkeys = sorted(self._dict.keys()) self.assertEqual(keys, dkeys) return keys @@ -155,12 +171,12 @@ if random.random() < 0.2: if k in d: del d[k] - del f[k.encode("ascii")] + del f[k] else: v = random.choice((b'a', b'b', b'c')) * random.randrange(10000) d[k] = v - f[k.encode("ascii")] = v - self.assertEqual(f[k.encode("ascii")], v) + f[k] = v + self.assertEqual(f[k], v) f.close() f = dumbdbm.open(_fname)