diff -r b8acfbf5aa61 Lib/pickle.py --- a/Lib/pickle.py Thu Feb 12 22:49:45 2015 +0100 +++ b/Lib/pickle.py Fri Feb 13 10:29:41 2015 +0200 @@ -533,7 +533,11 @@ class _Pickler: self.save(pid, save_persistent_id=False) self.write(BINPERSID) else: - self.write(PERSID + str(pid).encode("ascii") + b'\n') + try: + self.write(PERSID + str(pid).encode("ascii") + b'\n') + except UnicodeEncodeError: + raise PicklingError( + "persistent IDs in protocol 0 must be ASCII strings") def save_reduce(self, func, args, state=None, listitems=None, dictitems=None, obj=None): @@ -1074,7 +1078,11 @@ class _Unpickler: dispatch[FRAME[0]] = load_frame def load_persid(self): - pid = self.readline()[:-1].decode("ascii") + try: + pid = self.readline()[:-1].decode("ascii") + except UnicodeDecodeError: + raise UnpicklingError( + "persistent IDs in protocol 0 must be ASCII strings") self.append(self.persistent_load(pid)) dispatch[PERSID[0]] = load_persid diff -r b8acfbf5aa61 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Thu Feb 12 22:49:45 2015 +0100 +++ b/Lib/test/pickletester.py Fri Feb 13 10:29:41 2015 +0200 @@ -2065,6 +2065,35 @@ class AbstractPersistentPicklerTests(uni self.assertEqual(self.load_false_count, 1) +class AbstractIdentityPersistentPicklerTests(unittest.TestCase): + + def persistent_id(self, obj): + return obj + + def persistent_load(self, pid): + return pid + + def _check_return_correct_type(self, obj, proto): + unpickled = self.loads(self.dumps(obj, proto)) + self.assertIsInstance(unpickled, type(obj)) + self.assertEqual(unpickled, obj) + + def test_return_correct_type(self): + for proto in protocols: + # Protocol 0 supports only ASCII strings. + if proto == 0: + self._check_return_correct_type("abc", 0) + else: + for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]: + self._check_return_correct_type(obj, proto) + + def test_protocol0_is_ascii_only(self): + non_ascii_str = "\N{EMPTY SET}" + self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0) + pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.' + self.assertRaises(pickle.UnpicklingError, self.loads, pickled) + + class AbstractPicklerUnpicklerObjectTests(unittest.TestCase): pickler_class = None diff -r b8acfbf5aa61 Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py Thu Feb 12 22:49:45 2015 +0100 +++ b/Lib/test/test_pickle.py Fri Feb 13 10:29:41 2015 +0200 @@ -10,6 +10,7 @@ from test import support from test.pickletester import AbstractPickleTests from test.pickletester import AbstractPickleModuleTests from test.pickletester import AbstractPersistentPicklerTests +from test.pickletester import AbstractIdentityPersistentPicklerTests from test.pickletester import AbstractPicklerUnpicklerObjectTests from test.pickletester import AbstractDispatchTableTests from test.pickletester import BigmemPickleTests @@ -55,10 +56,7 @@ class InMemoryPickleTests(AbstractPickle return pickle.loads(buf, **kwds) -class PyPersPicklerTests(AbstractPersistentPicklerTests): - - pickler = pickle._Pickler - unpickler = pickle._Unpickler +class PersistentPicklerUnpicklerMixin(object): def dumps(self, arg, proto=None): class PersPickler(self.pickler): @@ -67,8 +65,7 @@ class PyPersPicklerTests(AbstractPersist f = io.BytesIO() p = PersPickler(f, proto) p.dump(arg) - f.seek(0) - return f.read() + return f.getvalue() def loads(self, buf, **kwds): class PersUnpickler(self.unpickler): @@ -79,6 +76,20 @@ class PyPersPicklerTests(AbstractPersist return u.load() +class PyPersPicklerTests(AbstractPersistentPicklerTests, + PersistentPicklerUnpicklerMixin): + + pickler = pickle._Pickler + unpickler = pickle._Unpickler + + +class PyIdPersPicklerTests(AbstractIdentityPersistentPicklerTests, + PersistentPicklerUnpicklerMixin): + + pickler = pickle._Pickler + unpickler = pickle._Unpickler + + class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests): pickler_class = pickle._Pickler @@ -110,6 +121,10 @@ if has_c_implementation: pickler = _pickle.Pickler unpickler = _pickle.Unpickler + class CIdPersPicklerTests(PyIdPersPicklerTests): + pickler = _pickle.Pickler + unpickler = _pickle.Unpickler + class CDumpPickle_LoadPickle(PyPicklerTests): pickler = _pickle.Pickler unpickler = pickle._Unpickler @@ -208,10 +223,11 @@ if has_c_implementation: def test_main(): - tests = [PickleTests, PyPicklerTests, PyPersPicklerTests, + tests = [PickleTests, PyPicklerTests, + PyPersPicklerTests, PyIdPersPicklerTests, PyDispatchTableTests, PyChainDispatchTableTests] if has_c_implementation: - tests.extend([CPicklerTests, CPersPicklerTests, + tests.extend([CPicklerTests, CPersPicklerTests, CIdPersPicklerTests, CDumpPickle_LoadPickle, DumpPickle_CLoadPickle, PyPicklerUnpicklerObjectTests, CPicklerUnpicklerObjectTests, diff -r b8acfbf5aa61 Modules/_pickle.c --- a/Modules/_pickle.c Thu Feb 12 22:49:45 2015 +0100 +++ b/Modules/_pickle.c Fri Feb 13 10:29:41 2015 +0200 @@ -3382,26 +3382,30 @@ save_pers(PicklerObject *self, PyObject goto error; } else { - PyObject *pid_str = NULL; - char *pid_ascii_bytes; - Py_ssize_t size; + PyObject *pid_str; pid_str = PyObject_Str(pid); if (pid_str == NULL) goto error; - /* XXX: Should it check whether the persistent id only contains - ASCII characters? And what if the pid contains embedded + /* XXX: Should it check whether the pid contains embedded newlines? */ - pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size); + if (!PyUnicode_IS_ASCII(pid_str)) { + PyErr_SetString(_Pickle_GetGlobalState()->PicklingError, + "persistent IDs in protocol 0 must be " + "ASCII strings"); + Py_DECREF(pid_str); + goto error; + } + + if (_Pickler_Write(self, &persid_op, 1) < 0 || + _Pickler_Write(self, PyUnicode_DATA(pid_str), + PyUnicode_GET_LENGTH(pid_str)) < 0 || + _Pickler_Write(self, "\n", 1) < 0) { + Py_DECREF(pid_str); + goto error; + } Py_DECREF(pid_str); - if (pid_ascii_bytes == NULL) - goto error; - - if (_Pickler_Write(self, &persid_op, 1) < 0 || - _Pickler_Write(self, pid_ascii_bytes, size) < 0 || - _Pickler_Write(self, "\n", 1) < 0) - goto error; } status = 1; } @@ -5360,9 +5364,15 @@ load_persid(UnpicklerObject *self) if (len < 1) return bad_readline(); - pid = PyBytes_FromStringAndSize(s, len - 1); - if (pid == NULL) + pid = PyUnicode_DecodeASCII(s, len - 1, "strict"); + if (pid == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError, + "persistent IDs in protocol 0 must be " + "ASCII strings"); + } return -1; + } /* This does not leak since _Pickle_FastCall() steals the reference to pid first. */