diff --git a/Lib/pickle.py b/Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -352,7 +352,11 @@ self.save(pid, save_persistent_id=False) self.write(BINPERSID) else: - self.write(PERSID + str(pid).encode("ascii") + b'\n') + try: + self.write(PERSID + str(pid).encode("ascii") + b'\n') + except UnicodeEncodeError: + raise PicklingError( + "persistent IDs in protocol 0 must be ASCII strings") def save_reduce(self, func, args, state=None, listitems=None, dictitems=None, obj=None): @@ -870,7 +874,11 @@ dispatch[PROTO[0]] = load_proto def load_persid(self): - pid = self.readline()[:-1].decode("ascii") + try: + pid = self.readline()[:-1].decode("ascii") + except UnicodeDecodeError: + raise UnpicklingError( + "persistent IDs in protocol 0 must be ASCII strings") self.append(self.persistent_load(pid)) dispatch[PERSID[0]] = load_persid diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1467,6 +1467,35 @@ self.assertEqual(self.load_count, 5) +class AbstractIdentityPersistentPicklerTests(unittest.TestCase): + + def persistent_id(self, obj): + return obj + + def persistent_load(self, pid): + return pid + + def _check_return_correct_type(self, obj, proto): + unpickled = self.loads(self.dumps(obj, proto)) + self.assertIsInstance(unpickled, type(obj)) + self.assertEqual(unpickled, obj) + + def test_return_correct_type(self): + for proto in protocols: + # Protocol 0 supports only ASCII strings. + if proto == 0: + self._check_return_correct_type("abc", 0) + else: + for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]: + self._check_return_correct_type(obj, proto) + + def test_protocol0_is_ascii_only(self): + non_ascii_str = "\N{EMPTY SET}" + self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0) + pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.' + self.assertRaises(pickle.UnpicklingError, self.loads, pickled) + + class AbstractPicklerUnpicklerObjectTests(unittest.TestCase): pickler_class = None diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -7,6 +7,7 @@ from test.pickletester import AbstractPickleTests from test.pickletester import AbstractPickleModuleTests from test.pickletester import AbstractPersistentPicklerTests +from test.pickletester import AbstractIdentityPersistentPicklerTests from test.pickletester import AbstractPicklerUnpicklerObjectTests from test.pickletester import AbstractDispatchTableTests from test.pickletester import BigmemPickleTests @@ -52,10 +53,7 @@ return pickle.loads(buf, **kwds) -class PyPersPicklerTests(AbstractPersistentPicklerTests): - - pickler = pickle._Pickler - unpickler = pickle._Unpickler +class PersistentPicklerUnpicklerMixin(object): def dumps(self, arg, proto=None): class PersPickler(self.pickler): @@ -64,8 +62,7 @@ f = io.BytesIO() p = PersPickler(f, proto) p.dump(arg) - f.seek(0) - return f.read() + return f.getvalue() def loads(self, buf, **kwds): class PersUnpickler(self.unpickler): @@ -76,6 +73,20 @@ return u.load() +class PyPersPicklerTests(AbstractPersistentPicklerTests, + PersistentPicklerUnpicklerMixin): + + pickler = pickle._Pickler + unpickler = pickle._Unpickler + + +class PyIdPersPicklerTests(AbstractIdentityPersistentPicklerTests, + PersistentPicklerUnpicklerMixin): + + pickler = pickle._Pickler + unpickler = pickle._Unpickler + + class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests): pickler_class = pickle._Pickler @@ -103,6 +114,10 @@ pickler = _pickle.Pickler unpickler = _pickle.Unpickler + class CIdPersPicklerTests(PyIdPersPicklerTests): + pickler = _pickle.Pickler + unpickler = _pickle.Unpickler + class CDumpPickle_LoadPickle(PyPicklerTests): pickler = _pickle.Pickler unpickler = pickle._Unpickler @@ -128,7 +143,8 @@ def test_main(): tests = [PickleTests, PyPicklerTests, PyPersPicklerTests, - PyDispatchTableTests, PyChainDispatchTableTests] + PyIdPersPicklerTests, CIdPersPicklerTests, PyDispatchTableTests, + PyChainDispatchTableTests] if has_c_implementation: tests.extend([CPicklerTests, CPersPicklerTests, CDumpPickle_LoadPickle, DumpPickle_CLoadPickle, diff --git a/Modules/_pickle.c b/Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2856,26 +2856,33 @@ goto error; } else { - PyObject *pid_str = NULL; - char *pid_ascii_bytes; - Py_ssize_t size; + PyObject *pid_str; + PyObject *pid_ascii_bytes; pid_str = PyObject_Str(pid); if (pid_str == NULL) goto error; - /* XXX: Should it check whether the persistent id only contains - ASCII characters? And what if the pid contains embedded + /* XXX: Should it check whether the pid contains embedded newlines? */ - pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size); + pid_ascii_bytes = PyUnicode_AsASCIIString(pid_str); Py_DECREF(pid_str); - if (pid_ascii_bytes == NULL) + if (pid_ascii_bytes == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { + PyErr_SetString(PicklingError, "persistent IDs in " + "protocol 0 must be ASCII strings"); + } goto error; + } if (_Pickler_Write(self, &persid_op, 1) < 0 || - _Pickler_Write(self, pid_ascii_bytes, size) < 0 || - _Pickler_Write(self, "\n", 1) < 0) + _Pickler_Write(self, PyBytes_AS_STRING(pid_ascii_bytes), + PyBytes_GET_SIZE(pid_ascii_bytes)) < 0 || + _Pickler_Write(self, "\n", 1) < 0) { + Py_DECREF(pid_ascii_bytes); goto error; + } + Py_DECREF(pid_ascii_bytes); } status = 1; } @@ -4668,9 +4675,14 @@ if (len < 2) return bad_readline(); - pid = PyBytes_FromStringAndSize(s, len - 1); - if (pid == NULL) + pid = PyUnicode_DecodeASCII(s, len - 1, "strict"); + if (pid == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_SetString(UnpicklingError, "persistent IDs in " + "protocol 0 must be ASCII strings"); + } return -1; + } /* Ugh... this does not leak since _Unpickler_FastCall() steals the reference to pid first. */