diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2,10 +2,14 @@ import io import unittest import pickle import pickletools +import sys import copyreg from http.cookies import SimpleCookie -from test.support import TestFailed, TESTFN, run_with_locale +from test.support import ( + TestFailed, TESTFN, run_with_locale, + _2G, _4G, precisionbigmemtest, + ) from pickle import bytes_types @@ -14,6 +18,8 @@ from pickle import bytes_types # kind of outer loop. protocols = range(pickle.HIGHEST_PROTOCOL + 1) +character_size = 4 if sys.maxunicode > 0xFFFF else 2 + # Return True if opcode code appears in the pickle, else False. def opcode_in_pickle(code, pickle): @@ -1098,6 +1104,100 @@ class AbstractPickleTests(unittest.TestC empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r') self.assertEqual(empty, '') + def check_negative_32b_binXXX(self, dumped): + if sys.maxsize > 2**32: + self.skipTest("test is only meaningful on 32-bit builds") + # XXX Pure Python pickle reads lengths as signed and passes + # them directly to read() (hence the EOFError) + with self.assertRaises((pickle.UnpicklingError, EOFError, + ValueError, OverflowError)): + self.loads(dumped) + + def test_negative_32b_binbytes(self): + # On 32-bit builds, a BINBYTES of 2**31 or more is refused + self.check_negative_32b_binXXX(b'\x80\x03B\xff\xff\xff\xffxyzq\x00.') + + def test_negative_32b_binunicode(self): + # On 32-bit builds, a BINUNICODE of 2**31 or more is refused + self.check_negative_32b_binXXX(b'\x80\x03X\xff\xff\xff\xffxyzq\x00.') + + +class BigmemPickleTests(unittest.TestCase): + + # Binary protocols can serialize longs of up to 2GB-1 + + @precisionbigmemtest(size=_2G, memuse=1 + 1, dry_run=False) + def test_huge_long_32b(self, size): + data = 1 << (8 * size) + try: + for proto in protocols: + if proto < 2: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # Protocol 3 can serialize up to 4GB-1 as a bytes object + # (older protocols don't have a dedicated opcode for bytes and are + # too inefficient) + + @precisionbigmemtest(size=_2G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_32b(self, size): + data = b"abcd" * (size // 4) + try: + for proto in protocols: + if proto < 3: + continue + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + pickled = None + finally: + data = None + + @precisionbigmemtest(size=_4G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_64b(self, size): + data = b"a" * size + try: + for proto in protocols: + if proto < 3: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # All protocols use 1-byte per printable ASCII character; we add another + # byte because the encoded form has to be copied into the internal buffer. + + @precisionbigmemtest(size=_2G, memuse=2 + character_size, dry_run=False) + def test_huge_str_32b(self, size): + data = "abcd" * (size // 4) + try: + for proto in protocols: + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + pickled = None + finally: + data = None + + @precisionbigmemtest(size=_4G, memuse=1 + character_size, dry_run=False) + def test_huge_str_64b(self, size): + data = "a" * size + try: + for proto in protocols: + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # Test classes for reduce_ex class REX_one(object): diff --git a/Lib/test/support.py b/Lib/test/support.py --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -1089,7 +1089,7 @@ def bigmemtest(minsize, memuse): return wrapper return decorator -def precisionbigmemtest(size, memuse): +def precisionbigmemtest(size, memuse, dry_run=True): def decorator(f): def wrapper(self): size = wrapper.size @@ -1099,10 +1099,11 @@ def precisionbigmemtest(size, memuse): else: maxsize = size - if real_max_memuse and real_max_memuse < maxsize * memuse: - raise unittest.SkipTest( - "not enough memory: %.1fG minimum needed" - % (size * memuse / (1024 ** 3))) + if ((real_max_memuse or not dry_run) + and real_max_memuse < maxsize * memuse): + raise unittest.SkipTest( + "not enough memory: %.1fG minimum needed" + % (size * memuse / (1024 ** 3))) return f(self, maxsize) wrapper.size = size diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -7,6 +7,7 @@ from test.pickletester import AbstractPi from test.pickletester import AbstractPickleModuleTests from test.pickletester import AbstractPersistentPicklerTests from test.pickletester import AbstractPicklerUnpicklerObjectTests +from test.pickletester import BigmemPickleTests try: import _pickle @@ -37,13 +38,13 @@ class PyPicklerTests(AbstractPickleTests return u.load() -class InMemoryPickleTests(AbstractPickleTests): +class InMemoryPickleTests(AbstractPickleTests, BigmemPickleTests): pickler = pickle._Pickler unpickler = pickle._Unpickler - def dumps(self, arg, proto=None): - return pickle.dumps(arg, proto) + def dumps(self, arg, protocol=None): + return pickle.dumps(arg, protocol) def loads(self, buf, **kwds): return pickle.loads(buf, **kwds) diff --git a/Modules/_pickle.c b/Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -153,7 +153,7 @@ typedef struct { static void Pdata_dealloc(Pdata *self) { - int i = Py_SIZE(self); + Py_ssize_t i = Py_SIZE(self); while (--i >= 0) { Py_DECREF(self->data[i]); } @@ -190,9 +190,9 @@ Pdata_New(void) * number of items, this is a (non-erroneous) NOP. */ static int -Pdata_clear(Pdata *self, int clearto) -{ - int i = Py_SIZE(self); +Pdata_clear(Pdata *self, Py_ssize_t clearto) +{ + Py_ssize_t i = Py_SIZE(self); if (clearto < 0) return stack_underflow(); @@ -303,7 +303,7 @@ Pdata_poplist(Pdata *self, Py_ssize_t st typedef struct { PyObject *me_key; - long me_value; + Py_ssize_t me_value; } PyMemoEntry; typedef struct { @@ -328,7 +328,7 @@ typedef struct PicklerObject { Py_ssize_t max_output_len; /* Allocation size of output_buffer. */ int proto; /* Pickle protocol number, >= 0 */ int bin; /* Boolean, true if proto > 0 */ - int buf_size; /* Size of the current buffered pickle data */ + Py_ssize_t buf_size; /* Size of the current buffered pickle data */ int fast; /* Enable fast mode if set to a true value. The fast mode disable the usage of memo, therefore speeding the pickling process by @@ -369,7 +369,7 @@ typedef struct UnpicklerObject { char *errors; /* Name of errors handling scheme to used when decoding strings. The default value is "strict". */ - int *marks; /* Mark stack, used for unpickling container + Py_ssize_t *marks; /* Mark stack, used for unpickling container objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ @@ -556,7 +556,7 @@ _PyMemoTable_ResizeTable(PyMemoTable *se } /* Returns NULL on failure, a pointer to the value otherwise. */ -static long * +static Py_ssize_t * PyMemoTable_Get(PyMemoTable *self, PyObject *key) { PyMemoEntry *entry = _PyMemoTable_Lookup(self, key); @@ -567,7 +567,7 @@ PyMemoTable_Get(PyMemoTable *self, PyObj /* Returns -1 on failure, 0 on success. */ static int -PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value) +PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) { PyMemoEntry *entry; @@ -700,7 +700,7 @@ _Pickler_FlushToFile(PicklerObject *self return (result == NULL) ? -1 : 0; } -static int +static Py_ssize_t _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n) { Py_ssize_t i, required; @@ -735,7 +735,7 @@ _Pickler_Write(PicklerObject *self, cons PyErr_NoMemory(); return -1; } - self->max_output_len = (self->output_len + n) * 2; + self->max_output_len = (self->output_len + n) / 2 * 3; if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) return -1; } @@ -1219,9 +1219,9 @@ _Unpickler_SetInputEncoding(UnpicklerObj static int memo_get(PicklerObject *self, PyObject *key) { - long *value; + Py_ssize_t *value; char pdata[30]; - int len; + Py_ssize_t len; value = PyMemoTable_Get(self->memo, key); if (value == NULL) { @@ -1231,8 +1231,9 @@ memo_get(PicklerObject *self, PyObject * if (!self->bin) { pdata[0] = GET; - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value); - len = (int)strlen(pdata); + PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, + "%" PY_FORMAT_SIZE_T "d\n", *value); + len = strlen(pdata); } else { if (*value < 256) { @@ -1266,9 +1267,9 @@ memo_get(PicklerObject *self, PyObject * static int memo_put(PicklerObject *self, PyObject *obj) { - long x; + Py_ssize_t x; char pdata[30]; - int len; + Py_ssize_t len; int status = 0; if (self->fast) @@ -1280,7 +1281,8 @@ memo_put(PicklerObject *self, PyObject * if (!self->bin) { pdata[0] = PUT; - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x); + PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, + "%" PY_FORMAT_SIZE_T "d\n", x); len = strlen(pdata); } else { @@ -1482,7 +1484,7 @@ static int save_int(PicklerObject *self, long x) { char pdata[32]; - int len = 0; + Py_ssize_t len = 0; if (!self->bin #if SIZEOF_LONG > 4 @@ -1609,7 +1611,7 @@ save_long(PicklerObject *self, PyObject } else { header[0] = LONG4; - size = (int)nbytes; + size = (Py_ssize_t) nbytes; for (i = 1; i < 5; i++) { header[i] = (unsigned char)(size & 0xff); size >>= 8; @@ -1723,7 +1725,7 @@ save_bytes(PicklerObject *self, PyObject else { Py_ssize_t size; char header[5]; - int len; + Py_ssize_t len; size = PyBytes_Size(obj); if (size < 0) @@ -1743,6 +1745,8 @@ save_bytes(PicklerObject *self, PyObject len = 5; } else { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a bytes object larger than 4GB"); return -1; /* string too large */ } @@ -1867,8 +1871,11 @@ save_unicode(PicklerObject *self, PyObje goto error; size = PyBytes_GET_SIZE(encoded); - if (size < 0 || size > 0xffffffffL) + if (size > 0xffffffffL) { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GB"); goto error; /* string too large */ + } pdata[0] = BINUNICODE; pdata[1] = (unsigned char)(size & 0xff); @@ -1913,9 +1920,9 @@ save_unicode(PicklerObject *self, PyObje /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ static int -store_tuple_elements(PicklerObject *self, PyObject *t, int len) -{ - int i; +store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len) +{ + Py_ssize_t i; assert(PyTuple_Size(t) == len); @@ -1940,7 +1947,7 @@ store_tuple_elements(PicklerObject *self static int save_tuple(PicklerObject *self, PyObject *obj) { - int len, i; + Py_ssize_t len, i; const char mark_op = MARK; const char tuple_op = TUPLE; @@ -2163,7 +2170,7 @@ static int batch_list_exact(PicklerObject *self, PyObject *obj) { PyObject *item = NULL; - int this_batch, total; + Py_ssize_t this_batch, total; const char append_op = APPEND; const char appends_op = APPENDS; @@ -2208,7 +2215,7 @@ static int save_list(PicklerObject *self, PyObject *obj) { char header[3]; - int len; + Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) @@ -2468,7 +2475,7 @@ save_dict(PicklerObject *self, PyObject { PyObject *items, *iter; char header[3]; - int len; + Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) @@ -2603,7 +2610,7 @@ save_global(PicklerObject *self, PyObjec PyObject *code_obj; /* extension code as Python object */ long code; /* extension code as C value */ char pdata[5]; - int n; + Py_ssize_t n; PyTuple_SET_ITEM(two_tuple, 0, module_name); PyTuple_SET_ITEM(two_tuple, 1, global_name); @@ -2626,9 +2633,10 @@ save_global(PicklerObject *self, PyObjec } code = PyLong_AS_LONG(code_obj); if (code <= 0 || code > 0x7fffffffL) { - PyErr_Format(PicklingError, - "Can't pickle %R: extension code %ld is out of range", - obj, code); + if (!PyErr_Occurred()) + PyErr_Format(PicklingError, + "Can't pickle %R: extension code %ld is out of range", + obj, code); goto error; } @@ -3477,7 +3485,7 @@ pmp_copy(PicklerMemoProxyObject *self) PyObject *key, *value; key = PyLong_FromVoidPtr(entry.me_key); - value = Py_BuildValue("lO", entry.me_value, entry.me_key); + value = Py_BuildValue("nO", entry.me_value, entry.me_key); if (key == NULL || value == NULL) { Py_XDECREF(key); @@ -3638,7 +3646,7 @@ Pickler_set_memo(PicklerObject *self, Py return -1; while (PyDict_Next(obj, &i, &key, &value)) { - long memo_id; + Py_ssize_t memo_id; PyObject *memo_obj; if (!PyTuple_Check(value) || Py_SIZE(value) != 2) { @@ -3646,7 +3654,7 @@ Pickler_set_memo(PicklerObject *self, Py "'memo' values must be 2-item tuples"); goto error; } - memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0)); + memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0)); if (memo_id == -1 && PyErr_Occurred()) goto error; memo_obj = PyTuple_GET_ITEM(value, 1); @@ -3777,7 +3785,7 @@ find_class(UnpicklerObject *self, PyObje module_name, global_name); } -static int +static Py_ssize_t marker(UnpicklerObject *self) { if (self->num_marks < 1) { @@ -3855,6 +3863,28 @@ load_bool(UnpicklerObject *self, PyObjec return 0; } +/* s contains x bytes of an unsigned little-endian integer. Return its value + * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX. + */ +static Py_ssize_t +calc_binsize(char *bytes, int size) +{ + unsigned char *s = (unsigned char *)bytes; + size_t x = 0; + + assert(size == 4); + + x = (size_t) s[0]; + x |= (size_t) s[1] << 8; + x |= (size_t) s[2] << 16; + x |= (size_t) s[3] << 24; + + if (x > PY_SSIZE_T_MAX) + return -1; + else + return (Py_ssize_t) x; +} + /* s contains x bytes of a little-endian integer. Return its value as a * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian * int, but when x is 4 it's a signed one. This is an historical source @@ -4099,16 +4129,18 @@ static int load_binbytes(UnpicklerObject *self) { PyObject *bytes; - long x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; - x = calc_binint(s, 4); + x = calc_binsize(s, 4); if (x < 0) { - PyErr_SetString(UnpicklingError, - "BINBYTES pickle has negative byte count"); + PyErr_Format(PyExc_OverflowError, + "BINBYTES exceeds system's maximum size of %zd bytes", + PY_SSIZE_T_MAX + ); return -1; } @@ -4126,7 +4158,7 @@ static int load_short_binbytes(UnpicklerObject *self) { PyObject *bytes; - unsigned char x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) @@ -4149,7 +4181,7 @@ static int load_binstring(UnpicklerObject *self) { PyObject *str; - long x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) @@ -4178,7 +4210,7 @@ static int load_short_binstring(UnpicklerObject *self) { PyObject *str; - unsigned char x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) @@ -4222,19 +4254,22 @@ static int load_binunicode(UnpicklerObject *self) { PyObject *str; - long size; + Py_ssize_t size; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; - size = calc_binint(s, 4); + size = calc_binsize(s, 4); if (size < 0) { - PyErr_SetString(UnpicklingError, - "BINUNICODE pickle has negative byte count"); + PyErr_Format(PyExc_OverflowError, + "BINUNICODE exceeds system's maximum size of %zd bytes", + PY_SSIZE_T_MAX + ); return -1; } + if (_Unpickler_Read(self, &s, size) < 0) return -1; @@ -4250,7 +4285,7 @@ static int load_tuple(UnpicklerObject *self) { PyObject *tuple; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4309,7 +4344,7 @@ static int load_list(UnpicklerObject *self) { PyObject *list; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4325,7 +4360,7 @@ static int load_dict(UnpicklerObject *self) { PyObject *dict, *key, *value; - int i, j, k; + Py_ssize_t i, j, k; if ((i = marker(self)) < 0) return -1; @@ -4369,7 +4404,7 @@ static int load_obj(UnpicklerObject *self) { PyObject *cls, *args, *obj = NULL; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4400,7 +4435,7 @@ load_inst(UnpicklerObject *self) PyObject *module_name; PyObject *class_name; Py_ssize_t len; - int i; + Py_ssize_t i; char *s; if ((i = marker(self)) < 0) @@ -4594,7 +4629,7 @@ load_binpersid(UnpicklerObject *self) static int load_pop(UnpicklerObject *self) { - int len = Py_SIZE(self->stack); + Py_ssize_t len = Py_SIZE(self->stack); /* Note that we split the (pickle.py) stack into two stacks, * an object stack and a mark stack. We have to be clever and @@ -4618,7 +4653,7 @@ load_pop(UnpicklerObject *self) static int load_pop_mark(UnpicklerObject *self) { - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4632,7 +4667,7 @@ static int load_dup(UnpicklerObject *self) { PyObject *last; - int len; + Py_ssize_t len; if ((len = Py_SIZE(self->stack)) <= 0) return stack_underflow(); @@ -4711,10 +4746,7 @@ load_long_binget(UnpicklerObject *self) if (_Unpickler_Read(self, &s, 4) < 0) return -1; - idx = (long)Py_CHARMASK(s[0]); - idx |= (long)Py_CHARMASK(s[1]) << 8; - idx |= (long)Py_CHARMASK(s[2]) << 16; - idx |= (long)Py_CHARMASK(s[3]) << 24; + idx = calc_binsize(s, 4); value = _Unpickler_MemoGet(self, idx); if (value == NULL) { @@ -4860,20 +4892,17 @@ load_long_binput(UnpicklerObject *self) return stack_underflow(); value = self->stack->data[Py_SIZE(self->stack) - 1]; - idx = (long)Py_CHARMASK(s[0]); - idx |= (long)Py_CHARMASK(s[1]) << 8; - idx |= (long)Py_CHARMASK(s[2]) << 16; - idx |= (long)Py_CHARMASK(s[3]) << 24; + idx = calc_binsize(s, 4); return _Unpickler_MemoPut(self, idx, value); } static int -do_append(UnpicklerObject *self, int x) +do_append(UnpicklerObject *self, Py_ssize_t x) { PyObject *value; PyObject *list; - int len, i; + Py_ssize_t len, i; len = Py_SIZE(self->stack); if (x > len || x <= 0) @@ -4886,14 +4915,15 @@ do_append(UnpicklerObject *self, int x) if (PyList_Check(list)) { PyObject *slice; Py_ssize_t list_len; + int ret; slice = Pdata_poplist(self->stack, x); if (!slice) return -1; list_len = PyList_GET_SIZE(list); - i = PyList_SetSlice(list, list_len, list_len, slice); + ret = PyList_SetSlice(list, list_len, list_len, slice); Py_DECREF(slice); - return i; + return ret; } else { PyObject *append_func; @@ -4932,11 +4962,11 @@ load_appends(UnpicklerObject *self) } static int -do_setitems(UnpicklerObject *self, int x) +do_setitems(UnpicklerObject *self, Py_ssize_t x) { PyObject *value, *key; PyObject *dict; - int len, i; + Py_ssize_t len, i; int status = 0; len = Py_SIZE(self->stack); @@ -5104,20 +5134,21 @@ load_mark(UnpicklerObject *self) if ((self->num_marks + 1) >= self->marks_size) { size_t alloc; - int *marks; + Py_ssize_t *marks; /* Use the size_t type to check for overflow. */ alloc = ((size_t)self->num_marks << 1) + 20; - if (alloc > PY_SSIZE_T_MAX || + if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) || alloc <= ((size_t)self->num_marks + 1)) { PyErr_NoMemory(); return -1; } if (self->marks == NULL) - marks = (int *)PyMem_Malloc(alloc * sizeof(int)); + marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t)); else - marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int)); + marks = (Py_ssize_t *) PyMem_Realloc(self->marks, + alloc * sizeof(Py_ssize_t)); if (marks == NULL) { PyErr_NoMemory(); return -1;