diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2,11 +2,15 @@ import unittest import pickle import pickletools +import sys import copyreg import weakref from http.cookies import SimpleCookie -from test.support import TestFailed, TESTFN, run_with_locale, no_tracing +from test.support import ( + TestFailed, TESTFN, run_with_locale, no_tracing, + _2G, _4G, precisionbigmemtest, + ) from pickle import bytes_types @@ -15,6 +19,8 @@ # kind of outer loop. protocols = range(pickle.HIGHEST_PROTOCOL + 1) +character_size = 4 if sys.maxunicode > 0xFFFF else 2 + # Return True if opcode code appears in the pickle, else False. def opcode_in_pickle(code, pickle): @@ -1128,6 +1134,55 @@ self.assertLessEqual(sizes[-1], 14) + +class BigmemPickleTests(unittest.TestCase): + + # Protocol 3 can serialize up to 4GB-1 as a bytes object + # (older protocols don't have a dedicated opcode for bytes and are + # too inefficient) + + @precisionbigmemtest(size=_2G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_32b(self, size): + data = b"abcd" * (size // 4) + try: + for proto in protocols: + if proto < 3: + continue + pickled = None + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + data = None + + @unittest.skip("test needs to be checked by someone with enough RAM") + @precisionbigmemtest(size=_4G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_64b(self, size): + data = b"a" * size + try: + for proto in protocols: + if proto < 3: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # All protocols use 1-byte per printable ASCII character; we add another + # byte because the encoded form has to be copied into the internal buffer. + + @unittest.skip("test needs to be checked by someone with enough RAM") + @precisionbigmemtest(size=_2G, memuse=2 + character_size, dry_run=False) + def test_huge_str(self, size): + data = "a" * size + try: + for proto in protocols: + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # Test classes for reduce_ex class REX_one(object): diff --git a/Lib/test/support.py b/Lib/test/support.py --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -1142,7 +1142,7 @@ return wrapper return decorator -def precisionbigmemtest(size, memuse): +def precisionbigmemtest(size, memuse, dry_run=True): """Decorator for bigmem tests that need exact sizes. Like bigmemtest, but without the size scaling upward to fill available @@ -1157,10 +1157,11 @@ else: maxsize = size - if real_max_memuse and real_max_memuse < maxsize * memuse: - raise unittest.SkipTest( - "not enough memory: %.1fG minimum needed" - % (size * memuse / (1024 ** 3))) + if ((real_max_memuse or not dry_run) + and real_max_memuse < maxsize * memuse): + raise unittest.SkipTest( + "not enough memory: %.1fG minimum needed" + % (size * memuse / (1024 ** 3))) return f(self, maxsize) wrapper.size = size diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -7,6 +7,7 @@ from test.pickletester import AbstractPickleModuleTests from test.pickletester import AbstractPersistentPicklerTests from test.pickletester import AbstractPicklerUnpicklerObjectTests +from test.pickletester import BigmemPickleTests try: import _pickle @@ -37,13 +38,13 @@ return u.load() -class InMemoryPickleTests(AbstractPickleTests): +class InMemoryPickleTests(AbstractPickleTests, BigmemPickleTests): pickler = pickle._Pickler unpickler = pickle._Unpickler - def dumps(self, arg, proto=None): - return pickle.dumps(arg, proto) + def dumps(self, arg, protocol=None): + return pickle.dumps(arg, protocol) def loads(self, buf, **kwds): return pickle.loads(buf, **kwds) diff --git a/Modules/_pickle.c b/Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -153,7 +153,7 @@ static void Pdata_dealloc(Pdata *self) { - int i = Py_SIZE(self); + Py_ssize_t i = Py_SIZE(self); while (--i >= 0) { Py_DECREF(self->data[i]); } @@ -190,9 +190,9 @@ * number of items, this is a (non-erroneous) NOP. */ static int -Pdata_clear(Pdata *self, int clearto) -{ - int i = Py_SIZE(self); +Pdata_clear(Pdata *self, Py_ssize_t clearto) +{ + Py_ssize_t i = Py_SIZE(self); if (clearto < 0) return stack_underflow(); @@ -303,7 +303,7 @@ typedef struct { PyObject *me_key; - long me_value; + Py_ssize_t me_value; } PyMemoEntry; typedef struct { @@ -328,7 +328,7 @@ Py_ssize_t max_output_len; /* Allocation size of output_buffer. */ int proto; /* Pickle protocol number, >= 0 */ int bin; /* Boolean, true if proto > 0 */ - int buf_size; /* Size of the current buffered pickle data */ + Py_ssize_t buf_size; /* Size of the current buffered pickle data */ int fast; /* Enable fast mode if set to a true value. The fast mode disable the usage of memo, therefore speeding the pickling process by @@ -369,7 +369,7 @@ char *errors; /* Name of errors handling scheme to used when decoding strings. The default value is "strict". */ - int *marks; /* Mark stack, used for unpickling container + Py_ssize_t *marks; /* Mark stack, used for unpickling container objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ @@ -556,7 +556,7 @@ } /* Returns NULL on failure, a pointer to the value otherwise. */ -static long * +static Py_ssize_t * PyMemoTable_Get(PyMemoTable *self, PyObject *key) { PyMemoEntry *entry = _PyMemoTable_Lookup(self, key); @@ -567,7 +567,7 @@ /* Returns -1 on failure, 0 on success. */ static int -PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value) +PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) { PyMemoEntry *entry; @@ -700,7 +700,7 @@ return (result == NULL) ? -1 : 0; } -static int +static Py_ssize_t _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n) { Py_ssize_t i, required; @@ -735,7 +735,7 @@ PyErr_NoMemory(); return -1; } - self->max_output_len = (self->output_len + n) * 2; + self->max_output_len = (self->output_len + n) / 2 * 3; if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) return -1; } @@ -1219,9 +1219,9 @@ static int memo_get(PicklerObject *self, PyObject *key) { - long *value; + Py_ssize_t *value; char pdata[30]; - int len; + Py_ssize_t len; value = PyMemoTable_Get(self->memo, key); if (value == NULL) { @@ -1232,7 +1232,7 @@ if (!self->bin) { pdata[0] = GET; PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value); - len = (int)strlen(pdata); + len = strlen(pdata); } else { if (*value < 256) { @@ -1266,9 +1266,9 @@ static int memo_put(PicklerObject *self, PyObject *obj) { - long x; + Py_ssize_t x; char pdata[30]; - int len; + Py_ssize_t len; int status = 0; if (self->fast) @@ -1482,7 +1482,7 @@ save_int(PicklerObject *self, long x) { char pdata[32]; - int len = 0; + Py_ssize_t len = 0; if (!self->bin #if SIZEOF_LONG > 4 @@ -1612,7 +1612,7 @@ } else { header[0] = LONG4; - size = (int)nbytes; + size = (Py_ssize_t) nbytes; for (i = 1; i < 5; i++) { header[i] = (unsigned char)(size & 0xff); size >>= 8; @@ -1726,7 +1726,7 @@ else { Py_ssize_t size; char header[5]; - int len; + Py_ssize_t len; size = PyBytes_Size(obj); if (size < 0) @@ -1746,6 +1746,8 @@ len = 5; } else { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a bytes object larger than 4GB"); return -1; /* string too large */ } @@ -1870,8 +1872,11 @@ goto error; size = PyBytes_GET_SIZE(encoded); - if (size < 0 || size > 0xffffffffL) + if (size > 0xffffffffL) { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GB"); goto error; /* string too large */ + } pdata[0] = BINUNICODE; pdata[1] = (unsigned char)(size & 0xff); @@ -1916,9 +1921,9 @@ /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ static int -store_tuple_elements(PicklerObject *self, PyObject *t, int len) -{ - int i; +store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len) +{ + Py_ssize_t i; assert(PyTuple_Size(t) == len); @@ -1943,7 +1948,7 @@ static int save_tuple(PicklerObject *self, PyObject *obj) { - int len, i; + Py_ssize_t len, i; const char mark_op = MARK; const char tuple_op = TUPLE; @@ -2166,7 +2171,7 @@ batch_list_exact(PicklerObject *self, PyObject *obj) { PyObject *item = NULL; - int this_batch, total; + Py_ssize_t this_batch, total; const char append_op = APPEND; const char appends_op = APPENDS; @@ -2211,7 +2216,7 @@ save_list(PicklerObject *self, PyObject *obj) { char header[3]; - int len; + Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) @@ -2471,7 +2476,7 @@ { PyObject *items, *iter; char header[3]; - int len; + Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) @@ -2606,7 +2611,7 @@ PyObject *code_obj; /* extension code as Python object */ long code; /* extension code as C value */ char pdata[5]; - int n; + Py_ssize_t n; PyTuple_SET_ITEM(two_tuple, 0, module_name); PyTuple_SET_ITEM(two_tuple, 1, global_name); @@ -2629,9 +2634,10 @@ } code = PyLong_AS_LONG(code_obj); if (code <= 0 || code > 0x7fffffffL) { - PyErr_Format(PicklingError, - "Can't pickle %R: extension code %ld is out of range", - obj, code); + if (!PyErr_Occurred()) + PyErr_Format(PicklingError, + "Can't pickle %R: extension code %ld is out of range", + obj, code); goto error; } @@ -3497,7 +3503,7 @@ PyObject *key, *value; key = PyLong_FromVoidPtr(entry.me_key); - value = Py_BuildValue("lO", entry.me_value, entry.me_key); + value = Py_BuildValue("nO", entry.me_value, entry.me_key); if (key == NULL || value == NULL) { Py_XDECREF(key); @@ -3658,7 +3664,7 @@ return -1; while (PyDict_Next(obj, &i, &key, &value)) { - long memo_id; + Py_ssize_t memo_id; PyObject *memo_obj; if (!PyTuple_Check(value) || Py_SIZE(value) != 2) { @@ -3666,7 +3672,7 @@ "'memo' values must be 2-item tuples"); goto error; } - memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0)); + memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0)); if (memo_id == -1 && PyErr_Occurred()) goto error; memo_obj = PyTuple_GET_ITEM(value, 1); @@ -3797,7 +3803,7 @@ module_name, global_name); } -static int +static Py_ssize_t marker(UnpicklerObject *self) { if (self->num_marks < 1) { @@ -4119,7 +4125,7 @@ load_binbytes(UnpicklerObject *self) { PyObject *bytes; - long x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) @@ -4146,7 +4152,7 @@ load_short_binbytes(UnpicklerObject *self) { PyObject *bytes; - unsigned char x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) @@ -4169,7 +4175,7 @@ load_binstring(UnpicklerObject *self) { PyObject *str; - long x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) @@ -4198,7 +4204,7 @@ load_short_binstring(UnpicklerObject *self) { PyObject *str; - unsigned char x; + Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) @@ -4242,7 +4248,7 @@ load_binunicode(UnpicklerObject *self) { PyObject *str; - long size; + Py_ssize_t size; char *s; if (_Unpickler_Read(self, &s, 4) < 0) @@ -4270,7 +4276,7 @@ load_tuple(UnpicklerObject *self) { PyObject *tuple; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4329,7 +4335,7 @@ load_list(UnpicklerObject *self) { PyObject *list; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4345,7 +4351,7 @@ load_dict(UnpicklerObject *self) { PyObject *dict, *key, *value; - int i, j, k; + Py_ssize_t i, j, k; if ((i = marker(self)) < 0) return -1; @@ -4389,7 +4395,7 @@ load_obj(UnpicklerObject *self) { PyObject *cls, *args, *obj = NULL; - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4420,7 +4426,7 @@ PyObject *module_name; PyObject *class_name; Py_ssize_t len; - int i; + Py_ssize_t i; char *s; if ((i = marker(self)) < 0) @@ -4614,7 +4620,7 @@ static int load_pop(UnpicklerObject *self) { - int len = Py_SIZE(self->stack); + Py_ssize_t len = Py_SIZE(self->stack); /* Note that we split the (pickle.py) stack into two stacks, * an object stack and a mark stack. We have to be clever and @@ -4638,7 +4644,7 @@ static int load_pop_mark(UnpicklerObject *self) { - int i; + Py_ssize_t i; if ((i = marker(self)) < 0) return -1; @@ -4652,7 +4658,7 @@ load_dup(UnpicklerObject *self) { PyObject *last; - int len; + Py_ssize_t len; if ((len = Py_SIZE(self->stack)) <= 0) return stack_underflow(); @@ -4889,11 +4895,11 @@ } static int -do_append(UnpicklerObject *self, int x) +do_append(UnpicklerObject *self, Py_ssize_t x) { PyObject *value; PyObject *list; - int len, i; + Py_ssize_t len, i; len = Py_SIZE(self->stack); if (x > len || x <= 0) @@ -4906,14 +4912,15 @@ if (PyList_Check(list)) { PyObject *slice; Py_ssize_t list_len; + int ret; slice = Pdata_poplist(self->stack, x); if (!slice) return -1; list_len = PyList_GET_SIZE(list); - i = PyList_SetSlice(list, list_len, list_len, slice); + ret = PyList_SetSlice(list, list_len, list_len, slice); Py_DECREF(slice); - return i; + return ret; } else { PyObject *append_func; @@ -4952,11 +4959,11 @@ } static int -do_setitems(UnpicklerObject *self, int x) +do_setitems(UnpicklerObject *self, Py_ssize_t x) { PyObject *value, *key; PyObject *dict; - int len, i; + Py_ssize_t len, i; int status = 0; len = Py_SIZE(self->stack); @@ -5124,20 +5131,21 @@ if ((self->num_marks + 1) >= self->marks_size) { size_t alloc; - int *marks; + Py_ssize_t *marks; /* Use the size_t type to check for overflow. */ alloc = ((size_t)self->num_marks << 1) + 20; - if (alloc > PY_SSIZE_T_MAX || + if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) || alloc <= ((size_t)self->num_marks + 1)) { PyErr_NoMemory(); return -1; } if (self->marks == NULL) - marks = (int *)PyMem_Malloc(alloc * sizeof(int)); + marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t)); else - marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int)); + marks = (Py_ssize_t *) PyMem_Realloc(self->marks, + alloc * sizeof(Py_ssize_t)); if (marks == NULL) { PyErr_NoMemory(); return -1;