diff -r de982d8b7b15 Lib/copyreg.py --- a/Lib/copyreg.py Tue Oct 13 21:26:35 2015 +0300 +++ b/Lib/copyreg.py Wed Oct 14 13:06:22 2015 +0300 @@ -82,6 +82,18 @@ def _reduce_ex(self, proto): else: return _reconstructor, args +# Helpers for pickling large str and bytes objects with protocols <4. + +def _chunks(data, size): + for i in range(0, len(data), size): + yield data[i: i + size] + +def _reduce_large_str(data): + return ''.join, (list(_chunks(data, 0x7fffffff//4)),) + +def _reduce_large_bytes(data): + return b''.join, (list(_chunks(data, 0xffffffff)),) + # Helper for __reduce_ex__ protocol 2 def __newobj__(cls, *args): diff -r de982d8b7b15 Lib/pickle.py --- a/Lib/pickle.py Tue Oct 13 21:26:35 2015 +0300 +++ b/Lib/pickle.py Wed Oct 14 13:06:22 2015 +0300 @@ -25,7 +25,8 @@ Misc variables: from types import FunctionType from copyreg import dispatch_table -from copyreg import _extension_registry, _inverted_registry, _extension_cache +from copyreg import (_extension_registry, _inverted_registry, _extension_cache, + _reduce_large_str, _reduce_large_bytes) from itertools import islice from functools import partial import sys @@ -691,10 +692,13 @@ class _Pickler: n = len(obj) if n <= 0xff: self.write(SHORT_BINBYTES + pack(" 0xffffffff and self.proto >= 4: + elif n <= 0xffffffff: + self.write(BINBYTES + pack("= 4: self.write(BINBYTES8 + pack("= 4: self.write(SHORT_BINUNICODE + pack(" 0xffffffff and self.proto >= 4: + elif n <= 0x7fffffff or self.proto >= 3 and n <= 0xffffffff: + self.write(BINUNICODE + pack("= 4: self.write(BINUNICODE8 + pack("extension_cache)->tp_name); goto error; } + st->reduce_large_str = PyObject_GetAttrString(copyreg, + "_reduce_large_str"); + if (!st->reduce_large_str) + goto error; + st->reduce_large_bytes = PyObject_GetAttrString(copyreg, + "_reduce_large_bytes"); + if (!st->reduce_large_bytes) + goto error; Py_CLEAR(copyreg); /* Load the 2.x -> 3.x stdlib module mapping tables */ @@ -2087,9 +2101,19 @@ save_bytes(PicklerObject *self, PyObject len = 9; } else { - PyErr_SetString(PyExc_OverflowError, - "cannot serialize a bytes object larger than 4 GiB"); - return -1; /* string too large */ + PyObject *reduce_value; + int status; + PickleState *st = _Pickle_GetGlobalState(); + + reduce_value = PyObject_CallFunctionObjArgs( + st->reduce_large_bytes, obj, NULL); + if (reduce_value == NULL) + return -1; + + /* save_reduce() will memoize the object automatically. */ + status = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; } if (_Pickler_Write(self, header, len) < 0) @@ -2188,7 +2212,8 @@ write_utf8(PicklerObject *self, char *da header[1] = (unsigned char)(size & 0xff); len = 2; } - else if ((size_t)size <= 0xffffffffUL) { + else if ((size_t)size <= 0x7fffffffUL || + (self->proto >= 3 && (size_t)size <= 0xffffffffUL)) { header[0] = BINUNICODE; header[1] = (unsigned char)(size & 0xff); header[2] = (unsigned char)((size >> 8) & 0xff); @@ -2202,9 +2227,7 @@ write_utf8(PicklerObject *self, char *da len = 9; } else { - PyErr_SetString(PyExc_OverflowError, - "cannot serialize a string larger than 4GiB"); - return -1; + return -2; } if (_Pickler_Write(self, header, len) < 0) @@ -2247,7 +2270,22 @@ static int save_unicode(PicklerObject *self, PyObject *obj) { if (self->bin) { - if (write_unicode_binary(self, obj) < 0) + int r = write_unicode_binary(self, obj); + if (r == -2) { + PyObject *reduce_value; + PickleState *st = _Pickle_GetGlobalState(); + + reduce_value = PyObject_CallFunctionObjArgs( + st->reduce_large_str, obj, NULL); + if (reduce_value == NULL) + return -1; + + /* save_reduce() will memoize the object automatically. */ + r = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return r; + } + if (r < 0) return -1; } else {