diff -r 85266c6f9ae4 Modules/_pickle.c --- a/Modules/_pickle.c Wed Aug 08 22:37:26 2012 +0200 +++ b/Modules/_pickle.c Thu Aug 09 00:34:47 2012 +0200 @@ -1807,37 +1807,67 @@ save_bytes(PicklerObject *self, PyObject /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates backslash and newline characters to \uXXXX escapes. */ -static PyObject * -raw_unicode_escape(PyObject *obj) -{ - PyObject *repr, *result; - char *p; - Py_ssize_t i, size, expandsize; +static int +write_raw_unicode_escape(PicklerObject *self, PyObject *obj) +{ + PyObject *repr; + char *buffer, *p; + Py_ssize_t i, bufsize, size, expandsize; void *data; unsigned int kind; + int err; if (PyUnicode_READY(obj)) - return NULL; + return -1; size = PyUnicode_GET_LENGTH(obj); + if (size == 0) + return 0; + data = PyUnicode_DATA(obj); kind = PyUnicode_KIND(obj); - if (kind == PyUnicode_4BYTE_KIND) + if ((PyUnicode_IS_ASCII(obj) || kind == PyUnicode_1BYTE_KIND) + && memchr(data, '\\', size) == NULL + && memchr(data, '\n', size) == NULL) + { + /* ASCII and latin1 strings without "\" nor "\n" character + don't need to be escaped */ + if (_Pickler_Write(self, data, size) < 0) + return -1; + else + return 0; + } + + if (kind == PyUnicode_2BYTE_KIND) + expandsize = 6; + else expandsize = 10; + + /* Limit buffer size to 64 KB */ + if (64 * 1024 / expandsize < size) + bufsize = 64 * 1024; else - expandsize = 6; - - if (size > PY_SSIZE_T_MAX / expandsize) - return PyErr_NoMemory(); - repr = PyByteArray_FromStringAndSize(NULL, expandsize * size); + bufsize = expandsize * size; + repr = PyByteArray_FromStringAndSize(NULL, bufsize); if (repr == NULL) - return NULL; - if (size == 0) - goto done; - - p = PyByteArray_AS_STRING(repr); + return -1; + + buffer = PyByteArray_AS_STRING(repr); + p = buffer; for (i=0; i < size; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); + Py_UCS4 ch; + + if (bufsize - expandsize < (p - buffer)) { + /* buffer is full, flush it */ + size = p - buffer; + if (_Pickler_Write(self, buffer, size) < 0) { + Py_DECREF(repr); + return -1; + } + p = buffer; + } + + ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { *p++ = '\\'; @@ -1864,72 +1894,111 @@ raw_unicode_escape(PyObject *obj) else *p++ = (char) ch; } - size = p - PyByteArray_AS_STRING(repr); - -done: - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size); + size = p - buffer; + err = 0; + if (size) { + if (_Pickler_Write(self, buffer, size) < 0) + err = -1; + } Py_DECREF(repr); - return result; -} + return err; +} + +static int +write_utf8(PicklerObject *self, char *data, Py_ssize_t size) +{ + char pdata[5]; + + if (size > 0xffffffffUL) { + /* string too large */ + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GB"); + return -1; + } + + pdata[0] = BINUNICODE; + pdata[1] = (unsigned char)(size & 0xff); + pdata[2] = (unsigned char)((size >> 8) & 0xff); + pdata[3] = (unsigned char)((size >> 16) & 0xff); + pdata[4] = (unsigned char)((size >> 24) & 0xff); + + if (_Pickler_Write(self, pdata, 5) < 0) + return -1; + + if (_Pickler_Write(self, data, size) < 0) + return -1; + + return 0; +} + +static int +write_unicode_binary(PicklerObject *self, PyObject *obj) +{ + PyObject *encoded = NULL; + PyCompactUnicodeObject *compact; + Py_ssize_t size; + + if (PyUnicode_READY(obj)) + return -1; + + if (PyUnicode_IS_ASCII(obj)) { + /* ASCII is compatible with UTF-8 */ + void *data = PyUnicode_DATA(obj); + size = PyUnicode_GET_LENGTH(obj); + return write_utf8(self, data, size); + } + + compact = (PyCompactUnicodeObject*)obj; + if (compact->utf8) { + /* string already available encoded as UTF-8 */ + return write_utf8(self, compact->utf8, compact->utf8_length); + } + + /* don't try to encode a string if we know that it will not fit + (the encoded string must be smaller than 4 GB) */ + if (PyUnicode_GET_LENGTH(obj) > 0xffffffffUL) { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GB"); + return -1; + } + + encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); + if (encoded == NULL) + return -1; + + size = PyBytes_GET_SIZE(encoded); + if (write_utf8(self, PyBytes_AS_STRING(encoded), size) < 0) { + Py_DECREF(encoded); + return -1; + } + Py_DECREF(encoded); + return 0; +} + static int save_unicode(PicklerObject *self, PyObject *obj) { - Py_ssize_t size; - PyObject *encoded = NULL; - if (self->bin) { - char pdata[5]; - - encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); - if (encoded == NULL) - goto error; - - size = PyBytes_GET_SIZE(encoded); - if (size > 0xffffffffL) { - PyErr_SetString(PyExc_OverflowError, - "cannot serialize a string larger than 4GB"); - goto error; /* string too large */ - } - - pdata[0] = BINUNICODE; - pdata[1] = (unsigned char)(size & 0xff); - pdata[2] = (unsigned char)((size >> 8) & 0xff); - pdata[3] = (unsigned char)((size >> 16) & 0xff); - pdata[4] = (unsigned char)((size >> 24) & 0xff); - - if (_Pickler_Write(self, pdata, 5) < 0) - goto error; - - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; + if (write_unicode_binary(self, obj) < 0) + return -1; } else { const char unicode_op = UNICODE; - encoded = raw_unicode_escape(obj); - if (encoded == NULL) - goto error; - if (_Pickler_Write(self, &unicode_op, 1) < 0) - goto error; - - size = PyBytes_GET_SIZE(encoded); - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; + return -1; + + if (write_raw_unicode_escape(self, obj) < 0) + return -1; if (_Pickler_Write(self, "\n", 1) < 0) - goto error; + return -1; } if (memo_put(self, obj) < 0) - goto error; - - Py_DECREF(encoded); + return -1; + return 0; - - error: - Py_XDECREF(encoded); - return -1; } /* A helper for save_tuple. Push the len elements in tuple t on the stack. */