diff -r 39ddcc5c7fb9 Lib/pickle.py --- a/Lib/pickle.py Sat Feb 25 19:26:39 2012 +0200 +++ b/Lib/pickle.py Sat Mar 17 17:03:09 2012 +0100 @@ -798,7 +798,8 @@ map the old Python 2.x names to the new names used in Python 3.x. The *encoding* and *errors* tell pickle how to decode 8-bit string instances pickled by Python 2.x; these default to 'ASCII' and - 'strict', respectively. + 'strict', respectively. *encoding* can be 'bytes' to read 8-bit string + instances as bytes objects. """ self.readline = file.readline self.read = file.read @@ -935,6 +936,12 @@ self.append(unpack('>d', self.read(8))[0]) dispatch[BINFLOAT[0]] = load_binfloat + def decode_string(self, value): + if self.encoding == "bytes": + return value + else: + return value.decode(self.encoding, self.errors) + def load_string(self): orig = self.readline() rep = orig[:-1] @@ -946,15 +953,13 @@ break else: raise ValueError("insecure string pickle: %r" % orig) - self.append(codecs.escape_decode(rep)[0] - .decode(self.encoding, self.errors)) + self.append(self.decode_string(codecs.escape_decode(rep)[0])) dispatch[STRING[0]] = load_string def load_binstring(self): len = mloads(b'i' + self.read(4)) data = self.read(len) - value = str(data, self.encoding, self.errors) - self.append(value) + self.append(self.decode_string(data)) dispatch[BINSTRING[0]] = load_binstring def load_binbytes(self): @@ -973,9 +978,8 @@ def load_short_binstring(self): len = ord(self.read(1)) - data = bytes(self.read(len)) - value = str(data, self.encoding, self.errors) - self.append(value) + data = self.read(len) + self.append(self.decode_string(data)) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): diff -r 39ddcc5c7fb9 Modules/_pickle.c --- a/Modules/_pickle.c Sat Feb 25 19:26:39 2012 +0200 +++ b/Modules/_pickle.c Sat Mar 17 17:03:09 2012 +0100 @@ -4108,6 +4108,18 @@ return 0; } +/* Returns a new reference */ +static PyObject * +decode_string(UnpicklerObject *self, PyObject *value) +{ + if (strcmp(self->encoding, "bytes") == 0) { + Py_INCREF(value); + return value; + } else { + return PyUnicode_FromEncodedObject(value, self->encoding, self->errors); + } +} + static int load_string(UnpicklerObject *self) { @@ -4150,7 +4162,8 @@ free(s); if (bytes == NULL) return -1; - str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors); + + str = decode_string(self, bytes); Py_DECREF(bytes); if (str == NULL) return -1; @@ -4214,7 +4227,7 @@ static int load_binstring(UnpicklerObject *self) { - PyObject *str; + PyObject *bytes, *str; Py_ssize_t x; char *s; @@ -4231,8 +4244,12 @@ if (_Unpickler_Read(self, &s, x) < 0) return -1; - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + bytes = PyBytes_FromStringAndSize(s, x); + if (bytes == NULL) + return -1; + + str = decode_string(self, bytes); + Py_DECREF(bytes); if (str == NULL) return -1; @@ -4243,7 +4260,7 @@ static int load_short_binstring(UnpicklerObject *self) { - PyObject *str; + PyObject *bytes, *str; Py_ssize_t x; char *s; @@ -4255,8 +4272,12 @@ if (_Unpickler_Read(self, &s, x) < 0) return -1; - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + bytes = PyBytes_FromStringAndSize(s, x); + if (bytes == NULL) + return -1; + + str = decode_string(self, bytes); + Py_DECREF(bytes); if (str == NULL) return -1; @@ -5580,7 +5601,8 @@ "map the old Python 2.x names to the new names used in Python 3.x. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" "instances pickled by Python 2.x; these default to 'ASCII' and\n" -"'strict', respectively.\n"); +"'strict', respectively. *encoding* can be 'bytes' to read 8-bit string\n" +"instances as byte objects.\n"); static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)