diff -r 288953a787ce Lib/datetime.py --- a/Lib/datetime.py Mon Oct 12 14:38:24 2015 +0200 +++ b/Lib/datetime.py Mon Oct 12 21:00:38 2015 +0300 @@ -676,9 +676,12 @@ class date: year, month, day (required, base 1) """ - if month is None and isinstance(year, bytes) and len(year) == 4 and \ - 1 <= year[2] <= 12: + if (month is None and + isinstance(year, (bytes, str)) and len(year) == 4 and + 1 <= ord(year[2:3]) <= 12): # Pickle support + if isinstance(year, str): + year = year.encode('ascii', 'surrogateescape') self = object.__new__(cls) self.__setstate(year) self._hashcode = -1 @@ -1036,8 +1039,11 @@ class time: second, microsecond (default to zero) tzinfo (default to None) """ - if isinstance(hour, bytes) and len(hour) == 6 and hour[0] < 24: + if (isinstance(hour, (bytes, str)) and len(hour) == 6 and + ord(hour[0:1]) < 24): # Pickle support + if isinstance(hour, str): + hour = hour.encode('ascii', 'surrogateescape') self = object.__new__(cls) self.__setstate(hour, minute or None) self._hashcode = -1 @@ -1318,8 +1324,11 @@ class datetime(date): def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): - if isinstance(year, bytes) and len(year) == 10 and 1 <= year[2] <= 12: + if (isinstance(year, (bytes, str)) and len(year) == 10 and + 1 <= ord(year[2:3]) <= 12): # Pickle support + if isinstance(year, str): + year = year.encode('ascii', 'surrogateescape') self = object.__new__(cls) self.__setstate(year, month) self._hashcode = -1 diff -r 288953a787ce Lib/pickle.py --- a/Lib/pickle.py Mon Oct 12 14:38:24 2015 +0200 +++ b/Lib/pickle.py Mon Oct 12 21:00:38 2015 +0300 @@ -982,7 +982,7 @@ class _Pickler: class _Unpickler: def __init__(self, file, *, fix_imports=True, - encoding="ASCII", errors="strict"): + encoding="ASCII", errors="surrogateescape"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so @@ -1007,8 +1007,8 @@ class _Unpickler: pickle will try to map the old Python 2 names to the new names used in Python 3. The *encoding* and *errors* tell pickle how to decode 8-bit string instances pickled by Python 2; these - default to 'ASCII' and 'strict', respectively. *encoding* can be - 'bytes' to read theses 8-bit string instances as bytes objects. + default to 'ASCII' and 'surrogateescape', respectively. *encoding* + can be 'bytes' to read theses 8-bit string instances as bytes objects. """ self._file_readline = file.readline self._file_read = file.read @@ -1560,11 +1560,11 @@ def _dumps(obj, protocol=None, *, fix_im assert isinstance(res, bytes_types) return res -def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): +def _load(file, *, fix_imports=True, encoding="ASCII", errors="surrogateescape"): return _Unpickler(file, fix_imports=fix_imports, encoding=encoding, errors=errors).load() -def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): +def _loads(s, *, fix_imports=True, encoding="ASCII", errors="surrogateescape"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) diff -r 288953a787ce Modules/_datetimemodule.c --- a/Modules/_datetimemodule.c Mon Oct 12 14:38:24 2015 +0200 +++ b/Modules/_datetimemodule.c Mon Oct 12 21:00:38 2015 +0300 @@ -2434,18 +2434,30 @@ date_new(PyTypeObject *type, PyObject *a /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) == 1 && - PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && - MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) + (state = PyTuple_GET_ITEM(args, 0)) && + ((PyBytes_Check(state) && + PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) || + (PyUnicode_Check(state) && + PyUnicode_GET_LENGTH(state) == _PyDateTime_DATE_DATASIZE && + MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2))))) { PyDateTime_Date *me; - + PyObject *encoded = NULL; + + if (PyUnicode_Check(state)) { + encoded = PyUnicode_AsEncodedString(state, "ascii", "surrogateescape"); + if (encoded == NULL) + return NULL; + state = encoded; + } me = (PyDateTime_Date *) (type->tp_alloc(type, 0)); if (me != NULL) { char *pdata = PyBytes_AS_STRING(state); memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE); me->hashcode = -1; } + Py_XDECREF(encoded); return (PyObject *)me; } @@ -3501,12 +3513,18 @@ time_new(PyTypeObject *type, PyObject *a /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2 && - PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && - ((unsigned char) (PyBytes_AS_STRING(state)[0])) < 24) + (state = PyTuple_GET_ITEM(args, 0)) && + ((PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && + PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && + ((unsigned char) (PyBytes_AS_STRING(state)[0])) < 24) || + (PyUnicode_Check(state) && + PyUnicode_GET_LENGTH(state) == _PyDateTime_TIME_DATASIZE && + PyUnicode_READ_CHAR(state, 2) < 24))) { PyDateTime_Time *me; char aware; + PyObject *encoded = NULL; + if (PyTuple_GET_SIZE(args) == 2) { tzinfo = PyTuple_GET_ITEM(args, 1); @@ -3516,6 +3534,12 @@ time_new(PyTypeObject *type, PyObject *a return NULL; } } + if (PyUnicode_Check(state)) { + encoded = PyUnicode_AsEncodedString(state, "ascii", "surrogateescape"); + if (encoded == NULL) + return NULL; + state = encoded; + } aware = (char)(tzinfo != Py_None); me = (PyDateTime_Time *) (type->tp_alloc(type, aware)); if (me != NULL) { @@ -3529,6 +3553,7 @@ time_new(PyTypeObject *type, PyObject *a me->tzinfo = tzinfo; } } + Py_XDECREF(encoded); return (PyObject *)me; } @@ -4000,12 +4025,17 @@ datetime_new(PyTypeObject *type, PyObjec /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2 && - PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && - MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) + (state = PyTuple_GET_ITEM(args, 0)) && + ((PyBytes_Check(state) && + PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) || + (PyUnicode_Check(state) && + PyUnicode_GET_LENGTH(state) == _PyDateTime_DATETIME_DATASIZE && + MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2))))) { PyDateTime_DateTime *me; char aware; + PyObject *encoded = NULL; if (PyTuple_GET_SIZE(args) == 2) { tzinfo = PyTuple_GET_ITEM(args, 1); @@ -4015,6 +4045,12 @@ datetime_new(PyTypeObject *type, PyObjec return NULL; } } + if (PyUnicode_Check(state)) { + encoded = PyUnicode_AsEncodedString(state, "ascii", "surrogateescape"); + if (encoded == NULL) + return NULL; + state = encoded; + } aware = (char)(tzinfo != Py_None); me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware)); if (me != NULL) { @@ -4028,6 +4064,7 @@ datetime_new(PyTypeObject *type, PyObjec me->tzinfo = tzinfo; } } + Py_XDECREF(encoded); return (PyObject *)me; } diff -r 288953a787ce Modules/_pickle.c --- a/Modules/_pickle.c Mon Oct 12 14:38:24 2015 +0200 +++ b/Modules/_pickle.c Mon Oct 12 21:00:38 2015 +0300 @@ -607,7 +607,7 @@ typedef struct UnpicklerObject { 2.x. The default value is "ASCII" */ char *errors; /* Name of errors handling scheme to used when decoding strings. The default value is - "strict". */ + "surrogateescape". */ Py_ssize_t *marks; /* Mark stack, used for unpickling container objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ @@ -1446,7 +1446,7 @@ static int if (encoding == NULL) encoding = "ASCII"; if (errors == NULL) - errors = "strict"; + errors = "surrogateescape"; self->encoding = _PyMem_Strdup(encoding); self->errors = _PyMem_Strdup(errors); @@ -6561,7 +6561,7 @@ Unpickler_clear(UnpicklerObject *self) * fix_imports: bool = True encoding: str = 'ASCII' - errors: str = 'strict' + errors: str = 'surrogateescape' This takes a binary file for reading a pickle data stream. @@ -6580,7 +6580,7 @@ which are used to control compatiblity s generated by Python 2. If *fix_imports* is True, pickle will try to map the old Python 2 names to the new names used in Python 3. The *encoding* and *errors* tell pickle how to decode 8-bit string -instances pickled by Python 2; these default to 'ASCII' and 'strict', +instances pickled by Python 2; these default to 'ASCII' and 'surrogateescape', respectively. The *encoding* can be 'bytes' to read these 8-bit string instances as bytes objects. [clinic start generated code]*/ @@ -6589,7 +6589,7 @@ static int _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, int fix_imports, const char *encoding, const char *errors) -/*[clinic end generated code: output=e2c8ce748edc57b0 input=30b4dc9e976b890c]*/ +/*[clinic end generated code: output=e2c8ce748edc57b0 input=e7ae267f1cba1ef4]*/ { _Py_IDENTIFIER(persistent_load); @@ -7104,7 +7104,7 @@ static PyObject * * fix_imports: bool = True encoding: str = 'ASCII' - errors: str = 'strict' + errors: str = 'surrogateescape' Read and return an object from the pickle data stored in a file. @@ -7126,7 +7126,7 @@ which are used to control compatiblity s generated by Python 2. If *fix_imports* is True, pickle will try to map the old Python 2 names to the new names used in Python 3. The *encoding* and *errors* tell pickle how to decode 8-bit string -instances pickled by Python 2; these default to 'ASCII' and 'strict', +instances pickled by Python 2; these default to 'ASCII' and 'surrogateescape', respectively. The *encoding* can be 'bytes' to read these 8-bit string instances as bytes objects. [clinic start generated code]*/ @@ -7134,7 +7134,7 @@ string instances as bytes objects. static PyObject * _pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports, const char *encoding, const char *errors) -/*[clinic end generated code: output=798f1c57cb2b4eb1 input=da97372e38e510a6]*/ +/*[clinic end generated code: output=798f1c57cb2b4eb1 input=a7a583666a1461b7]*/ { PyObject *result; UnpicklerObject *unpickler = _Unpickler_New(); @@ -7167,7 +7167,7 @@ static PyObject * * fix_imports: bool = True encoding: str = 'ASCII' - errors: str = 'strict' + errors: str = 'surrogateescape' Read and return an object from the given pickle data. @@ -7180,7 +7180,7 @@ which are used to control compatiblity s generated by Python 2. If *fix_imports* is True, pickle will try to map the old Python 2 names to the new names used in Python 3. The *encoding* and *errors* tell pickle how to decode 8-bit string -instances pickled by Python 2; these default to 'ASCII' and 'strict', +instances pickled by Python 2; these default to 'ASCII' and 'surrogateescape', respectively. The *encoding* can be 'bytes' to read these 8-bit string instances as bytes objects. [clinic start generated code]*/ @@ -7188,7 +7188,7 @@ string instances as bytes objects. static PyObject * _pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports, const char *encoding, const char *errors) -/*[clinic end generated code: output=61e9cdb01e36a736 input=f57f0fdaa2b4cb8b]*/ +/*[clinic end generated code: output=61e9cdb01e36a736 input=e49ccb8169359900]*/ { PyObject *result; UnpicklerObject *unpickler = _Unpickler_New(); diff -r 288953a787ce Modules/clinic/_pickle.c.h --- a/Modules/clinic/_pickle.c.h Mon Oct 12 14:38:24 2015 +0200 +++ b/Modules/clinic/_pickle.c.h Mon Oct 12 21:00:38 2015 +0300 @@ -248,7 +248,8 @@ exit: } PyDoc_STRVAR(_pickle_Unpickler___init____doc__, -"Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n" +"Unpickler(file, *, fix_imports=True, encoding=\'ASCII\',\n" +" errors=\'surrogateescape\')\n" "--\n" "\n" "This takes a binary file for reading a pickle data stream.\n" @@ -268,7 +269,7 @@ PyDoc_STRVAR(_pickle_Unpickler___init___ "generated by Python 2. If *fix_imports* is True, pickle will try to\n" "map the old Python 2 names to the new names used in Python 3. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" -"instances pickled by Python 2; these default to \'ASCII\' and \'strict\',\n" +"instances pickled by Python 2; these default to \'ASCII\' and \'surrogateescape\',\n" "respectively. The *encoding* can be \'bytes\' to read these 8-bit\n" "string instances as bytes objects."); @@ -285,7 +286,7 @@ static int PyObject *file; int fix_imports = 1; const char *encoding = "ASCII"; - const char *errors = "strict"; + const char *errors = "surrogateescape"; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|$pss:Unpickler", _keywords, &file, &fix_imports, &encoding, &errors)) @@ -447,7 +448,7 @@ exit: PyDoc_STRVAR(_pickle_load__doc__, "load($module, /, file, *, fix_imports=True, encoding=\'ASCII\',\n" -" errors=\'strict\')\n" +" errors=\'surrogateescape\')\n" "--\n" "\n" "Read and return an object from the pickle data stored in a file.\n" @@ -470,7 +471,7 @@ PyDoc_STRVAR(_pickle_load__doc__, "generated by Python 2. If *fix_imports* is True, pickle will try to\n" "map the old Python 2 names to the new names used in Python 3. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" -"instances pickled by Python 2; these default to \'ASCII\' and \'strict\',\n" +"instances pickled by Python 2; these default to \'ASCII\' and \'surrogateescape\',\n" "respectively. The *encoding* can be \'bytes\' to read these 8-bit\n" "string instances as bytes objects."); @@ -489,7 +490,7 @@ static PyObject * PyObject *file; int fix_imports = 1; const char *encoding = "ASCII"; - const char *errors = "strict"; + const char *errors = "surrogateescape"; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|$pss:load", _keywords, &file, &fix_imports, &encoding, &errors)) @@ -502,7 +503,7 @@ exit: PyDoc_STRVAR(_pickle_loads__doc__, "loads($module, /, data, *, fix_imports=True, encoding=\'ASCII\',\n" -" errors=\'strict\')\n" +" errors=\'surrogateescape\')\n" "--\n" "\n" "Read and return an object from the given pickle data.\n" @@ -516,7 +517,7 @@ PyDoc_STRVAR(_pickle_loads__doc__, "generated by Python 2. If *fix_imports* is True, pickle will try to\n" "map the old Python 2 names to the new names used in Python 3. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" -"instances pickled by Python 2; these default to \'ASCII\' and \'strict\',\n" +"instances pickled by Python 2; these default to \'ASCII\' and \'surrogateescape\',\n" "respectively. The *encoding* can be \'bytes\' to read these 8-bit\n" "string instances as bytes objects."); @@ -535,7 +536,7 @@ static PyObject * PyObject *data; int fix_imports = 1; const char *encoding = "ASCII"; - const char *errors = "strict"; + const char *errors = "surrogateescape"; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|$pss:loads", _keywords, &data, &fix_imports, &encoding, &errors)) @@ -545,4 +546,4 @@ static PyObject * exit: return return_value; } -/*[clinic end generated code: output=06f3a5233298448e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4482ed8d0039d4c0 input=a9049054013a1b77]*/