diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 5ab8f92..e3f8263 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -155,7 +155,11 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, PyObject *temp; if (string_type == NPY_STRING) { +#if defined(NPY_PY3K) + if ((temp = PyObject_Bytes(obj)) == NULL) { +#else if ((temp = PyObject_Str(obj)) == NULL) { +#endif return -1; } itemsize = PyString_GET_SIZE(temp); @@ -199,7 +203,11 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, PyObject *temp; if (string_type == NPY_STRING) { +#if defined(NPY_PY3K) + if ((temp = PyObject_Bytes(obj)) == NULL) { +#else if ((temp = PyObject_Str(obj)) == NULL) { +#endif return -1; } itemsize = PyString_GET_SIZE(temp); @@ -255,10 +263,14 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, /* Check if it's a Unicode string */ if (PyUnicode_Check(obj)) { +#if PY_VERSION_HEX >= 0x03030000 + int itemsize = PyUnicode_GET_LENGTH(obj) * sizeof(Py_UNICODE); +#else int itemsize = PyUnicode_GET_DATA_SIZE(obj); #ifndef Py_UNICODE_WIDE itemsize <<= 1; #endif +#endif /* else clause of PY_VERSION_HEX >= 0x03030000 */ /* * If it's already a big enough unicode object, diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index 00c71f9..cdc616f 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -641,6 +641,30 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) itemsize = (((itemsize - 1) >> 2) + 1) << 2; } } +#if 0 // PY_VERSION_HEX >= 0x03030000 + if (type_num == NPY_UNICODE) { + void *buffer; + assert(itemsize % 4 == 0); + if (swap) { + buffer = malloc(itemsize); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(buffer, data, itemsize); + byte_swap_vector(buffer, itemsize / 4, 4); + } else { + buffer = data; + } + obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, + itemsize / 4); + /* Free the buffer if we alloced it earlier: */ + if (swap) { + free(buffer); + } + return obj; + } +#endif if (type->tp_itemsize != 0) { /* String type */ obj = type->tp_alloc(type, itemsize); @@ -675,6 +699,60 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) else if (type_num == NPY_UNICODE) { /* tp_alloc inherited from Python PyBaseObject_Type */ PyUnicodeObject *uni = (PyUnicodeObject*)obj; +#if PY_VERSION_HEX >= 0x03030000 + +/* Macros taken from CPython 3.3's unicodeobject.c: */ +#define _PyUnicode_UTF8(op) \ + (((PyCompactUnicodeObject*)(op))->utf8) +#define _PyUnicode_UTF8_LENGTH(op) \ + (((PyCompactUnicodeObject*)(op))->utf8_length) +#define PyUnicode_UTF8_LENGTH(op) \ + (assert(_PyUnicode_CHECK(op)), \ + assert(PyUnicode_IS_READY(op)), \ + PyUnicode_IS_COMPACT_ASCII(op) ? \ + ((PyASCIIObject*)(op))->length : \ + _PyUnicode_UTF8_LENGTH(op)) +#define _PyUnicode_WSTR(op) \ + (((PyASCIIObject*)(op))->wstr) +#define _PyUnicode_WSTR_LENGTH(op) \ + (((PyCompactUnicodeObject*)(op))->wstr_length) +#define _PyUnicode_LENGTH(op) \ + (((PyASCIIObject *)(op))->length) +#define _PyUnicode_STATE(op) \ + (((PyASCIIObject *)(op))->state) +#define _PyUnicode_HASH(op) \ + (((PyASCIIObject *)(op))->hash) +#define _PyUnicode_DATA_ANY(op) \ + (((PyUnicodeObject*)(op))->data.any) + + /* Manually construct a legacy unicode object */ + Py_ssize_t new_size = itemsize + sizeof(Py_UNICODE); + Py_ssize_t length = new_size / sizeof(Py_UNICODE); + _PyUnicode_WSTR(uni) = (Py_UNICODE*) PyObject_MALLOC(new_size); + if (!_PyUnicode_WSTR(uni)) { + PyErr_NoMemory(); + Py_DECREF(obj); + return NULL; + } + memcpy(_PyUnicode_WSTR(uni), data, new_size); + if (swap) { + byte_swap_vector(_PyUnicode_WSTR(uni), new_size / 4, 4); + } + + _PyUnicode_WSTR(uni)[length] = 0; + _PyUnicode_WSTR_LENGTH(uni) = length; + _PyUnicode_HASH(uni) = -1; + _PyUnicode_STATE(uni).interned = 0; + _PyUnicode_STATE(uni).kind = 0; + _PyUnicode_STATE(uni).compact = 0; + _PyUnicode_STATE(uni).ready = 0; + _PyUnicode_STATE(uni).ascii = 0; + _PyUnicode_DATA_ANY(uni) = NULL; + _PyUnicode_LENGTH(uni) = 0; + _PyUnicode_UTF8(uni) = NULL; + _PyUnicode_UTF8_LENGTH(uni) = 0; + _PyUnicode_Ready(obj); +#else /* PY_VERSION_HEX >= 0x03030000 */ size_t length = itemsize >> 2; Py_UNICODE *dst; #ifndef Py_UNICODE_WIDE @@ -683,7 +761,7 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) int alloc = 0; length *= 2; -#endif +#endif /* #ifndef Py_UNICODE_WIDE */ /* Set uni->str so that object can be deallocated on failure */ uni->str = NULL; uni->defenc = NULL; @@ -702,7 +780,7 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) uni->str = dst; uni->str[length] = 0; uni->length = length; -#else +#else /* #ifdef Py_UNICODE_WIDE */ /* need aligned data buffer */ if ((swap) || ((((npy_intp)data) % descr->alignment) != 0)) { buffer = malloc(itemsize); @@ -740,7 +818,8 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) uni->str = tmp; uni->str[length] = 0; uni->length = length; -#endif +#endif /* #else clause of #ifdef Py_UNICODE_WIDE */ +#endif /* else clause of PY_VERSION_HEX >= 0x03030000 */ return obj; } else { diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index e547071..2af7b9a 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -2592,7 +2592,16 @@ finish: *((npy_@name@ *)dest) = *((npy_@name@ *)src); #elif @default@ == 1 /* unicode and strings */ if (itemsize == 0) { /* unicode */ +#if PY_VERSION_HEX >= 0x03030000 + /* The call to scalar_value(robj) above will have called + * PyUnicode_AS_DATA() + * For Python 3.3 onwards this will have created a wchar_t/Py_UNICODE + * on demand */ + itemsize = PyUnicode_GET_LENGTH(robj) * sizeof(Py_UNICODE); +#else itemsize = ((PyUnicodeObject *)robj)->length * sizeof(Py_UNICODE); +#endif + } memcpy(dest, src, itemsize); /* @default@ == 2 won't get here */ diff --git a/numpy/core/src/multiarray/ucsnarrow.c b/numpy/core/src/multiarray/ucsnarrow.c index b0afdc6..3087e79 100644 --- a/numpy/core/src/multiarray/ucsnarrow.c +++ b/numpy/core/src/multiarray/ucsnarrow.c @@ -20,6 +20,7 @@ * Python Unicode scalar (2-bytes on a narrow build). */ +#ifndef Py_UNICODE_WIDE /* * The ucs2 buffer must be large enough to hold 2*ucs4length characters * due to the use of surrogate pairs. @@ -85,6 +86,7 @@ PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len) } return numchars; } +#endif /* ifndef Py_UNICODE_WIDE */ /* * Returns a PyUnicodeObject initialized from a buffer containing diff --git a/numpy/core/src/multiarray/ucsnarrow.h b/numpy/core/src/multiarray/ucsnarrow.h index fe31a5e..ba2385c 100644 --- a/numpy/core/src/multiarray/ucsnarrow.h +++ b/numpy/core/src/multiarray/ucsnarrow.h @@ -1,11 +1,13 @@ #ifndef _NPY_UCSNARROW_H_ #define _NPY_UCSNARROW_H_ +#ifndef Py_UNICODE_WIDE NPY_NO_EXPORT int PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length); NPY_NO_EXPORT int PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len); +#endif NPY_NO_EXPORT PyUnicodeObject * PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align);