From 04593f2068a8338c4867d8dd962098f4731d2b36 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 1 Nov 2017 02:04:00 +0100 Subject: [PATCH] bpo-18835: Add PyMem_AlignedAlloc() * Add aligned_alloc and aligned_free fields to PyMemAllocatorEx * Rename PyMemAllocatorEx structure to PyMemAllocatorEx2 to make sure that C extensions are upgraded to fill the new aligned_alloc and aligned_free fields * Add 6 new functions: * PyMem_RawAlignedAlloc() * PyMem_RawAlignedFree() * PyMem_AlignedAlloc() * PyMem_AlignedFree() * PyObject_AlignedAlloc() * PyObject_AlignedFree() --- Doc/c-api/memory.rst | 137 +++++++-- Doc/whatsnew/3.7.rst | 4 + Include/internal/mem.h | 6 +- Include/objimpl.h | 2 + Include/pymem.h | 36 ++- Lib/test/test_capi.py | 17 +- .../C API/2017-10-23-19-03-38.bpo-18835.8XEjtG.rst | 9 + Modules/_testcapimodule.c | 215 ++++++++++---- Modules/_tracemalloc.c | 131 +++++++-- Objects/obmalloc.c | 320 +++++++++++++++++++-- 10 files changed, 753 insertions(+), 124 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2017-10-23-19-03-38.bpo-18835.8XEjtG.rst diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 0a5b0ef44b4..ceb2c125bca 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -156,6 +156,37 @@ The default raw memory block allocator uses the following functions: If *p* is *NULL*, no operation is performed. +.. c:function:: void* PyMem_RawAlignedAlloc(size_t alignment, size_t n) + + Allocates *n* bytes aligned to *alignment* bytes and returns a pointer of + type :c:type:`void\*` to the allocated memory, or *NULL* if the request + fails. + + *alignment* must be a power of 2, multiple of ``sizeof(void*)`` and greater + than 0. If the *alignment* is invalid, the function fails with an assertion + error in debug mode, or returns *NULL* in release mode. + + Requesting zero bytes returns a distinct non-*NULL* pointer if possible, as + if ``PyMem_RawAlignedAlloc(alignment, 1)`` had been called instead. The + memory will not have been initialized in any way. + + The allocated memory block must be released by :c:func:`PyMem_RawAlignedFree`. + + .. versionadded:: 3.7 + + +.. c:function:: void PyMem_RawAlignedFree(void *p) + + Frees the memory block pointed to by *p*, which must have been returned by a + previous call to :c:func:`PyMem_RawAlignedAlloc`. Otherwise, or if + ``PyMem_RawAlignedFree(p)`` has been called before, undefined behavior + occurs. + + If *p* is *NULL*, no operation is performed. + + .. versionadded:: 3.7 + + .. _memoryinterface: Memory Interface @@ -224,6 +255,37 @@ By default, these functions use :ref:`pymalloc memory allocator `. If *p* is *NULL*, no operation is performed. +.. c:function:: void* PyMem_AlignedAlloc(size_t alignment, size_t n) + + Allocates *n* bytes aligned to *alignment* bytes and returns a pointer of + type :c:type:`void\*` to the allocated memory, or *NULL* if the request + fails. + + *alignment* must be a power of 2, multiple of ``sizeof(void*)`` and greater + than 0. If the *alignment* is invalid, the function fails with an assertion + error in debug mode, or returns *NULL* in release mode. + + Requesting zero bytes returns a distinct non-*NULL* pointer if possible, as + if ``PyMem_AlignedAlloc(alignment, 1)`` had been called instead. The + memory will not have been initialized in any way. + + The allocated memory block must be released by :c:func:`PyMem_AlignedFree`. + + .. versionadded:: 3.7 + + +.. c:function:: void PyMem_AlignedFree(void *p) + + Frees the memory block pointed to by *p*, which must have been returned by a + previous call to :c:func:`PyMem_AlignedAlloc`. Otherwise, or if + ``PyMem_AlignedFree(p)`` has been called before, undefined behavior + occurs. + + If *p* is *NULL*, no operation is performed. + + .. versionadded:: 3.7 + + The following type-oriented macros are provided for convenience. Note that *TYPE* refers to any C type. @@ -326,30 +388,71 @@ By default, these functions use :ref:`pymalloc memory allocator `. If *p* is *NULL*, no operation is performed. +.. c:function:: void* PyObject_AlignedAlloc(size_t alignment, size_t n) + + Allocates *n* bytes aligned to *alignment* bytes and returns a pointer of + type :c:type:`void\*` to the allocated memory, or *NULL* if the request + fails. + + *alignment* must be a power of 2, multiple of ``sizeof(void*)`` and greater + than 0. If the *alignment* is invalid, the function fails with an assertion + error in debug mode, or returns *NULL* in release mode. + + Requesting zero bytes returns a distinct non-*NULL* pointer if possible, as + if ``PyObject_AlignedAlloc(alignment, 1)`` had been called instead. The + memory will not have been initialized in any way. + + The allocated memory block must be released by + :c:func:`PyObject_AlignedFree`. + + .. versionadded:: 3.7 + + +.. c:function:: void PyObject_AlignedFree(void *p) + + Frees the memory block pointed to by *p*, which must have been returned by a + previous call to :c:func:`PyObject_AlignedAlloc`. Otherwise, or if + ``PyObject_AlignedFree(p)`` has been called before, undefined behavior + occurs. + + If *p* is *NULL*, no operation is performed. + + .. versionadded:: 3.7 + + Customize Memory Allocators =========================== .. versionadded:: 3.4 -.. c:type:: PyMemAllocatorEx +.. c:type:: PyMemAllocatorEx2 Structure used to describe a memory block allocator. The structure has four fields: - +----------------------------------------------------------+---------------------------------------+ - | Field | Meaning | - +==========================================================+=======================================+ - | ``void *ctx`` | user context passed as first argument | - +----------------------------------------------------------+---------------------------------------+ - | ``void* malloc(void *ctx, size_t size)`` | allocate a memory block | - +----------------------------------------------------------+---------------------------------------+ - | ``void* calloc(void *ctx, size_t nelem, size_t elsize)`` | allocate a memory block initialized | - | | with zeros | - +----------------------------------------------------------+---------------------------------------+ - | ``void* realloc(void *ctx, void *ptr, size_t new_size)`` | allocate or resize a memory block | - +----------------------------------------------------------+---------------------------------------+ - | ``void free(void *ctx, void *ptr)`` | free a memory block | - +----------------------------------------------------------+---------------------------------------+ + +-------------------------------------------------------------------+---------------------------------------+ + | Field | Meaning | + +===================================================================+=======================================+ + | ``void *ctx`` | user context passed as first argument | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void* malloc(void *ctx, size_t size)`` | allocate a memory block | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void* calloc(void *ctx, size_t nelem, size_t elsize)`` | allocate a memory block initialized | + | | with zeros | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void* realloc(void *ctx, void *ptr, size_t new_size)`` | allocate or resize a memory block | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void free(void *ctx, void *ptr)`` | free a memory block | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void* aligned_alloc(void *ctx, size_t alignment, size_t size)`` | allocate an aligned memory block | + +-------------------------------------------------------------------+---------------------------------------+ + | ``void aligned_free(void *ctx, void *ptr)`` | free an aligned memory block | + +-------------------------------------------------------------------+---------------------------------------+ + + .. versionchanged:: 3.7 + The :c:type:`PyMemAllocatorEx` structure was renamed to + :c:type:`PyMemAllocatorEx2` and new ``aligned_alloc`` and + ``aligned_free`` fields were added. .. versionchanged:: 3.5 The :c:type:`PyMemAllocator` structure was renamed to @@ -387,12 +490,12 @@ Customize Memory Allocators * :c:func:`PyObject_Calloc` * :c:func:`PyObject_Free` -.. c:function:: void PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) +.. c:function:: void PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx2 *allocator) Get the memory block allocator of the specified domain. -.. c:function:: void PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) +.. c:function:: void PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx2 *allocator) Set the memory block allocator of the specified domain. diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index d3c3c1f7537..9ad48d3dc94 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -493,6 +493,10 @@ Deprecated Changes in the C API -------------------- +- The :c:type:`PyMemAllocatorEx` structure was renamed to + :c:type:`PyMemAllocatorEx2` and new ``aligned_alloc`` and ``aligned_free`` + fields were added. + - The type of results of :c:func:`PyThread_start_new_thread` and :c:func:`PyThread_get_thread_ident`, and the *id* parameter of :c:func:`PyThreadState_SetAsyncExc` changed from :c:type:`long` to diff --git a/Include/internal/mem.h b/Include/internal/mem.h index 471cdf45df2..3983677a485 100644 --- a/Include/internal/mem.h +++ b/Include/internal/mem.h @@ -15,9 +15,9 @@ extern "C" { struct _pymem_runtime_state { struct _allocator_runtime_state { - PyMemAllocatorEx mem; - PyMemAllocatorEx obj; - PyMemAllocatorEx raw; + PyMemAllocatorEx2 mem; + PyMemAllocatorEx2 obj; + PyMemAllocatorEx2 raw; } allocators; #ifdef WITH_PYMALLOC /* Array of objects used to track chunks of memory (arenas). */ diff --git a/Include/objimpl.h b/Include/objimpl.h index 746f9c92134..da061e1c8b1 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -100,6 +100,8 @@ PyAPI_FUNC(void *) PyObject_Calloc(size_t nelem, size_t elsize); #endif PyAPI_FUNC(void *) PyObject_Realloc(void *ptr, size_t new_size); PyAPI_FUNC(void) PyObject_Free(void *ptr); +PyAPI_FUNC(void*) PyObject_AlignedAlloc(size_t alignment, size_t size); +PyAPI_FUNC(void) PyObject_AlignedFree(void *ptr); #ifndef Py_LIMITED_API /* This function returns the number of allocated memory blocks, regardless of size */ diff --git a/Include/pymem.h b/Include/pymem.h index 2170e0fc8b0..fe9871e46ab 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -16,6 +16,8 @@ PyAPI_FUNC(void *) PyMem_RawMalloc(size_t size); PyAPI_FUNC(void *) PyMem_RawCalloc(size_t nelem, size_t elsize); PyAPI_FUNC(void *) PyMem_RawRealloc(void *ptr, size_t new_size); PyAPI_FUNC(void) PyMem_RawFree(void *ptr); +PyAPI_FUNC(void*) PyMem_RawAlignedAlloc(size_t alignment, size_t size); +PyAPI_FUNC(void) PyMem_RawAlignedFree(void *ptr); /* Configure the Python memory allocators. Pass NULL to use default allocators. */ @@ -103,6 +105,8 @@ PyAPI_FUNC(void *) PyMem_Calloc(size_t nelem, size_t elsize); #endif PyAPI_FUNC(void *) PyMem_Realloc(void *ptr, size_t new_size); PyAPI_FUNC(void) PyMem_Free(void *ptr); +PyAPI_FUNC(void*) PyMem_AlignedAlloc(size_t alignment, size_t size); +PyAPI_FUNC(void) PyMem_AlignedFree(void *ptr); #ifndef Py_LIMITED_API PyAPI_FUNC(char *) _PyMem_RawStrdup(const char *str); @@ -132,11 +136,11 @@ PyAPI_FUNC(char *) _PyMem_Strdup(const char *str); */ #define PyMem_New(type, n) \ - ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) + ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) #define PyMem_NEW(type, n) \ - ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) + ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) /* * The value of (p) is always clobbered by this macro regardless of success. @@ -145,17 +149,17 @@ PyAPI_FUNC(char *) _PyMem_Strdup(const char *str); * caller's memory error handler to not lose track of it. */ #define PyMem_Resize(p, type, n) \ - ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) + ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) #define PyMem_RESIZE(p, type, n) \ - ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) + ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) /* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. */ -#define PyMem_Del PyMem_Free -#define PyMem_DEL PyMem_FREE +#define PyMem_Del PyMem_Free +#define PyMem_DEL PyMem_FREE #ifndef Py_LIMITED_API typedef enum { @@ -184,11 +188,17 @@ typedef struct { /* release a memory block */ void (*free) (void *ctx, void *ptr); -} PyMemAllocatorEx; + + /* allocate an aligned memory block */ + void* (*aligned_alloc) (void *ctx, size_t alignment, size_t size); + + /* free an aligned memory block */ + void (*aligned_free) (void *ctx, void *ptr); +} PyMemAllocatorEx2; /* Get the memory block allocator of the specified domain. */ PyAPI_FUNC(void) PyMem_GetAllocator(PyMemAllocatorDomain domain, - PyMemAllocatorEx *allocator); + PyMemAllocatorEx2 *allocator); /* Set the memory block allocator of the specified domain. @@ -202,7 +212,7 @@ PyAPI_FUNC(void) PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMem_SetupDebugHooks() function must be called to reinstall the debug hooks on top on the new allocator. */ PyAPI_FUNC(void) PyMem_SetAllocator(PyMemAllocatorDomain domain, - PyMemAllocatorEx *allocator); + PyMemAllocatorEx2 *allocator); /* Setup hooks to detect bugs in the following Python memory allocator functions: diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index bb5b2a3b9f0..371c451154b 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -793,19 +793,26 @@ def test_buffer_overflow(self): regex = re.compile(regex, flags=re.DOTALL) self.assertRegex(out, regex) - def test_api_misuse(self): - out = self.check('import _testcapi; _testcapi.pymem_api_misuse()') - regex = (r"Debug memory block at address p={ptr}: API 'm'\n" + def check_api_misuse(self, func, alloc_api, free_api): + out = self.check(f'import _testcapi; _testcapi.{func}()') + regex = (r"Debug memory block at address p={ptr}: API '{alloc_api}'\n" r" 16 bytes originally requested\n" r" The [0-9] pad bytes at p-[0-9] are FORBIDDENBYTE, as expected.\n" r" The [0-9] pad bytes at tail={ptr} are FORBIDDENBYTE, as expected.\n" r" The block was made by call #[0-9]+ to debug malloc/realloc.\n" r" Data at p: cb cb cb .*\n" r"\n" - r"Fatal Python error: bad ID: Allocated using API 'm', verified using API 'r'\n") - regex = regex.format(ptr=self.PTR_REGEX) + r"Fatal Python error: bad ID: Allocated using API '{alloc_api}'," + r" verified using API '{free_api}'\n") + regex = regex.format(ptr=self.PTR_REGEX, alloc_api=alloc_api, free_api=free_api) self.assertRegex(out, regex) + def test_pymem_api_misuse(self): + self.check_api_misuse('pymem_api_misuse', 'm', 'r') + + def test_pymem_aligned_api_misuse(self): + self.check_api_misuse('pymem_aligned_api_misuse', 'M', 'm') + def check_malloc_without_gil(self, code): out = self.check(code) expected = ('Fatal Python error: Python memory allocator called ' diff --git a/Misc/NEWS.d/next/C API/2017-10-23-19-03-38.bpo-18835.8XEjtG.rst b/Misc/NEWS.d/next/C API/2017-10-23-19-03-38.bpo-18835.8XEjtG.rst new file mode 100644 index 00000000000..cfb7a28bc08 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2017-10-23-19-03-38.bpo-18835.8XEjtG.rst @@ -0,0 +1,9 @@ +Add PyMem_AlignedAlloc(): + +* Add aligned_alloc and aligned_free fields to PyMemAllocatorEx +* Rename PyMemAllocatorEx structure to PyMemAllocatorEx2 to make sure + that C extensions are upgraded to fill the new aligned_alloc and + aligned_free fields +* Add 6 new functions: PyMem_RawAlignedAlloc(), PyMem_RawAlignedFree(), + PyMem_AlignedAlloc(), PyMem_AlignedFree(), PyObject_AlignedAlloc() and + PyObject_AlignedFree(). diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 1f71a09974e..787dc6731cc 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3217,63 +3217,48 @@ test_incref_decref_API(PyObject *ob) static PyObject * test_pymem_alloc0(PyObject *self) { - void *ptr; - - ptr = PyMem_RawMalloc(0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyMem_RawMalloc(0) returns NULL"); - return NULL; - } - PyMem_RawFree(ptr); - - ptr = PyMem_RawCalloc(0, 0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyMem_RawCalloc(0, 0) returns NULL"); - return NULL; - } - PyMem_RawFree(ptr); - - ptr = PyMem_Malloc(0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyMem_Malloc(0) returns NULL"); - return NULL; - } - PyMem_Free(ptr); +#define TEST(ALLOC, FREE) \ + do { \ + void *ptr = ALLOC; \ + if (ptr == NULL) { \ + PyErr_SetString(PyExc_RuntimeError, #ALLOC " returns NULL"); \ + return NULL; \ + } \ + FREE(ptr); \ + } while (0) - ptr = PyMem_Calloc(0, 0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyMem_Calloc(0, 0) returns NULL"); - return NULL; - } - PyMem_Free(ptr); + TEST(PyMem_RawMalloc(0), PyMem_RawFree); + TEST(PyMem_RawCalloc(0, 0), PyMem_RawFree); + TEST(PyMem_RawRealloc(NULL, 0), PyMem_RawFree); + TEST(PyMem_RawAlignedAlloc(16, 0), PyMem_RawAlignedFree); - ptr = PyObject_Malloc(0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyObject_Malloc(0) returns NULL"); - return NULL; - } - PyObject_Free(ptr); + TEST(PyMem_Malloc(0), PyMem_Free); + TEST(PyMem_Calloc(0, 0), PyMem_Free); + TEST(PyMem_Realloc(NULL, 0), PyMem_Free); + TEST(PyMem_AlignedAlloc(16, 0), PyMem_AlignedFree); - ptr = PyObject_Calloc(0, 0); - if (ptr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "PyObject_Calloc(0, 0) returns NULL"); - return NULL; - } - PyObject_Free(ptr); + TEST(PyObject_Malloc(0), PyObject_Free); + TEST(PyObject_Calloc(0, 0), PyObject_Free); + TEST(PyObject_Realloc(NULL, 0), PyObject_Free); + TEST(PyObject_AlignedAlloc(16, 0), PyObject_AlignedFree); Py_RETURN_NONE; + +#undef TEST } typedef struct { - PyMemAllocatorEx alloc; - + PyMemAllocatorEx2 alloc; + void *ctx; size_t malloc_size; size_t calloc_nelem; size_t calloc_elsize; void *realloc_ptr; size_t realloc_new_size; void *free_ptr; - void *ctx; + size_t aligned_alloc_alignment; + size_t aligned_alloc_size; + void *aligned_free_ptr; } alloc_hook_t; static void* hook_malloc(void* ctx, size_t size) @@ -3310,15 +3295,36 @@ static void hook_free(void *ctx, void *ptr) hook->alloc.free(hook->alloc.ctx, ptr); } + +static void* hook_aligned_alloc(void* ctx, size_t alignment, size_t size) +{ + alloc_hook_t *hook = (alloc_hook_t *)ctx; + hook->ctx = ctx; + hook->aligned_alloc_alignment = alignment; + hook->aligned_alloc_size = size; + return hook->alloc.aligned_alloc(hook->alloc.ctx, alignment, size); +} + + +static void hook_aligned_free(void *ctx, void *ptr) +{ + alloc_hook_t *hook = (alloc_hook_t *)ctx; + hook->ctx = ctx; + hook->free_ptr = ptr; + hook->alloc.aligned_free(hook->alloc.ctx, ptr); +} + + static PyObject * test_setallocators(PyMemAllocatorDomain domain) { PyObject *res = NULL; const char *error_msg; alloc_hook_t hook; - PyMemAllocatorEx alloc; + PyMemAllocatorEx2 alloc; size_t size, size2, nelem, elsize; void *ptr, *ptr2; + size_t alignment; memset(&hook, 0, sizeof(hook)); @@ -3327,6 +3333,8 @@ test_setallocators(PyMemAllocatorDomain domain) alloc.calloc = &hook_calloc; alloc.realloc = &hook_realloc; alloc.free = &hook_free; + alloc.aligned_alloc = &hook_aligned_alloc; + alloc.aligned_free = &hook_aligned_free; PyMem_GetAllocator(domain, &hook.alloc); PyMem_SetAllocator(domain, &alloc); @@ -3420,12 +3428,49 @@ test_setallocators(PyMemAllocatorDomain domain) case PYMEM_DOMAIN_OBJ: PyObject_Free(ptr); break; } + CHECK_CTX("calloc free"); if (hook.free_ptr != ptr) { error_msg = "calloc free invalid pointer"; goto fail; } + /* aligned_alloc, aligned_free */ + alignment = 8; + size = 66; + switch(domain) + { + case PYMEM_DOMAIN_RAW: ptr = PyMem_RawAlignedAlloc(alignment, size); break; + case PYMEM_DOMAIN_MEM: ptr = PyMem_AlignedAlloc(alignment, size); break; + case PYMEM_DOMAIN_OBJ: ptr = PyObject_AlignedAlloc(alignment, size); break; + default: ptr = NULL; break; + } + + if (ptr == NULL) { + error_msg = "aligned_alloc failed"; + goto fail; + } + CHECK_CTX("aligned_alloc"); + if (hook.aligned_alloc_alignment != alignment + || hook.aligned_alloc_size != size) { + error_msg = "aigned_alloc invalid alignment or size"; + goto fail; + } + + switch(domain) + { + case PYMEM_DOMAIN_RAW: PyMem_RawAlignedFree(ptr); break; + case PYMEM_DOMAIN_MEM: PyMem_AlignedFree(ptr); break; + case PYMEM_DOMAIN_OBJ: PyObject_AlignedFree(ptr); break; + default: hook.free_ptr = (void*)((Py_uintptr_t)ptr + 1); + } + + CHECK_CTX("aligned_free"); + if (hook.free_ptr != ptr) { + error_msg = "aligned_free invalid pointer"; + goto fail; + } + Py_INCREF(Py_None); res = Py_None; goto finally; @@ -3462,9 +3507,9 @@ test_pyobject_setallocators(PyObject *self) * written by Victor Stinner. */ static struct { int installed; - PyMemAllocatorEx raw; - PyMemAllocatorEx mem; - PyMemAllocatorEx obj; + PyMemAllocatorEx2 raw; + PyMemAllocatorEx2 mem; + PyMemAllocatorEx2 obj; } FmHook; static struct { @@ -3487,7 +3532,7 @@ fm_nomemory(void) static void * hook_fmalloc(void *ctx, size_t size) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (fm_nomemory()) { return NULL; } @@ -3497,7 +3542,7 @@ hook_fmalloc(void *ctx, size_t size) static void * hook_fcalloc(void *ctx, size_t nelem, size_t elsize) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (fm_nomemory()) { return NULL; } @@ -3507,7 +3552,7 @@ hook_fcalloc(void *ctx, size_t nelem, size_t elsize) static void * hook_frealloc(void *ctx, void *ptr, size_t new_size) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (fm_nomemory()) { return NULL; } @@ -3517,14 +3562,14 @@ hook_frealloc(void *ctx, void *ptr, size_t new_size) static void hook_ffree(void *ctx, void *ptr) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; alloc->free(alloc->ctx, ptr); } static void fm_setup_hooks(void) { - PyMemAllocatorEx alloc; + PyMemAllocatorEx2 alloc; if (FmHook.installed) { return; @@ -4069,6 +4114,19 @@ pymem_api_misuse(PyObject *self, PyObject *args) Py_RETURN_NONE; } +static PyObject* +pymem_aligned_api_misuse(PyObject *self, PyObject *args) +{ + char *buffer; + + /* Deliberate misusage of Python allocators: allococate with + PyMem_AlignedAlloc() but don't release with PyMem_AlignedFree(). */ + buffer = PyMem_AlignedAlloc(8, 16); + PyMem_Free(buffer); + + Py_RETURN_NONE; +} + static PyObject* pymem_malloc_without_gil(PyObject *self, PyObject *args) { @@ -4085,6 +4143,57 @@ pymem_malloc_without_gil(PyObject *self, PyObject *args) Py_RETURN_NONE; } +static PyObject* +test_pymem_alignedalloc(PyObject *self, PyObject *Py_UNUSED(args)) +{ + size_t alignments[] = { + sizeof(void*), sizeof(void*) * 2, sizeof(void*) * 4, + 512, 1024, 2048, 4096, + 1024 * 1024}; + size_t sizes[] = { + 0, 1, 3, 8, + 127, 255, 511, 1023, 2047, + 128, 256, 512, 1024, 2048, + 129, 257, 513, 1025, 2049, + 1024 * 1024}; + size_t i, j; + void *ptr; + + for (i=0; i < Py_ARRAY_LENGTH(alignments); i++) { + for (j=0; j < Py_ARRAY_LENGTH(alignments); j++) { + size_t alignment = alignments[i]; + size_t size = sizes[j]; + +#define TEST_ALLOC(ALLOC, FREE) \ + do { \ + ptr = ALLOC(alignment, size); \ + if (ptr == NULL) { \ + PyErr_NoMemory(); \ + return NULL; \ + } \ + if (((uintptr_t)ptr & (alignment - 1)) != 0) { \ + FREE(ptr); \ + PyErr_Format(PyExc_RuntimeError, \ + #ALLOC ": alignment of %zu bytes with size %zu is not respected: " \ + "%p, remainder: %zu", \ + alignment, size, ptr, \ + ((uintptr_t)ptr & (alignment - 1))); \ + return NULL; \ + } \ + FREE(ptr); \ + } while (0) + + TEST_ALLOC(PyMem_RawAlignedAlloc, PyMem_RawAlignedFree); + TEST_ALLOC(PyMem_AlignedAlloc, PyMem_AlignedFree); + TEST_ALLOC(PyObject_AlignedAlloc, PyObject_AlignedFree); + +#undef TEST_ALLOC + } + } + + Py_RETURN_NONE; +} + static PyObject* pyobject_malloc_without_gil(PyObject *self, PyObject *args) { @@ -4604,7 +4713,9 @@ static PyMethodDef TestMethods[] = { {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"pymem_buffer_overflow", pymem_buffer_overflow, METH_NOARGS}, {"pymem_api_misuse", pymem_api_misuse, METH_NOARGS}, + {"pymem_aligned_api_misuse", pymem_aligned_api_misuse, METH_NOARGS}, {"pymem_malloc_without_gil", pymem_malloc_without_gil, METH_NOARGS}, + {"test_pymem_alignedalloc", test_pymem_alignedalloc, METH_NOARGS}, {"pyobject_malloc_without_gil", pyobject_malloc_without_gil, METH_NOARGS}, {"tracemalloc_track", tracemalloc_track, METH_VARARGS}, {"tracemalloc_untrack", tracemalloc_untrack, METH_VARARGS}, diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c index af2a2fa4d52..5cefbd12693 100644 --- a/Modules/_tracemalloc.c +++ b/Modules/_tracemalloc.c @@ -24,9 +24,9 @@ static void raw_free(void *ptr); /* Protected by the GIL */ static struct { - PyMemAllocatorEx mem; - PyMemAllocatorEx raw; - PyMemAllocatorEx obj; + PyMemAllocatorEx2 mem; + PyMemAllocatorEx2 raw; + PyMemAllocatorEx2 obj; } allocators; static struct { @@ -664,26 +664,31 @@ tracemalloc_add_trace(unsigned int domain, uintptr_t ptr, static void* tracemalloc_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; void *ptr; + int added; assert(elsize == 0 || nelem <= SIZE_MAX / elsize); - if (use_calloc) + if (use_calloc) { ptr = alloc->calloc(alloc->ctx, nelem, elsize); - else + } + else { ptr = alloc->malloc(alloc->ctx, nelem * elsize); - if (ptr == NULL) + } + if (ptr == NULL) { return NULL; + } TABLES_LOCK(); - if (ADD_TRACE(ptr, nelem * elsize) < 0) { + added = ADD_TRACE(ptr, nelem * elsize); + TABLES_UNLOCK(); + + if (added < 0) { /* Failed to allocate a trace for the new memory block */ - TABLES_UNLOCK(); alloc->free(alloc->ctx, ptr); return NULL; } - TABLES_UNLOCK(); return ptr; } @@ -691,7 +696,7 @@ tracemalloc_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) static void* tracemalloc_realloc(void *ctx, void *ptr, size_t new_size) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; void *ptr2; ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); @@ -742,7 +747,7 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size) static void tracemalloc_free(void *ctx, void *ptr) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (ptr == NULL) return; @@ -764,7 +769,7 @@ tracemalloc_alloc_gil(int use_calloc, void *ctx, size_t nelem, size_t elsize) void *ptr; if (get_reentrant()) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (use_calloc) return alloc->calloc(alloc->ctx, nelem, elsize); else @@ -807,7 +812,7 @@ tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size) Example: PyMem_RawRealloc() is called internally by pymalloc (_PyObject_Malloc() and _PyObject_Realloc()) to allocate a new arena (new_arena()). */ - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); if (ptr2 != NULL && ptr != NULL) { @@ -830,6 +835,80 @@ tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size) } +static void* +tracemalloc_aligned_alloc(void *ctx, size_t alignment, size_t size) +{ + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; + void *ptr; + int added; + + ptr = alloc->aligned_alloc(alloc->ctx, alignment, size); + if (ptr == NULL) { + return NULL; + } + + TABLES_LOCK(); + added = ADD_TRACE(ptr, size); + TABLES_UNLOCK(); + + if (added < 0) { + /* Failed to allocate a trace for the new memory block */ + alloc->free(alloc->ctx, ptr); + return NULL; + } + return ptr; +} + + +static void* +tracemalloc_aligned_alloc_gil(void *ctx, size_t alignment, size_t size) +{ + void *ptr; + + if (get_reentrant()) { + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; + return alloc->aligned_alloc(alloc->ctx, alignment, size); + } + + /* Ignore reentrant call. PyObjet_Malloc() calls PyMem_Malloc() for + allocations larger than 512 bytes, don't trace the same memory + allocation twice. */ + set_reentrant(1); + + ptr = tracemalloc_aligned_alloc(ctx, alignment, size); + + set_reentrant(0); + return ptr; +} + + +static void +tracemalloc_aligned_free(void *ctx, void *ptr) +{ + if (ptr == NULL) { + return; + } + + PyGILState_STATE gil_state; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; + + gil_state = PyGILState_Ensure(); + alloc->aligned_free(alloc->ctx, ptr); + PyGILState_Release(gil_state); + + TABLES_LOCK(); + REMOVE_TRACE(ptr); + TABLES_UNLOCK(); +} + + +static void +tracemalloc_aligned_free_gil(void *ctx, void *ptr) +{ + tracemalloc_aligned_free(ctx, ptr); +} + + #ifdef TRACE_RAW_MALLOC static void* tracemalloc_raw_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) @@ -838,7 +917,7 @@ tracemalloc_raw_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) void *ptr; if (get_reentrant()) { - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; if (use_calloc) return alloc->calloc(alloc->ctx, nelem, elsize); else @@ -881,7 +960,7 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) if (get_reentrant()) { /* Reentrant call to PyMem_RawRealloc(). */ - PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; + PyMemAllocatorEx2 *alloc = (PyMemAllocatorEx2 *)ctx; ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); @@ -905,6 +984,20 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) set_reentrant(0); return ptr2; } + + +static void* +tracemalloc_raw_aligned_alloc(void *ctx, size_t alignment, size_t size) +{ + return tracemalloc_aligned_alloc(ctx, alignment, size); +} + + +static void +tracemalloc_raw_aligned_free(void *ctx, void *ptr) +{ + tracemalloc_aligned_free(ctx, ptr); +} #endif /* TRACE_RAW_MALLOC */ @@ -1063,7 +1156,7 @@ tracemalloc_deinit(void) static int tracemalloc_start(int max_nframe) { - PyMemAllocatorEx alloc; + PyMemAllocatorEx2 alloc; size_t size; if (tracemalloc_init() < 0) @@ -1091,6 +1184,8 @@ tracemalloc_start(int max_nframe) alloc.calloc = tracemalloc_raw_calloc; alloc.realloc = tracemalloc_raw_realloc; alloc.free = tracemalloc_free; + alloc.aligned_alloc = tracemalloc_raw_aligned_alloc; + alloc.aligned_free = tracemalloc_raw_aligned_free; alloc.ctx = &allocators.raw; PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); @@ -1100,6 +1195,8 @@ tracemalloc_start(int max_nframe) alloc.malloc = tracemalloc_malloc_gil; alloc.calloc = tracemalloc_calloc_gil; alloc.realloc = tracemalloc_realloc_gil; + alloc.aligned_alloc = tracemalloc_aligned_alloc_gil; + alloc.aligned_free = tracemalloc_aligned_free_gil; alloc.free = tracemalloc_free; alloc.ctx = &allocators.mem; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b92116cd554..6672b0d6b5e 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -19,11 +19,15 @@ static void* _PyMem_DebugRawMalloc(void *ctx, size_t size); static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); static void _PyMem_DebugRawFree(void *ctx, void *ptr); +static void* _PyMem_DebugRawAlignedAlloc(void *ctx, size_t alignment, size_t size); +static void _PyMem_DebugRawAlignedFree(void *ctx, void *ptr); static void* _PyMem_DebugMalloc(void *ctx, size_t size); static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); static void _PyMem_DebugFree(void *ctx, void *p); +static void* _PyMem_DebugAlignedAlloc(void *ctx, size_t alignment, size_t size); +static void _PyMem_DebugAlignedFree(void *ctx, void *ptr); static void _PyObject_DebugDumpAddress(const void *p); static void _PyMem_DebugCheckAddress(char api_id, const void *p); @@ -60,6 +64,8 @@ static void* _PyObject_Malloc(void *ctx, size_t size); static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); static void _PyObject_Free(void *ctx, void *p); static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); +static void * _PyObject_AlignedAlloc(void *ctx, size_t alignment, size_t size); +static void _PyObject_AlignedFree(void *ctx, void *ptr); #endif @@ -104,6 +110,71 @@ _PyMem_RawFree(void *ctx, void *ptr) } +static int +check_alignment(size_t alignment) +{ + /* alignment must be a power of 2, multiple of sizeof(void*) + and greater than 0 */ + return (alignment > 0 + && (alignment % sizeof(void *)) == 0 + && (alignment & (alignment - 1)) == 0); +} + + +static void * +_PyMem_RawAlignedAlloc(void *ctx, size_t alignment, size_t size) +{ + /* Even if posix_memalign() fails with EINVAL is the alignment is invalid, + use an assertion in debug mode to be able to distinguish this case + from a memory allocation failure */ + assert(check_alignment(alignment)); + + if (size == 0) { + size = 1; + } + +#ifdef MS_WINDOWS + /* check the alignment to return NULL, as posix_memalign() */ + if (!check_alignment(alignment)) { + return NULL; + } + + return _aligned_malloc(size, alignment); +#else + /* Initialize ptr to NULL to detect bugs: see the comment below */ + void *ptr = NULL; + int res; + + res = posix_memalign(&ptr, alignment, size); + if (res) { + /* alignment is invalid (res == EINVAL) or the memory allocation + failed (res == ENOMEM) */ + return NULL; + } + /* posix_memalign() must not allocate memory at 0 (aka "NULL") for non-zero + size. If it does, it's a bug in the C library. Use an assertion to + detect the bug early rather than crashing the application, or leak + memory if the caller handles NULL as an allocation failures and doesn't + release the newly allocated memory (allocated at NULL). + + In the Python API, NULL means that the allocation failed, and + PyMem_RawAlignedFree(NULL) must do nothing. */ + assert(ptr != NULL); + return ptr; +#endif +} + +static void +_PyMem_RawAlignedFree(void *ctx, void *ptr) +{ +#ifdef MS_WINDOWS + _aligned_free(ptr); +#else + free(ptr); +#endif +} + + #ifdef MS_WINDOWS static void * _PyObject_ArenaVirtualAlloc(void *ctx, size_t size) @@ -152,18 +223,24 @@ _PyObject_ArenaFree(void *ctx, void *ptr, size_t size) #endif -#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree +#define PYRAW_FUNCS \ + _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree, \ + _PyMem_RawAlignedAlloc, _PyMem_RawAlignedFree #ifdef WITH_PYMALLOC -# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free +# define PYOBJ_FUNCS \ + _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free, \ + _PyObject_AlignedAlloc, _PyObject_AlignedFree #else # define PYOBJ_FUNCS PYRAW_FUNCS #endif #define PYMEM_FUNCS PYOBJ_FUNCS typedef struct { - /* We tag each block with an API ID in order to tag API violations */ + /* We tag each block with an API ID in order to tag API violations: + lowercase letter. AlignedAlloc() converts the letter to uppercase + to distinguish with non-aligned allocations. */ char api_id; - PyMemAllocatorEx alloc; + PyMemAllocatorEx2 alloc; } debug_alloc_api_t; static struct { debug_alloc_api_t raw; @@ -176,13 +253,15 @@ static struct { }; #define PYRAWDBG_FUNCS \ - _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree + _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree, \ + _PyMem_DebugRawAlignedAlloc, _PyMem_DebugRawAlignedFree #define PYDBG_FUNCS \ - _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree + _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree, \ + _PyMem_DebugAlignedAlloc, _PyMem_DebugAlignedFree #define _PyMem_Raw _PyRuntime.mem.allocators.raw -static const PyMemAllocatorEx _pymem_raw = { +static const PyMemAllocatorEx2 _pymem_raw = { #ifdef Py_DEBUG &_PyMem_Debug.raw, PYRAWDBG_FUNCS #else @@ -191,7 +270,7 @@ static const PyMemAllocatorEx _pymem_raw = { }; #define _PyMem _PyRuntime.mem.allocators.mem -static const PyMemAllocatorEx _pymem = { +static const PyMemAllocatorEx2 _pymem = { #ifdef Py_DEBUG &_PyMem_Debug.mem, PYDBG_FUNCS #else @@ -200,7 +279,7 @@ static const PyMemAllocatorEx _pymem = { }; #define _PyObject _PyRuntime.mem.allocators.obj -static const PyMemAllocatorEx _pyobject = { +static const PyMemAllocatorEx2 _pyobject = { #ifdef Py_DEBUG &_PyMem_Debug.obj, PYDBG_FUNCS #else @@ -235,7 +314,7 @@ _PyMem_SetupAllocators(const char *opt) } else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) { - PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS}; + PyMemAllocatorEx2 alloc = {NULL, PYRAW_FUNCS}; PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); @@ -248,9 +327,9 @@ _PyMem_SetupAllocators(const char *opt) else if (strcmp(opt, "pymalloc") == 0 || strcmp(opt, "pymalloc_debug") == 0) { - PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS}; - PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS}; - PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS}; + PyMemAllocatorEx2 raw_alloc = {NULL, PYRAW_FUNCS}; + PyMemAllocatorEx2 mem_alloc = {NULL, PYMEM_FUNCS}; + PyMemAllocatorEx2 obj_alloc = {NULL, PYOBJ_FUNCS}; PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc); PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc); @@ -333,12 +412,14 @@ _PyMem_PymallocEnabled(void) void PyMem_SetupDebugHooks(void) { - PyMemAllocatorEx alloc; + PyMemAllocatorEx2 alloc; alloc.malloc = _PyMem_DebugRawMalloc; alloc.calloc = _PyMem_DebugRawCalloc; alloc.realloc = _PyMem_DebugRawRealloc; alloc.free = _PyMem_DebugRawFree; + alloc.aligned_alloc = _PyMem_DebugRawAlignedAlloc; + alloc.aligned_free = _PyMem_DebugRawAlignedFree; if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) { alloc.ctx = &_PyMem_Debug.raw; @@ -350,6 +431,8 @@ PyMem_SetupDebugHooks(void) alloc.calloc = _PyMem_DebugCalloc; alloc.realloc = _PyMem_DebugRealloc; alloc.free = _PyMem_DebugFree; + alloc.aligned_alloc = _PyMem_DebugAlignedAlloc; + alloc.aligned_free = _PyMem_DebugAlignedFree; if (_PyMem.malloc != _PyMem_DebugMalloc) { alloc.ctx = &_PyMem_Debug.mem; @@ -365,7 +448,7 @@ PyMem_SetupDebugHooks(void) } void -PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) +PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx2 *allocator) { switch(domain) { @@ -379,11 +462,13 @@ PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) allocator->calloc = NULL; allocator->realloc = NULL; allocator->free = NULL; + allocator->aligned_alloc = NULL; + allocator->aligned_free = NULL; } } void -PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) +PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx2 *allocator) { switch(domain) { @@ -444,6 +529,22 @@ PyMem_RawFree(void *ptr) _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); } +void* +PyMem_RawAlignedAlloc(size_t alignment, size_t size) +{ + /* see PyMem_RawMalloc() */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyMem_Raw.aligned_alloc(_PyMem_Raw.ctx, alignment, size); +} + + +void +PyMem_RawAlignedFree(void *ptr) +{ + _PyMem_Raw.aligned_free(_PyMem_Raw.ctx, ptr); +} + void * PyMem_Malloc(size_t size) @@ -479,6 +580,24 @@ PyMem_Free(void *ptr) } +void* +PyMem_AlignedAlloc(size_t alignment, size_t size) +{ + /* see PyMem_RawMalloc() */ + if (size > (size_t)PY_SSIZE_T_MAX) { + return NULL; + } + return _PyMem.aligned_alloc(_PyMem.ctx, alignment, size); +} + + +void +PyMem_AlignedFree(void *ptr) +{ + _PyMem.aligned_free(_PyMem.ctx, ptr); +} + + char * _PyMem_RawStrdup(const char *str) { @@ -541,6 +660,24 @@ PyObject_Free(void *ptr) } +void* +PyObject_AlignedAlloc(size_t alignment, size_t size) +{ + /* see PyMem_RawMalloc() */ + if (size > (size_t)PY_SSIZE_T_MAX) { + return NULL; + } + return _PyObject.aligned_alloc(_PyObject.ctx, alignment, size); +} + + +void +PyObject_AlignedFree(void *ptr) +{ + _PyObject.aligned_free(_PyObject.ctx, ptr); +} + + #ifdef WITH_PYMALLOC #ifdef WITH_VALGRIND @@ -1318,6 +1455,48 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) return PyMem_RawRealloc(ptr, nbytes); } + +static void * +_PyObject_AlignedAlloc(void *ctx, size_t alignment, size_t size) +{ + assert(check_alignment(alignment)); + + void *ptr; + + if (alignment <= ALIGNMENT) { + if (!check_alignment(alignment)) { + return NULL; + } + + if (pymalloc_alloc(ctx, &ptr, size)) { + _PyRuntime.mem.num_allocated_blocks++; + return ptr; + } + } + + ptr = PyMem_RawAlignedAlloc(alignment, size); + if (ptr != NULL) { + _PyRuntime.mem.num_allocated_blocks++; + } + return ptr; +} + + +static void +_PyObject_AlignedFree(void *ctx, void *ptr) +{ + /* PyObject_AlignedFree(NULL) has no effect */ + if (ptr == NULL) { + return; + } + + _PyRuntime.mem.num_allocated_blocks--; + if (!pymalloc_free(ctx, ptr)) { + /* pymalloc didn't allocate this address */ + PyMem_RawAlignedFree(ptr); + } +} + #else /* ! WITH_PYMALLOC */ /*==========================================================================*/ @@ -1430,7 +1609,8 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) total = nbytes + 4 * SST; /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN] - * ^--- p ^--- data ^--- tail + ^--- p ^--- data ^--- tail + S: nbytes stored as size_t I: API identifier (1 byte) F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after) @@ -1483,6 +1663,71 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) } +#define ALIGN_PAD(size, alignment) \ + (_Py_SIZE_ROUND_UP(size, alignment) - size) + +static void * +_PyMem_DebugRawAlignedAlloc(void *ctx, size_t alignment, size_t nbytes) +{ + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; + uint8_t *p; /* base address of malloc'ed block */ + uint8_t *head; + uint8_t *data; /* pointer to data bytes */ + uint8_t *tail; /* p + 3*SST + nbytes == pointer to tail pad bytes */ + size_t align_pad; + size_t total; /* nbytes + 4*SST */ + + align_pad = ALIGN_PAD(3 * SST, alignment); + if (nbytes > (size_t)PY_SSIZE_T_MAX - 5 * SST - align_pad) { + /* overflow: can't represent total as a Py_ssize_t */ + return NULL; + } + /* align_pad + 3 * SST + nbytes + 2 * SST */ + total = nbytes + align_pad + 5 * SST; + + /* Layout: [PPPPPP AAAA SSSS IFFF CCCC...CCCC FFFF NNNN] + ^-- p ^-- head ^-- data ^-- tail + + P: alignment pad (align_pad bytes) + A: alignment stored as size_t + S: nbytes stored as size_t + I: API identifier (1 byte) + F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after) + C: Clean bytes used later to store actual data + N: Serial number stored as size_t */ + + p = (uint8_t *)api->alloc.aligned_alloc(api->alloc.ctx, alignment, total); + if (p == NULL) { + return NULL; + } + + bumpserialno(); + + if (align_pad) { + memset(p, FORBIDDENBYTE, align_pad); + } + head = p + align_pad; + + /* at p, write alignment (SST bytes), write size (SST bytes), + id (1 byte), pad (SST-1 bytes) */ + write_size_t(head, alignment); + write_size_t(head + SST, nbytes); + head[2 * SST] = (uint8_t)Py_TOUPPER(api->api_id); + memset(head + 2 * SST + 1, FORBIDDENBYTE, SST-1); + data = head + 3*SST; + + /* data */ + memset(data, CLEANBYTE, nbytes); + tail = data + nbytes; + + /* at tail, write pad (SST bytes) and serialno (SST bytes) */ + memset(tail, FORBIDDENBYTE, SST); + write_size_t(tail + SST, _PyRuntime.mem.serialno); + + return data; +} + + /* The debug free first checks the 2*SST bytes on each end for sanity (in particular, that the FORBIDDENBYTEs with the api ID are still intact). Then fills the original bytes with DEADBYTE. @@ -1508,6 +1753,31 @@ _PyMem_DebugRawFree(void *ctx, void *p) } +static void +_PyMem_DebugRawAlignedFree(void *ctx, void *p) +{ + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; + uint8_t *q = (uint8_t *)p - 3*SST; /* address returned from malloc */ + size_t alignment; + size_t nbytes; + size_t align_pad; + + if (p == NULL) { + return; + } + + _PyMem_DebugCheckAddress(Py_TOUPPER(api->api_id), p); + alignment = read_size_t(q); + align_pad = ALIGN_PAD(3 * SST, alignment); + nbytes = read_size_t(q + SST); + nbytes += align_pad + 5 * SST; + + q -= align_pad; + memset(q, DEADBYTE, nbytes); + api->alloc.aligned_free(api->alloc.ctx, q); +} + + static void * _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) { @@ -1583,6 +1853,14 @@ _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) } +static void * +_PyMem_DebugAlignedAlloc(void *ctx, size_t alignment, size_t size) +{ + _PyMem_DebugCheckGIL(); + return _PyMem_DebugRawAlignedAlloc(ctx, alignment, size); +} + + static void _PyMem_DebugFree(void *ctx, void *ptr) { @@ -1591,6 +1869,14 @@ _PyMem_DebugFree(void *ctx, void *ptr) } +static void +_PyMem_DebugAlignedFree(void *ctx, void *ptr) +{ + _PyMem_DebugCheckGIL(); + _PyMem_DebugRawAlignedFree(ctx, ptr); +} + + static void * _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) {