Note that total runtime is 200000 samples and so 1000 samples is about 0.5% of the total runtime. Samples│ │ Disassembly of section .text: ▒ │ ▒ │ 000000000012ef10 <_Py_Dealloc>: ▒ │ _PyObject_IS_GC(): ◆ │ ▒ │ // Fast inlined version of PyObject_IS_GC() ▒ │ static inline int ▒ │ _PyObject_IS_GC(PyObject *obj) ▒ │ { ▒ │ return (PyType_IS_GC(Py_TYPE(obj)) ▒ 1057 │ mov rax,QWORD PTR [rdi+0x8] ▒ │ && (Py_TYPE(obj)->tp_is_gc == NULL ▒ 757 │ ┌──test BYTE PTR [rax+0xa9],0x40 ▒ 1799 │ ├──jne 10 ▒ │ │_Py_Dealloc(): ▒ │ │_PyTrash_dealloc(op); ▒ │ │} ▒ │ │else { ▒ │ │PyTypeObject *type = Py_TYPE(op); ▒ │ │destructor dealloc = type->tp_dealloc; ▒ │ │(*dealloc)(op); ▒ 2071 │ │→ jmp QWORD PTR [rax+0x30] ▒ │ │{ ▒ │ 10:└─→push rbx ▒ 6 │ sub rsp,0x10 ▒ │ _PyObject_IS_GC(): ▒ 49 │ mov rbx,QWORD PTR [rax+0x148] ▒ 6 │ test rbx,rbx ▒ │ ↓ jne aa ▒ │ _PyTrash_dealloc(): ▒ │ if (_PyObject_GC_IS_TRACKED(op)) { ▒ 10 │ 25: mov rsi,QWORD PTR [rdi-0x10] ▒ 3 │ test rsi,rsi ▒ │ ↓ je 55 ▒ │ _PyObject_GC_UNTRACK(): ▒ │ PyGC_Head *prev = _PyGCHead_PREV(gc); ▒ 7 │ mov r8,QWORD PTR [rdi-0x8] ▒ 7 │ and r8,0xfffffffffffffffc ▒ │ _PyGCHead_SET_NEXT(prev, next); ▒ 36 │ mov QWORD PTR [r8],rsi ▒ │ _PyGCHead_SET_PREV(next, prev); ▒ 8 │ mov r9,QWORD PTR [rsi+0x8] ▒ 12 │ and r9d,0x3 ▒ 9 │ or r9,r8 ▒ 10 │ mov QWORD PTR [rsi+0x8],r9 ▒ │ gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; ▒ 13 │ and QWORD PTR [rdi-0x8],0x1 ▒ │ gc->_gc_next = 0; ▒ 1 │ mov QWORD PTR [rdi-0x10],0x0 ▒ │ gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; ▒ 8 │ 55: mov QWORD PTR [rsp+0x8],rdi ▒ │ _PyTrash_dealloc(): ▒ │ PyThreadState *tstate = PyThreadState_Get(); ▒ 19 │ → call PyThreadState_Get ▒ │ if (tstate->trash_delete_nesting >= _PyTrash_UNWIND_LEVEL) {