This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author vstinner
Recipients terry.reedy, vstinner
Date 2021-09-17.21:23:48
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1631913829.05.0.399515268437.issue45200@roundup.psfhosted.org>
In-reply-to
Content
When the test hangs, a background Python process starts to use 100% of my CPU.

This process is stuck in _thread.start_new_thread(). I opened the process in gdb.

(gdb) thread 1
[Switching to thread 1 (Thread 0x7fece33615c0 (LWP 132301))]
#0  __interceptor_pthread_create (thread=thread@entry=0x7ffe79cd6190, attr=attr@entry=0x7ffe79cd61b0, start_routine=start_routine@entry=0x6fed30 <pythread_wrapper>, arg=arg@entry=0x60200019aed0)
    at ../../../../libsanitizer/asan/asan_interceptors.cpp:245
245	    while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
(gdb) where
#0  __interceptor_pthread_create (thread=thread@entry=0x7ffe79cd6190, attr=attr@entry=0x7ffe79cd61b0, start_routine=start_routine@entry=0x6fed30 <pythread_wrapper>, arg=arg@entry=0x60200019aed0)
    at ../../../../libsanitizer/asan/asan_interceptors.cpp:245
#1  0x00000000006ff242 in PyThread_start_new_thread (func=func@entry=0x7f45d0 <thread_run>, arg=arg@entry=0x604000e83c50) at Python/thread_pthread.h:287
(...)

(gdb) thread 2
[Switching to thread 2 (Thread 0x7fecd151c640 (LWP 132302))]
#0  __sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t> (mo=__sanitizer::memory_order_acquire, v=2, a=0x640000001b00) at ../../../../libsanitizer/sanitizer_common/sanitizer_atomic_clang.h:62
62	inline typename T::Type atomic_exchange(volatile T *a,
(gdb) where
#0  __sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t> (mo=__sanitizer::memory_order_acquire, v=2, a=0x640000001b00) at ../../../../libsanitizer/sanitizer_common/sanitizer_atomic_clang.h:62
#1  __sanitizer::BlockingMutex::Lock (this=this@entry=0x640000001b00) at ../../../../libsanitizer/sanitizer_common/sanitizer_linux.cpp:649
#2  0x00007fece3914e15 in __sanitizer::GenericScopedLock<__sanitizer::BlockingMutex>::GenericScopedLock (mu=0x640000001b00, this=<synthetic pointer>) at ../../../../libsanitizer/sanitizer_common/sanitizer_mutex.h:183
#3  __sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> >::GetFromAllocator (this=this@entry=0x7fece3a13f00 <__asan::instance>, stat=stat@entry=0x7feccfb46c40, class_id=class_id@entry=36, 
    chunks=chunks@entry=0x7feccfb42330, n_chunks=n_chunks@entry=8) at ../../../../libsanitizer/sanitizer_common/sanitizer_allocator_primary64.h:146
#4  0x00007fece3914f41 in __sanitizer::SizeClassAllocator64LocalCache<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> > >::Refill (this=this@entry=0x7feccfb390e0, c=c@entry=0x7feccfb42320, 
    allocator=allocator@entry=0x7fece3a13f00 <__asan::instance>, class_id=class_id@entry=36) at ../../../../libsanitizer/sanitizer_common/sanitizer_allocator_local_cache.h:102
#5  0x00007fece3915209 in __sanitizer::SizeClassAllocator64LocalCache<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> > >::Allocate (class_id=36, allocator=0x7fece3a13f00 <__asan::instance>, 
    this=0x7feccfb390e0) at ../../../../libsanitizer/sanitizer_common/sanitizer_allocator_local_cache.h:38
#6  __sanitizer::CombinedAllocator<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> >, __sanitizer::LargeMmapAllocatorPtrArrayDynamic>::Allocate (this=this@entry=0x7fece3a13f00 <__asan::instance>, 
    cache=0x7feccfb390e0, size=<optimized out>, size@entry=8192, alignment=alignment@entry=1) at ../../../../libsanitizer/sanitizer_common/sanitizer_allocator_combined.h:69
#7  0x00007fece39118f6 in __asan::QuarantineCallback::Allocate (size=8192, this=<synthetic pointer>) at ../../../../libsanitizer/asan/asan_allocator.cpp:869
#8  __sanitizer::QuarantineCache<__asan::QuarantineCallback>::Enqueue (size=32, ptr=0x6030005477d0, cb=..., this=0x7feccfb39060) at ../../../../libsanitizer/sanitizer_common/sanitizer_quarantine.h:223
#9  __sanitizer::Quarantine<__asan::QuarantineCallback, __asan::AsanChunk>::Put (size=32, ptr=0x6030005477d0, cb=..., c=0x7feccfb39060, this=<optimized out>) at ../../../../libsanitizer/sanitizer_common/sanitizer_quarantine.h:105
#10 __asan::Allocator::QuarantineChunk (this=this@entry=0x7fece3a13f00 <__asan::instance>, m=m@entry=0x6030005477d0, stack=stack@entry=0x7fecd151b360, ptr=0x6030005477e0) at ../../../../libsanitizer/asan/asan_allocator.cpp:666
#11 0x00007fece3911ba5 in __asan::Allocator::Deallocate (alloc_type=__asan::FROM_MALLOC, stack=0x7fecd151b360, delete_alignment=0, delete_size=0, ptr=0x6030005477e0, this=0x7fece3a13f00 <__asan::instance>)
    at ../../../../libsanitizer/asan/asan_allocator.cpp:714
#12 0x00007fece3997618 in __interceptor_free (ptr=0x6030005477e0) at ../../../../libsanitizer/asan/asan_malloc_linux.cpp:128
#13 0x00007fece362f7a4 in pthread_getattr_np@GLIBC_2.2.5 () from /lib64/libc.so.6
#14 0x00007fece39b142e in __sanitizer::GetThreadStackTopAndBottom (at_initialization=at_initialization@entry=false, stack_top=stack_top@entry=0x7fecd151bd10, stack_bottom=stack_bottom@entry=0x7fecd151bd18)
    at ../../../../libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:143
#15 0x00007fece39b1778 in __sanitizer::GetThreadStackAndTls (main=<optimized out>, stk_addr=stk_addr@entry=0x7feccfb39020, stk_size=stk_size@entry=0x7fecd151bd68, tls_addr=tls_addr@entry=0x7feccfb39040, 
    tls_size=tls_size@entry=0x7fecd151bd60) at ../../../../libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:543
#16 0x00007fece39a3c93 in __asan::AsanThread::SetThreadStackAndTls (this=this@entry=0x7feccfb39000, options=<optimized out>) at ../../../../libsanitizer/asan/asan_thread.h:81
#17 0x00007fece39a3fd6 in __asan::AsanThread::Init (this=this@entry=0x7feccfb39000, options=options@entry=0x0) at ../../../../libsanitizer/asan/asan_thread.cpp:226
#18 0x00007fece39a446e in __asan::AsanThread::ThreadStart (this=0x7feccfb39000, os_id=132302, signal_thread_is_registered=0x7ffe79cd58f8) at ../../../../libsanitizer/asan/asan_thread.cpp:258
#19 0x00007fece3781299 in start_thread () from /lib64/libpthread.so.0
#20 0x00007fece36a9353 in clone () from /lib64/libc.so.6


The thread 1 is stuck at the loop at the end of pthread_create(), line 245 an 246: wait until &param.is_registered == 0.

210	INTERCEPTOR(int, pthread_create, void *thread,
211	    void *attr, void *(*start_routine)(void*), void *arg) {
212	  EnsureMainThreadIDIsCorrect();
213	  // Strict init-order checking is thread-hostile.
214	  if (flags()->strict_init_order)
215	    StopInitOrderChecking();
216	  GET_STACK_TRACE_THREAD;
217	  int detached = 0;
218	  if (attr)
219	    REAL(pthread_attr_getdetachstate)(attr, &detached);
220	  ThreadStartParam param;
221	  atomic_store(&param.t, 0, memory_order_relaxed);
222	  atomic_store(&param.is_registered, 0, memory_order_relaxed);
223	  int result;
224	  {
225	    // Ignore all allocations made by pthread_create: thread stack/TLS may be
226	    // stored by pthread for future reuse even after thread destruction, and
227	    // the linked list it's stored in doesn't even hold valid pointers to the
228	    // objects, the latter are calculated by obscure pointer arithmetic.
229	#if CAN_SANITIZE_LEAKS
230	    __lsan::ScopedInterceptorDisabler disabler;
231	#endif
232	    result = REAL(pthread_create)(thread, attr, asan_thread_start, &param);
233	  }
234	  if (result == 0) {
235	    u32 current_tid = GetCurrentTidOrInvalid();
236	    AsanThread *t =
237	        AsanThread::Create(start_routine, arg, current_tid, &stack, detached);
238	    atomic_store(&param.t, reinterpret_cast<uptr>(t), memory_order_release);
239	    // Wait until the AsanThread object is initialized and the ThreadRegistry
240	    // entry is in "started" state. One reason for this is that after this
241	    // interceptor exits, the child thread's stack may be the only thing holding
242	    // the |arg| pointer. This may cause LSan to report a leak if leak checking
243	    // happens at a point when the interceptor has already exited, but the stack
244	    // range for the child thread is not yet known.
245	    while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
246	      internal_sched_yield();
247	  }
248	  return result;
249	}



The thread 2 seems to be stuck in a syscall done by __sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t>, called by __sanitizer::BlockingMutex::Lock:

(gdb) disassemble 
Dump of assembler code for function _ZN11__sanitizer13BlockingMutex4LockEv:
   0x00007fece39af850 <+0>:	endbr64 
   0x00007fece39af854 <+4>:	push   r12
   0x00007fece39af856 <+6>:	push   rbp
   0x00007fece39af857 <+7>:	push   rbx
   0x00007fece39af858 <+8>:	mov    rcx,QWORD PTR [rdi+0x50]
   0x00007fece39af85c <+12>:	test   rcx,rcx
   0x00007fece39af85f <+15>:	jne    0x7fece39af8a8 <_ZN11__sanitizer13BlockingMutex4LockEv+88>
   0x00007fece39af861 <+17>:	mov    eax,0x1
   0x00007fece39af866 <+22>:	xchg   DWORD PTR [rdi],eax
   0x00007fece39af868 <+24>:	test   eax,eax
   0x00007fece39af86a <+26>:	je     0x7fece39af89e <_ZN11__sanitizer13BlockingMutex4LockEv+78>
   0x00007fece39af86c <+28>:	mov    ebp,0x2
   0x00007fece39af871 <+33>:	mov    r12d,0xca
   0x00007fece39af877 <+39>:	mov    esi,0x80
   0x00007fece39af87c <+44>:	xor    ebx,ebx
   0x00007fece39af87e <+46>:	mov    edx,0x2
   0x00007fece39af883 <+51>:	jmp    0x7fece39af896 <_ZN11__sanitizer13BlockingMutex4LockEv+70>
   0x00007fece39af885 <+53>:	nop    DWORD PTR [rax]
   0x00007fece39af888 <+56>:	mov    rax,r12
   0x00007fece39af88b <+59>:	mov    r10,rbx
   0x00007fece39af88e <+62>:	mov    r8,rbx
   0x00007fece39af891 <+65>:	mov    r9,rbx
   0x00007fece39af894 <+68>:	syscall 
=> 0x00007fece39af896 <+70>:	mov    eax,ebp
   0x00007fece39af898 <+72>:	xchg   DWORD PTR [rdi],eax
   0x00007fece39af89a <+74>:	test   eax,eax
   0x00007fece39af89c <+76>:	jne    0x7fece39af888 <_ZN11__sanitizer13BlockingMutex4LockEv+56>
   0x00007fece39af89e <+78>:	pop    rbx
   0x00007fece39af89f <+79>:	pop    rbp
   0x00007fece39af8a0 <+80>:	pop    r12
   0x00007fece39af8a2 <+82>:	ret    
   0x00007fece39af8a3 <+83>:	nop    DWORD PTR [rax+rax*1+0x0]
   0x00007fece39af8a8 <+88>:	xor    r8d,r8d
   0x00007fece39af8ab <+91>:	lea    rdx,[rip+0x37d7c]        # 0x7fece39e762e
   0x00007fece39af8b2 <+98>:	mov    esi,0x285
   0x00007fece39af8b7 <+103>:	lea    rdi,[rip+0x37dfa]        # 0x7fece39e76b8
   0x00007fece39af8be <+110>:	call   0x7fece39be990 <_ZN11__sanitizer11CheckFailedEPKciS1_yy>
End of assembler dump.
History
Date User Action Args
2021-09-17 21:23:49vstinnersetrecipients: + vstinner, terry.reedy
2021-09-17 21:23:49vstinnersetmessageid: <1631913829.05.0.399515268437.issue45200@roundup.psfhosted.org>
2021-09-17 21:23:49vstinnerlinkissue45200 messages
2021-09-17 21:23:48vstinnercreate