Message288510
Following crash is sporadically observed in RHEL7 anaconda:
(gdb) f 0
#0 PyThread_ReInitTLS () at /usr/src/debug/Python-2.7.5/Python/thread.c:411
411 if (p->id != id) {
(gdb) list
406 keymutex = PyThread_allocate_lock();
407
408 /* Delete all keys which do not match the current thread id */
409 q = &keyhead;
410 while ((p = *q) != NULL) {
411 if (p->id != id) {
412 *q = p->next;
413 free((void *)p);
414 /* NB This does *not* free p->value! */
415 }
(gdb) p p
$1 = (struct key *) 0x3333333333333333
(gdb) p keyhead
$2 = (struct key *) 0x3333333333333333
key list is protected by keymutex (except for PyThread_ReInitTLS), but there doesn't seem to be any protection against concurrent fork(). What seems to happen is fork() at the moment when key list is not consistent.
For example, if I initialize new key to 0xfe:
static struct key *find_key(int key, void *value)
// find_key with extra memset()
...
p = (struct key *)malloc(sizeof(struct key));
memset(p, 0xfe, sizeof(struct key));
if (p != NULL) {
p->id = id;
p->key = key;
p->value = value;
p->next = keyhead;
keyhead = p;
}
...
Looking at disassembly, compiler reordered last 2 writes:
0x00000000004fcb50 <+272>: callq 0x413d10 <malloc@plt>
0x00000000004fcb55 <+277>: movabs $0xfefefefefefefefe,%rcx
0x00000000004fcb5f <+287>: test %rax,%rax
0x00000000004fcb62 <+290>: mov %rcx,(%rax)
0x00000000004fcb65 <+293>: mov %rcx,0x8(%rax)
0x00000000004fcb69 <+297>: mov %rcx,0x10(%rax)
0x00000000004fcb6d <+301>: mov %rcx,0x18(%rax)
0x00000000004fcb71 <+305>: je 0x4fcaff <PyThread_set_key_value+191>
0x00000000004fcb73 <+307>: mov 0x2f1e26(%rip),%rdx # 0x7ee9a0 <keyhead>
0x00000000004fcb7a <+314>: mov 0x2f1dff(%rip),%rdi # 0x7ee980 <keymutex>
0x00000000004fcb81 <+321>: xor %r14d,%r14d
0x00000000004fcb84 <+324>: mov %rbp,0x8(%rax)
0x00000000004fcb88 <+328>: mov %r12d,0x10(%rax)
0x00000000004fcb8c <+332>: mov %r13,0x18(%rax)
0x00000000004fcb90 <+336>: mov %rax,0x2f1e09(%rip) # 0x7ee9a0 <keyhead>
0x00000000004fcb97 <+343>: mov %rdx,(%rax)
Now consider what happens, when different threads call fork() in between these 2 writes: we updated keyhead, but keyhead->next has not been updated yet.
Now when anaconda hangs, I get:
(gdb) list
407 keymutex = PyThread_allocate_lock();
408
409 /* Delete all keys which do not match the current thread id */
410 q = &keyhead;
411 while ((p = *q) != NULL) {
412 if (p->id != id) {
413 *q = p->next;
414 free((void *)p);
415 /* NB This does *not* free p->value! */
416 }
(gdb) p p
$1 = (struct key *) 0xfefefefefefefefe
(gdb) p keyhead
$2 = (struct key *) 0xfefefefefefefefe
Here's how I think we get into this state:
-------------------------> thread 1
# has GIL
Thread.start
_start_new_thread(self.__bootstrap, ())
PyThread_start_new_thread(t_bootstrap)
# spawns thread 3
-------------------------> thread 2
...
# waiting for GIL
-------------------------> thread 3
t_bootstrap
_PyThreadState_Init # does not have GIL yet at this point
_PyGILState_NoteThreadState
PyThread_set_key_value(autoTLSkey, (void *)tstate)
find_key
# key list is temporarily not consistent
# due to compiler reordering couple writes in find_key
-------------------------> thread 1
continuing Thread.start
self.__started.wait()
Event.wait()
self.__cond.wait
Condition.wait()
waiter = _allocate_lock()
waiter.acquire()
lock_PyThread_acquire_lock
Py_BEGIN_ALLOW_THREADS
PyEval_SaveThread
PyThread_release_lock(interpreter_lock);
-------------------------> thread 2
...
# acquired GIL
os.fork() # forks inconsistent list
-------------------------> child
PyOS_AfterFork()
PyThread_ReInitTLS()
SIGSEGV
Attached patch for python makes it easier to reproduce, by adding delays to couple places to make window key list is not consistent larger. |
|
Date |
User |
Action |
Args |
2017-02-24 09:03:58 | Ján Stanček | set | recipients:
+ Ján Stanček |
2017-02-24 09:03:58 | Ján Stanček | set | messageid: <1487927038.37.0.851881550844.issue29640@psf.upfronthosting.co.za> |
2017-02-24 09:03:58 | Ján Stanček | link | issue29640 messages |
2017-02-24 09:03:56 | Ján Stanček | create | |
|