/* File: os_pipe_write_close_bug.c Date: 1-Dec-2009 Author: Hugh Secker-Walker Description: Demonstrate a POSIX multithreading process hang on Mac OS X */ char const usage[] = "Demonstrate a POSIX hang on Mac OS X 10.5 and 10.6; all logging to stderr\n" "\n" "usage: os_pipe_write_close_bug size logging [count]\n" " size power-of-two exponent for nbytes buffer used to demonstrate the problem;\n" " no hang seen with 16 or smaller, hangs with 17 or larger\n" " logging 0 for no call logging; 1 for call logging\n" " count optional, non-negative number of cycles to try; default is no count limit\n" "\n" "For example: os_pipe_write_close_bug 17 0\n" ; /* Demonstrate a bug on Mac OS X whereby a POSIX process will hang while simultaneously from separate threads trying write() with a large buffer and close() on the write and read fds respectively of a new pipe. The problem has only been seen with size of 17 or larger (buffer 128K or larger). So, a workaround is to limit the size of the write buffer to 64K. Problem has been seen on Mac OS X 10.5 and 10.6. The problem has not been seen on 32-bit Ubuntu Linux or 64-bit Red Hat Linux. The script has a cycle whereby it creates a pipe and starts a thread to try to write a buffer of 2^size bytes to the pipe and then close the write end of the pipe. The main thread closes the read end of the pipe without ever reading anything. By default, this script cycles forever, or until the hang, printing out the cycle count and, optionally, line-by-line records of the code that's been executed. Optional count argument limits the number of cycles to try. All output is to stderr. As with all such race condition-type bugs, the logging code, etc can have an effect on how likely the problem is to occur. To reproduce: $ gcc -Wall -Werror -lpthread os_pipe_write_close_bug.c -o os_pipe_write_close_bug $ ./os_pipe_write_close_bug 17 0 ... writes ongoing cycle counts to stderr, eventually hangs $ ./os_pipe_write_close_bug 17 1 2> os_pipe_write_close_bug.log ... verbose logging writes ongoing cycle count and call details to file; it hangs in a second or so for me on Mac OS X 10.5 When the process hangs, according to 'ps -l' it's in an uninteruptible wait 'U' or 'U+', so it's slightly tough to kill. If run from bash, Ctrl-Z followed by 'kill %1 %1' followed by two newlines does the trick. The file os_pipe_write_close_bug.log will show you where the two threads had got to. The tail of the log file usually looks something like the following, e.g for cycle 394, suggesting that both the write() and close() on the pipe are active in the two threads: ... cycle: 393 go: enter writer_thread_func: enter go: pthread_create writer_thread_func: write writer_thread_func: close writer_thread_func: exit go: close go: pthread_join go: exit cycle: 394 go: enter writer_thread_func: enter go: pthread_create */ #include #include #include #include #include #include #include void die_usage() { fprintf(stderr, "%s", usage); exit(1); } // shared global state // the buffer for writing char * bytes = NULL; int nbytes = 0; // the pipe int fds[2]; // log details int logging; void * writer_thread_func() { int err2; int errno2; if( logging ) fprintf(stderr, " writer_thread_func: enter\n"); assert( bytes ); assert( nbytes > 0 ); // write to the pipe err2 = write(fds[1], bytes, nbytes); errno2 = errno; if( logging ) fprintf(stderr, " writer_thread_func: write\n"); assert( err2 >= 0 || errno2 == EPIPE ); // write the write-end of the pipe err2 = close(fds[1]); errno2 = errno; if( logging ) fprintf(stderr, " writer_thread_func: close\n"); assert( err2 >= 0 ); if( logging ) fprintf(stderr, " writer_thread_func: exit\n"); return NULL; } void go() { int err1; int errno1; if( logging ) fprintf(stderr, " go: enter\n"); // create the pipe err1 = pipe(fds); assert( err1 == 0 ); // start the writer thread pthread_t writer_thread = 0x00; err1 = pthread_create(&writer_thread, NULL, &writer_thread_func, NULL); if( logging ) fprintf(stderr, " go: pthread_create\n"); assert( err1 == 0 && writer_thread != 0x00 ); // close the read-end of the pipe err1 = close(fds[0]); errno1 = errno; if( logging ) fprintf(stderr, " go: close\n"); assert( err1 == 0 ); // wait for the writer thread err1 = pthread_join(writer_thread, NULL); if( logging ) fprintf(stderr, " go: pthread_join\n"); assert( err1 == 0 ); if( logging ) fprintf(stderr, " go: exit\n"); } int main(int argc, char *argv[]) { int i; // ignore SIGPIPE typedef void (*sighandler_t)(int); sighandler_t const sighandler = signal(SIGPIPE, SIG_IGN); assert( sighandler != SIG_ERR ); // parse comand line integers long argls[3] = { -1, -1, -1 }; if( argc != 3 && argc != 4 ) die_usage(); char *end; for( i = 1; i < argc; ++i ) { if( *argv[i] == '\0' ) die_usage(); argls[i-1] = strtol(argv[i], &end, 10); if( *end != '\0' || argls[i-1] < 0 ) die_usage(); } int const size = argls[0]; logging = argls[1]; int const loop = argls[2]; fprintf(stderr, "args: %s %d %d [%d]\n", argv[0], size, logging, loop); // make the buffer nbytes = 1 << size; bytes = malloc(nbytes); assert( bytes ); for( i = nbytes; --i >= 0; bytes[i] = (char)i ); // do the work for( i = 0; loop < 0 || i < loop; ++i ) { fprintf(stderr, "cycle: %d\n", i); go(); } return 0; }