# The code below results to a null-pointer dereference in s_unpack_internal() function of _struct module: # # ASAN:SIGSEGV # ================================================================= # ==20245==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x7facd2cea83a bp 0x000000000000 sp 0x7ffd0250f860 T0) # #0 0x7facd2cea839 in s_unpack_internal /home/artem/projects/python/src/cpython-asan/Modules/_struct.c:1515 # #1 0x7facd2ceab69 in Struct_unpack_impl /home/artem/projects/python/src/cpython-asan/Modules/_struct.c:1570 # #2 0x7facd2ceab69 in unpack_impl /home/artem/projects/python/src/cpython-asan/Modules/_struct.c:2192 # #3 0x7facd2ceab69 in unpack /home/artem/projects/python/src/cpython-asan/Modules/clinic/_struct.c.h:215 # #4 0x474397 in _PyMethodDef_RawFastCallKeywords Objects/call.c:618 # #5 0x474397 in _PyCFunction_FastCallKeywords Objects/call.c:690 # #6 0x42685f in call_function Python/ceval.c:4817 # #7 0x42685f in _PyEval_EvalFrameDefault Python/ceval.c:3298 # #8 0x54b164 in PyEval_EvalFrameEx Python/ceval.c:663 # #9 0x54b164 in _PyEval_EvalCodeWithName Python/ceval.c:4173 # #10 0x54b252 in PyEval_EvalCodeEx Python/ceval.c:4200 # #11 0x54b252 in PyEval_EvalCode Python/ceval.c:640 # #12 0x431e0e in run_mod Python/pythonrun.c:976 # #13 0x431e0e in PyRun_FileExFlags Python/pythonrun.c:929 # #14 0x43203b in PyRun_SimpleFileExFlags Python/pythonrun.c:392 # #15 0x446354 in run_file Modules/main.c:338 # #16 0x446354 in Py_Main Modules/main.c:809 # #17 0x41df71 in main Programs/python.c:69 # #18 0x7facd58ac82f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f) # #19 0x428728 in _start (/home/artem/projects/python/build/cpython-asan/bin/python3.7+0x428728) # # AddressSanitizer can not provide additional info. # SUMMARY: AddressSanitizer: SEGV /home/artem/projects/python/src/cpython-asan/Modules/_struct.c:1515 s_unpack_internal # ==20245==ABORTING # # # Looks like _struct implementation assumes that PyStructObject->s_codes cannot be null, # but it may happen if a bytearray was passed to unpack(). # PyStructObject->s_codes becomes null in a couple of places in _struct.c, but that's not the case. # unpack() calls _PyArg_ParseStack() with cache_struct_converter() which maintains a cache. # Even if unpack() was called incorrectly with a string as second parameter (see below), this value is going to be cached anyway. # Next time, if the same format string is used, the value is going to be retrieved from the cache. # But PyStructObject->s_codes is still not null in cache_struct_converter() function. # If you watch "s_object" under gdb, you can see that "s_codes" becomes null here: # # PyBuffer_FillInfo (view=0x7fffffffd700, obj=obj@entry=0x7ffff7e50730, # buf=0x8df478 <_PyByteArray_empty_string>, len=0, readonly=readonly@entry=0, # flags=0) at Objects/abstract.c:647 # 647 view->format = NULL; # (gdb) bt # #0 PyBuffer_FillInfo (view=0x7fffffffd700, obj=obj@entry=0x7ffff7e50730, # buf=0x8df478 <_PyByteArray_empty_string>, len=0, readonly=readonly@entry=0, # flags=0) at Objects/abstract.c:647 # #1 0x000000000046020c in bytearray_getbuffer (obj=0x7ffff7e50730, # view=, flags=) at Objects/bytearrayobject.c:72 # #2 0x0000000000560b0a in getbuffer (errmsg=, # view=0x7fffffffd700, arg=0x7ffff7e50730) at Python/getargs.c:1380 # #3 convertsimple (freelist=0x7fffffffd3b0, bufsize=256, # msgbuf=0x7fffffffd4c0 "must be bytes-like object, not str", flags=2, # p_va=0x0, p_format=, arg=0x7ffff7e50730) # at Python/getargs.c:938 # #4 convertitem (arg=0x7ffff7e50730, p_format=p_format@entry=0x7fffffffd3a8, # p_va=p_va@entry=0x7fffffffd610, flags=flags@entry=2, # levels=levels@entry=0x7fffffffd3c0, # msgbuf=msgbuf@entry=0x7fffffffd4c0 "must be bytes-like object, not str", # bufsize=256, freelist=0x7fffffffd3b0) at Python/getargs.c:596 # #5 0x0000000000561d6f in vgetargs1_impl (compat_args=compat_args@entry=0x0, # stack=stack@entry=0x61600004b520, nargs=2, # format=format@entry=0x7ffff35d5c88 "O&y*:unpack", # p_va=p_va@entry=0x7fffffffd610, flags=flags@entry=2) at Python/getargs.c:388 # #6 0x00000000005639b0 in _PyArg_ParseStack_SizeT ( # args=args@entry=0x61600004b520, nargs=, # format=format@entry=0x7ffff35d5c88 "O&y*:unpack") at Python/getargs.c:163 # #7 0x00007ffff35d2df8 in unpack (module=module@entry=0x7ffff7e523b8, # args=args@entry=0x61600004b520, nargs=, # kwnames=kwnames@entry=0x0) # at /home/artem/projects/python/src/cpython-asan/Modules/clinic/_struct.c.h:207 # #8 0x0000000000474398 in _PyMethodDef_RawFastCallKeywords (kwnames=0x0, # nargs=140737352377272, args=0x61600004b520, self=0x7ffff7e523b8, # method=0x7ffff37d94e0 ) at Objects/call.c:618 # #9 _PyCFunction_FastCallKeywords (func=func@entry=0x7ffff7e53828, # args=args@entry=0x61600004b520, nargs=nargs@entry=2, # kwnames=kwnames@entry=0x0) at Objects/call.c:690 # #10 0x0000000000426860 in call_function (kwnames=0x0, oparg=2, # pp_stack=) at Python/ceval.c:4817 # #11 _PyEval_EvalFrameDefault (f=, throwflag=) # at Python/ceval.c:3298 # #12 0x000000000054b165 in PyEval_EvalFrameEx (throwflag=0, f=0x61600004b398) # at Python/ceval.c:663 # #13 _PyEval_EvalCodeWithName (_co=_co@entry=0x7ffff7ed3ae0, # globals=globals@entry=0x7ffff7f2f150, locals=locals@entry=0x7ffff7ed3ae0, # args=args@entry=0x0, argcount=argcount@entry=0, kwnames=kwnames@entry=0x0, # kwargs=0x8, kwcount=0, kwstep=2, defs=0x0, defcount=0, kwdefs=0x0, # closure=0x0, name=0x0, qualname=0x0) at Python/ceval.c:4173 # #14 0x000000000054b253 in PyEval_EvalCodeEx (closure=0x0, kwdefs=0x0, # defcount=0, defs=0x0, kwcount=0, kws=0x0, argcount=0, args=0x0, # locals=locals@entry=0x7ffff7ed3ae0, globals=globals@entry=0x7ffff7f2f150, # _co=_co@entry=0x7ffff7ed3ae0) at Python/ceval.c:4200 # #15 PyEval_EvalCode (co=co@entry=0x7ffff7ed3ae0, # globals=globals@entry=0x7ffff7f16288, locals=locals@entry=0x7ffff7f16288) # at Python/ceval.c:640 # #16 0x0000000000431e0f in run_mod (arena=0x7ffff7f2f150, flags=0x7fffffffdb60, # locals=0x7ffff7f16288, globals=0x7ffff7f16288, filename=0x7ffff7e534b0, # mod=0x625000021078) at Python/pythonrun.c:976 # #17 PyRun_FileExFlags (fp=0x61600003cc80, filename_str=, # start=, globals=0x7ffff7f16288, locals=0x7ffff7f16288, # closeit=1, flags=0x7fffffffdb60) at Python/pythonrun.c:929 # #18 0x000000000043203c in PyRun_SimpleFileExFlags (fp=0x61600003cc80, # filename=, closeit=1, flags=0x7fffffffdb60) # at Python/pythonrun.c:392 # #19 0x0000000000446355 in run_file (p_cf=0x7fffffffdb60, # filename=0x60800000bf20 L"struct_unpack_crash.py", fp=0x61600003cc80) # at Modules/main.c:338 # #20 Py_Main (argc=argc@entry=2, argv=argv@entry=0x60300000efe0) # at Modules/main.c:809 # #21 0x000000000041df72 in main (argc=2, argv=) # at ./Programs/python.c:69 # # # I am not sure if it should cache an object if a error occurred. # But clearing the cache in case of error seems to fix this null-pointer dereference. # # Here is a patch (untested): # # diff --git a/Modules/clinic/_struct.c.h b/Modules/clinic/_struct.c.h # index 71ac290..9573769 100644 # --- a/Modules/clinic/_struct.c.h # +++ b/Modules/clinic/_struct.c.h # @@ -206,6 +206,7 @@ unpack(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames) # # if (!_PyArg_ParseStack(args, nargs, "O&y*:unpack", # cache_struct_converter, &s_object, &buffer)) { # + _clearcache_impl(NULL); # goto exit; # } # # # If this solution is okay, then _clearcache_impl() should probably be called in a couple of other unpack functions. # import struct try: p1 = b'hhl' p2 = 'test' struct.unpack(p1, p2) except Exception as err: print(err) try: p1 = b'hhl' p2 = bytearray() struct.unpack(p1, p2) except Exception as err: print(err)