# unicode_repr(PyObject *unicode) # { # ... # 1 isize = PyUnicode_GET_LENGTH(unicode); # idata = PyUnicode_DATA(unicode); # # /* Compute length of output, quote characters, and # maximum character */ # osize = 0; # ... # for (i = 0; i < isize; i++) { # Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); # switch (ch) { # ... # default: # /* Fast-path ASCII */ # if (ch < ' ' || ch == 0x7f) # 2 osize += 4; /* \xHH */ # ... # } # } # # ... # 3 repr = PyUnicode_New(osize, max); # ... # for (i = 0, o = 1; i < isize; i++) { # Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); # ... # else { # 4 PyUnicode_WRITE(okind, odata, o++, ch); # } # } # } # } # /* Closing quote already added at the beginning */ # 5 assert(_PyUnicode_CheckConsistency(repr, 1)); # return repr; # } # # 1. isize=2^30+1 # 2. osize=isize*4=4 # 3. allocated buffer is too small # 4. heap overflow # 5. this assert will likely fail, since there is a good chance the allocated # buffer is just before the huge one, so the huge one will overwrite itself. # # Repro: s='\x00'*(2**30+1) repr(s)