# Objects\bytesobject.c # # PyObject * # PyBytes_Repr(PyObject *obj, int smartquotes) # { # PyBytesObject* op = (PyBytesObject*) obj; # 1 Py_ssize_t i, length = Py_SIZE(op); # size_t newsize, squotes, dquotes; # ... # # /* Compute size of output string */ # newsize = 3; /* b'' */ # s = (unsigned char*)op->ob_sval; # for (i = 0; i < length; i++) { # ... # default: # if (s[i] < ' ' || s[i] >= 0x7f) # 2 newsize += 4; /* \xHH */ # else # newsize++; # } # } # ... # 3 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) { # PyErr_SetString(PyExc_OverflowError, # "bytes object is too large to make repr"); # return NULL; # } # 4 v = PyUnicode_New(newsize, 127); # ... # *p++ = 'b', *p++ = quote; # for (i = 0; i < length; i++) { # ... # 5 *p++ = c; # } # *p++ = quote; # 6 assert(_PyUnicode_CheckConsistency(v, 1)); # return v; # } # # 1. length=2^30+1=1073741825 # 2. newsize=length*4+3=7 (overflow) # 3. check is inefficient, because newsize=7 # 4. allocated buffer is too small # 5. buffer overwrite # 6. this assert will likely fail, since there is a good chance the allocated # buffer is just before the huge one, so the huge one will overwrite itself. # # Two lines required to repro: s=b'\x00'*(2**30+1) repr(s)