# Bug # --- # # Py_UNICODE * # PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) # { # ... # #endif # wchar_t *w; # wchar_t *wchar_end; # # ... # 1 _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * # (_PyUnicode_LENGTH(unicode) + 1)); # ... # w = _PyUnicode_WSTR(unicode); # 2 wchar_end = w + _PyUnicode_LENGTH(unicode); # # if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { # one_byte = PyUnicode_1BYTE_DATA(unicode); # 3 for (; w < wchar_end; ++one_byte, ++w) # *w = *one_byte; # /* null-terminate the wstr */ # 4 *w = 0; # } # # 1. if length(unicode)==2**30-1, then malloced buffer has size equal to # 4*(2^30-1+1)=2^32 == 0 (modulo 2^32) # 2. wchar_end is equal to w-4 because of pointer arithmetic (nonexplicit # multiplication by 4) # 3. w > wchar_end, so we don't enter the loop # 4. 4 byte write to a 0 size buffer # # GDB output # ---------- # # 3860 _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * # ... # (gdb) print sizeof(wchar_t)*(((PyASCIIObject*)(unicode))->length+1) # $21 = 0 # ... # (gdb) n # 3868 w = _PyUnicode_WSTR(unicode); # (gdb) n # 3869 wchar_end = w + _PyUnicode_LENGTH(unicode); # (gdb) n # 3871 if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { # (gdb) print w # $22 = 0x805fc028 L"\xfbfbfbfb\xced00000" # (gdb) print wchar_end # $23 = 0x805fc024 L"\xfbfbfb6f\xfbfbfbfb\xced00000" # ... # 3876 *w = 0; # # ) # OS info # ------- # # % ./python -V # Python 3.4.1 # # % uname -a # Linux ubuntu 3.8.0-29-generic #42~precise1-Ubuntu SMP Wed Aug 14 15:31:16 UTC 2013 i686 i686 i386 GNU/Linux import locale s='a'*(2**30-1) locale.strxfrm(s)