This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author pkt
Recipients pkt
Date 2015-02-20.12:06:49
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1424434009.7.0.146767591235.issue23490@psf.upfronthosting.co.za>
In-reply-to
Content
# Bug
# ---
# 
# Py_UNICODE *
# PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
# {
#     ...
# #endif
#     wchar_t *w;
#     wchar_t *wchar_end;
# 
#     ...
# 1           _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
#                                                   (_PyUnicode_LENGTH(unicode) + 1));
#             ...
#             w = _PyUnicode_WSTR(unicode);
# 2           wchar_end = w + _PyUnicode_LENGTH(unicode);
# 
#             if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
#                 one_byte = PyUnicode_1BYTE_DATA(unicode);
# 3               for (; w < wchar_end; ++one_byte, ++w)
#                     *w = *one_byte;
#                 /* null-terminate the wstr */
# 4               *w = 0;
#             }
# 
# 1. if length(unicode)==2**30-1, then malloced buffer has size equal to 
#    4*(2^30-1+1)=2^32 == 0 (modulo 2^32)
# 2. wchar_end is equal to w-4 because of pointer arithmetic (nonexplicit 
#    multiplication by 4)
# 3. w > wchar_end, so we don't enter the loop
# 4. 4 byte write to a 0 size buffer
# 
# GDB output
# ----------
# 
# 3860                _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
# ...
# (gdb) print sizeof(wchar_t)*(((PyASCIIObject*)(unicode))->length+1)
# $21 = 0
# ...
# (gdb) n
# 3868                w = _PyUnicode_WSTR(unicode);
# (gdb) n
# 3869                wchar_end = w + _PyUnicode_LENGTH(unicode);
# (gdb) n
# 3871                if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
# (gdb) print w
# $22 = 0x805fc028 L"\xfbfbfbfb\xced00000"
# (gdb) print wchar_end
# $23 = 0x805fc024 L"\xfbfbfb6f\xfbfbfbfb\xced00000"
# ...
# 3876                    *w = 0;
#  
# )
# OS info
# -------
# 
# % ./python -V
# Python 3.4.1
#  
# % uname -a
# Linux ubuntu 3.8.0-29-generic #42~precise1-Ubuntu SMP Wed Aug 14 15:31:16 UTC 2013 i686 i686 i386 GNU/Linux
 
import locale
s='a'*(2**30-1)
locale.strxfrm(s)
History
Date User Action Args
2015-02-20 12:06:49pktsetrecipients: + pkt
2015-02-20 12:06:49pktsetmessageid: <1424434009.7.0.146767591235.issue23490@psf.upfronthosting.co.za>
2015-02-20 12:06:49pktlinkissue23490 messages
2015-02-20 12:06:49pktcreate