int and float constructing from non NUL-terminated buffer #68990

JohnLeitch · 2015-08-06T03:15:20Z

BPO	24802
Nosy	@mdickinson, @ericvsmith, @vadmium, @serhiy-storchaka
Files	PyFloat_FromString_Buffer_Over-read.patch: patch PyFloat_FromString_Buffer_Over-read.py: repro int_float_from_buffer.patch unterminated-buffer.patch: Also fix compile() etc

^{Note: these values reflect the state of the issue at the time it was migrated and might not reflect the current state.}

Show more details

GitHub fields:

assignee = 'https://github.com/vadmium'
closed_at = <Date 2015-11-08.11:34:47.949>
created_at = <Date 2015-08-06.03:15:19.959>
labels = ['interpreter-core', 'type-crash']
title = 'int and float constructing from non NUL-terminated buffer'
updated_at = <Date 2015-11-20.19:56:50.438>
user = 'https://bugs.python.org/JohnLeitch'

bugs.python.org fields:

activity = <Date 2015-11-20.19:56:50.438>
actor = 'python-dev'
assignee = 'martin.panter'
closed = True
closed_date = <Date 2015-11-08.11:34:47.949>
closer = 'martin.panter'
components = ['Interpreter Core']
creation = <Date 2015-08-06.03:15:19.959>
creator = 'JohnLeitch'
dependencies = []
files = ['40132', '40133', '40940', '40943']
hgrepos = []
issue_num = 24802
keywords = ['patch']
message_count = 14.0
messages = ['248099', '248100', '254052', '254076', '254077', '254079', '254137', '254254', '254255', '254260', '254266', '254335', '254336', '255015']
nosy_count = 6.0
nosy_names = ['mark.dickinson', 'eric.smith', 'python-dev', 'martin.panter', 'serhiy.storchaka', 'JohnLeitch']
pr_nums = []
priority = 'normal'
resolution = 'fixed'
stage = 'resolved'
status = 'closed'
superseder = None
type = 'crash'
url = 'https://bugs.python.org/issue24802'
versions = ['Python 3.4', 'Python 3.5', 'Python 3.6']

JohnLeitch · 2015-08-06T03:15:15Z

Python suffers from a buffer over-read in PyFloat_FromString() that is caused by the incorrect assumption that buffers returned by PyObject_GetBuffer() are null-terminated. This could potentially result in the disclosure of adjacent memory.

PyObject *
PyFloat_FromString(PyObject *v)
{
    const char *s, *last, *end;
    double x;
    PyObject *s_buffer = NULL;
    Py_ssize_t len;
    Py_buffer view = {NULL, NULL};
    PyObject *result = NULL;

    if (PyUnicode_Check(v)) {
        s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
        if (s_buffer == NULL)
            return NULL;
        s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
        if (s == NULL) {
            Py_DECREF(s_buffer);
            return NULL;
        }
    }
    else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
        s = (const char *)view.buf; <<<< The unterminated buffer is retrieved here.
        len = view.len;
    }
    else {
        PyErr_Format(PyExc_TypeError,
            "float() argument must be a string or a number, not '%.200s'",
            Py_TYPE(v)->tp_name);
        return NULL;
    }
    last = s + len;
    /* strip space */
    while (s < last && Py_ISSPACE(*s))
        s++;
    while (s < last - 1 && Py_ISSPACE(last[-1]))
        last--;
    /* We don't care about overflow or underflow.  If the platform
     * supports them, infinities and signed zeroes (on underflow) are
     * fine. */
    x = PyOS_string_to_double(s, (char **)&end, NULL); <<<< The buffer is then passed
                                                            along here.
    if (end != last) {
        PyErr_Format(PyExc_ValueError,
                     "could not convert string to float: "
                     "%R", v);
        result = NULL;
    }
    else if (x == -1.0 && PyErr_Occurred())
        result = NULL;
    else
        result = PyFloat_FromDouble(x);

    PyBuffer_Release(&view);
    Py_XDECREF(s_buffer);
    return result;
}

double
PyOS_string_to_double(const char *s,
                      char **endptr,
                      PyObject *overflow_exception)
{
    double x, result=-1.0;
    char *fail_pos;

    errno = 0;
    PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
    x = _PyOS_ascii_strtod(s, &fail_pos);
    PyFPE_END_PROTECT(x)

    if (errno == ENOMEM) {
        PyErr_NoMemory();
        fail_pos = (char *)s;
    }
    else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
        PyErr_Format(PyExc_ValueError, <<<< If any of these error paths are taken, the
                                            unterminated buffer is passed along without
                                            its length, ultimately resulting in a call
                                            to unicode_fromformat_write_cstr().
                      "could not convert string to float: "
                      "%.200s", s);
    else if (fail_pos == s)
        PyErr_Format(PyExc_ValueError,
                      "could not convert string to float: "
                      "%.200s", s);
    else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
        PyErr_Format(overflow_exception,
                      "value too large to convert to float: "
                      "%.200s", s);
    else
        result = x;

    if (endptr != NULL)
        *endptr = fail_pos;
    return result;
}

static int
unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
                              Py_ssize_t width, Py_ssize_t precision)
{
    /* UTF-8 */
    Py_ssize_t length;
    PyObject *unicode;
    int res;

    length = strlen(str); <<<< str points to the unterminated buffer, which means 
                               strlen() my read off the end, depending on the contents
                               of adjacent memory.
    if (precision != -1)
        length = Py_MIN(length, precision);
    unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL); <<<< The new,
                                                                    incorrect length is
                                                                    passed along.
																		 
    if (unicode == NULL)
        return -1;

    res = unicode_fromformat_write_str(writer, unicode, width, -1);
    Py_DECREF(unicode);
    return res;
}

A script that reproduces the issue is as follows:

import array
float(array.array("B",b"A"*0x10))

And it produces the following exception:

0:000> gu
eax=00000000 ebx=06116b00 ecx=00000000 edx=00000000 esi=06116b00 edi=00000000
eip=6516be1b esp=0080f440 ebp=0080f4f4 iopl=0 nv up ei pl zr na pe nc
cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00000246
python35!PyFloat_FromString+0xab:
6516be1b 8b4c2454 mov ecx,dword ptr [esp+54h] ss:002b:0080f494=090a2fe8
0:000> db @@(view.buf)
090a2fe8 41 41 41 41 41 41 41 41-41 41 41 41 41 41 41 41 AAAAAAAAAAAAAAAA
090a2ff8 c0 c0 c0 c0 d0 d0 d0 d0-?? ?? ?? ?? ?? ?? ?? ?? ........????????
090a3008 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3018 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a302 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3038 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3048 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3058 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
0:000> g
(828.bec): Access violation - code c0000005 (first chance)
First chance exceptions are reported before any exception handling.
This exception may be expected and handled.
eax=0080f4d0 ebx=0080f3a0 ecx=0080f3a0 edx=090a2fe8 esi=090a3000 edi=090a2fe9
eip=651ac280 esp=0080f31c ebp=0080f328 iopl=0 nv up ei ng nz na po nc
cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00010282
python35!unicode_fromformat_write_cstr+0x10:
651ac280 8a06 mov al,byte ptr [esi] ds:002b:090a3000=??
0:000> db esi-0x10
090a2ff0 41 41 41 41 41 41 41 41-c0 c0 c0 c0 d0 d0 d0 d0 AAAAAAAA........
090a3000 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3010 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3020 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3030 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3040 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3050 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
090a3060 ?? ?? ?? ?? ?? ?? ?? ??-?? ?? ?? ?? ?? ?? ?? ?? ????????????????
0:000> !analyze -v -nodb

                   Exception Analysis                                   *

FAULTING_IP:
python35!unicode_fromformat_write_cstr+10 [c:\build\cpython\objects\unicodeobject.c @ 2368]
651ac280 8a06 mov al,byte ptr [esi]

EXCEPTION_RECORD: ffffffff -- (.exr 0xffffffffffffffff)
ExceptionAddress: 651ac280 (python35!unicode_fromformat_write_cstr+0x00000010)
ExceptionCode: c0000005 (Access violation)
ExceptionFlags: 0000000
NumberParameters: 2
Parameter[0]: 0000000
Parameter[1]: 090a3000
Attempt to read from address 090a3000

CONTEXT: 0000000 -- (.cxr 0x0;r)
eax=0080f4d0 ebx=0080f3a0 ecx=0080f3a0 edx=090a2fe8 esi=090a3000 edi=090a2fe9
eip=651ac280 esp=0080f31c ebp=0080f328 iopl=0 nv up ei ng nz na po nc
cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00010282
python35!unicode_fromformat_write_cstr+0x10:
651ac280 8a06 mov al,byte ptr [esi] ds:002b:090a3000=??

FAULTING_THREAD: 00000bec

DEFAULT_BUCKET_ID: INVALID_POINTER_READ

PROCESS_NAME: python.exe

ERROR_CODE: (NTSTATUS) 0xc0000005 - The instruction at 0x%p referenced memory at 0x%p. The memory could not be %s.

EXCEPTION_CODE: (NTSTATUS) 0xc0000005 - The instruction at 0x%p referenced memory at 0x%p. The memory could not be %s.

EXCEPTION_PARAMETER1: 0000000

EXCEPTION_PARAMETER2: 090a3000

READ_ADDRESS: 090a3000

FOLLOWUP_IP:
python35!unicode_fromformat_write_cstr+10 [c:\build\cpython\objects\unicodeobject.c @ 2368]
651ac280 8a06 mov al,byte ptr [esi]

NTGLOBALFLAG: 2000000

APPLICATION_VERIFIER_FLAGS: 0

FAULTING_LOCAL_VARIABLE_NAME: length

APP: python.exe

ANALYSIS_VERSION: 6.3.9600.17029 (debuggers(dbg).140219-1702) x86fre

PRIMARY_PROBLEM_CLASS: INVALID_POINTER_READ

BUGCHECK_STR: APPLICATION_FAULT_INVALID_POINTER_READ

LAST_CONTROL_TRANSFER: from 651ac6e9 to 651ac280

STACK_TEXT:
0080f328 651ac6e9 ffffffff 000000c8 0000000 python35!unicode_fromformat_write_cstr+0x10
0080f384 651ac955 0080f39c 090a2fe8 65321778 python35!unicode_fromformat_arg+0x409
0080f3d8 651f1a1a 65321778 0080f404 090a2fe8 python35!PyUnicode_FromFormatV+0x65
0080f3f4 652070a9 6536bd38 65321778 090a2fe8 python35!PyErr_Format+0x1a
0080f42c 6516be70 090a2fe8 0080f484 0000000 python35!PyOS_string_to_double+0xa9
0080f4f4 6514808b 06116b0 6536d658 6536d658 python35!PyFloat_FromString+0x100
0080f554 6516e6e2 06116b0 06116b0 06116b0 python35!PyNumber_Float+0xcb
0080f568 65194e08 6536d658 0610e630 0000000 python35!float_new+0x72
0080f588 6514947d 6536d658 0610e630 0000000 python35!type_call+0x38
0080f5a4 651e49cc 6536d658 0610e630 0000000 python35!PyObject_Call+0x6d
0080f5d0 651e449c 00000001 0610e630 00000083 python35!do_call+0x11c
0080f600 651e18d 060ceab0 0000000 00000040 python35!call_function+0x36c
0080f678 651e339f 060ceab0 0000000 08c73ff0 python35!PyEval_EvalFrameEx+0x2318
0080f6c4 6521a142 060eff58 0000000 0000000 python35!_PyEval_EvalCodeWithName+0x82f
0080f700 65219fd5 060eff58 060eff58 0080f7c python35!run_mod+0x42
0080f72c 6521904a 06d40fc8 061571d0 00000101 python35!PyRun_FileExFlags+0x85
0080f770 650ef037 06d40fc8 061571d0 00000001 python35!PyRun_SimpleFileExFlags+0x20a
0080f79c 650ef973 0080f7c 65492100 65492108 python35!run_file+0xe7
0080f840 1c4b143f 00000002 05e06f10 05e0cf48 python35!Py_Main+0x913
0080f88c 76323744 7ee8e000 76323720 ddf3f75b python!__scrt_common_main_seh+0xff
0080f8a0 7789a064 7ee8e000 16227053 0000000 KERNEL32!BaseThreadInitThunk+0x24
0080f8e8 7789a02f ffffffff 778bd7c3 0000000 ntdll!__RtlUserThreadStart+0x2f
0080f8f8 0000000 1c4b14f7 7ee8e000 0000000 ntdll!_RtlUserThreadStart+0x1b

STACK_COMMAND: .cxr 0x0 ; kb

FAULTING_SOURCE_LINE: c:\build\cpython\objects\unicodeobject.c

FAULTING_SOURCE_FILE: c:\build\cpython\objects\unicodeobject.c

FAULTING_SOURCE_LINE_NUMBER: 2368

FAULTING_SOURCE_CODE:
2364: Py_ssize_t length;
2365: PyObject *unicode;
2366: int res;
2367:

2368: length = strlen(str);
2369: if (precision != -1)
2370: length = Py_MIN(length, precision);
2371: unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL);
2372: if (unicode == NULL)
2373: return -1;

SYMBOL_STACK_INDEX: 0

SYMBOL_NAME: python35!unicode_fromformat_write_cstr+10

FOLLOWUP_NAME: MachineOwner

MODULE_NAME: python35

IMAGE_NAME: python35.dll

DEBUG_FLR_IMAGE_TIMESTAMP: 5598ccc2

FAILURE_BUCKET_ID: INVALID_POINTER_READ_c0000005_python35.dll!unicode_fromformat_write_cstr

BUCKET_ID: APPLICATION_FAULT_INVALID_POINTER_READ_python35!unicode_fromformat_write_cstr+10

ANALYSIS_SOURCE: UM

FAILURE_ID_HASH_STRING: um:invalid_pointer_read_c0000005_python35.dll!unicode_fromformat_write_cstr

FAILURE_ID_HASH: {1d85cbc9-3259-9a7e-3da2-8540573292b2}

Followup: MachineOwner
---------

To fix the issue, it is recommended that PyFloat_FromString() check the type of argument v after a successful PyObject_GetBuffer() call to determine if the buffer is null-terminated or otherwise. A proposed patch is attached.

JohnLeitch · 2015-08-06T03:16:26Z

Attaching repro

serhiy-storchaka · 2015-11-04T13:55:10Z

I prefer to merge bpo-24802 and bpo-24803 and discuss them at one place.

Here is merged and revised patch.

The patch is changed. There is a very rare corner case: when the type of argument is a subclass of bytes or bytearray with overloaded tp_as_buffer->bf_getbuffer, returned view can be not NUL terminated. To avoid ambiguity and for unifying with int constructor, I have added special cases for bytes and bytearray (this is also restores pre-issue22896 behavior for int(bytes, base)).

Tests are moved and added additional tests for memoryview slices.

>>> int(memoryview(b'123')[1:3])
23
>>> int(memoryview(b'123\x00')[1:3])
23
>>> int(memoryview(b'123 ')[1:3])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: invalid literal for int() with base 10: b'23'
>>> int(memoryview(b'123A')[1:3])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: invalid literal for int() with base 10: b'23'
>>> int(memoryview(b'1234')[1:3])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: invalid literal for int() with base 10: b'23'
>>> float(memoryview(b'12.3')[1:4])
2.3
>>> float(memoryview(b'12.3\x00')[1:4])
2.3
>>> float(memoryview(b'12.3 ')[1:4])
2.3
>>> float(memoryview(b'12.3A')[1:4])
2.3
>>> float(memoryview(b'12.34')[1:4])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: could not convert string to float: <memory at 0xb6fee02c>

There is similar dangerously looking code for complex. But it is never executed, because complex accepts only one non-numeric type: str. The patch removes this misleading dead code.

vadmium · 2015-11-04T23:37:44Z

The patch looks good to me.

However I don’t understand what you meant about restoring int(bytes, base) behaviour. None of the patches here, nor in bpo-22896, touch long_new() in /Objects/longobject.c.

vadmium · 2015-11-05T00:01:02Z

Going through the commits for bpo-22896, I noticed compile(), eval() and exec() also suffer from a similar flaw. They check strlen(buffer) but the buffer may not be null-terminated:

>>> eval(memoryview(b"1234")[1:3])
TypeError: source code string cannot contain null bytes

vadmium · 2015-11-05T03:48:01Z

This patch builds on Serhiy’s, and also fixes compile(), eval() and exec().

Also, I took the liberty of converting your assertRaises() calls to assertRaisesRegex() with context managers. I find this is a good general practice, to avoid hiding an accidental exception.

serhiy-storchaka · 2015-11-05T20:18:13Z

Great! Besides few nitpicks the patch LGTM.

However I don’t understand what you meant about restoring int(bytes, base) behaviour. None of the patches here, nor in bpo-22896, touch long_new() in /Objects/longobject.c.

long_new() uses PyNumber_Long(). I was wrong, the contrary case is affected, when the base is not specified.

python-dev · 2015-11-07T03:33:17Z

New changeset a75336ac40e0 by Martin Panter in branch '3.4':
Issue bpo-24802: Copy bytes-like objects to null-terminated buffers if necessary
https://hg.python.org/cpython/rev/a75336ac40e0

New changeset 95b9c07b27f7 by Martin Panter in branch '3.5':
Issue bpo-24802: Merge null termination fixes from 3.4 into 3.5
https://hg.python.org/cpython/rev/95b9c07b27f7

New changeset 4df1eaecb506 by Martin Panter in branch 'default':
Issue bpo-24802: Merge null termination fixes from 3.5
https://hg.python.org/cpython/rev/4df1eaecb506

vadmium · 2015-11-07T05:35:35Z

I have committed changes to Python 3. The compile() test now exists as test_compile.TestSpecifics.test_null_terminated.

Serhiy: You set these bugs for Python 2 as well. Is there a way to produce a non-null-terminated buffer in Python 2 to help verify this? Both John’s original array() tests and the memoryview() tests don’t seem to be supported by Python 2’s idea of buffer objects:

>>> float(array.array("B",b"A"*0x10))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: float() argument must be a string or a number
>>> float(memoryview(b"1234")[1:-1])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: float() argument must be a string or a number

I have a half-ported patch for Python 2, but it would be nice to know (a) if it is actually needed, and (b) what to do about unit tests.

serhiy-storchaka · 2015-11-07T07:33:43Z

Yes, looks as this issue is not related to 2.7.

serhiy-storchaka · 2015-11-07T09:50:56Z

One post-commit comment: I would use ValueError instead of Exceptions in tests for 3.5+. This will help to avoid regression and can catch incompatibilities in other implementations.

python-dev · 2015-11-08T11:30:45Z

New changeset a13d9656f954 by Martin Panter in branch '3.5':
Issue bpo-24802: Update test case for ValueError in 3.5
https://hg.python.org/cpython/rev/a13d9656f954

New changeset 96cdd2532034 by Martin Panter in branch 'default':
Issue bpo-24802: Merge ValueError test case from 3.5
https://hg.python.org/cpython/rev/96cdd2532034

vadmium · 2015-11-08T11:34:48Z

Thanks for picking that up Serhiy

python-dev · 2015-11-20T19:56:50Z

New changeset 3ef7d1af5195 by Serhiy Storchaka in branch '2.7':
Issue bpo-25678: Copy buffer objects to null-terminated strings.
https://hg.python.org/cpython/rev/3ef7d1af5195

JohnLeitch mannequin added the type-security A security issue label Aug 6, 2015

serhiy-storchaka added interpreter-core (Objects, Python, Grammar, and Parser dirs) type-bug An unexpected behavior, bug, or error and removed type-security A security issue labels Aug 6, 2015

serhiy-storchaka self-assigned this Aug 6, 2015

serhiy-storchaka added type-crash A hard crash of the interpreter, possibly with a core dump and removed type-bug An unexpected behavior, bug, or error labels Aug 6, 2015

serhiy-storchaka changed the title ~~PyFloat_FromString Buffer Over-read~~ int and float constructing from non NUL-terminated buffer Nov 4, 2015

serhiy-storchaka assigned vadmium and unassigned serhiy-storchaka Nov 7, 2015

vadmium closed this as completed Nov 8, 2015

ezio-melotti transferred this issue from another repository Apr 10, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

int and float constructing from non NUL-terminated buffer #68990

int and float constructing from non NUL-terminated buffer #68990

JohnLeitch mannequin commented Aug 6, 2015

JohnLeitch mannequin commented Aug 6, 2015

JohnLeitch mannequin commented Aug 6, 2015

serhiy-storchaka commented Nov 4, 2015

vadmium commented Nov 4, 2015

vadmium commented Nov 5, 2015

vadmium commented Nov 5, 2015

serhiy-storchaka commented Nov 5, 2015

python-dev mannequin commented Nov 7, 2015

vadmium commented Nov 7, 2015

serhiy-storchaka commented Nov 7, 2015

serhiy-storchaka commented Nov 7, 2015

python-dev mannequin commented Nov 8, 2015

vadmium commented Nov 8, 2015

python-dev mannequin commented Nov 20, 2015

int and float constructing from non NUL-terminated buffer #68990

int and float constructing from non NUL-terminated buffer #68990

Comments

JohnLeitch mannequin commented Aug 6, 2015

JohnLeitch mannequin commented Aug 6, 2015

JohnLeitch mannequin commented Aug 6, 2015

serhiy-storchaka commented Nov 4, 2015

vadmium commented Nov 4, 2015

vadmium commented Nov 5, 2015

vadmium commented Nov 5, 2015

serhiy-storchaka commented Nov 5, 2015

python-dev mannequin commented Nov 7, 2015

vadmium commented Nov 7, 2015

serhiy-storchaka commented Nov 7, 2015

serhiy-storchaka commented Nov 7, 2015

python-dev mannequin commented Nov 8, 2015

vadmium commented Nov 8, 2015

python-dev mannequin commented Nov 20, 2015