classification
Title: Ctypes segfault or TypeError tested for python2.7 and 3
Type: Stage: resolved
Components: ctypes Versions: Python 3.5, Python 2.7
process
Status: closed Resolution: not a bug
Dependencies: Superseder:
Assigned To: Nosy List: eryksun, execve, josh.r
Priority: normal Keywords:

Created on 2018-11-06 16:47 by execve, last changed 2018-11-07 17:27 by eryksun. This issue is now closed.

Messages (5)
msg329367 - (view) Author: Bob (execve) Date: 2018-11-06 16:47
~Description of the problem:

I was using ctypes to get a directory file descriptor, and to do so I found this mailing list (https://lists.gt.net/python/dev/696028) from 2008 where a user wrote a piece that could do what the asking user and me were looking for.
What concerns me is how much this code has been used when I looked though Github and Google and came across the same exact pieces.

The code provided looks like this:

from ctypes import CDLL, c_char_p, c_int, Structure, POINTER
from ctypes.util import find_library

class c_dir(Structure):
"""Opaque type for directory entries, corresponds to struct DIR"""
c_dir_p = POINTER(c_dir)

c_lib = CDLL(find_library("c"))
opendir = c_lib.opendir
opendir.argtypes = [c_char_p]
opendir.restype = c_dir_p
dirfd = c_lib.dirfd < -- IT FAILS HERE // STACK TRACE PROVIDED
dirfd.argtypes = [c_dir_p]
dirfd.restype = c_int
closedir = c_lib.closedir
closedir.argtypes = [c_dir_p]
closedir.restype = c_int

dir_p = opendir(".")
print "dir_p = %r" % dir_p
dir_fd = dirfd(dir_p)
print "dir_fd = %r" % dir_fd
print "closed (rc %r)" % closedir(dir_p) 

When I implemented it in my machine, I changed it a bit so "opendir()" got its arguments from an imputed value, and the final program looks like this:

from ctypes import *
import sys
import ctypes
from ctypes.util import find_library

class c_dir(Structure):
    """Opaque type for directory entries, corresponds to struct DIR""" 

def get_directory_file_descriptor(directory):
    c_dir_p = POINTER(c_dir)
    c_lib = CDLL(find_library("c"))
    opendir = c_lib.opendir
    opendir.argtypes = [c_char_p]
    opendir.restype = c_dir_p
    dirfd = c_lib.dirfd < -- SAME. FAILS HERE.
    dirfd.argtypes = [c_dir_p]
    dirfd.restype = c_int
    closedir = c_lib.closedir
    closedir.argtypes = [c_dir_p]
    closedir.restype = c_int

    dir_p = opendir("%s" % directory)
    print ("dir_p = %s:%r" % (directory, dir_p))
    dir_fd = dirfd(dir_p)
    print("dir_fd = %r" % dir_fd)
    print ("closed (rc %r)" % closedir(dir_p))

get_directory_file_descriptor(sys.argv[1])

When I run it *with python 2.7*, the program runs normally if I enter the expected value, like "/home/". But if I don't, the program exits with a segmentation fault.
In python 3, it fails no matter what with a TypeError.

INPUT when NOT giving the error (in python 2.7): /home/
INPUT when giving the error: aaa

~Stack trace from python 2.7:

Program received signal SIGSEGV, Segmentation fault.
dirfd (dirp=0x0) at ../sysdeps/posix/dirfd.c:27
27	../sysdeps/posix/dirfd.c: No such file or directory.
(gdb) bt
#0  dirfd (dirp=0x0) at ../sysdeps/posix/dirfd.c:27
#1  0x00007ffff6698e40 in ffi_call_unix64 () from /usr/lib/x86_64-linux-gnu/libffi.so.6
#2  0x00007ffff66988ab in ffi_call () from /usr/lib/x86_64-linux-gnu/libffi.so.6
#3  0x00007ffff68a83df in _call_function_pointer (argcount=1, resmem=0x7fffffffd630, 
    restype=<optimized out>, atypes=<optimized out>, avalues=0x7fffffffd610, 
    pProc=0x7ffff78b8960 <dirfd>, flags=4353)
    at /build/python2.7-dPs3Rr/python2.7-2.7.12/Modules/_ctypes/callproc.c:837
#4  _ctypes_callproc (pProc=0x7ffff78b8960 <dirfd>, argtuple=<optimized out>, flags=4353, 
    argtypes=(<built-in method from_param of _ctypes.PyCPointerType object at remote 0xa2d370>,), 
    restype=<_ctypes.PyCSimpleType at remote 0xa38ce0>, checker=0x0)
    at /build/python2.7-dPs3Rr/python2.7-2.7.12/Modules/_ctypes/callproc.c:1180
#5  0x00007ffff68acd82 in PyCFuncPtr_call.lto_priv.107 (self=self@entry=0x7ffff7e322c0, 
    inargs=inargs@entry=(<LP_c_dir at remote 0x7ffff7ed45f0>,), kwds=kwds@entry=0x0)
    at /build/python2.7-dPs3Rr/python2.7-2.7.12/Modules/_ctypes/_ctypes.c:3954
#6  0x00000000004c15bf in PyObject_Call (kw=0x0, arg=(<LP_c_dir at remote 0x7ffff7ed45f0>,), 
    func=<_FuncPtr(__name__='dirfd') at remote 0x7ffff7e322c0>) at ../Objects/abstract.c:2546
#7  do_call (nk=<optimized out>, na=<optimized out>, pp_stack=0x7fffffffd890, 
    func=<_FuncPtr(__name__='dirfd') at remote 0x7ffff7e322c0>) at ../Python/ceval.c:4567
#8  call_function (oparg=<optimized out>, pp_stack=0x7fffffffd890) at ../Python/ceval.c:4372
#9  PyEval_EvalFrameEx () at ../Python/ceval.c:2987
#10 0x00000000004c136f in fast_function (nk=<optimized out>, na=<optimized out>, n=1, 
    pp_stack=0x7fffffffd9b0, func=<function at remote 0x7ffff7e8f5f0>) at ../Python/ceval.c:4435
#11 call_function (oparg=<optimized out>, pp_stack=0x7fffffffd9b0) at ../Python/ceval.c:4370
#12 PyEval_EvalFrameEx () at ../Python/ceval.c:2987
#13 0x00000000004b9ab6 in PyEval_EvalCodeEx () at ../Python/ceval.c:3582
#14 0x00000000004eb30f in PyEval_EvalCode (
    locals={'c_void_p': <_ctypes.PyCSimpleType at remote 0xa3df50>, 'c_int64': <_ctypes.PyCSimpleType at remote 0xa1d7b0>, 'c_ssize_t': <_ctypes.PyCSimpleType at remote 0xa1d7b0>, 'c_longdouble': <_ctypes.PyCSimpleType at remote 0xa3c360>, 'Union': <_ctypes.UnionType at remote 0x7ffff6abc400>, 'cdll': <LibraryLoader(_dlltype=<type at remote 0xa3f780>) at remote 0x7ffff7e2c450>, 'c_wchar': <_ctypes.PyCSimpleType at remote 0xa3f0b0>, 'memset': <CFunctionType at remote 0x7ffff7fc6e20>, 'c_bool': <_ctypes.PyCSimpleType at remote 0xa3e620>, 'CFUNCTYPE': <function at remote 0x7ffff7e8f938>, 'DEFAULT_MODE': 0, 'string_at': <function at remote 0x7ffff7e30230>, 'c_voidp': <_ctypes.PyCSimpleType at re---Type <return> to continue, or q <return> to quit---
mote 0xa3df50>, '__name__': '__main__', 'c_uint64': <_ctypes.PyCSimpleType at remote 0xa367b0>, 'sizeof': <built-in function sizeof>, 'byref': <built-in function byref>, 'pointer': <built-in function pointer>, 'alignment': <built-in function alignment>, 'pydll': <LibraryLoader(_dlltype=<type at remote 0xa3fe50>) at remote 0x7ffff7e2c...(truncated), 
    globals={'c_void_p': <_ctypes.PyCSimpleType at remote 0xa3df50>, 'c_int64': <_ctypes.PyCSimpleType at remote 0xa1d7b0>, 'c_ssize_t': <_ctypes.PyCSimpleType at remote 0xa1d7b0>, 'c_longdouble': <_ctypes.PyCSimpleType at remote 0xa3c360>, 'Union': <_ctypes.UnionType at remote 0x7ffff6abc400>, 'cdll': <LibraryLoader(_dlltype=<type at remote 0xa3f780>) at remote 0x7ffff7e2c450>, 'c_wchar': <_ctypes.PyCSimpleType at remote 0xa3f0b0>, 'memset': <CFunctionType at remote 0x7ffff7fc6e20>, 'c_bool': <_ctypes.PyCSimpleType at remote 0xa3e620>, 'CFUNCTYPE': <function at remote 0x7ffff7e8f938>, 'DEFAULT_MODE': 0, 'string_at': <function at remote 0x7ffff7e30230>, 'c_voidp': <_ctypes.PyCSimpleType at remote 0xa3df50>, '__name__': '__main__', 'c_uint64': <_ctypes.PyCSimpleType at remote 0xa367b0>, 'sizeof': <built-in function sizeof>, 'byref': <built-in function byref>, 'pointer': <built-in function pointer>, 'alignment': <built-in function alignment>, 'pydll': <LibraryLoader(_dlltype=<type at remote 0xa3fe50>) at remote 0x7ffff7e2c...(truncated), co=0x7ffff7ed2d30) at ../Python/ceval.c:669
#15 run_mod.lto_priv () at ../Python/pythonrun.c:1376
#16 0x00000000004e5422 in PyRun_FileExFlags () at ../Python/pythonrun.c:1362
#17 0x00000000004e3cd6 in PyRun_SimpleFileExFlags () at ../Python/pythonrun.c:948
#18 0x0000000000493ae2 in Py_Main () at ../Modules/main.c:640
#19 0x00007ffff7810830 in __libc_start_main (main=0x4934c0 <main>, argc=3, argv=0x7fffffffddf8, 
    init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, 
    stack_end=0x7fffffffdde8) at ../csu/libc-start.c:291
#20 0x00000000004933e9 in _start ()

~The reason I thought that this may be a bug is because while researching the problem, I came across this other bug (https://bugzilla.redhat.com/show_bug.cgi?id=674206) where the user specifies a similar issue for RedHat.

The difference, though, is that in that case it could be fixed by specifying the argtypes, while in my case it's already specified.

I tested this on an Ubuntu 16.04 and ArchLinux machine and got the same results.
msg329368 - (view) Author: Josh Rosenberg (josh.r) * (Python triager) Date: 2018-11-06 17:07
The TypeError on Py3 would be because functions taking c_char_p need bytes-like objects, not str, on Python 3. '%s' % directory is pointless when directory is a str; instead you need to encode it to a bytes-like object, e.g. opendir(os.fsencode(directory)) (os.fsencode is Python 3 specific; plain str works fine on Py 2).

Your segfault isn't occurring when you load dirfd, it occurs when you call it on the result of opendir, when opendir returned NULL on failure (due to the non-existent directory you call it with). You didn't check the return value, and end up doing flagrantly illegal things with it.

In neither case is this a bug in Python; ctypes lets you do evil things that break the rules, and if you break the rules the wrong way, segfaults are to be expected. Fix your argument types (for Py3), check your return values (for Py2).
msg329369 - (view) Author: Bob (execve) Date: 2018-11-06 17:16
Hi Josh thanks for answering me and so quick.
So if I understood correctly, by inserting an unexpected and unchecked on value, it could lead to a potential vulnerability in the program? Or just a plain failure (which could be a denial of service also)?

Thanks again.

Sent with ProtonMail Secure Email.

‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
On Tuesday, November 6, 2018 5:07 PM, Josh Rosenberg <report@bugs.python.org> wrote:

> Josh Rosenberg shadowranger+python@gmail.com added the comment:
>
> The TypeError on Py3 would be because functions taking c_char_p need bytes-like objects, not str, on Python 3. '%s' % directory is pointless when directory is a str; instead you need to encode it to a bytes-like object, e.g. opendir(os.fsencode(directory)) (os.fsencode is Python 3 specific; plain str works fine on Py 2).
>
> Your segfault isn't occurring when you load dirfd, it occurs when you call it on the result of opendir, when opendir returned NULL on failure (due to the non-existent directory you call it with). You didn't check the return value, and end up doing flagrantly illegal things with it.
>
> In neither case is this a bug in Python; ctypes lets you do evil things that break the rules, and if you break the rules the wrong way, segfaults are to be expected. Fix your argument types (for Py3), check your return values (for Py2).
>
> -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> nosy: +josh.r
> resolution: -> not a bug
> stage: -> resolved
> status: open -> closed
>
> Python tracker report@bugs.python.org
> https://bugs.python.org/issue35180
msg329401 - (view) Author: Josh Rosenberg (josh.r) * (Python triager) Date: 2018-11-07 02:46
As soon as you use ctypes, you sign up for all the security vulnerabilities, including denial of service, buffer overrun, use-after-free, etc. that plain old C programs are subject to. In this case, it's just a NULL pointer dereference (read: segfault in most normal cases), but in general, if you don't use ctypes with the same discipline as you would actual C code (at best it provides a little in the way of automatic memory management), you're subject to all the same problems.

Side-note: When replying to e-mails, don't include the quotes from the e-mail you're replying to; it just clutters the tracker.
msg329427 - (view) Author: Eryk Sun (eryksun) * (Python triager) Date: 2018-11-07 17:27
Assign an errcheck function to check the return value and raise an OSError exception on failure. Load the C library with use_errno=True to get the value of C errno. Make sure to properly handle encoding Unicode strings for the file-system encoding, including conversion of __fspath__ paths (e.g. 3.x pathlib). Finally, don't load libraries and define prototypes in API functions. It's redundant and inefficient. You're just needlessly increasing the loaders reference count on the shared library and needlessly redefining prototypes that never change. Load it once at module or class level.

For example:

    import os
    import sys
    import ctypes
    import ctypes.util
    import contextlib

    libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)

    class DIR(ctypes.Structure):
        """Opaque type for directory entries""" 

    PDIR = ctypes.POINTER(DIR)

    class c_fschar_p(ctypes.c_char_p):
        if sys.version_info[0] >= 3:
            @classmethod
            def from_param(cls, param):
                if isinstance(param, (str, bytes, os.PathLike)):
                    param = os.fsencode(param)
                return super().from_param(param)
        else:
            @classmethod
            def from_param(cls, param):
                if isinstance(param, unicode):
                    param = param.encode(sys.getfilesystemencoding())
                return super(c_fschar_p, cls).from_param(param)

    def check_bool(result, func, args):
        if not result:
            err = ctypes.get_errno()
            raise OSError(err, os.strerror(err))
        return args

    def check_int(result, func, args):
        if result == -1:
            err = ctypes.get_errno()
            raise OSError(err, os.strerror(err))
        return args

    libc.opendir.errcheck = check_bool
    libc.opendir.argtypes = (c_fschar_p,)
    libc.opendir.restype = PDIR
    libc.dirfd.errcheck = check_int
    libc.dirfd.argtypes = (PDIR,)
    libc.closedir.errcheck = check_int
    libc.closedir.argtypes = (PDIR,)

    @contextlib.contextmanager
    def get_directory_file_descriptor(directory):
        dir_p = libc.opendir(directory)
        try:
            if __debug__:
                print("dir_p = %s:%r" % (directory, dir_p))
            dir_fd = libc.dirfd(dir_p)
            if __debug__:
                print("dir_fd = %r" % dir_fd)
            yield dir_fd
        finally:
            libc.closedir(dir_p)
            if __debug__:
                print("closed %r" % dir_p)
History
Date User Action Args
2018-11-07 17:27:01eryksunsetnosy: + eryksun
messages: + msg329427
2018-11-07 02:46:10josh.rsetmessages: + msg329401
2018-11-06 17:16:31execvesetmessages: + msg329369
2018-11-06 17:07:49josh.rsetstatus: open -> closed

nosy: + josh.r
messages: + msg329368

resolution: not a bug
stage: resolved
2018-11-06 16:47:10execvecreate