diff -r 7191b14ca312 Doc/library/os.rst --- a/Doc/library/os.rst Sun Sep 14 21:18:31 2014 +0300 +++ b/Doc/library/os.rst Mon Oct 06 21:06:51 2014 -0400 @@ -1601,6 +1601,11 @@ Availability: Unix, Windows. + .. seealso:: + + :func:`scandir`, another function that returns directory entries but + gives better performance for many common use cases. + .. versionchanged:: 3.2 The *path* parameter became optional. @@ -1893,6 +1898,142 @@ The *dir_fd* parameter. +.. function:: scandir(path='.') + + Return an iterator of :class:`DirEntry` objects corresponding to the files + and subdirectories in the directory given by *path*. Like :func:`listdir`, + the entries are yielded in arbitrary order, and the special entries + ``'.'`` and ``'..'`` are not included. + + Using :func:`scandir` instead of :func:`listdir` can significantly + increase the performance of code that also needs file type or file + attribute (stat) information, because :class:`DirEntry` objects + expose the file attribute information the operating system provides + when scanning a directory. All :class:`DirEntry` methods may perform a + system call, but :func:`DirEntry.is_dir` and :func:`DirEntry.is_file` + usually only require a system call for symbolic links (on both Unix and + Windows), and :func:`DirEntry.stat` always requires a system call on Unix + but only requires one for symbolic links on Windows. + + If *path* is of type ``str`` (recommended, and also the default when *path* + is not specified), the ``name`` and ``path`` attributes of the + :class:`DirEntry` objects will also be of type ``str``. If *path* is of + type ``bytes``, the ``name`` and ``path`` attributes will be ``bytes``. + + The following example shows a simple use of :func:`scandir` to + display all the files (not directories) in the given *path* that don't + start with ``'.'``. Note that the ``entry.is_file()`` call will + generally not make an additional operating system call:: + + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_file(): + print(entry.name) + + .. note:: + + On Unix-based systems, :func:`scandir` uses the system's + `opendir() `_ + and + `readdir() `_ + functions, and on Windows it uses the Win32 + `FindFirstFile `_ + and + `FindNextFile `_ + functions. + + Availability: Unix, Windows. + + .. versionadded:: 3.5 + + +.. class:: DirEntry + + Object yielded by :func:`scandir` to expose the file path and other file + attributes of a directory entry. + + :func:`scandir` will provide as much of this information as possible + without making additional system calls. When a system call *is* made (it + will be a ``stat`` or ``lstat`` system call), the ``DirEntry`` object will + cache the result on the entry object. ``DirEntry`` instances are not + intended to be stored in long-lived data structures; if you know the file + metadata has changed or if a long time has elapsed since calling + :func:`scandir`, call ``os.stat(entry.path)`` or similar to fetch + up-to-date information. + + Because the ``DirEntry`` methods *may* make operating system calls, they + may also raise :exc:`OSError` in certain cases, for example, if a file + is deleted between calling :func:`scandir` and calling + :func:`DirEntry.stat`. If you need very fine-grained control over + errors, you can catch :exc:`OSError` when calling one of the ``DirEntry`` + methods and handle as appropriate. + + Attributes and methods on a ``DirEntry`` instance are as follows: + + .. attribute:: name + + The entry's base filename, relative to the :func:`scandir` *path* + argument; this field corresponds to the names returned by + :func:`listdir`. Will be of type ``str`` if the original + :func:`scandir` *path* argument was a ``str`` (recommended), otherwise + ``bytes``. + + .. attribute:: path + + The entry's full path name (an absolute path only if the original + :func:`scandir` *path* argument was absolute); this field is + equivalent to ``os.path.join(scandir_path, entry.name)``. Will + be of type ``str`` if the original :func:`scandir` *path* argument + was a ``str`` (recommended), otherwise ``bytes``. + + .. method:: is_dir(*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a directory or a symbolic link pointing to a directory, + ``False`` if it points to another kind of file. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a directory, ``False`` if it points to a symbolic link or another + kind of file. + + ``False`` is also returned if the path doesn't exist anymore or is + a broken symbolic link; other errors (such as permission errors) are + propagated as :exc:`OSError`. + + .. method:: is_file(*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a regular file or a symbolic link pointing to a regular file, + ``False`` if it points to another kind of file. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a regular file, ``False`` if it points to a symbolic link or another + kind of file. + + ``False`` is also returned if the path doesn't exist anymore or is + a broken symbolic link; other errors (such as permission errors) are + propagated as :exc:`OSError`. + + .. method:: is_symlink() + + Return ``True`` if this entry is a symbolic link, ``False`` if it + points to a another kind of file. + + ``False`` is also returned if the path doesn't exist anymore or is + a broken symbolic link; other errors (such as permission errors) are + propagated as :exc:`OSError`. + + .. method:: stat(*, follow_symlinks=True) + + Return a :class:`stat_result` object for this entry. This function + normally follows symbolic links; to stat a symbolic link add the + argument ``follow_symlinks=False``. + + On Windows, this method does not generally require a system call; + however, for implementation and performance reasons, the return value's + ``st_ino``, ``st_dev`` and ``st_nlink`` attributes will always be set + to zero. Call :func:`os.stat` if these fields are required. + + .. function:: stat(path, \*, dir_fd=None, follow_symlinks=True) Get the status of a file or a file descriptor. Perform the equivalent of a diff -r 7191b14ca312 Lib/test/test_scandir.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_scandir.py Mon Oct 06 21:06:51 2014 -0400 @@ -0,0 +1,279 @@ +"""Tests for scandir.scandir().""" + +import os +import shutil +import sys +import time +import unittest + +try: + import scandir + has_scandir = True +except ImportError: + has_scandir = False + +FILE_ATTRIBUTE_DIRECTORY = 16 + +TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdir')) + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + int_types = int +else: + int_types = (int, long) + str = unicode + + +if hasattr(os, 'symlink'): + try: + link_name = os.path.join(os.path.dirname(__file__), '_testlink') + os.symlink(__file__, link_name) + os.remove(link_name) + symlinks_supported = True + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + symlinks_supported = False +else: + symlinks_supported = False + + +def create_file(path, contents='1234'): + with open(path, 'w') as f: + f.write(contents) + + +def setup_main(): + join = os.path.join + + try: + os.mkdir(TEST_PATH) + except Exception as e: + print(repr(e), e.filename) + import time + time.sleep(500) + raise + os.mkdir(join(TEST_PATH, 'subdir')) + create_file(join(TEST_PATH, 'file1.txt')) + create_file(join(TEST_PATH, 'file2.txt'), contents='12345678') + + os.mkdir(join(TEST_PATH, 'subdir', u'unidir\u018F')) + create_file(join(TEST_PATH, 'subdir', 'file1.txt')) + create_file(join(TEST_PATH, 'subdir', u'unicod\u018F.txt')) + + create_file(join(TEST_PATH, 'subdir', u'unidir\u018F', 'file1.txt')) + + os.mkdir(join(TEST_PATH, 'linkdir')) + + +def setup_symlinks(): + join = os.path.join + + os.mkdir(join(TEST_PATH, 'linkdir', 'linksubdir')) + create_file(join(TEST_PATH, 'linkdir', 'file1.txt')) + + os.symlink(os.path.abspath(join(TEST_PATH, 'linkdir', 'file1.txt')), + join(TEST_PATH, 'linkdir', 'link_to_file')) + + dir_name = os.path.abspath(join(TEST_PATH, 'linkdir', 'linksubdir')) + dir_link = join(TEST_PATH, 'linkdir', 'link_to_dir') + if sys.version_info >= (3, 3): + # "target_is_directory" was only added in Python 3.3 + os.symlink(dir_name, dir_link, target_is_directory=True) + else: + os.symlink(dir_name, dir_link) + + +def teardown(): + try: + shutil.rmtree(TEST_PATH) + except OSError: + # why does the above fail sometimes? + time.sleep(0.1) + shutil.rmtree(TEST_PATH) + + +class TestMixin(object): + def setUp(self): + if not os.path.exists(TEST_PATH): + setup_main() + if symlinks_supported and not os.path.exists( + os.path.join(TEST_PATH, 'linkdir', 'linksubdir')): + setup_symlinks() + + if not hasattr(unittest.TestCase, 'skipTest'): + def skipTest(self, reason): + sys.stdout.write('skipped {0!r} '.format(reason)) + + def test_basic(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([(e.name, e.is_dir()) for e in entries], + [('file1.txt', False), ('file2.txt', False), + ('linkdir', True), ('subdir', True)]) + self.assertEqual([e.path for e in entries], + [os.path.join(TEST_PATH, e.name) for e in entries]) + + def test_dir_entry(self): + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + e = entries['file1.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['file2.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['subdir'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [True, False, False]) + + self.assertEqual(entries['file1.txt'].stat().st_size, 4) + self.assertEqual(entries['file2.txt'].stat().st_size, 8) + + def test_stat(self): + entries = list(self.scandir_func(TEST_PATH)) + for entry in entries: + os_stat = os.stat(os.path.join(TEST_PATH, entry.name)) + scandir_stat = entry.stat() + self.assertEqual(os_stat.st_mode, scandir_stat.st_mode) + self.assertEqual(int(os_stat.st_mtime), int(scandir_stat.st_mtime)) + self.assertEqual(int(os_stat.st_ctime), int(scandir_stat.st_ctime)) + if entry.is_file(): + self.assertEqual(os_stat.st_size, scandir_stat.st_size) + + def test_returns_iter(self): + it = self.scandir_func(TEST_PATH) + entry = next(it) + assert hasattr(entry, 'name') + + def check_file_attributes(self, result): + self.assertTrue(hasattr(result, 'st_file_attributes')) + self.assertTrue(isinstance(result.st_file_attributes, int_types)) + self.assertTrue(0 <= result.st_file_attributes <= 0xFFFFFFFF) + + def test_file_attributes(self): + if sys.platform != 'win32' or not self.has_file_attributes: + # st_file_attributes is Win32 specific (but can't use + # unittest.skipUnless on Python 2.6) + return self.skipTest('st_file_attributes not supported') + + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + + # test st_file_attributes on a file (FILE_ATTRIBUTE_DIRECTORY not set) + result = entries['file1.txt'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, 0) + + # test st_file_attributes on a directory (FILE_ATTRIBUTE_DIRECTORY set) + result = entries['subdir'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, + FILE_ATTRIBUTE_DIRECTORY) + + def test_path(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([os.path.basename(e.name) for e in entries], + ['file1.txt', 'file2.txt', 'linkdir', 'subdir']) + self.assertEqual([os.path.normpath(os.path.join(TEST_PATH, e.name)) for e in entries], + [os.path.normpath(e.path) for e in entries]) + + def test_symlink(self): + if not symlinks_supported: + return self.skipTest('symbolic links not supported') + + entries = sorted(self.scandir_func(os.path.join(TEST_PATH, 'linkdir')), + key=lambda e: e.name) + + self.assertEqual([(e.name, e.is_symlink()) for e in entries], + [('file1.txt', False), + ('link_to_dir', True), + ('link_to_file', True), + ('linksubdir', False)]) + + self.assertEqual([(e.name, e.is_file(), e.is_file(follow_symlinks=False)) + for e in entries], + [('file1.txt', True, True), + ('link_to_dir', False, False), + ('link_to_file', True, False), + ('linksubdir', False, False)]) + + self.assertEqual([(e.name, e.is_dir(), e.is_dir(follow_symlinks=False)) + for e in entries], + [('file1.txt', False, False), + ('link_to_dir', True, False), + ('link_to_file', False, False), + ('linksubdir', True, True)]) + + def test_bytes(self): + # Check that unicode filenames are returned correctly as bytes in output + path = os.path.join(TEST_PATH, 'subdir').encode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, bytes)) + entries = [e for e in self.scandir_func(path) if e.name.startswith(b'unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, bytes)) + self.assertTrue(isinstance(entry.path, bytes)) + + # b'unicod?.txt' on Windows, b'unicod\xc6\x8f.txt' (UTF-8) or similar on POSIX + entry_name = u'unicod\u018f.txt'.encode(sys.getfilesystemencoding(), 'replace') + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, entry_name)) + + def test_unicode(self): + # Check that unicode filenames are returned correctly as (unicode) str in output + path = os.path.join(TEST_PATH, 'subdir') + if not IS_PY3: + path = path.decode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, str)) + entries = [e for e in self.scandir_func(path) if e.name.startswith('unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + + entry_name = u'unicod\u018f.txt' + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, u'unicod\u018f.txt')) + + # Check that it handles unicode input properly + path = os.path.join(TEST_PATH, 'subdir', u'unidir\u018f') + self.assertTrue(isinstance(path, str)) + entries = list(self.scandir_func(path)) + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + self.assertEqual(entry.name, 'file1.txt') + self.assertEqual(entry.path, os.path.join(path, 'file1.txt')) + + # TODO ben: add tests for file not found is_dir/is_file/stat + + +if has_scandir: + class TestScandirGeneric(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_generic + self.has_file_attributes = False + TestMixin.setUp(self) + + + if hasattr(scandir, 'scandir_python'): + class TestScandirPython(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_python + self.has_file_attributes = True + TestMixin.setUp(self) + + + if hasattr(scandir, 'scandir_c'): + class TestScandirC(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_c + self.has_file_attributes = True + TestMixin.setUp(self) + + +if hasattr(os, 'scandir'): + class TestScandirOS(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = os.scandir + self.has_file_attributes = True + TestMixin.setUp(self) diff -r 7191b14ca312 Modules/posixmodule.c --- a/Modules/posixmodule.c Sun Sep 14 21:18:31 2014 +0300 +++ b/Modules/posixmodule.c Mon Oct 06 21:06:51 2014 -0400 @@ -25,6 +25,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "structmember.h" #ifndef MS_WINDOWS #include "posixmodule.h" #else @@ -16356,6 +16357,778 @@ #endif /* !MS_WINDOWS */ +/* Begin implementation of scandir and DirEntry */ + +PyDoc_STRVAR(posix_scandir__doc__, +"scandir(path='.') -> iterator of DirEntry objects for given path"); + +static char *_follow_symlinks_keywords[] = {"follow_symlinks", NULL}; + +typedef struct { + PyObject_HEAD + PyObject *name; + PyObject *path; + PyObject *stat; + PyObject *lstat; +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + struct win32_stat win32_lstat; +#else + unsigned char d_type; +#endif +} DirEntry; + +static void +DirEntry_dealloc(DirEntry *entry) +{ + Py_XDECREF(entry->name); + Py_XDECREF(entry->path); + Py_XDECREF(entry->stat); + Py_XDECREF(entry->lstat); + Py_TYPE(entry)->tp_free((PyObject *)entry); +} + +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + +typedef unsigned short mode_t; + +static PyObject * +DirEntry_is_symlink(DirEntry *self) +{ + return PyBool_FromLong((self->win32_lstat.st_mode & S_IFMT) == S_IFLNK); +} + +static PyObject * +DirEntry_do_stat(DirEntry *self, int follow_symlinks) +{ + if (follow_symlinks) { + if (!self->stat) { + if ((self->win32_lstat.st_mode & S_IFMT) == S_IFLNK) { + path_t path = PATH_T_INITIALIZE("DirEntry.stat", NULL, 0, 0); + + if (!path_converter(self->path, &path)) { + return NULL; + } + self->stat = posix_do_stat("DirEntry.stat", &path, + DEFAULT_DIR_FD, follow_symlinks); + path_cleanup(&path); + } + else { + if (!self->lstat) { + self->lstat = _pystat_fromstructstat(&self->win32_lstat); + } + Py_XINCREF(self->lstat); + self->stat = self->lstat; + } + } + Py_XINCREF(self->stat); + return self->stat; + } + else { + if (!self->lstat) { + self->lstat = _pystat_fromstructstat(&self->win32_lstat); + } + Py_XINCREF(self->lstat); + return self->lstat; + } +} + +#else /* POSIX || HAVE_OPENDIR */ + +/* Forward reference */ +static PyObject * +DirEntry_is_dir_file(DirEntry *self, int follow_symlinks, mode_t mode_bits); + +static PyObject * +DirEntry_is_symlink(DirEntry *self) +{ + if (self->d_type != DT_UNKNOWN) { + return PyBool_FromLong(self->d_type == DT_LNK); + } + else { + return DirEntry_is_dir_file(self, 0, S_IFLNK); + } +} + +static PyObject * +DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) +{ + PyObject *result; + path_t path = PATH_T_INITIALIZE("DirEntry.stat", NULL, 0, 0); + + if (!path_converter(self->path, &path)) { + return NULL; + } + result = posix_do_stat("DirEntry.stat", &path, DEFAULT_DIR_FD, follow_symlinks); + path_cleanup(&path); + return result; +} + +static PyObject * +DirEntry_do_stat(DirEntry *self, int follow_symlinks) +{ + if (follow_symlinks) { + if (!self->stat) { + int is_symlink; + PyObject *po_is_symlink = DirEntry_is_symlink(self); + if (!po_is_symlink) { + return NULL; + } + is_symlink = PyObject_IsTrue(po_is_symlink); + Py_DECREF(po_is_symlink); + + if (is_symlink) { + self->stat = DirEntry_fetch_stat(self, 1); + } + else { + if (!self->lstat) { + self->lstat = DirEntry_fetch_stat(self, 0); + } + Py_XINCREF(self->lstat); + self->stat = self->lstat; + } + } + Py_XINCREF(self->stat); + return self->stat; + } + else { + if (!self->lstat) { + self->lstat = DirEntry_fetch_stat(self, 0); + } + Py_XINCREF(self->lstat); + return self->lstat; + } +} + +#endif + +static PyObject * +DirEntry_stat(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.stat", + _follow_symlinks_keywords, + &follow_symlinks)) { + return NULL; + } + + return DirEntry_do_stat(self, follow_symlinks); +} + +static PyObject * +DirEntry_is_dir_file(DirEntry *self, int follow_symlinks, mode_t mode_bits) +{ + PyObject *stat = NULL; + PyObject *st_mode = NULL; + int mode; + int result = 0; + int is_symlink; + +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + is_symlink = (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; + if (follow_symlinks && is_symlink) { +#else + is_symlink = self->d_type == DT_LNK; + if (self->d_type == DT_UNKNOWN || (follow_symlinks && is_symlink)) { +#endif + stat = DirEntry_do_stat(self, follow_symlinks); + if (!stat) { + if (PyErr_ExceptionMatches(PyExc_OSError) && errno == ENOENT) { + /* If file doesn't exist (anymore), then return False + (say it's not a directory) */ + PyErr_Clear(); + Py_RETURN_FALSE; + } + goto error; + } + st_mode = PyObject_GetAttrString(stat, "st_mode"); + if (!st_mode) { + goto error; + } + + mode = PyLong_AsLong(st_mode); + Py_DECREF(st_mode); + Py_DECREF(stat); + result = (mode & S_IFMT) == mode_bits; + } + else if (is_symlink) { + result = 0; + } + else { +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + unsigned long dir_bits = self->win32_lstat.st_file_attributes & + FILE_ATTRIBUTE_DIRECTORY; + result = (mode_bits == S_IFDIR) ? dir_bits != 0 : + dir_bits == 0; +#else + result = (mode_bits == S_IFDIR) ? self->d_type == DT_DIR : + self->d_type == DT_REG; +#endif + } + + return PyBool_FromLong(result); + +error: + Py_XDECREF(st_mode); + Py_XDECREF(stat); + return NULL; +} + +static PyObject * +DirEntry_is_dir(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_dir", + _follow_symlinks_keywords, + &follow_symlinks)) { + return NULL; + } + + return DirEntry_is_dir_file(self, follow_symlinks, (mode_t)S_IFDIR); +} + +static PyObject * +DirEntry_is_file(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_file", + _follow_symlinks_keywords, + &follow_symlinks)) { + return NULL; + } + + return DirEntry_is_dir_file(self, follow_symlinks, (mode_t)S_IFREG); +} + +static PyMemberDef DirEntry_members[] = { + {"name", T_OBJECT_EX, offsetof(DirEntry, name), READONLY, + "this entry's filename, relative to scandir()'s \"path\" argument"}, + {"path", T_OBJECT_EX, offsetof(DirEntry, path), READONLY, + "this entry's full path name, equivalent of os.path.join(scandir_path, entry.name)"}, + {NULL} +}; + +static PyMethodDef DirEntry_methods[] = { + {"is_dir", (PyCFunction)DirEntry_is_dir, METH_VARARGS | METH_KEYWORDS, + "return True if this entry is a directory; cached per entry" + }, + {"is_file", (PyCFunction)DirEntry_is_file, METH_VARARGS | METH_KEYWORDS, + "return True if this entry is a file; cached per entry" + }, + {"is_symlink", (PyCFunction)DirEntry_is_symlink, METH_NOARGS, + "return True if this entry is a symbolic link; cached per entry" + }, + {"stat", (PyCFunction)DirEntry_stat, METH_VARARGS | METH_KEYWORDS, + "return stat_result object for this entry; cached per entry" + }, + {NULL} +}; + +PyTypeObject DirEntryType = { + PyVarObject_HEAD_INIT(NULL, 0) + "DirEntry", /* tp_name */ + sizeof(DirEntry), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)DirEntry_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + DirEntry_methods, /* tp_methods */ + DirEntry_members, /* tp_members */ +}; + +static char * +_join_path_filenameA(char *path_narrow, char* filename, Py_ssize_t filename_len) +{ + Py_ssize_t path_len; + char *result; + + if (!path_narrow) { /* Default arg: "." */ + path_narrow = "."; + path_len = 1; + } + else { + path_len = strlen(path_narrow); + } + path_len = strlen(path_narrow); + + if (filename_len == -1) { + filename_len = strlen(filename); + } + + /* The +2 is for the path separator and the NUL */ + result = PyMem_Malloc(path_len + filename_len + 2); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + strcpy(result, path_narrow); + if (path_len > 0) { + char ch = result[path_len - 1]; +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + if (ch != '\\' && ch != '/' && ch != ':') + result[path_len++] = '\\'; +#else + if (ch != '/') + result[path_len++] = '/'; +#endif + strcpy(result + path_len, filename); + } + return result; +} + +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + +static void +find_data_to_stat(WIN32_FIND_DATAW *data, struct win32_stat *result) +{ + /* Note: data argument can point to a WIN32_FIND_DATAW or a + WIN32_FIND_DATAA struct, as the first members are in the same + position, and cFileName is not used here + */ + memset(result, 0, sizeof(*result)); + + result->st_mode = attributes_to_mode(data->dwFileAttributes); + if ((data->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0 && + (data->dwReserved0 == IO_REPARSE_TAG_SYMLINK)) { + /* first clear the S_IFMT bits */ + result->st_mode ^= (result->st_mode & S_IFMT); + /* now set the bits that make this a symlink */ + result->st_mode |= S_IFLNK; + } + + result->st_size = (((__int64)data->nFileSizeHigh)<<32) + data->nFileSizeLow; + + FILE_TIME_to_time_t_nsec(&data->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); + FILE_TIME_to_time_t_nsec(&data->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&data->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + + result->st_file_attributes = data->dwFileAttributes; +} + +static wchar_t * +_join_path_filenameW(wchar_t *path_wide, wchar_t* filename) +{ + Py_ssize_t path_len; + wchar_t *result; + + if (!path_wide) { /* Default arg: "." */ + path_wide = L"."; + path_len = 1; + } + else { + path_len = wcslen(path_wide); + } + /* The +2 is for the path separator and the NUL */ + result = PyMem_Malloc((path_len + wcslen(filename) + 2) * sizeof(wchar_t)); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + wcscpy(result, path_wide); + if (path_len > 0) { + wchar_t ch = result[path_len - 1]; + if (ch != SEP && ch != ALTSEP && ch != L':') + result[path_len++] = SEP; + wcscpy(result + path_len, filename); + } + return result; +} + +static PyObject * +make_DirEntry(path_t *path, void *data) +{ + DirEntry *entry; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) { + return NULL; + } + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + + if (!path->narrow) { + WIN32_FIND_DATAW *dataW = (WIN32_FIND_DATAW *)data; + wchar_t *path_strW; + + entry->name = PyUnicode_FromWideChar(dataW->cFileName, wcslen(dataW->cFileName)); + if (!entry->name) { + goto error; + } + + path_strW = _join_path_filenameW(path->wide, dataW->cFileName); + if (!path_strW) { + goto error; + } + entry->path = PyUnicode_FromWideChar(path_strW, wcslen(path_strW)); + PyMem_Free(path_strW); + if (!entry->path) { + goto error; + } + } + else { + WIN32_FIND_DATAA *dataA = (WIN32_FIND_DATAA *)data; + char *path_strA; + + entry->name = PyBytes_FromString(dataA->cFileName); + if (!entry->name) { + goto error; + } + + path_strA = _join_path_filenameA(path->narrow, dataA->cFileName, -1); + if (!path_strA) { + goto error; + } + entry->path = PyBytes_FromString(path_strA); + PyMem_Free(path_strA); + if (!entry->path) { + goto error; + } + } + find_data_to_stat((WIN32_FIND_DATAW *)data, &entry->win32_lstat); + + return (PyObject *)entry; + +error: + Py_XDECREF(entry); + return NULL; +} + +#else /* POSIX || HAVE_OPENDIR */ + +static PyObject * +make_DirEntry(path_t *path, char *name, Py_ssize_t name_len, unsigned char d_type) +{ + DirEntry *entry; + char *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) { + return NULL; + } + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + + joined_path = _join_path_filenameA(path->narrow, name, name_len); + if (!joined_path) { + goto error; + } + + if (!path->narrow || !PyBytes_Check(path->object)) { + entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); + entry->path = PyUnicode_DecodeFSDefault(joined_path); + } + else { + entry->name = PyBytes_FromStringAndSize(name, name_len); + entry->path = PyBytes_FromString(joined_path); + } + PyMem_Free(joined_path); + if (!entry->name || !entry->path) { + goto error; + } + + entry->d_type = d_type; + + return (PyObject *)entry; + +error: + Py_XDECREF(entry); + return NULL; +} +#endif + +typedef struct { + PyObject_HEAD + path_t path; + int yield_name; /* for when listdir() is implemented using scandir() */ +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + HANDLE handle; +#else + DIR *dirp; +#endif +} ScandirIterator; + +static void +ScandirIterator_dealloc(ScandirIterator *iterator) +{ + Py_XDECREF(iterator->path.object); + path_cleanup(&iterator->path); + Py_TYPE(iterator)->tp_free((PyObject *)iterator); +} + +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + union { /* We only use one at a time, so save space */ + WIN32_FIND_DATAW W; + WIN32_FIND_DATAA A; + } FileData; + + int is_unicode = !iterator->path.narrow; + + while (1) { + if (iterator->handle == INVALID_HANDLE_VALUE) { + /* First time around, prepare path and call FindFirstFile */ + if (is_unicode) { + wchar_t *path_strW; + + path_strW = _join_path_filenameW(iterator->path.wide, L"*.*"); + if (!path_strW) { + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + iterator->handle = FindFirstFileW(path_strW, &FileData.W); + Py_END_ALLOW_THREADS + + PyMem_Free(path_strW); /* We're done with path_strW now */ + } + else { + char *path_strA; + + path_strA = _join_path_filenameA(iterator->path.narrow, "*.*", -1); + if (!path_strA) { + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + iterator->handle = FindFirstFileA(path_strA, &FileData.A); + Py_END_ALLOW_THREADS + + PyMem_Free(path_strA); /* We're done with path_strA now */ + } + + if (iterator->handle == INVALID_HANDLE_VALUE) { + if (GetLastError() != ERROR_FILE_NOT_FOUND) { + return path_error(&iterator->path); + } + /* No files found, stop iterating */ + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + else { + BOOL success; + + Py_BEGIN_ALLOW_THREADS + success = is_unicode ? FindNextFileW(iterator->handle, &FileData.W) : + FindNextFileA(iterator->handle, &FileData.A); + Py_END_ALLOW_THREADS + + if (!success) { + if (GetLastError() != ERROR_NO_MORE_FILES) { + return path_error(&iterator->path); + } + /* No more files found in directory, stop iterating */ + Py_BEGIN_ALLOW_THREADS + success = FindClose(iterator->handle); + Py_END_ALLOW_THREADS + if (!success) { + return path_error(&iterator->path); + } + iterator->handle = INVALID_HANDLE_VALUE; + + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* Skip over . and .. */ + if (is_unicode) { + if (wcscmp(FileData.W.cFileName, L".") != 0 && + wcscmp(FileData.W.cFileName, L"..") != 0) { + if (iterator->yield_name) { + return PyUnicode_FromWideChar(FileData.W.cFileName, wcslen(FileData.W.cFileName)); + } + else { + return make_DirEntry(&iterator->path, &FileData.W); + } + } + } + else { + if (strcmp(FileData.A.cFileName, ".") != 0 && + strcmp(FileData.A.cFileName, "..") != 0) { + if (iterator->yield_name) { + return PyBytes_FromString(FileData.A.cFileName); + } + else { + return make_DirEntry(&iterator->path, &FileData.A); + } + } + } + + /* Loop till we get a non-dot directory or finish iterating */ + } +} + +#else /* POSIX || HAVE_OPENDIR */ + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + struct dirent *direntp; + Py_ssize_t name_len; + int is_dot; + + if (!iterator->dirp) { + /* First time iterating, prepare path and call opendir */ + errno = 0; + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(iterator->path.narrow ? iterator->path.narrow : "."); + Py_END_ALLOW_THREADS + + if (!iterator->dirp) { + return path_error(&iterator->path); + } + } + + while (1) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + direntp = readdir(iterator->dirp); + Py_END_ALLOW_THREADS + + if (!direntp) { + int result; + + if (errno != 0) { + return path_error(&iterator->path); + } + + /* No more files found in directory, stop iterating */ + Py_BEGIN_ALLOW_THREADS + result = closedir(iterator->dirp); + Py_END_ALLOW_THREADS + if (result != 0) { + return path_error(&iterator->path); + } + iterator->dirp = NULL; + + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* Skip over . and .. */ + name_len = NAMLEN(direntp); + is_dot = direntp->d_name[0] == '.' && + (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); + if (!is_dot) { + if (!iterator->yield_name) { + return make_DirEntry(&iterator->path, direntp->d_name, name_len, + direntp->d_type); + } + if (!iterator->path.narrow || !PyBytes_Check(iterator->path.object)) { + return PyUnicode_DecodeFSDefaultAndSize(direntp->d_name, name_len); + } + else { + return PyBytes_FromStringAndSize(direntp->d_name, name_len); + } + } + + /* Loop till we get a non-dot directory or finish iterating */ + } +} + +#endif + +PyTypeObject ScandirIteratorType = { + PyVarObject_HEAD_INIT(NULL, 0) + "ScandirIterator", /* tp_name */ + sizeof(ScandirIterator), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)ScandirIterator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)ScandirIterator_iternext, /* tp_iternext */ +}; + +static PyObject * +posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs) +{ + ScandirIterator *iterator; + static char *keywords[] = {"path", NULL}; + + iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); + if (!iterator) { + return NULL; + } + iterator->yield_name = 0; + memset(&iterator->path, 0, sizeof(path_t)); + iterator->path.function_name = "scandir"; + iterator->path.nullable = 1; + +#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) + iterator->handle = INVALID_HANDLE_VALUE; +#else + iterator->dirp = NULL; +#endif + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O&:scandir", keywords, + path_converter, &iterator->path)) { + Py_DECREF(iterator); + return NULL; + } + + /* path_converter doesn't keep path.object around, so do it + manually for the lifetime of the iterator here (the refcount + is decremented in ScandirIterator_dealloc) + */ + Py_XINCREF(iterator->path.object); + + return (PyObject *)iterator; +} + +/* End implementation of scandir and DirEntry */ + + /*[clinic input] dump buffer [clinic start generated code]*/ @@ -17028,6 +17801,8 @@ {"get_blocking", posix_get_blocking, METH_VARARGS, get_blocking__doc__}, {"set_blocking", posix_set_blocking, METH_VARARGS, set_blocking__doc__}, #endif + {"scandir", (PyCFunction)posix_scandir, METH_VARARGS | METH_KEYWORDS, + posix_scandir__doc__}, {NULL, NULL} /* Sentinel */ }; @@ -17699,6 +18474,12 @@ if (PyStructSequence_InitType2(&TerminalSizeType, &TerminalSize_desc) < 0) return NULL; + + /* initialize scandir types */ + if (PyType_Ready(&ScandirIteratorType) < 0) + return NULL; + if (PyType_Ready(&DirEntryType) < 0) + return NULL; } #if defined(HAVE_WAITID) && !defined(__APPLE__) Py_INCREF((PyObject*) &WaitidResultType);