diff -r d33b9fd46cef Doc/library/os.rst --- a/Doc/library/os.rst Sun Nov 06 18:47:35 2016 +0200 +++ b/Doc/library/os.rst Sun Nov 06 23:52:46 2016 +0200 @@ -2016,13 +2016,14 @@ features: always requires a system call on Unix but only requires one for symbolic links on Windows. - On Unix, *path* can be of type :class:`str` or :class:`bytes` (either - directly or indirectly through the :class:`PathLike` interface; use - :func:`~os.fsencode` and :func:`~os.fsdecode` to encode and decode - :class:`bytes` paths). On Windows, *path* must be of type :class:`str`. - On both systems, the type of the :attr:`~os.DirEntry.name` and - :attr:`~os.DirEntry.path` attributes of each :class:`os.DirEntry` will be of - the same type as *path*. + *path* may be a :term:`path-like object`. If *path* is of type ``bytes`` + (directly or indirectly through the :class:`PathLike` interface), + the type of the :attr:`~os.DirEntry.name` and :attr:`~os.DirEntry.path` + attributes of each :class:`os.DirEntry` will be ``bytes``; in all other + circumstances, they will be of type ``str``. + + This function can also support :ref:`specifying a file descriptor + `; the file descriptor must refer to a directory. The :func:`scandir` iterator supports the :term:`context manager` protocol and has the following method: @@ -2100,8 +2101,8 @@ features: The entry's base filename, relative to the :func:`scandir` *path* argument. - The :attr:`name` attribute will be of the same type (``str`` or - ``bytes``) as the :func:`scandir` *path* argument. Use + The :attr:`name` attribute will be ``bytes`` if the :func:`scandir` + *path* argument is of type ``bytes`` and ``str`` otherwise. Use :func:`~os.fsdecode` to decode byte filenames. .. attribute:: path @@ -2109,10 +2110,12 @@ features: The entry's full path name: equivalent to ``os.path.join(scandir_path, entry.name)`` where *scandir_path* is the :func:`scandir` *path* argument. The path is only absolute if the :func:`scandir` *path* - argument was absolute. - - The :attr:`path` attribute will be of the same type (``str`` or - ``bytes``) as the :func:`scandir` *path* argument. Use + argument was absolute. If the :func:`scandir` *path* + argument was a :ref:`file descriptor `, the :attr:`path` is + the same as the :attr:`name` attribute. + + The :attr:`path` attribute will be ``bytes`` if the :func:`scandir` + *path* argument is of type ``bytes`` and ``str`` otherwise. Use :func:`~os.fsdecode` to decode byte filenames. .. method:: inode() @@ -2209,6 +2212,9 @@ features: .. versionchanged:: 3.6 Added support for the :class:`~os.PathLike` interface. + .. versionchanged:: 3.7 + Added support for :ref:`file descriptors `. + .. function:: stat(path, \*, dir_fd=None, follow_symlinks=True) diff -r d33b9fd46cef Doc/whatsnew/3.7.rst --- a/Doc/whatsnew/3.7.rst Sun Nov 06 18:47:35 2016 +0200 +++ b/Doc/whatsnew/3.7.rst Sun Nov 06 23:52:46 2016 +0200 @@ -86,10 +86,20 @@ New Modules Improved Modules ================ +os +-- + +Added support for :ref:`file descriptors ` in :func:`~os.scandir`. +(Contributed by Serhiy Storchaka in :issue:`25996`.) + Optimizations ============= +The :func:`os.fwalk` function has been sped up by 2 times. This was done +using the :func:`os.scandir` function. +(Contributed by Serhiy Storchaka in :issue:`25996`.) + Build and C API Changes ======================= diff -r d33b9fd46cef Lib/os.py --- a/Lib/os.py Sun Nov 06 18:47:35 2016 +0200 +++ b/Lib/os.py Sun Nov 06 23:52:46 2016 +0200 @@ -129,6 +129,7 @@ if _exists("_have_functions"): _add("HAVE_FCHMOD", "chmod") _add("HAVE_FCHOWN", "chown") _add("HAVE_FDOPENDIR", "listdir") + _add("HAVE_FDOPENDIR", "scandir") _add("HAVE_FEXECVE", "execve") _set.add(stat) # fstat always works _add("HAVE_FTRUNCATE", "truncate") @@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None __all__.append("walk") -if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd: +if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None): """Directory tree generator. @@ -455,7 +456,8 @@ if {open, stat} <= supports_dir_fd and { top = fspath(top) # Note: To guard against symlink races, we use the standard # lstat()/open()/fstat() trick. - orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) + if not follow_symlinks: + orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) topfd = open(top, O_RDONLY, dir_fd=dir_fd) try: if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and @@ -469,33 +471,48 @@ if {open, stat} <= supports_dir_fd and { # necessary, it can be adapted to only require O(1) FDs, see issue # #13734. - names = listdir(topfd) + scandir_it = scandir(topfd) dirs, nondirs = [], [] - for name in names: - try: - # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with - # walk() which reports symlinks to directories as directories. - # We do however check for symlinks before recursing into - # a subdirectory. - if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode): - dirs.append(name) - else: - nondirs.append(name) - except OSError: + dir_entries = [] + with scandir_it: + while True: try: - # Add dangling symlinks, ignore disappeared files - if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False) - .st_mode): - nondirs.append(name) + entry = next(scandir_it) + except StopIteration: + break + try: + if entry.is_dir(): + dirs.append(entry.name) + if not topdown: + dir_entries.append(entry) + else: + nondirs.append(entry.name) except OSError: - continue + try: + # Add dangling symlinks, ignore disappeared files + if entry.is_symlink(): + nondirs.append(entry.name) + except OSError: + continue if topdown: yield toppath, dirs, nondirs, topfd - for name in dirs: + for entry in (dirs if topdown or follow_symlinks else dir_entries): + if topdown or follow_symlinks: + name = entry + else: + name = entry.name try: - orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks) + if topdown or follow_symlinks: + orig_st = stat(name, dir_fd=topfd, follow_symlinks=False) + else: + orig_st = entry.stat(follow_symlinks=False) + #if not follow_symlinks: + #if topdown: + #orig_st = stat(name, dir_fd=topfd, follow_symlinks=False) + #else: + #orig_st = entry.stat(follow_symlinks=False) dirfd = open(name, O_RDONLY, dir_fd=topfd) except OSError as err: if onerror is not None: diff -r d33b9fd46cef Lib/test/test_os.py --- a/Lib/test/test_os.py Sun Nov 06 18:47:35 2016 +0200 +++ b/Lib/test/test_os.py Sun Nov 06 23:52:46 2016 +0200 @@ -3260,6 +3260,35 @@ class TestScandir(unittest.TestCase): self.assertEqual(entry.path, os.fsencode(os.path.join(self.path, 'file.txt'))) + @unittest.skipUnless(os.listdir in os.supports_fd, + 'fd support for listdir required for this test.') + def test_fd(self): + self.assertIn(os.scandir, os.supports_fd) + self.create_file('file.txt') + expected_names = ['file.txt'] + if support.can_symlink(): + os.symlink('file.txt', os.path.join(self.path, 'link')) + expected_names.append('link') + + fd = os.open(self.path, os.O_RDONLY) + try: + with os.scandir(fd) as it: + entries = list(it) + names = [entry.name for entry in entries] + self.assertEqual(sorted(names), expected_names) + self.assertEqual(names, os.listdir(fd)) + for entry in entries: + self.assertEqual(entry.path, entry.name) + self.assertEqual(os.fspath(entry), entry.name) + self.assertEqual(entry.is_symlink(), entry.name == 'link') + if os.stat in os.supports_dir_fd: + st = os.stat(entry.name, dir_fd=fd) + self.assertEqual(entry.stat(), st) + st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False) + self.assertEqual(entry.stat(follow_symlinks=False), st) + finally: + os.close(fd) + def test_empty_path(self): self.assertRaises(FileNotFoundError, os.scandir, '') diff -r d33b9fd46cef Modules/clinic/posixmodule.c.h --- a/Modules/clinic/posixmodule.c.h Sun Nov 06 18:47:35 2016 +0200 +++ b/Modules/clinic/posixmodule.c.h Sun Nov 06 23:52:46 2016 +0200 @@ -5784,7 +5784,7 @@ os_scandir(PyObject *module, PyObject ** PyObject *return_value = NULL; static const char * const _keywords[] = {"path", NULL}; static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0}; - path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0); + path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR); if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser, path_converter, &path)) { @@ -6351,4 +6351,4 @@ exit: #ifndef OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF #endif /* !defined(OS_GETRANDOM_METHODDEF) */ -/*[clinic end generated code: output=e4a3bd36c7bb8356 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ea7cd2fd8f123294 input=a9049054013a1b77]*/ diff -r d33b9fd46cef Modules/posixmodule.c --- a/Modules/posixmodule.c Sun Nov 06 18:47:35 2016 +0200 +++ b/Modules/posixmodule.c Sun Nov 06 23:52:46 2016 +0200 @@ -11112,6 +11112,7 @@ typedef struct { unsigned char d_type; #endif ino_t d_ino; + int dir_fd; #endif } DirEntry; @@ -11161,19 +11162,31 @@ DirEntry_fetch_stat(DirEntry *self, int PyObject *ub; #ifdef MS_WINDOWS - if (PyUnicode_FSDecoder(self->path, &ub)) { - const wchar_t *path = PyUnicode_AsUnicode(ub); + if (!PyUnicode_FSDecoder(self->path, &ub)) + return NULL; + const wchar_t *path = PyUnicode_AsUnicode(ub); #else /* POSIX */ - if (PyUnicode_FSConverter(self->path, &ub)) { - const char *path = PyBytes_AS_STRING(ub); -#endif + if (!PyUnicode_FSConverter(self->path, &ub)) + return NULL; + const char *path = PyBytes_AS_STRING(ub); + if (self->dir_fd != DEFAULT_DIR_FD) { +#ifdef HAVE_FSTATAT + result = fstatat(self->dir_fd, path, &st, + follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); +#else + PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); + return NULL; +#endif /* HAVE_FSTATAT */ + } + else +#endif + { if (follow_symlinks) result = STAT(path, &st); else result = LSTAT(path, &st); - Py_DECREF(ub); - } else - return NULL; + } + Py_DECREF(ub); if (result != 0) return path_object_error(self->path); @@ -11583,20 +11596,36 @@ DirEntry_from_posix_info(path_t *path, c entry->stat = NULL; entry->lstat = NULL; - joined_path = join_path_filename(path->narrow, name, name_len); - if (!joined_path) - goto error; + if (path->fd != -1) { + entry->dir_fd = path->fd; + joined_path = NULL; + } + else { + entry->dir_fd = DEFAULT_DIR_FD; + joined_path = join_path_filename(path->narrow, name, name_len); + if (!joined_path) + goto error; + } if (!path->narrow || !PyBytes_Check(path->object)) { entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); - entry->path = PyUnicode_DecodeFSDefault(joined_path); + if (joined_path) + entry->path = PyUnicode_DecodeFSDefault(joined_path); } else { entry->name = PyBytes_FromStringAndSize(name, name_len); - entry->path = PyBytes_FromString(joined_path); + if (joined_path) + entry->path = PyBytes_FromString(joined_path); } PyMem_Free(joined_path); - if (!entry->name || !entry->path) + if (!entry->name) + goto error; + + if (path->fd != -1) { + entry->path = entry->name; + Py_INCREF(entry->path); + } + else if (!entry->path) goto error; #ifdef HAVE_DIRENT_D_TYPE @@ -11624,6 +11653,9 @@ typedef struct { #else /* POSIX */ DIR *dirp; #endif +#ifdef HAVE_FDOPENDIR + int fd; +#endif } ScandirIterator; #ifdef MS_WINDOWS @@ -11708,6 +11740,10 @@ ScandirIterator_closedir(ScandirIterator iterator->dirp = NULL; Py_BEGIN_ALLOW_THREADS +#ifdef HAVE_FDOPENDIR + if (iterator->path.fd != -1) + rewinddir(dirp); +#endif closedir(dirp); Py_END_ALLOW_THREADS return; @@ -11884,7 +11920,7 @@ static PyTypeObject ScandirIteratorType /*[clinic input] os.scandir - path : path_t(nullable=True) = None + path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None Return an iterator of DirEntry objects for given path. @@ -11897,13 +11933,16 @@ If path is None, uses the path='.'. static PyObject * os_scandir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/ +/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/ { ScandirIterator *iterator; #ifdef MS_WINDOWS wchar_t *path_strW; #else const char *path_str; +#ifdef HAVE_FDOPENDIR + int fd = -1; +#endif #endif iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); @@ -11942,18 +11981,40 @@ os_scandir_impl(PyObject *module, path_t goto error; } #else /* POSIX */ - if (iterator->path.narrow) - path_str = iterator->path.narrow; + errno = 0; +#ifdef HAVE_FDOPENDIR + if (path->fd != -1) { + /* closedir() closes the FD, so we duplicate it */ + fd = _Py_dup(path->fd); + if (fd == -1) + goto error; + + Py_BEGIN_ALLOW_THREADS + iterator->dirp = fdopendir(fd); + Py_END_ALLOW_THREADS + } else - path_str = "."; - - errno = 0; - Py_BEGIN_ALLOW_THREADS - iterator->dirp = opendir(path_str); - Py_END_ALLOW_THREADS +#endif + { + if (iterator->path.narrow) + path_str = iterator->path.narrow; + else + path_str = "."; + + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(path_str); + Py_END_ALLOW_THREADS + } if (!iterator->dirp) { path_error(&iterator->path); +#ifdef HAVE_FDOPENDIR + if (fd != -1) { + Py_BEGIN_ALLOW_THREADS + close(fd); + Py_END_ALLOW_THREADS + } +#endif goto error; } #endif