diff -r 0b576ab589c5 Doc/library/os.rst --- a/Doc/library/os.rst Sat Nov 12 14:37:11 2016 +0200 +++ b/Doc/library/os.rst Sat Nov 12 19:27:04 2016 +0200 @@ -2016,13 +2016,14 @@ features: always requires a system call on Unix but only requires one for symbolic links on Windows. - On Unix, *path* can be of type :class:`str` or :class:`bytes` (either - directly or indirectly through the :class:`PathLike` interface; use - :func:`~os.fsencode` and :func:`~os.fsdecode` to encode and decode - :class:`bytes` paths). On Windows, *path* must be of type :class:`str`. - On both systems, the type of the :attr:`~os.DirEntry.name` and - :attr:`~os.DirEntry.path` attributes of each :class:`os.DirEntry` will be of - the same type as *path*. + *path* may be a :term:`path-like object`. If *path* is of type ``bytes`` + (directly or indirectly through the :class:`PathLike` interface), + the type of the :attr:`~os.DirEntry.name` and :attr:`~os.DirEntry.path` + attributes of each :class:`os.DirEntry` will be ``bytes``; in all other + circumstances, they will be of type ``str``. + + This function can also support :ref:`specifying a file descriptor + `; the file descriptor must refer to a directory. The :func:`scandir` iterator supports the :term:`context manager` protocol and has the following method: @@ -2100,8 +2101,8 @@ features: The entry's base filename, relative to the :func:`scandir` *path* argument. - The :attr:`name` attribute will be of the same type (``str`` or - ``bytes``) as the :func:`scandir` *path* argument. Use + The :attr:`name` attribute will be ``bytes`` if the :func:`scandir` + *path* argument is of type ``bytes`` and ``str`` otherwise. Use :func:`~os.fsdecode` to decode byte filenames. .. attribute:: path @@ -2109,10 +2110,12 @@ features: The entry's full path name: equivalent to ``os.path.join(scandir_path, entry.name)`` where *scandir_path* is the :func:`scandir` *path* argument. The path is only absolute if the :func:`scandir` *path* - argument was absolute. - - The :attr:`path` attribute will be of the same type (``str`` or - ``bytes``) as the :func:`scandir` *path* argument. Use + argument was absolute. If the :func:`scandir` *path* + argument was a :ref:`file descriptor `, the :attr:`path` is + the same as the :attr:`name` attribute. + + The :attr:`path` attribute will be ``bytes`` if the :func:`scandir` + *path* argument is of type ``bytes`` and ``str`` otherwise. Use :func:`~os.fsdecode` to decode byte filenames. .. method:: inode() @@ -2209,6 +2212,9 @@ features: .. versionchanged:: 3.6 Added support for the :class:`~os.PathLike` interface. + .. versionchanged:: 3.7 + Added support for :ref:`file descriptors `. + .. function:: stat(path, \*, dir_fd=None, follow_symlinks=True) diff -r 0b576ab589c5 Doc/whatsnew/3.7.rst --- a/Doc/whatsnew/3.7.rst Sat Nov 12 14:37:11 2016 +0200 +++ b/Doc/whatsnew/3.7.rst Sat Nov 12 19:27:04 2016 +0200 @@ -86,10 +86,20 @@ New Modules Improved Modules ================ +os +-- + +Added support for :ref:`file descriptors ` in :func:`~os.scandir`. +(Contributed by Serhiy Storchaka in :issue:`25996`.) + Optimizations ============= +The :func:`os.fwalk` function has been sped up by 2 times. This was done +using the :func:`os.scandir` function. +(Contributed by Serhiy Storchaka in :issue:`25996`.) + Build and C API Changes ======================= diff -r 0b576ab589c5 Lib/os.py --- a/Lib/os.py Sat Nov 12 14:37:11 2016 +0200 +++ b/Lib/os.py Sat Nov 12 19:27:04 2016 +0200 @@ -129,6 +129,7 @@ if _exists("_have_functions"): _add("HAVE_FCHMOD", "chmod") _add("HAVE_FCHOWN", "chown") _add("HAVE_FDOPENDIR", "listdir") + _add("HAVE_FDOPENDIR", "scandir") _add("HAVE_FEXECVE", "execve") _set.add(stat) # fstat always works _add("HAVE_FTRUNCATE", "truncate") @@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None __all__.append("walk") -if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd: +if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None): """Directory tree generator. @@ -455,7 +456,8 @@ if {open, stat} <= supports_dir_fd and { top = fspath(top) # Note: To guard against symlink races, we use the standard # lstat()/open()/fstat() trick. - orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) + if not follow_symlinks: + orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) topfd = open(top, O_RDONLY, dir_fd=dir_fd) try: if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and @@ -469,33 +471,40 @@ if {open, stat} <= supports_dir_fd and { # necessary, it can be adapted to only require O(1) FDs, see issue # #13734. - names = listdir(topfd) - dirs, nondirs = [], [] - for name in names: + scandir_it = scandir(topfd) + walk_dirs = dirs = [] + nondirs = [] + if not (topdown or follow_symlinks): + walk_dirs = [] # list of entries + for entry in scandir_it: + name = entry.name try: - # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with - # walk() which reports symlinks to directories as directories. - # We do however check for symlinks before recursing into - # a subdirectory. - if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode): + if entry.is_dir(): dirs.append(name) + if walk_dirs is not dirs: + walk_dirs.append(entry) else: nondirs.append(name) except OSError: try: # Add dangling symlinks, ignore disappeared files - if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False) - .st_mode): + if entry.is_symlink(): nondirs.append(name) except OSError: - continue + pass if topdown: yield toppath, dirs, nondirs, topfd - for name in dirs: + for entry in walk_dirs: + name = entry try: - orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks) + if not follow_symlinks: + if topdown: + orig_st = stat(name, dir_fd=topfd, follow_symlinks=False) + else: + name = entry.name + orig_st = entry.stat(follow_symlinks=False) dirfd = open(name, O_RDONLY, dir_fd=topfd) except OSError as err: if onerror is not None: diff -r 0b576ab589c5 Lib/test/test_os.py --- a/Lib/test/test_os.py Sat Nov 12 14:37:11 2016 +0200 +++ b/Lib/test/test_os.py Sat Nov 12 19:27:04 2016 +0200 @@ -1011,9 +1011,12 @@ class FwalkTests(WalkTests): """Tests for os.fwalk().""" def walk(self, top, **kwargs): - for root, dirs, files, root_fd in os.fwalk(top, **kwargs): + for root, dirs, files, root_fd in self.fwalk(top, **kwargs): yield (root, dirs, files) + def fwalk(self, *args, **kwargs): + return os.fwalk(*args, **kwargs) + def _compare_to_walk(self, walk_kwargs, fwalk_kwargs): """ compare with walk() results. @@ -1028,7 +1031,7 @@ class FwalkTests(WalkTests): for root, dirs, files in os.walk(**walk_kwargs): expected[root] = (set(dirs), set(files)) - for root, dirs, files, rootfd in os.fwalk(**fwalk_kwargs): + for root, dirs, files, rootfd in self.fwalk(**fwalk_kwargs): self.assertIn(root, expected) self.assertEqual(expected[root], (set(dirs), set(files))) @@ -1050,7 +1053,7 @@ class FwalkTests(WalkTests): # check returned file descriptors for topdown, follow_symlinks in itertools.product((True, False), repeat=2): args = support.TESTFN, topdown, None - for root, dirs, files, rootfd in os.fwalk(*args, follow_symlinks=follow_symlinks): + for root, dirs, files, rootfd in self.fwalk(*args, follow_symlinks=follow_symlinks): # check that the FD is valid os.fstat(rootfd) # redundant check @@ -1065,7 +1068,7 @@ class FwalkTests(WalkTests): minfd = os.dup(1) os.close(minfd) for i in range(256): - for x in os.fwalk(support.TESTFN): + for x in self.fwalk(support.TESTFN): pass newfd = os.dup(1) self.addCleanup(os.close, newfd) @@ -3255,6 +3258,35 @@ class TestScandir(unittest.TestCase): self.assertEqual(entry.path, os.fsencode(os.path.join(self.path, 'file.txt'))) + @unittest.skipUnless(os.listdir in os.supports_fd, + 'fd support for listdir required for this test.') + def test_fd(self): + self.assertIn(os.scandir, os.supports_fd) + self.create_file('file.txt') + expected_names = ['file.txt'] + if support.can_symlink(): + os.symlink('file.txt', os.path.join(self.path, 'link')) + expected_names.append('link') + + fd = os.open(self.path, os.O_RDONLY) + try: + with os.scandir(fd) as it: + entries = list(it) + names = [entry.name for entry in entries] + self.assertEqual(sorted(names), expected_names) + self.assertEqual(names, os.listdir(fd)) + for entry in entries: + self.assertEqual(entry.path, entry.name) + self.assertEqual(os.fspath(entry), entry.name) + self.assertEqual(entry.is_symlink(), entry.name == 'link') + if os.stat in os.supports_dir_fd: + st = os.stat(entry.name, dir_fd=fd) + self.assertEqual(entry.stat(), st) + st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False) + self.assertEqual(entry.stat(follow_symlinks=False), st) + finally: + os.close(fd) + def test_empty_path(self): self.assertRaises(FileNotFoundError, os.scandir, '') @@ -3270,7 +3302,7 @@ class TestScandir(unittest.TestCase): self.assertEqual(len(entries2), 0, entries2) def test_bad_path_type(self): - for obj in [1234, 1.234, {}, []]: + for obj in [1.234, {}, []]: self.assertRaises(TypeError, os.scandir, obj) def test_close(self): diff -r 0b576ab589c5 Modules/clinic/posixmodule.c.h --- a/Modules/clinic/posixmodule.c.h Sat Nov 12 14:37:11 2016 +0200 +++ b/Modules/clinic/posixmodule.c.h Sat Nov 12 19:27:04 2016 +0200 @@ -5785,7 +5785,7 @@ os_scandir(PyObject *module, PyObject ** PyObject *return_value = NULL; static const char * const _keywords[] = {"path", NULL}; static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0}; - path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0); + path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR); if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser, path_converter, &path)) { @@ -6352,4 +6352,4 @@ exit: #ifndef OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF #endif /* !defined(OS_GETRANDOM_METHODDEF) */ -/*[clinic end generated code: output=61abf6df195aa5f1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5646cd83b3efdf47 input=a9049054013a1b77]*/ diff -r 0b576ab589c5 Modules/posixmodule.c --- a/Modules/posixmodule.c Sat Nov 12 14:37:11 2016 +0200 +++ b/Modules/posixmodule.c Sat Nov 12 19:27:04 2016 +0200 @@ -11110,6 +11110,7 @@ typedef struct { unsigned char d_type; #endif ino_t d_ino; + int dir_fd; #endif } DirEntry; @@ -11159,19 +11160,31 @@ DirEntry_fetch_stat(DirEntry *self, int PyObject *ub; #ifdef MS_WINDOWS - if (PyUnicode_FSDecoder(self->path, &ub)) { - const wchar_t *path = PyUnicode_AsUnicode(ub); + if (!PyUnicode_FSDecoder(self->path, &ub)) + return NULL; + const wchar_t *path = PyUnicode_AsUnicode(ub); #else /* POSIX */ - if (PyUnicode_FSConverter(self->path, &ub)) { - const char *path = PyBytes_AS_STRING(ub); -#endif + if (!PyUnicode_FSConverter(self->path, &ub)) + return NULL; + const char *path = PyBytes_AS_STRING(ub); + if (self->dir_fd != DEFAULT_DIR_FD) { +#ifdef HAVE_FSTATAT + result = fstatat(self->dir_fd, path, &st, + follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); +#else + PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); + return NULL; +#endif /* HAVE_FSTATAT */ + } + else +#endif + { if (follow_symlinks) result = STAT(path, &st); else result = LSTAT(path, &st); - Py_DECREF(ub); - } else - return NULL; + } + Py_DECREF(ub); if (result != 0) return path_object_error(self->path); @@ -11581,20 +11594,36 @@ DirEntry_from_posix_info(path_t *path, c entry->stat = NULL; entry->lstat = NULL; - joined_path = join_path_filename(path->narrow, name, name_len); - if (!joined_path) - goto error; + if (path->fd != -1) { + entry->dir_fd = path->fd; + joined_path = NULL; + } + else { + entry->dir_fd = DEFAULT_DIR_FD; + joined_path = join_path_filename(path->narrow, name, name_len); + if (!joined_path) + goto error; + } if (!path->narrow || !PyBytes_Check(path->object)) { entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); - entry->path = PyUnicode_DecodeFSDefault(joined_path); + if (joined_path) + entry->path = PyUnicode_DecodeFSDefault(joined_path); } else { entry->name = PyBytes_FromStringAndSize(name, name_len); - entry->path = PyBytes_FromString(joined_path); + if (joined_path) + entry->path = PyBytes_FromString(joined_path); } PyMem_Free(joined_path); - if (!entry->name || !entry->path) + if (!entry->name) + goto error; + + if (path->fd != -1) { + entry->path = entry->name; + Py_INCREF(entry->path); + } + else if (!entry->path) goto error; #ifdef HAVE_DIRENT_D_TYPE @@ -11622,6 +11651,9 @@ typedef struct { #else /* POSIX */ DIR *dirp; #endif +#ifdef HAVE_FDOPENDIR + int fd; +#endif } ScandirIterator; #ifdef MS_WINDOWS @@ -11706,6 +11738,10 @@ ScandirIterator_closedir(ScandirIterator iterator->dirp = NULL; Py_BEGIN_ALLOW_THREADS +#ifdef HAVE_FDOPENDIR + if (iterator->path.fd != -1) + rewinddir(dirp); +#endif closedir(dirp); Py_END_ALLOW_THREADS return; @@ -11882,7 +11918,7 @@ static PyTypeObject ScandirIteratorType /*[clinic input] os.scandir - path : path_t(nullable=True) = None + path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None Return an iterator of DirEntry objects for given path. @@ -11895,13 +11931,16 @@ If path is None, uses the path='.'. static PyObject * os_scandir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/ +/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/ { ScandirIterator *iterator; #ifdef MS_WINDOWS wchar_t *path_strW; #else const char *path_str; +#ifdef HAVE_FDOPENDIR + int fd = -1; +#endif #endif iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); @@ -11940,18 +11979,40 @@ os_scandir_impl(PyObject *module, path_t goto error; } #else /* POSIX */ - if (iterator->path.narrow) - path_str = iterator->path.narrow; + errno = 0; +#ifdef HAVE_FDOPENDIR + if (path->fd != -1) { + /* closedir() closes the FD, so we duplicate it */ + fd = _Py_dup(path->fd); + if (fd == -1) + goto error; + + Py_BEGIN_ALLOW_THREADS + iterator->dirp = fdopendir(fd); + Py_END_ALLOW_THREADS + } else - path_str = "."; - - errno = 0; - Py_BEGIN_ALLOW_THREADS - iterator->dirp = opendir(path_str); - Py_END_ALLOW_THREADS +#endif + { + if (iterator->path.narrow) + path_str = iterator->path.narrow; + else + path_str = "."; + + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(path_str); + Py_END_ALLOW_THREADS + } if (!iterator->dirp) { path_error(&iterator->path); +#ifdef HAVE_FDOPENDIR + if (fd != -1) { + Py_BEGIN_ALLOW_THREADS + close(fd); + Py_END_ALLOW_THREADS + } +#endif goto error; } #endif