changeset: 94595:51b8b815fe43 tag: tip user: Victor Stinner date: Thu Feb 12 18:12:58 2015 +0100 files: Doc/library/os.rst Lib/os.py Lib/test/test_os.py Modules/posixmodule.c description: test diff -r 9e10c4255277 -r 51b8b815fe43 Doc/library/os.rst --- a/Doc/library/os.rst Thu Feb 12 16:34:54 2015 +0100 +++ b/Doc/library/os.rst Thu Feb 12 18:12:58 2015 +0100 @@ -1601,6 +1601,11 @@ features: Availability: Unix, Windows. + .. seealso:: + + The :func:`scandir` function returns the directory entries with more + information than just the name. + .. versionchanged:: 3.2 The *path* parameter became optional. @@ -1893,6 +1898,177 @@ features: The *dir_fd* parameter. +.. function:: scandir(path='.') + + Return an iterator of :class:`DirEntry` objects corresponding to the entries + in the directory given by *path*. The entries are yielded in arbitrary + order, and the special entries ``'.'`` and ``'..'`` are not included. + + On Windows, *path* must of type :class:`str`. On POSIX, *path* can be of + type :class:`str` or :class:`bytes`. If *path* is of type :class:`bytes`, + the :attr:`~DirEntry.name` and :attr:`~DirEntry.path` attributes of + :class:`DirEntry` are also of type ``bytes``. Use :func:`~os.fsencode` and + :func:`~os.fsdecode` to encode and decode paths. + + The :func:`scandir` function is recommended, instead of :func:`listdir`, + when the file type of entries is used. In most cases, the file type of a + :class:`DirEntry` is retrieved directly by :func:`scandir`, no system call + is required. + + The following example shows a simple use of :func:`scandir` to display all + the files (excluding directories) in the given *path* that don't start with + ``'.'``:: + + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_file(): + print(entry.name) + + .. note:: + + On Unix-based systems, :func:`scandir` uses the system's + `opendir() `_ + and + `readdir() `_ + functions. On Windows, it uses the Win32 + `FindFirstFileW `_ + and + `FindNextFileW `_ + functions. + + .. seealso:: + + The :func:`listdir` function returns the names of the directory entries. + + .. versionadded:: 3.5 + + +.. class:: DirEntry + + Object yielded by :func:`scandir` to expose the file path and other file + attributes of a directory entry. + + :func:`scandir` will provide as much of this information as possible without + making additional system calls. When a ``stat()`` or ``lstat()`` system call + is made, the ``DirEntry`` object cache the result . + + ``DirEntry`` instances are not intended to be stored in long-lived data + structures; if you know the file metadata has changed or if a long time has + elapsed since calling :func:`scandir`, call ``os.stat(entry.path)`` to fetch + up-to-date information. + + Because the ``DirEntry`` methods can make operating system calls, they may + also raise :exc:`OSError`. For example, if a file is deleted between calling + :func:`scandir` and calling :func:`DirEntry.stat`, a + :exc:`FileNotFoundError` exception can be raised. Unfortunately, the + behaviour on errors depends on the platform. If you need very fine-grained + control over errors, you can catch :exc:`OSError` when calling one of the + ``DirEntry`` methods and handle as appropriate. + + Attributes and methods on a ``DirEntry`` instance are as follows: + + .. attribute:: name + + The entry's base filename, relative to the :func:`scandir` *path* + argument. + + The :attr:`name` type is :class:`str`. On POSIX, it can be of type + :class:`bytes` if the type of the :func:`scandir` *path* argument is also + :class:`bytes`. Use :func:`~os.fsdecode` to decode the name. + + .. attribute:: path + + The entry's full path name: equivalent to ``os.path.join(scandir_path, + entry.name)`` where *scandir_path* is the :func:`scandir` *path* + argument. The path is only absolute if the :func:`scandir` *path* + argument is absolute. + + The :attr:`name` type is :class:`str`. On POSIX, it can be of type + :class:`bytes` if the type of the :func:`scandir` *path* argument is also + :class:`bytes`. Use :func:`~os.fsdecode` to decode the path. + + .. method:: inode() + + Return the inode number of the entry. + + The result is cached in the object, use ``os.stat(entry.path, + follow_symlinks=False).st_ino`` to fetch up-to-date information. + + On POSIX, no system call is required. + + .. method:: is_dir(\*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a directory or a symbolic link pointing to a directory, + return ``False`` if it points to another kind of file, if it doesn't + exist anymore or if it is a broken symbolic link. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a directory, return ``False`` if it points to a symbolic link or + another kind of file, if the entry doesn't exist anymore or if it is a + broken symbolic link + + The result is cached in the object. Call :func:`stat.S_ISDIR` with + :func:`os.stat` to fetch up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: is_file(\*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a regular file or a symbolic link pointing to a regular file, + return ``False`` if it points to another kind of file, if it doesn't + exist anymore or if it is a broken symbolic link. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a regular file, return ``False`` if it points to a symbolic link or + another kind of file, if it doesn't exist anymore or if it is a broken + symbolic link. + + The result is cached in the object. Call :func:`stat.S_ISREG` with + :func:`os.stat` to fetch up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: is_symlink() + + Return ``True`` if this entry is a symbolic link or a broken symbolic + link, return ``False`` if it points to a another kind of file or if the + entry doesn't exist anymore. + + The result is cached in the object. Call :func:`os.path.islink` to fetch + up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: stat(\*, follow_symlinks=True) + + Return a :class:`stat_result` object for this entry. This function + normally follows symbolic links; to stat a symbolic link add the + argument ``follow_symlinks=False``. + + On Windows, the ``st_ino``, ``st_dev`` and ``st_nlink`` attributes of the + :class:`stat_result` are always set to zero. Call :func:`os.stat` to + get these attributes. + + The result is cached in the object. Call :func:`os.stat` to fetch + up-to-date information. + + On Windows, ``DirEntry.stat(follow_symlinks=False)`` doesn't require a + system call. ``DirEntry.stat()`` requires a system call if the entry is a + symbolic link. + + .. versionadded:: 3.5 + + .. function:: stat(path, \*, dir_fd=None, follow_symlinks=True) Get the status of a file or a file descriptor. Perform the equivalent of a diff -r 9e10c4255277 -r 51b8b815fe43 Lib/os.py --- a/Lib/os.py Thu Feb 12 16:34:54 2015 +0100 +++ b/Lib/os.py Thu Feb 12 18:12:58 2015 +0100 @@ -65,6 +65,8 @@ if 'posix' in _names: __all__.extend(_get_exports_list(posix)) del posix + from posix import _scandir + elif 'nt' in _names: name = 'nt' linesep = '\r\n' @@ -85,6 +87,8 @@ elif 'nt' in _names: except ImportError: pass + from nt import _scandir + elif 'ce' in _names: name = 'ce' linesep = '\r\n' @@ -106,6 +110,8 @@ elif 'ce' in _names: except ImportError: pass + from ce import _scandir + else: raise ImportError('no os specific module found') @@ -982,3 +988,148 @@ def fdopen(fd, *args, **kwargs): raise TypeError("invalid fd type (%s, expected integer)" % type(fd)) import io return io.open(fd, *args, **kwargs) + + +class DirEntry: + """Directory entry. + + Object yielded by scandir() to expose the file path and other file + attributes of a directory entry. + """ + + __slots__ = ('_directory', 'name', '_inode', '_path', '_lstat', '_stat') + if name != 'nt': + __slots__ += ('_d_type',) + + if name == 'nt': + def __init__(self, directory, entry_name, lstat): + self._directory = directory + self.name = entry_name + self._inode = None + self._lstat = lstat + self._path = None + self._stat = None + else: + def __init__(self, directory, entry_name, inode, d_type): + self._directory = directory + self.name = entry_name + self._inode = inode + self._d_type = d_type + self._lstat = None + self._path = None + self._stat = None + + def inode(self): + """Return the inode number of the entry.""" + + if name == 'nt' and self._inode is None: + self._inode = lstat(self.path).st_ino + # Drop lstat instead of storing it into self._lstat. Otherwise + # st_ino, st_dev and st_nlink fields of stat() may or may not + # be filled depending if inode() was called or not. + return self._inode + + def stat(self, follow_symlinks=True): + """Return a os.stat_result() object for this entry.""" + + if follow_symlinks: + if self._stat is None: + if self.is_symlink(): + self._stat = stat(self.path) + else: + if self._lstat is None: + self._lstat = lstat(self.path) + self._stat = self._lstat + return self._stat + else: + if self._lstat is None: + self._lstat = lstat(self.path) + return self._lstat + + def is_dir(self, follow_symlinks=True): + """Test whether the entry is a directory.""" + + if name == 'nt': + if follow_symlinks and self.is_symlink(): + try: + stat = self.stat() + except FileNotFoundError: + return False + else: + stat = self._lstat + return st.S_ISDIR(stat.st_mode) + else: + if (self._d_type is None or + (follow_symlinks and self.is_symlink())): + try: + stat = self.stat(follow_symlinks=follow_symlinks) + except FileNotFoundError: + return False + return st.S_ISDIR(stat.st_mode) + else: + return self._d_type == DT_DIR + + def is_file(self, follow_symlinks=True): + """Test whether the entry is a regular file.""" + + if name == 'nt': + if follow_symlinks and self.is_symlink(): + try: + stat = self.stat() + except FileNotFoundError: + return False + else: + stat = self._lstat + return st.S_ISREG(stat.st_mode) + else: + if (self._d_type is None or + (follow_symlinks and self.is_symlink())): + try: + stat = self.stat(follow_symlinks=follow_symlinks) + except FileNotFoundError: + return False + return st.S_ISREG(stat.st_mode) + else: + return self._d_type == DT_REG + + def is_symlink(self): + """Test whether the entry is a symbolic link.""" + + if name == 'nt': + return st.S_ISLNK(self._lstat.st_mode) + else: + if self._d_type is None: + try: + lstat = self.stat(follow_symlinks=False) + except FileNotFoundError: + return False + return st.S_ISLNK(lstat.st_mode) + else: + return self._d_type == DT_LNK + + @property + def path(self): + if self._path is None: + self._path = path.join(self._directory, self.name) + return self._path + + def __repr__(self): + return '<%s %r>' % (self.__class__.__name__, self.name) + + +def scandir(path='.'): + """Return an iterator of DirEntry objects corresponding to the entries in + the directory given by path. + """ + + if name == 'nt': + for entry_name, lstat in _scandir(path): + yield DirEntry(path, entry_name, lstat) + else: + use_unicode = isinstance(path, str) + if use_unicode: + encoding = sys.getfilesystemencoding() + for entry_name, inode, d_type in _scandir(fsencode(path)): + if use_unicode: + entry_name = entry_name.decode(encoding, 'surrogateescape') + yield DirEntry(path, entry_name, inode, d_type) diff -r 9e10c4255277 -r 51b8b815fe43 Lib/test/test_os.py --- a/Lib/test/test_os.py Thu Feb 12 16:34:54 2015 +0100 +++ b/Lib/test/test_os.py Thu Feb 12 18:12:58 2015 +0100 @@ -2698,5 +2698,232 @@ class ExportsTests(unittest.TestCase): self.assertIn('walk', os.__all__) +class TestScandir(unittest.TestCase): + def setUp(self): + self.path = os.path.realpath(support.TESTFN) + self.addCleanup(support.rmtree, self.path) + os.mkdir(self.path) + + def create_file(self, name="file.txt"): + filename = os.path.join(self.path, name) + with open(filename, "wb") as fp: + fp.write(b'python') + return filename + + def get_entries(self, names): + entries = dict((entry.name, entry) + for entry in os.scandir(self.path)) + self.assertEqual(sorted(entries.keys()), names) + return entries + + def assert_stat_equal(self, stat1, stat2, skip_fields): + if skip_fields: + for attr in dir(stat1): + if not attr.startswith("st_"): + continue + if attr in ("st_dev", "st_ino", "st_nlink"): + continue + self.assertEqual(getattr(stat1, attr), + getattr(stat2, attr), + (stat1, stat2, attr)) + else: + self.assertEqual(stat1, stat2) + + def check_entry(self, entry, name, is_dir, is_file, is_symlink): + self.assertEqual(entry.name, name) + self.assertEqual(entry.path, os.path.join(self.path, name)) + self.assertEqual(entry.inode(), + os.stat(entry.path, follow_symlinks=False).st_ino) + + entry_stat = os.stat(entry.path) + self.assertEqual(entry.is_dir(), + stat.S_ISDIR(entry_stat.st_mode)) + self.assertEqual(entry.is_file(), + stat.S_ISREG(entry_stat.st_mode)) + self.assertEqual(entry.is_symlink(), + os.path.islink(entry.path)) + + entry_lstat = os.stat(entry.path, follow_symlinks=False) + self.assertEqual(entry.is_dir(follow_symlinks=False), + stat.S_ISDIR(entry_lstat.st_mode)) + self.assertEqual(entry.is_file(follow_symlinks=False), + stat.S_ISREG(entry_lstat.st_mode)) + + self.assert_stat_equal(entry.stat(), + entry_stat, + os.name == 'nt' and not is_symlink) + self.assert_stat_equal(entry.stat(follow_symlinks=False), + entry_lstat, + os.name == 'nt') + + def test_attributes(self): + link = hasattr(os, 'link') + symlink = support.can_symlink() + + dirname = os.path.join(self.path, "dir") + os.mkdir(dirname) + filename = self.create_file("file.txt") + if link: + os.link(filename, os.path.join(self.path, "link_file.txt")) + if symlink: + os.symlink(dirname, os.path.join(self.path, "symlink_dir"), + target_is_directory=True) + os.symlink(filename, os.path.join(self.path, "symlink_file.txt")) + + names = ['dir', 'file.txt'] + if link: + names.append('link_file.txt') + if symlink: + names.extend(('symlink_dir', 'symlink_file.txt')) + entries = self.get_entries(names) + + entry = entries['dir'] + self.check_entry(entry, 'dir', True, False, False) + + entry = entries['file.txt'] + self.check_entry(entry, 'file.txt', False, True, False) + + if link: + entry = entries['link_file.txt'] + self.check_entry(entry, 'link_file.txt', False, True, False) + + if symlink: + entry = entries['symlink_dir'] + self.check_entry(entry, 'symlink_dir', True, False, True) + + entry = entries['symlink_file.txt'] + self.check_entry(entry, 'symlink_file.txt', False, True, True) + + def get_entry(self, name): + entries = list(os.scandir(self.path)) + self.assertEqual(len(entries), 1) + + entry = entries[0] + self.assertEqual(entry.name, name) + return entry + + def create_file_entry(self): + filename = self.create_file() + return self.get_entry(os.path.basename(filename)) + + def test_current_directory(self): + filename = self.create_file() + old_dir = os.getcwd() + try: + os.chdir(self.path) + + # call scandir() without parameter: it must list the content + # of the current directory + entries = dict((entry.name, entry) for entry in os.scandir()) + self.assertEqual(sorted(entries.keys()), + [os.path.basename(filename)]) + finally: + os.chdir(old_dir) + + def test_repr(self): + entry = self.create_file_entry() + self.assertEqual(repr(entry), "") + + def test_removed_dir(self): + path = os.path.join(self.path, 'dir') + + os.mkdir(path) + entry = self.get_entry('dir') + os.rmdir(path) + + # On POSIX, is_dir() result depends if scandir() filled d_type or not + if os.name == 'nt': + self.assertTrue(entry.is_dir()) + self.assertFalse(entry.is_file()) + self.assertFalse(entry.is_symlink()) + if os.name == 'nt': + self.assertRaises(FileNotFoundError, entry.inode) + # don't fail + entry.stat() + entry.stat(follow_symlinks=False) + else: + self.assertGreater(entry.inode(), 0) + self.assertRaises(FileNotFoundError, entry.stat) + self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False) + + def test_removed_file(self): + entry = self.create_file_entry() + os.unlink(entry.path) + + self.assertFalse(entry.is_dir()) + # On POSIX, is_dir() result depends if scandir() filled d_type or not + if os.name == 'nt': + self.assertTrue(entry.is_file()) + self.assertFalse(entry.is_symlink()) + if os.name == 'nt': + self.assertRaises(FileNotFoundError, entry.inode) + # don't fail + entry.stat() + entry.stat(follow_symlinks=False) + else: + self.assertGreater(entry.inode(), 0) + self.assertRaises(FileNotFoundError, entry.stat) + self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False) + + def test_broken_symlink(self): + if not support.can_symlink(): + return self.skipTest('cannot create symbolic link') + + filename = self.create_file("file.txt") + os.symlink(filename, + os.path.join(self.path, "symlink.txt")) + entries = self.get_entries(['file.txt', 'symlink.txt']) + entry = entries['symlink.txt'] + os.unlink(filename) + + self.assertGreater(entry.inode(), 0) + self.assertFalse(entry.is_dir()) + self.assertFalse(entry.is_file()) # broken symlink returns False + self.assertFalse(entry.is_dir(follow_symlinks=False)) + self.assertFalse(entry.is_file(follow_symlinks=False)) + self.assertTrue(entry.is_symlink()) + self.assertRaises(FileNotFoundError, entry.stat) + # don't fail + entry.stat(follow_symlinks=False) + + def test_bytes(self): + if os.name == "nt": + # On Windows, os.scandir(bytes) must raise an exception + self.assertRaises(TypeError, list, os.scandir(b'.')) + return + + self.create_file("file.txt") + + path_bytes = os.fsencode(self.path) + entries = list(os.scandir(path_bytes)) + self.assertEqual(len(entries), 1, entries) + entry = entries[0] + + self.assertEqual(entry.name, b'file.txt') + self.assertEqual(entry.path, + os.fsencode(os.path.join(self.path, 'file.txt'))) + + def test_path_null_char(self): + self.assertRaises(ValueError, list, os.scandir("a\0b")) + + def test_empty_path(self): + self.assertRaises(FileNotFoundError, list, os.scandir('')) + + @support.cpython_only + def test_consume_scandir_twice(self): + self.create_file("file.txt") + path = self.path + if os.name != 'nt': + path = os.fsencode(path) + scandir = os._scandir(path) + + entries = list(scandir) + self.assertEqual(len(entries), 1, entries) + + # check than consuming the iterator twice doesn't crash + entries2 = list(scandir) + self.assertEqual(len(entries2), 0, entries2) + + if __name__ == "__main__": unittest.main() diff -r 9e10c4255277 -r 51b8b815fe43 Modules/posixmodule.c --- a/Modules/posixmodule.c Thu Feb 12 16:34:54 2015 +0100 +++ b/Modules/posixmodule.c Thu Feb 12 18:12:58 2015 +0100 @@ -374,6 +374,16 @@ static int win32_can_symlink = 0; #ifdef MS_WINDOWS +#define INITFUNC PyInit_nt +#define MODNAME "nt" + +#else +#define INITFUNC PyInit_posix +#define MODNAME "posix" +#endif + + +#ifdef MS_WINDOWS static int win32_warn_bytes_api() { @@ -1512,7 +1522,7 @@ attributes_to_mode(DWORD attr) return m; } -static int +static void attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct win32_stat *result) { memset(result, 0, sizeof(*result)); @@ -1532,8 +1542,6 @@ attribute_data_to_stat(BY_HANDLE_FILE_IN result->st_mode |= S_IFLNK; } result->st_file_attributes = info->dwFileAttributes; - - return 0; } static BOOL @@ -1559,8 +1567,28 @@ attributes_from_dir(LPCSTR pszFile, BY_H return TRUE; } +static void +attributes_from_find_dataw(WIN32_FIND_DATAW *pFileData, + BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) +{ + memset(info, 0, sizeof(*info)); + info->dwFileAttributes = pFileData->dwFileAttributes; + info->ftCreationTime = pFileData->ftCreationTime; + info->ftLastAccessTime = pFileData->ftLastAccessTime; + info->ftLastWriteTime = pFileData->ftLastWriteTime; + info->nFileSizeHigh = pFileData->nFileSizeHigh; + info->nFileSizeLow = pFileData->nFileSizeLow; +/* info->nNumberOfLinks = 1; */ + if (pFileData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + *reparse_tag = pFileData->dwReserved0; + else + *reparse_tag = 0; +} + static BOOL -attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) +attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) { HANDLE hFindFile; WIN32_FIND_DATAW FileData; @@ -1568,17 +1596,7 @@ attributes_from_dir_w(LPCWSTR pszFile, B if (hFindFile == INVALID_HANDLE_VALUE) return FALSE; FindClose(hFindFile); - memset(info, 0, sizeof(*info)); - *reparse_tag = 0; - info->dwFileAttributes = FileData.dwFileAttributes; - info->ftCreationTime = FileData.ftCreationTime; - info->ftLastAccessTime = FileData.ftLastAccessTime; - info->ftLastWriteTime = FileData.ftLastWriteTime; - info->nFileSizeHigh = FileData.nFileSizeHigh; - info->nFileSizeLow = FileData.nFileSizeLow; -/* info->nNumberOfLinks = 1; */ - if (FileData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) - *reparse_tag = FileData.dwReserved0; + attributes_from_find_dataw(&FileData, info, reparse_tag); return TRUE; } @@ -16544,6 +16562,334 @@ posix_set_blocking(PyObject *self, PyObj #endif /* !MS_WINDOWS */ +/* Forward declaration */ +static PyTypeObject Scandir_Type; + +typedef struct { + PyObject_HEAD + /* on Windows, handle is INVALID_HANDLE_VALUE at creation and when + * the object is closed. A different attribute is required to indicate if + * the object is closed. + */ + int closed; +#ifdef MS_WINDOWS + wchar_t *path; + HANDLE handle; +#else + char *path; + DIR *dirp; +#endif +} ScandirObject; + +static void +scandir_close(ScandirObject* scandir) +{ +#ifdef MS_WINDOWS + HANDLE handle; + + if (scandir->closed) + return; + scandir->closed = 1; + + handle = scandir->handle; + if (handle == INVALID_HANDLE_VALUE) + return; + scandir->handle = INVALID_HANDLE_VALUE; + + Py_BEGIN_ALLOW_THREADS + FindClose(handle); + Py_END_ALLOW_THREADS +#else + DIR *dirp; + + if (scandir->closed) + return; + scandir->closed = 1; + + dirp = scandir->dirp; + if (dirp == NULL) + return; + scandir->dirp = NULL; + + Py_BEGIN_ALLOW_THREADS + closedir(dirp); + Py_END_ALLOW_THREADS +#endif +} + +static void +scandir_dealloc(PyObject *op) +{ + ScandirObject *scandir = (ScandirObject *)op; + + scandir_close(scandir); + PyMem_Free(scandir->path); + PyObject_Del(scandir); +} + +PyDoc_STRVAR(scandir__doc__, + "_scandir(path) -> ScandirIterator."); + +static PyObject * +posix_scandir(PyObject *self, PyObject *args) +{ + Py_ssize_t path_len; + ScandirObject *iterator = NULL; + +#ifdef MS_WINDOWS + PyObject *pathobj; + wchar_t *path; + wchar_t *filepath = NULL; + + if (!PyArg_ParseTuple(args, "U:_scandir", &pathobj)) + return NULL; + + path = PyUnicode_AsWideCharString(pathobj, &path_len); + if (path == NULL) + goto error; + + if (wcslen(path) != (size_t)path_len) { + PyErr_SetString(PyExc_ValueError, "embedded null byte"); + goto error; + } + + /* +5: len("\\*.*") + null character */ + filepath = (wchar_t *)PyMem_Malloc(sizeof(wchar_t) * (path_len + 5)); + if (filepath == NULL) { + PyErr_NoMemory(); + goto error; + } + wcscpy(filepath, path); + if (path_len > 0) { + if (filepath[path_len-1] != L'\\' && filepath[path_len-1] != L'/') + filepath[path_len++] = L'\\'; + wcscpy(filepath + path_len, L"*.*"); + } +#else + Py_buffer buffer; + char *path; + char *filepath = NULL; + DIR *dirp = NULL; + + if (!PyArg_ParseTuple(args, "y*:_scandir", &buffer)) + return NULL; + + path = buffer.buf; + path_len = buffer.len; + + if (strlen(path) != (size_t)path_len) { + PyErr_SetString(PyExc_ValueError, "embedded null byte"); + goto error; + } + + /* +5: len("/") + null byte */ + filepath = (char *)PyMem_Malloc(sizeof(char) * (path_len + 2)); + if (filepath == NULL) { + PyErr_NoMemory(); + goto error; + } + strcpy(filepath, path); + if (path_len > 0 && filepath[path_len-1] != '/') { + filepath[path_len] = '/'; + filepath[path_len+1] = '\0'; + } + + Py_BEGIN_ALLOW_THREADS + dirp = opendir(filepath); + Py_END_ALLOW_THREADS + + if (dirp == NULL) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path); + goto error; + } +#endif + + iterator = PyObject_New(ScandirObject, &Scandir_Type); + if (iterator == NULL) { + goto error; + } + iterator->closed = 0; +#ifdef MS_WINDOWS + iterator->handle = INVALID_HANDLE_VALUE; +#else + iterator->dirp = dirp; + dirp = NULL; +#endif + iterator->path = filepath; + filepath = NULL; + +error: +#ifdef MS_WINDOWS + PyMem_Free(path); +#else + if (dirp != NULL) + closedir(dirp); + PyBuffer_Release(&buffer); +#endif + PyMem_Free(filepath); + return (PyObject *)iterator; +} + +static PyObject * +scandir_iternext(PyObject *op) +{ + ScandirObject *scandir = (ScandirObject *)op; + PyObject *filename; +#ifdef MS_WINDOWS + PyObject *stat_result; + BOOL is_finished; + WIN32_FIND_DATAW find_data; + BY_HANDLE_FILE_INFORMATION info; + ULONG reparse_tag; + struct win32_stat st; + + if (scandir->closed) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + is_finished = 0; + while (1) { + if (scandir->handle == INVALID_HANDLE_VALUE) { + Py_BEGIN_ALLOW_THREADS + scandir->handle = FindFirstFileW(scandir->path, &find_data); + Py_END_ALLOW_THREADS + + if (scandir->handle == INVALID_HANDLE_VALUE) { + if (GetLastError() != ERROR_FILE_NOT_FOUND) + return PyErr_SetFromWindowsErr(0); + is_finished = 1; + } + } + else { + Py_BEGIN_ALLOW_THREADS + is_finished = !FindNextFileW(scandir->handle, &find_data); + Py_END_ALLOW_THREADS + + if (is_finished) { + if (GetLastError() != ERROR_NO_MORE_FILES) + return PyErr_SetFromWindowsErr(0); + break; + } + } + + /* Only continue if we have a useful filename or we've run out of + * files. A useful filename is one which isn't the "." and ".." + * pseudo-directories. + */ + if ((wcscmp(find_data.cFileName, L".") != 0 && + wcscmp(find_data.cFileName, L"..") != 0)) { + break; + } + } + + if (is_finished) { + scandir_close(scandir); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + filename = PyUnicode_FromWideChar(find_data.cFileName, -1); + if (filename == NULL) + return NULL; + + attributes_from_find_dataw(&find_data, &info, &reparse_tag); + attribute_data_to_stat(&info, reparse_tag, &st); + stat_result = _pystat_fromstructstat(&st); + if (stat_result == NULL) { + Py_DECREF(filename); + return NULL; + } + + return Py_BuildValue("NN", filename, stat_result); +#else /* MS_WINDOWS */ + struct dirent *ep; + PyObject *d_type; + + if (scandir->closed) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + ep = readdir(scandir->dirp); + Py_END_ALLOW_THREADS + + if (ep == NULL) { + if (errno == 0) { + /* end of directory */ + scandir_close(scandir); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + return PyErr_SetFromErrnoWithFilename(PyExc_OSError, + scandir->path); + } + + if (strcmp(ep->d_name, ".") != 0 && strcmp(ep->d_name, "..") != 0) + break; + } + + filename = PyBytes_FromStringAndSize(ep->d_name, strlen(ep->d_name)); + if (filename == NULL) + return NULL; + +#if defined(__GLIBC__) && !defined(_DIRENT_HAVE_D_TYPE) + Py_INCREF(Py_None); + d_type = Py_None; +#else + if (ep->d_type != DT_UNKNOWN) { + d_type = PyLong_FromLong(ep->d_type); + if (d_type == NULL) { + Py_DECREF(filename); + return NULL; + } + } + else { + Py_INCREF(Py_None); + d_type = Py_None; + } +#endif + + return Py_BuildValue("NlN", filename, ep->d_ino, d_type); +#endif /* !MS_WINDOWS */ +} + +static PyTypeObject Scandir_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".Scandir", /* tp_name */ + sizeof(ScandirObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)scandir_dealloc,/* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)scandir_iternext,/* tp_iternext */ + 0, /* tp_methods */ +}; + + /*[clinic input] dump buffer [clinic start generated code]*/ @@ -17216,6 +17562,7 @@ static PyMethodDef posix_methods[] = { {"get_blocking", posix_get_blocking, METH_VARARGS, get_blocking__doc__}, {"set_blocking", posix_set_blocking, METH_VARARGS, set_blocking__doc__}, #endif + {"_scandir", (PyCFunction)posix_scandir, METH_VARARGS, scandir__doc__}, {NULL, NULL} /* Sentinel */ }; @@ -17654,19 +18001,20 @@ all_ins(PyObject *m) if (PyModule_AddIntMacro(m, RTLD_DEEPBIND)) return -1; #endif +#ifdef DT_DIR + if (PyModule_AddIntMacro(m, DT_DIR)) return -1; +#endif +#ifdef DT_REG + if (PyModule_AddIntMacro(m, DT_REG)) return -1; +#endif +#ifdef DT_LNK + if (PyModule_AddIntMacro(m, DT_LNK)) return -1; +#endif + return 0; } -#ifdef MS_WINDOWS -#define INITFUNC PyInit_nt -#define MODNAME "nt" - -#else -#define INITFUNC PyInit_posix -#define MODNAME "posix" -#endif - static struct PyModuleDef posixmodule = { PyModuleDef_HEAD_INIT, MODNAME, @@ -17985,6 +18333,9 @@ INITFUNC(void) } PyModule_AddObject(m, "_have_functions", list); + if (PyType_Ready(&Scandir_Type) < 0) + return NULL; + initialized = 1; return m;