diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -122,6 +122,8 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, __import__, 1, 2, 3, 4) self.assertRaises(ValueError, __import__, '') self.assertRaises(TypeError, __import__, 'sys', name='sys') + # embedded null character + self.assertRaises(TypeError, __import__, 'a\x00b') def test_abs(self): # int @@ -892,7 +894,11 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(fp.read(1000), 'YYY'*100) finally: fp.close() - unlink(TESTFN) + unlink(TESTFN) + + # embedded null character + self.assertRaises(TypeError, open, b'a\x00b') + self.assertRaises(TypeError, open, 'a\x00b') def test_ord(self): self.assertEqual(ord(' '), 32) diff --git a/Lib/test/test_grp.py b/Lib/test/test_grp.py --- a/Lib/test/test_grp.py +++ b/Lib/test/test_grp.py @@ -48,6 +48,9 @@ class GroupDatabaseTestCase(unittest.Tes self.assertRaises(TypeError, grp.getgrgid) self.assertRaises(TypeError, grp.getgrnam) self.assertRaises(TypeError, grp.getgrall, 42) + # reject embedded null bytes and characters + self.assertRaises(TypeError, grp.getgrnam, b'a\x00b') + self.assertRaises(TypeError, grp.getgrnam, 'a\x00b') # try to get some errors bynames = {} diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -181,6 +181,10 @@ class ImportTests(unittest.TestCase): self.assertRaises(SyntaxError, imp.find_module, "badsyntax_pep3120", [path]) + def test_load_source(self): + self.assertRaisesRegex(TypeError, 'embedded NUL character', + imp.load_source, __name__, __file__ + "\0") + class ReloadTests(unittest.TestCase): diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -361,10 +361,17 @@ PyCurses_ConvertToString(PyCursesWindowO { if (PyUnicode_Check(obj)) { #ifdef HAVE_NCURSESW + Py_ssize_t wlen; assert (wstr != NULL); - *wstr = PyUnicode_AsWideCharString(obj, NULL); + + *wstr = PyUnicode_AsWideCharString(obj, &wlen); if (*wstr == NULL) return 0; + if (wlen != wcslen(wstr)) { + PyErr_SetString(PyExc_TypeError, + "embedded null character"); + return 0; + } return 2; #else assert (wstr == NULL); diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -256,15 +256,23 @@ fileio_init(PyObject *oself, PyObject *a #ifdef MS_WINDOWS if (PyUnicode_Check(nameobj)) { - widename = PyUnicode_AsUnicode(nameobj); + Py_ssize_t namelen; + + widename = PyUnicode_AsUnicodeAndSize(nameobj, &namelen); if (widename == NULL) return -1; + + if (wcslen(widename) != namelen) { + PyErr_SetString(PyExc_TypeError, "embedded null character"); + return -1; + } } else #endif if (fd < 0) { + Py_ssize_t namelen; + if (PyBytes_Check(nameobj) || PyByteArray_Check(nameobj)) { - Py_ssize_t namelen; if (PyObject_AsCharBuffer(nameobj, &name, &namelen) < 0) return -1; } @@ -284,6 +292,12 @@ fileio_init(PyObject *oself, PyObject *a goto error; } name = PyBytes_AS_STRING(stringobj); + namelen = PyBytes_GET_SIZE(stringobj); + } + + if (strlen(name) != namelen) { + PyErr_SetString(PyExc_TypeError, "embedded null character"); + goto error; } } diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -205,16 +205,27 @@ PyLocale_strcoll(PyObject* self, PyObjec { PyObject *os1, *os2, *result = NULL; wchar_t *ws1 = NULL, *ws2 = NULL; + Py_ssize_t wlen; if (!PyArg_ParseTuple(args, "UU:strcoll", &os1, &os2)) return NULL; /* Convert the unicode strings to wchar[]. */ - ws1 = PyUnicode_AsWideCharString(os1, NULL); + ws1 = PyUnicode_AsWideCharString(os1, &wlen); if (ws1 == NULL) goto done; + if (wlen != wcslen(ws1)) { + PyErr_SetString(PyExc_TypeError, + "embedded null character"); + goto done; + } ws2 = PyUnicode_AsWideCharString(os2, NULL); if (ws2 == NULL) goto done; + if (wlen != wcslen(ws2)) { + PyErr_SetString(PyExc_TypeError, + "embedded null character"); + goto done; + } /* Collate the strings. */ result = PyLong_FromLong(wcscoll(ws1, ws2)); done: @@ -246,6 +257,11 @@ PyLocale_strxfrm(PyObject* self, PyObjec s = PyUnicode_AsWideCharString(str, &n1); if (s == NULL) goto exit; + if (n1 != wcslen(s)) { + PyErr_SetString(PyExc_TypeError, + "embedded null character"); + goto exit; + } /* assume no change in size, first */ n1 = n1 + 1; diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c --- a/Modules/grpmodule.c +++ b/Modules/grpmodule.c @@ -112,6 +112,7 @@ grp_getgrnam(PyObject *self, PyObject *a return NULL; if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; diff --git a/Modules/nismodule.c b/Modules/nismodule.c --- a/Modules/nismodule.c +++ b/Modules/nismodule.c @@ -173,6 +173,7 @@ nis_match (PyObject *self, PyObject *arg return NULL; if ((bkey = PyUnicode_EncodeFSDefault(ukey)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bkey, &key, &keylen) == -1) { Py_DECREF(bkey); return NULL; diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c --- a/Modules/pwdmodule.c +++ b/Modules/pwdmodule.c @@ -133,6 +133,7 @@ pwd_getpwnam(PyObject *self, PyObject *a return NULL; if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; if ((p = getpwnam(name)) == NULL) { diff --git a/Modules/spwdmodule.c b/Modules/spwdmodule.c --- a/Modules/spwdmodule.c +++ b/Modules/spwdmodule.c @@ -119,6 +119,7 @@ static PyObject* spwd_getspnam(PyObject return NULL; if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; if ((p = getspnam(name)) == NULL) { diff --git a/Modules/zipimport.c b/Modules/zipimport.c --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -1094,11 +1094,18 @@ static PyObject * compile_source(PyObject *pathname, PyObject *source) { PyObject *code, *fixed_source, *pathbytes; + char *cpathname; pathbytes = PyUnicode_EncodeFSDefault(pathname); if (pathbytes == NULL) return NULL; + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(pathbytes, &cpathname, NULL) < 0) { + Py_DECREF(pathbytes); + return NULL; + } + fixed_source = normalize_line_endings(source); if (fixed_source == NULL) { Py_DECREF(pathbytes); @@ -1106,7 +1113,7 @@ compile_source(PyObject *pathname, PyObj } code = Py_CompileString(PyBytes_AsString(fixed_source), - PyBytes_AsString(pathbytes), + cpathname, Py_file_input); Py_DECREF(pathbytes); Py_DECREF(fixed_source); diff --git a/Python/fileutils.c b/Python/fileutils.c --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -250,20 +250,36 @@ _Py_stat(PyObject *path, struct stat *st int err; struct _stat wstatbuf; wchar_t *wpath; + Py_ssize_t pathlen; - wpath = PyUnicode_AsUnicode(path); + wpath = PyUnicode_AsUnicodeAndSize(path, &pathlen); if (wpath == NULL) return -2; + if (wcslen(wpath) != pathlen) { + PyErr_SetString(PyExc_TypeError, "embedded null character"); + return -2; + } + err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; return err; #else int ret; - PyObject *bytes = PyUnicode_EncodeFSDefault(path); + PyObject *bytes; + char *cpath; + + bytes = PyUnicode_EncodeFSDefault(path); if (bytes == NULL) return -2; - ret = stat(PyBytes_AS_STRING(bytes), statbuf); + + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) { + Py_DECREF(bytes); + return -2; + } + + ret = stat(cpath, statbuf); Py_DECREF(bytes); return ret; #endif @@ -309,10 +325,15 @@ _Py_fopen(PyObject *path, const char *mo wchar_t *wpath; wchar_t wmode[10]; int usize; + Py_ssize_t pathlen; - wpath = PyUnicode_AsUnicode(path); + wpath = PyUnicode_AsUnicodeAndSize(path, &pathlen); if (wpath == NULL) return NULL; + if (wcslen(wpath) != pathlen) { + PyErr_SetString(PyExc_TypeError, "embedded null character"); + return NULL; + } usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode)); if (usize == 0) @@ -321,10 +342,20 @@ _Py_fopen(PyObject *path, const char *mo return _wfopen(wpath, wmode); #else FILE *f; - PyObject *bytes = PyUnicode_EncodeFSDefault(path); + PyObject *bytes; + char *cpath; + + bytes = PyUnicode_EncodeFSDefault(path); if (bytes == NULL) return NULL; - f = fopen(PyBytes_AS_STRING(bytes), mode); + + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) { + Py_DECREF(bytes); + return NULL; + } + + f = fopen(cpath, mode); Py_DECREF(bytes); return f; #endif diff --git a/Python/import.c b/Python/import.c --- a/Python/import.c +++ b/Python/import.c @@ -4159,6 +4159,7 @@ NullImporter_init(NullImporter *self, Py PyObject *pathobj; DWORD rv; wchar_t *path; + Py_ssize_t len; if (!_PyArg_NoKeywords("NullImporter()", kwds)) return -1; @@ -4172,9 +4173,16 @@ NullImporter_init(NullImporter *self, Py return -1; } - path = PyUnicode_AsWideCharString(pathobj, NULL); + path = PyUnicode_AsWideCharString(pathobj, &len); if (path == NULL) return -1; + if (len != wcslen(path)) { + PyErr_SetString(PyExc_TypeError, + "embedded null character"); + PyMem_Free(path); + return -1; + } + /* see issue1293 and issue3677: * stat() on Windows doesn't recognise paths like * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs.