Index: Misc/NEWS =================================================================== --- Misc/NEWS (révision 80864) +++ Misc/NEWS (copie de travail) @@ -348,6 +348,12 @@ Library ------- +- Issue #8603: Create a bytes version of os.environ for Unix. Create + os.environb mapping and os.getenvb() function. os.unsetenv() encodes str + argument to the file system encoding with the surrogateescape error handler + (instead of utf8/strict) and accepts bytes. posix.environ keys and values are + bytes. + - Issue #4265: shutil.copyfile() was leaking file descriptors when disk fills. Patch by Tres Seaver. Index: Doc/library/posix.rst =================================================================== --- Doc/library/posix.rst (révision 80864) +++ Doc/library/posix.rst (copie de travail) @@ -69,17 +69,22 @@ .. data:: environ A dictionary representing the string environment at the time the interpreter - was started. For example, ``environ['HOME']`` is the pathname of your home - directory, equivalent to ``getenv("HOME")`` in C. + was started. Keys and values are bytes on Unix and str on Windows. For + example, ``environ[b'HOME']`` (``environ['HOME']`` on Windows) is the + pathname of your home directory, equivalent to ``getenv("HOME")`` in C. Modifying this dictionary does not affect the string environment passed on by :func:`execv`, :func:`popen` or :func:`system`; if you need to change the environment, pass ``environ`` to :func:`execve` or add variable assignments and export statements to the command string for :func:`system` or :func:`popen`. + .. versionchanged:: 3.2 + Keys and values are bytes on Unix. + .. note:: - The :mod:`os` module provides an alternate implementation of ``environ`` which - updates the environment on modification. Note also that updating ``os.environ`` - will render this dictionary obsolete. Use of the :mod:`os` module version of - this is recommended over direct access to the :mod:`posix` module. + The :mod:`os` module provides an alternate implementation of ``environ`` + which updates the environment on modification. Note also that updating + :data:`os.environ` will render this dictionary obsolete. Use of the + :mod:`os` module version of this is recommended over direct access to the + :mod:`posix` module. Index: Doc/library/os.rst =================================================================== --- Doc/library/os.rst (révision 80864) +++ Doc/library/os.rst (copie de travail) @@ -107,6 +107,10 @@ to modify the environment as well as query the environment. :func:`putenv` will be called automatically when the mapping is modified. + On Unix, keys and values use :func:`sys.getfilesystemencoding` and + ``'surrogateescape'`` error handler. Use :data:`environb` if you would like + to use a different encoding. + .. note:: Calling :func:`putenv` directly does not change ``os.environ``, so it's better @@ -128,6 +132,16 @@ one of the :meth:`pop` or :meth:`clear` methods is called. +.. data:: environb + + Bytes version of :data:`environ`: a mapping object representing the + environment as byte strings. :data:`environ` and :data:`environb` are + synchronized (modify :data:`environb` updates :data:`environ`, and vice + versa). + + Availability: Unix. + + .. function:: chdir(path) fchdir(fd) getcwd() @@ -251,9 +265,21 @@ .. function:: getenv(key, default=None) Return the value of the environment variable *key* if it exists, or - *default* if it doesn't. Availability: most flavors of Unix, Windows. + *default* if it doesn't. *key*, *default* and the result are str. + Availability: most flavors of Unix, Windows. + On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding` + and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you + would like to use a different encoding. + +.. function:: getenvb(key: bytes, default: bytes=None) + + Return the value of the environment variable *key* if it exists, or + *default* if it doesn't. *key*, *default* and the result are bytes. + Availability: most flavors of Unix. + + .. function:: putenv(key, value) .. index:: single: environment variables; setting Index: Lib/os.py =================================================================== --- Lib/os.py (révision 80864) +++ Lib/os.py (copie de travail) @@ -387,29 +387,33 @@ from _abcoll import MutableMapping # Can't use collections (bootstrap) class _Environ(MutableMapping): - def __init__(self, environ, keymap, putenv, unsetenv): - self.keymap = keymap + def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv): + self.encodekey = encodekey + self.decodekey = decodekey + self.encodevalue = encodevalue + self.decodevalue = decodevalue self.putenv = putenv self.unsetenv = unsetenv - self.data = data = {} - for key, value in environ.items(): - data[keymap(key)] = str(value) + self.data = data def __getitem__(self, key): - return self.data[self.keymap(key)] + value = self.data[self.encodekey(key)] + return self.decodevalue(value) def __setitem__(self, key, value): - value = str(value) + key = self.encodekey(key) + value = self.encodevalue(value) self.putenv(key, value) - self.data[self.keymap(key)] = value + self.data[key] = value def __delitem__(self, key): + key = self.encodekey(key) self.unsetenv(key) - del self.data[self.keymap(key)] + del self.data[key] def __iter__(self): for key in self.data: - yield key + yield self.decodekey(key) def __len__(self): return len(self.data) @@ -439,22 +443,67 @@ else: __all__.append("unsetenv") -if name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE - _keymap = lambda key: str(key.upper()) -else: # Where Env Var Names Can Be Mixed Case - _keymap = lambda key: str(key) +def _createenviron(): + if name in ('os2', 'nt'): + # Where Env Var Names Must Be UPPERCASE + def check_str(value): + if not isinstance(value, str): + raise TypeError("str expected, not %s" % type(value).__name__) + return value + encode = check_str + decode = check_str + def encodekey(key): + return encode(key).upper() + data = {} + for key, value in environ.items(): + data[encodekey(key)] = value + else: + # Where Env Var Names Can Be Mixed Case + def encode(value): + if not isinstance(value, str): + raise TypeError("str expected, not %s" % type(value).__name__) + return value.encode(sys.getfilesystemencoding(), 'surrogateescape') + def decode(value): + if not isinstance(value, bytes): + raise TypeError("bytes expected, not %s" % type(value).__name__) + return value.decode(sys.getfilesystemencoding(), 'surrogateescape') + encodekey = encode + data = environ + return _Environ(data, + encodekey, decode, + encode, decode, + _putenv, _unsetenv) -environ = _Environ(environ, _keymap, _putenv, _unsetenv) +# unicode environ +environ = _createenviron() +del _createenviron -def getenv(key, default=None): +def getenv(key: str, default: str=None) -> str: """Get an environment variable, return None if it doesn't exist. The optional second argument can specify an alternate default.""" - if isinstance(key, bytes): - key = key.decode(sys.getfilesystemencoding(), "surrogateescape") return environ.get(key, default) __all__.append("getenv") +if name not in ('os2', 'nt'): + def _check_bytes(value): + if not isinstance(value, bytes): + raise TypeError("bytes expected, not %s" % type(value).__name__) + return value + + # bytes environ + environb = _Environ(environ.data, + _check_bytes, _check_bytes, + _check_bytes, _check_bytes, + _putenv, _unsetenv) + del _check_bytes + + def getenvb(key: bytes, default=None) -> bytes: + """Get an environment variable, return None if it doesn't exist. + The optional second argument can specify an alternate default.""" + return environb.get(key, default) + __all__.append("getenvb") + def _exists(name): return name in globals() Index: Lib/test/test_os.py =================================================================== --- Lib/test/test_os.py (révision 80864) +++ Lib/test/test_os.py (copie de travail) @@ -369,12 +369,15 @@ def setUp(self): self.__save = dict(os.environ) + self.__saveb = dict(os.environb) for key, value in self._reference().items(): os.environ[key] = value def tearDown(self): os.environ.clear() os.environ.update(self.__save) + os.environb.clear() + os.environb.update(self.__saveb) def _reference(self): return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"} @@ -439,7 +442,25 @@ # Supplied PATH environment variable self.assertSequenceEqual(test_path, os.get_exec_path(test_env)) + @unittest.skipIf(sys.platform == "win32", "POSIX specific test") + def test_environb(self): + # os.environ -> os.environb + value = 'euro\u20ac' + try: + value_bytes = value.encode(sys.getfilesystemencoding(), 'surrogateescape') + except UnicodeEncodeError: + raise unittest.SkipTest("U+20AC character is not encodable to %s" % sys.getfilesystemencoding()) + os.environ['unicode'] = value + self.assertEquals(os.environ['unicode'], value) + self.assertEquals(os.environb[b'unicode'], value_bytes) + # os.environb -> os.environ + value = b'\xff' + os.environb[b'bytes'] = value + self.assertEquals(os.environb[b'bytes'], value) + value_str = value.decode(sys.getfilesystemencoding(), 'surrogateescape') + self.assertEquals(os.environ['bytes'], value_str) + class WalkTests(unittest.TestCase): """Tests for os.walk().""" Index: Lib/test/test_subprocess.py =================================================================== --- Lib/test/test_subprocess.py (révision 80864) +++ Lib/test/test_subprocess.py (copie de travail) @@ -803,8 +803,6 @@ def test_undecodable_env(self): for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')): - value_repr = repr(value).encode("ascii") - # test str with surrogates script = "import os; print(repr(os.getenv(%s)))" % repr(key) env = os.environ.copy() @@ -813,19 +811,19 @@ [sys.executable, "-c", script], env=env) stdout = stdout.rstrip(b'\n\r') - self.assertEquals(stdout, value_repr) + self.assertEquals(stdout.decode('ascii'), repr(value)) # test bytes key = key.encode("ascii", "surrogateescape") value = value.encode("ascii", "surrogateescape") - script = "import os; print(repr(os.getenv(%s)))" % repr(key) + script = "import os; print(repr(os.getenvb(%s)))" % repr(key) env = os.environ.copy() env[key] = value stdout = subprocess.check_output( [sys.executable, "-c", script], env=env) stdout = stdout.rstrip(b'\n\r') - self.assertEquals(stdout, value_repr) + self.assertEquals(stdout.decode('ascii'), repr(value)) @unittest.skipUnless(mswindows, "Windows specific tests") Index: Modules/posixmodule.c =================================================================== --- Modules/posixmodule.c (révision 80864) +++ Modules/posixmodule.c (copie de travail) @@ -498,14 +498,12 @@ char *p = strchr(*e, '='); if (p == NULL) continue; - k = PyUnicode_Decode(*e, (int)(p-*e), - Py_FileSystemDefaultEncoding, "surrogateescape"); + k = PyBytes_FromStringAndSize(*e, (int)(p-*e)); if (k == NULL) { PyErr_Clear(); continue; } - v = PyUnicode_Decode(p+1, strlen(p+1), - Py_FileSystemDefaultEncoding, "surrogateescape"); + v = PyBytes_FromStringAndSize(p+1, strlen(p+1)); if (v == NULL) { PyErr_Clear(); Py_DECREF(k); @@ -5301,7 +5299,7 @@ char *s1, *s2; char *newenv; #endif - PyObject *newstr; + PyObject *newstr = NULL; size_t len; #ifdef MS_WINDOWS @@ -5324,15 +5322,19 @@ APIRET rc; rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH); - if (rc != NO_ERROR) - return os2_error(rc); + if (rc != NO_ERROR) { + os2_error(rc); + goto error; + } } else if (stricmp(s1, "ENDLIBPATH") == 0) { APIRET rc; rc = DosSetExtLIBPATH(s2, END_LIBPATH); - if (rc != NO_ERROR) - return os2_error(rc); + if (rc != NO_ERROR) { + os2_error(rc); + goto error; + } } else { #endif /* XXX This can leak memory -- not easy to fix :-( */ @@ -5342,36 +5344,40 @@ len = wcslen(s1) + wcslen(s2) + 2; newstr = PyUnicode_FromUnicode(NULL, (int)len - 1); #else - len = strlen(s1) + strlen(s2) + 2; + len = PyBytes_GET_SIZE(os1) + PyBytes_GET_SIZE(os2) +2; newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1); #endif - if (newstr == NULL) - return PyErr_NoMemory(); + if (newstr == NULL) { + PyErr_NoMemory(); + goto error; + } #ifdef MS_WINDOWS newenv = PyUnicode_AsUnicode(newstr); _snwprintf(newenv, len, L"%s=%s", s1, s2); if (_wputenv(newenv)) { - Py_DECREF(newstr); posix_error(); - return NULL; + goto error; } #else newenv = PyBytes_AS_STRING(newstr); PyOS_snprintf(newenv, len, "%s=%s", s1, s2); if (putenv(newenv)) { - Py_DECREF(newstr); - Py_DECREF(os1); - Py_DECREF(os2); posix_error(); - return NULL; + goto error; } #endif + /* Install the first arg and newstr in posix_putenv_garbage; * this will cause previous value to be collected. This has to * happen after the real putenv() call because the old value * was still accessible until then. */ if (PyDict_SetItem(posix_putenv_garbage, - PyTuple_GET_ITEM(args, 0), newstr)) { +#ifdef MS_WINDOWS + PyTuple_GET_ITEM(args, 0), +#else + os1, +#endif + newstr)) { /* really not much we can do; just leak */ PyErr_Clear(); } @@ -5382,12 +5388,20 @@ #if defined(PYOS_OS2) } #endif + #ifndef MS_WINDOWS Py_DECREF(os1); Py_DECREF(os2); #endif - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; + +error: +#ifndef MS_WINDOWS + Py_DECREF(os1); + Py_DECREF(os2); +#endif + Py_XDECREF(newstr); + return NULL; } #endif /* putenv */ @@ -5399,11 +5413,21 @@ static PyObject * posix_unsetenv(PyObject *self, PyObject *args) { +#ifdef MS_WINDOWS char *s1; if (!PyArg_ParseTuple(args, "s:unsetenv", &s1)) return NULL; +#else + PyObject *os1; + char *s1; + if (!PyArg_ParseTuple(args, "O&:unsetenv", + PyUnicode_FSConverter, &os1)) + return NULL; + s1 = PyBytes_AsString(os1); +#endif + unsetenv(s1); /* Remove the key from posix_putenv_garbage; @@ -5412,13 +5436,20 @@ * old value was still accessible until then. */ if (PyDict_DelItem(posix_putenv_garbage, - PyTuple_GET_ITEM(args, 0))) { +#ifdef MS_WINDOWS + PyTuple_GET_ITEM(args, 0) +#else + os1 +#endif + )) { /* really not much we can do; just leak */ PyErr_Clear(); } - Py_INCREF(Py_None); - return Py_None; +#ifndef MS_WINDOWS + Py_DECREF(os1); +#endif + Py_RETURN_NONE; } #endif /* unsetenv */