diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -460,6 +460,13 @@ These environment variables influence Py option. +.. envvar:: PYTHONHASHSEED + + If this is set, it is used as a fixed seed for the Unicode randomized hash: + number in range [0; 4294967295]. The value 0 disables the Unicode randomized + hash. + + .. envvar:: PYTHONIOENCODING If this is set before running the interpreter, it overrides the encoding used diff --git a/Include/pythonrun.h b/Include/pythonrun.h --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -376,6 +376,12 @@ typedef struct { PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; +typedef struct { + Py_hash_t prefix; + Py_hash_t suffix; +} _Py_unicode_hash_secret_t; +PyAPI_DATA(_Py_unicode_hash_secret_t) _Py_unicode_hash_secret; + #define PyUnicode_Check(op) \ PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -31,7 +31,9 @@ Encoding basic Python object hierarchies Compact encoding:: >>> import json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: diff --git a/Lib/os.py b/Lib/os.py --- a/Lib/os.py +++ b/Lib/os.py @@ -761,23 +761,6 @@ try: except NameError: # statvfs_result may not exist pass -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - bs = b"" - while len(bs) < n: - bs += read(_urandomfd, n - len(bs)) - close(_urandomfd) - return bs - # Supply os.popen() def popen(cmd, mode="r", buffering=-1): if not isinstance(cmd, str): diff --git a/Lib/packaging/create.py b/Lib/packaging/create.py --- a/Lib/packaging/create.py +++ b/Lib/packaging/create.py @@ -311,8 +311,11 @@ class MainProgram: 'package_data', 'extra_files'): if not(name in self.data and self.data[name]): continue - fp.write('%s = %s\n' - % (name, '\n '.join(self.data[name]).strip())) + data = self.data[name] + if name == "extra_files": + data.sort() + data = '\n '.join(data).strip() + fp.write('%s = %s\n' % (name, data)) fp.write('\nresources =\n') for src, dest in self.data['resources']: fp.write(' %s = %s\n' % (src, dest)) diff --git a/Lib/packaging/tests/test_create.py b/Lib/packaging/tests/test_create.py --- a/Lib/packaging/tests/test_create.py +++ b/Lib/packaging/tests/test_create.py @@ -150,16 +150,16 @@ class CreateTestCase(support.TempdirMana mymodule scripts = my_script bin/run - extra_files = Martinique/Lamentin/dady + extra_files = Alexander + Flora + Martinique/Lamentin/bro + Martinique/Lamentin/dady Martinique/Lamentin/mumy Martinique/Lamentin/sys - Martinique/Lamentin/bro + Pom + README + pyxfoil/fengine.so setup.py - README - Pom - Flora - Alexander - pyxfoil/fengine.so resources = README.rst = {doc} @@ -217,8 +217,8 @@ ho, baby! [files] packages = pyxfoil - extra_files = pyxfoil/fengine.so - pyxfoil/babar.so + extra_files = pyxfoil/babar.so + pyxfoil/fengine.so resources = README.rst = {doc} diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest. def _reference(self): """Return a dictionary of values which are invariant by storage in the object under test.""" - return {1:2, "key1":"value1", "key2":(1,2,3)} + return {"1": "2", "key1":"value1", "key2":(1,2,3)} def _empty_mapping(self): """Return an empty mapping object""" return self.type2test() diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -559,6 +559,11 @@ def main(tests=None, testdir=None, verbo except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: + hashseed = os.getenv('PYTHONHASHSEED') + if not hashseed: + os.environ['PYTHONHASHSEED'] = str(random_seed) + os.execv(sys.executable, [sys.executable] + sys.argv) + return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(selected) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -597,7 +597,6 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(hash(1), hash(1)) self.assertEqual(hash(1), hash(1.0)) hash('spam') - self.assertEqual(hash('spam'), hash(b'spam')) hash((0,1,2,3)) def f(): pass self.assertRaises(TypeError, hash, []) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -6,6 +6,7 @@ import unittest import sys import dis import io +from test.script_helper import assert_python_ok class _C: def __init__(self, x): @@ -322,12 +323,6 @@ Variable names: 0: x""" % (('Formatted details of methods, functions, or code.', ' 6: None\n') if sys.flags.optimize < 2 else (None, '')) -@staticmethod -def tricky(x, y, z=True, *args, c, d, e=[], **kwds): - def f(c=c): - print(x, y, z, c, d, e, f) - yield x, y, z, c, d, e, f - code_info_tricky = """\ Name: tricky Filename: (.*) @@ -357,8 +352,6 @@ Cell variables: 4: x 5: z""" -co_tricky_nested_f = tricky.__func__.__code__.co_consts[1] - code_info_tricky_nested_f = """\ Name: f Filename: (.*) @@ -426,13 +419,36 @@ Names: class CodeInfoTests(unittest.TestCase): test_pairs = [ (dis.code_info, code_info_code_info), - (tricky, code_info_tricky), - (co_tricky_nested_f, code_info_tricky_nested_f), (expr_str, code_info_expr_str), (simple_stmt_str, code_info_simple_stmt_str), (compound_stmt_str, code_info_compound_stmt_str), ] + def get_tricky_output(self, func): + code = """ +import dis + +@staticmethod +def tricky(x, y, z=True, *args, c, d, e=[], **kwds): + def f(c=c): + print(x, y, z, c, d, e, f) + yield x, y, z, c, d, e, f + +%s + """ % func + err, stdout, stderr = assert_python_ok('-c', code, PYTHONHASHSEED='0') + return (stdout + stderr).decode('ascii', 'surrogateescape') + + def test_tricky(self): + stdout = self.get_tricky_output('dis.show_code(tricky)') + self.assertRegex(stdout, code_info_tricky) + + def test_tricky_nested(self): + stdout = self.get_tricky_output( + 'co_tricky_nested_f = tricky.__func__.__code__.co_consts[1]; ' + 'dis.show_code(co_tricky_nested_f)') + self.assertRegex(stdout, code_info_tricky_nested_f) + def test_code_info(self): self.maxDiff = 1000 for x, expected in self.test_pairs: diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -52,13 +52,15 @@ class DebuggerTests(unittest.TestCase): """Test that the debugger can debug Python.""" - def run_gdb(self, *args): + def run_gdb(self, *args, **env): """Runs gdb with the command line given by *args. Returns its stdout, stderr """ + if not env: + env = None out, err = subprocess.Popen( - args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, ).communicate() return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace') @@ -118,7 +120,7 @@ class DebuggerTests(unittest.TestCase): # print ' '.join(args) # Use "args" to invoke gdb, capturing stdout, stderr: - out, err = self.run_gdb(*args) + out, err = self.run_gdb(*args, PYTHONHASHSEED='0') # Ignore some noise on stderr due to the pending breakpoint: err = err.replace('Function "%s" not defined.\n' % breakpoint, '') @@ -207,7 +209,8 @@ class PrettyPrintTests(DebuggerTests): 'Verify the pretty-printing of dictionaries' self.assertGdbRepr({}) self.assertGdbRepr({'foo': 'bar'}) - self.assertGdbRepr({'foo': 'bar', 'douglas':42}) + self.assertGdbRepr({'foo': 'bar', 'douglas': 42}, + "{'foo': 'bar', 'douglas': 42}") def test_lists(self): 'Verify the pretty-printing of lists' diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py --- a/Lib/test/test_inspect.py +++ b/Lib/test/test_inspect.py @@ -799,7 +799,6 @@ class TestGetcallargsFunctions(unittest. self.assertEqualException(f, '2, 3, 4') self.assertEqualException(f, '1, 2, 3, a=1') self.assertEqualException(f, '2, 3, 4, c=5') - self.assertEqualException(f, '2, 3, 4, a=1, c=5') # f got an unexpected keyword argument self.assertEqualException(f, 'c=2') self.assertEqualException(f, '2, c=3') diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -12,6 +12,7 @@ import subprocess import time import shutil from test import support +from test.script_helper import assert_python_ok import contextlib import mmap import platform @@ -630,14 +631,32 @@ class DevNullTests(unittest.TestCase): self.assertEqual(f.read(), b'') class URandomTests(unittest.TestCase): - def test_urandom(self): - try: - self.assertEqual(len(os.urandom(1)), 1) - self.assertEqual(len(os.urandom(10)), 10) - self.assertEqual(len(os.urandom(100)), 100) - self.assertEqual(len(os.urandom(1000)), 1000) - except NotImplementedError: - pass + def test_urandom_length(self): + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.buffer.write(data)', + 'sys.stdout.buffer.flush()')) + out = assert_python_ok('-c', code) + stdout = out[1] + self.assertEqual(len(stdout), 16) + return stdout + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) @contextlib.contextmanager def _execvpe_mockup(defpath=None): diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -891,7 +891,17 @@ class TestBasicOpsString(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{'a', 'c', 'b'}" + + def test_repr(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) #------------------------------------------------------------------------------ @@ -916,11 +926,22 @@ class TestBasicOpsMixedStringBytes(TestB self.set = set(self.values) self.dup = set(self.values) self.length = 4 - self.repr = "{'a', b'a', 'b', b'b'}" def tearDown(self): self._warning_filters.__exit__(None, None, None) + def test_repr(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) + + #============================================================================== def baditer(): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -11,8 +11,11 @@ import struct import sys import unittest import warnings +from test.script_helper import assert_python_ok from test import support, string_tests +IS_64BIT = (struct.calcsize('l') == 8) + # Error handling (bad decoder return) def search_function(encoding): def decode1(input, errors="strict"): @@ -1943,6 +1946,42 @@ class StringModuleTest(unittest.TestCase self.assertRaises(TypeError, _string.formatter_field_name_split, 1) +class HashTest(unittest.TestCase): + def get_hash(self, text, seed=None): + env = {} + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + out = assert_python_ok( + '-c', 'print(hash(%r))' % text, + **env) + stdout = out[1].strip() + return int(stdout) + + def test_empty_string(self): + self.assertEqual(hash(""), 0) + + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + hash_empty = 1453079729188098211 + else: + hash_empty = -1600925533 + self.assertEqual(self.get_hash("abc", 0), hash_empty) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + self.assertEqual(self.get_hash("abc", 42), h) + + def test_randomized_hash(self): + # two runs should return different hashes + run1 = self.get_hash("abc") + run2 = self.get_hash("abc") + self.assertNotEqual(run1, run2) + def test_main(): support.run_unittest(__name__) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1,16 +1,17 @@ """Regresssion tests for urllib""" -import urllib.parse -import urllib.request -import urllib.error +import collections +import email.message import http.client -import email.message import io -import unittest -from test import support import os import sys import tempfile +from test import support +import unittest +import urllib.error +import urllib.parse +import urllib.request from base64 import b64encode @@ -950,8 +951,9 @@ class urlencode_Tests(unittest.TestCase) self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) self.assertEqual("a=None&a=a", urllib.parse.urlencode({"a": [None, "a"]}, True)) + data = collections.OrderedDict([("a", 1), ("b", 1)]) self.assertEqual("a=a&a=b", - urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) + urllib.parse.urlencode({"a": data}, True)) def test_urlencode_encoding(self): # ASCII encoding. Expect %3F with errors="replace' diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1,6 +1,7 @@ #! /usr/bin/env python3 from test import support +import collections import unittest import urllib.parse @@ -768,7 +769,10 @@ class UrlParseTestCase(unittest.TestCase def test_urlencode_sequences(self): # Other tests incidentally urlencode things; test non-covered cases: # Sequence and object values. - result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) + data = collections.OrderedDict() + data['a'] = [1, 2] + data['b'] = (3, 4, 5) + result = urllib.parse.urlencode(data, True) self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5') class Trivial: diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py --- a/Lib/tkinter/test/test_ttk/test_functions.py +++ b/Lib/tkinter/test/test_ttk/test_functions.py @@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.Tes ('a', 'b', 'c')), ("test {a b} c", ())) # state spec and options self.assertEqual(ttk._format_elemcreate('image', False, 'test', - ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y"))) + ('a', 'b'), a='x'), ("test a b", ("-a", "x"))) # format returned values as a tcl script # state spec with multiple states and an option with a multivalue self.assertEqual(ttk._format_elemcreate('image', True, 'test', diff --git a/Makefile.pre.in b/Makefile.pre.in --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -322,6 +322,7 @@ PYTHON_OBJS= \ Python/pystate.o \ Python/pythonrun.o \ Python/pytime.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -9329,81 +9329,36 @@ posix_getloadavg(PyObject *self, PyObjec } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; +Return n pseudo-random bytes."); static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; +posix_urandom(PyObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *result; + int ret; /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) + if (!PyArg_ParseTuple(args, "n:urandom", &size)) + return NULL; + if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandleW(L"advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyBytes_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; + + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; } return result; } -#endif PyDoc_STRVAR(device_encoding__doc__, "device_encoding(fd) -> str\n\n\ @@ -9445,42 +9400,6 @@ device_encoding(PyObject *self, PyObject return Py_None; } -#ifdef __VMS -/* Use openssl random routine */ -#include -PyDoc_STRVAR(vms_urandom__doc__, -"urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -static PyObject* -vms_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - if (RAND_pseudo_bytes((unsigned char*) - PyBytes_AS_STRING(result), - howMany) < 0) { - Py_DECREF(result); - return PyErr_Format(PyExc_ValueError, - "RAND_pseudo_bytes"); - } - } - return result; -} -#endif - #ifdef HAVE_SETRESUID PyDoc_STRVAR(posix_setresuid__doc__, "setresuid(ruid, euid, suid)\n\n\ @@ -10887,12 +10806,7 @@ static PyMethodDef posix_methods[] = { #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, #ifdef HAVE_SETRESUID {"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__}, #endif diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -166,6 +166,8 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) +_Py_unicode_hash_secret_t _Py_unicode_hash_secret; + /* This dictionary holds all interned unicode strings. Note that references to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation @@ -11193,8 +11195,6 @@ unicode_getitem(PyObject *self, Py_ssize return PyUnicode_FromOrdinal(ch); } -/* Believe it or not, this produces the same value for ASCII strings - as bytes_hash(). */ static Py_hash_t unicode_hash(PyObject *self) { @@ -11206,10 +11206,23 @@ unicode_hash(PyObject *self) if (PyUnicode_READY(self) == -1) return -1; len = PyUnicode_GET_LENGTH(self); + if (len == 0) { + _PyUnicode_HASH(self) = 0; + return 0; + } + + /* Issue #13703: add 2 x sizeof(Py_hash_t) random bytes (prefix and suffix) + to the output of hash(str) to protect Python against the hash collision + attack on dictionary. Without these random bytes, an attacker can + computes N strings with the same hash value to exploit to worst case of + a dict lookup, O(n^2) complexity, to cause a denial of service. See + oCERT advisory for the details: + http://www.ocert.org/advisories/ocert-2011-003.html */ + x = _Py_unicode_hash_secret.prefix; /* The hash function as a macro, gets expanded three times below. */ #define HASH(P) \ - x = (Py_uhash_t)*P << 7; \ + x ^= (Py_uhash_t)*P << 7; \ while (--len >= 0) \ x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++; @@ -11234,6 +11247,7 @@ unicode_hash(PyObject *self) } } x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); + x ^= _Py_unicode_hash_secret.suffix; if (x == -1) x = -2; diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -1891,6 +1891,10 @@ > + + diff --git a/Python/pythonrun.c b/Python/pythonrun.c --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -71,6 +71,7 @@ extern int _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); extern int _PyLong_Init(void); extern void PyLong_Fini(void); +extern void _PyRandom_Init(void); extern int _PyFaulthandler_Init(void); extern void _PyFaulthandler_Fini(void); @@ -219,6 +220,8 @@ Py_InitializeEx(int install_sigs) if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + _PyRandom_Init(); + interp = PyInterpreterState_New(); if (interp == NULL) Py_FatalError("Py_Initialize: can't make first interpreter"); diff --git a/Python/random.c b/Python/random.c new file mode 100644 --- /dev/null +++ b/Python/random.c @@ -0,0 +1,268 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Fail to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t orig_size = size; + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = Py_MIN(size, INT_MAX); + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Fail to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} + +#else + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Fail to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Fail to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Fail to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else + return dev_urandom_python((char*)buffer, size); +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_unicode_hash_secret; + Py_ssize_t secret_size = sizeof _Py_unicode_hash_secret; + + if (random_initialized) + return; + random_initialized = 1; + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0') { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized Unicode hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS +#if 1 + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else + /* fast but weak RNG (fast initialization, weak seed) */ + _PyTime_timeval t; + unsigned int seed; + _PyTime_gettimeofday(&t); + seed = (unsigned int)t.tv_sec; + seed ^= t.tv_usec; + seed ^= getpid(); + lcg_urandom(seed, (unsigned char*)secret, secret_size); +#endif +#else + dev_urandom_noraise((char*)secret, secret_size); +#endif + } +} +