diff -r 3be60a4c8c63 Doc/using/cmdline.rst --- a/Doc/using/cmdline.rst Fri Jan 20 11:01:06 2012 -0500 +++ b/Doc/using/cmdline.rst Wed Jan 25 12:28:05 2012 -0500 @@ -460,6 +460,12 @@ option. +.. envvar:: PYTHONHASHSEED + + If this is set, it is used as a fixed seed when randomizing hashes. + It should be a number in the range [0; 4294967295]. The value 0 disables + the hash randomization. + .. envvar:: PYTHONIOENCODING If this is set before running the interpreter, it overrides the encoding used diff -r 3be60a4c8c63 Include/bytesobject.h --- a/Include/bytesobject.h Fri Jan 20 11:01:06 2012 -0500 +++ b/Include/bytesobject.h Wed Jan 25 12:28:05 2012 -0500 @@ -124,6 +124,8 @@ #define F_ALT (1<<3) #define F_ZERO (1<<4) +PyAPI_FUNC(Py_hash_t) _PyBytes_RandomizedHash(PyObject *bytes); + #ifdef __cplusplus } #endif diff -r 3be60a4c8c63 Include/object.h --- a/Include/object.h Fri Jan 20 11:01:06 2012 -0500 +++ b/Include/object.h Wed Jan 25 12:28:05 2012 -0500 @@ -488,6 +488,8 @@ PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, PyObject *, PyObject *); PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *); +PyAPI_FUNC(Py_hash_t) PyObject_RandomizedHash(PyObject *); + PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *); PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); PyAPI_FUNC(int) PyObject_Not(PyObject *); @@ -521,6 +523,12 @@ PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); +PyAPI_FUNC(Py_hash_t) _Py_RandomizedHashBytes(unsigned char*, Py_ssize_t); +typedef struct { + Py_hash_t prefix; + Py_hash_t suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; #endif /* Helper for passing objects to printf and the like */ diff -r 3be60a4c8c63 Include/pythonrun.h --- a/Include/pythonrun.h Fri Jan 20 11:01:06 2012 -0500 +++ b/Include/pythonrun.h Wed Jan 25 12:28:05 2012 -0500 @@ -246,6 +246,8 @@ PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff -r 3be60a4c8c63 Include/unicodeobject.h --- a/Include/unicodeobject.h Fri Jan 20 11:01:06 2012 -0500 +++ b/Include/unicodeobject.h Wed Jan 25 12:28:05 2012 -0500 @@ -2154,6 +2154,8 @@ /* Clear all static strings. */ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); +PyAPI_FUNC(Py_hash_t) _PyUnicode_RandomizedHash(PyObject *unicode); + #ifdef __cplusplus } #endif diff -r 3be60a4c8c63 Lib/os.py --- a/Lib/os.py Fri Jan 20 11:01:06 2012 -0500 +++ b/Lib/os.py Wed Jan 25 12:28:05 2012 -0500 @@ -761,23 +761,6 @@ except NameError: # statvfs_result may not exist pass -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - bs = b"" - while len(bs) < n: - bs += read(_urandomfd, n - len(bs)) - close(_urandomfd) - return bs - # Supply os.popen() def popen(cmd, mode="r", buffering=-1): if not isinstance(cmd, str): diff -r 3be60a4c8c63 Lib/test/regrtest.py --- a/Lib/test/regrtest.py Fri Jan 20 11:01:06 2012 -0500 +++ b/Lib/test/regrtest.py Wed Jan 25 12:28:05 2012 -0500 @@ -559,6 +559,11 @@ except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: + hashseed = os.getenv('PYTHONHASHSEED') + if not hashseed: + os.environ['PYTHONHASHSEED'] = str(random_seed) + os.execv(sys.executable, [sys.executable] + sys.argv) + return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(selected) diff -r 3be60a4c8c63 Lib/test/test_dict.py --- a/Lib/test/test_dict.py Fri Jan 20 11:01:06 2012 -0500 +++ b/Lib/test/test_dict.py Wed Jan 25 12:28:05 2012 -0500 @@ -3,6 +3,8 @@ import collections, random, string import gc, weakref +import sys +import time class DictTest(unittest.TestCase): @@ -757,6 +759,301 @@ self._tracked(MyDict()) +# Support classes for HashCollisionTests: +class ChosenHash: + """ + Use this to create arbitrary collections of keys that are non-equal + but have equal hashes, without needing to include hostile data + within the test suite. + """ + def __init__(self, variability, hash): + self.variability = variability + self.hash = hash + + def __eq__(self, other): + # The variability field is used to handle non-equalness: + return self.variability == other.variability + + def __hash__(self): + return self.hash + + def __repr__(self): + return 'ChosenHash(%r, %r)' % (self.variability, + self.hash) + +class Timer: + """ + Simple way to measure time elapsed during a test case + """ + def __init__(self): + self.starttime = time.time() + + def get_elapsed_time(self): + """Get elapsed time in seconds as a float""" + curtime = time.time() + return curtime - self.starttime + + def elapsed_time_as_str(self): + """Get elapsed time as a string (with units)""" + return '%0.3f seconds' % self.get_elapsed_time() + +class TookTooLong(RuntimeError): + def __init__(self, timelimit, elapsed, itercount=None): + self.timelimit = timelimit + self.elapsed = elapsed + self.itercount = itercount + + def __str__(self): + result = 'took >= %s seconds' % self.timelimit + if self.itercount is not None: + result += (' (%0.3f seconds elapsed after %i iterations)' + % (self.elapsed, self.itercount)) + else: + result += ' (%0.3f seconds elapsed)' % self.elapsed + return result + +# Some of the tests involve constructing large dictionaries. How big +# should they be? +ITEM_COUNT = 100000 + +# Arbitrary threshold (in seconds) for a "reasonable amount of time" +# that it should take to work with ITEM_COUNT items: +TIME_LIMIT = 5 + +class _FasterThanContext(object): + """ + A context manager for implementing assertFasterThan + """ + def __init__(self, test_case, **kwargs): + self.test_case = test_case + if 'seconds' in kwargs: + self.timelimit = kwargs['seconds'] + else: + raise ValueError() + + def __enter__(self): + self.timer = Timer() + return self + + def __exit__(self, exc_type, exc_value, tb): + if exc_type is not None: + # let unexpected exceptions pass through + return + + self.check_for_timeout() + if 1: + print('timer within %s took %s' + % (self.test_case, self.timer.elapsed_time_as_str())) + + def handle(self, callable_obj, args, kwargs): + """ + If callable_obj is None, assertRaises/Warns is being used as a + context manager, so check for a 'msg' kwarg and return self. + If callable_obj is not None, call it passing args and kwargs. + """ + if callable_obj is None: + self.msg = kwargs.pop('msg', None) + return self + with self: + callable_obj(*args, **kwargs) + + def check_for_timeout(self, itercount=None): + """ + Allow directly checking for timeouts, potentially supplying an + iteration count if within a loop. If the timer has elapsed, this will + raise a TookTooLong exception (potentially indicating how many + iterations were completed when the time limit was reached). + + Otherwise, it does nothing. + """ + elapsed_time = self.timer.get_elapsed_time() + if elapsed_time > self.timelimit: + raise TookTooLong(self.timelimit, + elapsed_time, + itercount) + +@support.cpython_only +class HashCollisionTests(unittest.TestCase): + """ + Issue 13703: tests about the behavior of dicts in the face of hostile data + """ + + def assertFasterThan(self, callableObj=None, *args, **kwargs): + context = _FasterThanContext(self, *args, **kwargs) + return context.handle(callableObj, args, kwargs) + + def test_timings_with_benign_data(self): + # Verify that inserting many keys into a dict only takes a few seconds + d = dict() + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + for i in range(ITEM_COUNT): + d[i] = 0 + + # Verify that we can also retrieve the values quickly: + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + d[i] + + # Verify that we can quickly insert the same item many times + # (overwriting each time): + d = dict() + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + for i in range(ITEM_COUNT): + d[0] = 0 + + def assert_ma_lookup(self, d, name): + # verify that a dict's ma_lookup hook has a specific value + if hasattr(d, '_stats'): + self.assertEqual(d._stats()['ma_lookup'], + name) + + def test_dict_stats(self): + # test for the diagnostic _stats() method, if it exists: + if hasattr(dict, '_stats'): + # Simple cases in which no probing should happen at all: + for d in [dict(zip(range(100), range(100))), + dict(zip(range(1000), range(1000))), + dict(zip(range(10000), range(10000)))]: + stats = d._stats() + self.assertIn('ma_mask', stats) + # ma_mask will be large for these dicts + + # We don't expect any probing happened during construction: + self.assertEqual(stats['insertion_iter_count'], 0) + # They transition from ma_smalltable at size 6: + self.assertEqual(stats['num_insertions'], len(d) - 6) + self.assertEqual(stats['iter_count_per_insertion'], 0.0) + + # We shouldn't have transitioned to paranoid mode: + self.assert_ma_lookup(d, 'lookdict') + + # Cases in which we expect some hash collisions: + from itertools import permutations + + for name, d, exp_length in [ + ('All strings of length 4', + dict([(''.join(chars), 1) + for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)]), + 358800), + ]: + + self.assertEqual(len(d), exp_length) + stats = d._stats() + + # ma_mask will be large for this dict: + self.assertGreater(stats['ma_mask'], 7) + + self.assertEqual(stats['num_insertions'], len(d) - 6) + + # We expect significant probing happened during construction: + self.assertGreater(stats['insertion_iter_count'], 100000) + # ...but it should be sane relative to the amount of work we were + # asked to do: + self.assertLessEqual(stats['iter_count_per_insertion'], 2.0) + # and so we didn't transition: + self.assert_ma_lookup(d, 'lookdict_unicode') + + def test_not_reaching_limit(self): + # Ensure that we can insert equal-hashed keys up to (but not reaching) + # the collision limit: + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + d = dict() + for i in range(50): + key = ChosenHash(variability=i, hash=42) + d[key] = 0 + self.assert_ma_lookup(d, 'lookdict') + + def test_many_hash_collisions(self): + """ + Ensure we gracefully handle inserting a large number of non-equal keys + with the same hash that transition the dict to "paranoid" mode: + """ + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + d = dict() + for i in range(60): + key = ChosenHash(variability=i, hash=42) + d[key] = 0 + + # Verify that we exceeded the safety threshold: + if hasattr(d, '_stats'): + self.assertGreaterEqual(d._stats()['iter_count_per_insertion'], + 32.0) + + # Verify that the dict transitioned to paranoid mode: + self.assert_ma_lookup(d, 'lookdict_paranoid') + + # Verify key lookup: + for i in range(60): + key = ChosenHash(variability=i, hash=42) + self.assertEqual(d[key], 0) + + # Frank Sievertsen found scenarios in which the collision-counting + # scheme could be attacked: + # http://mail.python.org/pipermail/python-dev/2012-January/115726.html + + def test_scenario_b_from_Frank_Sievertsen(self): + # This typically takes about 3 seconds on my box (with a debug build): + with self.assertFasterThan(seconds=TIME_LIMIT * 2) as cm: + d = dict() + + # Insert non-equal hash collisions up to but not reaching the + # limit where we take effect: + for i in range(50): + key = ChosenHash(variability=i, hash=42) + d[key] = 0 + cm.check_for_timeout(i) + + # We shouldn't have transitioned yet: + self.assert_ma_lookup(d, 'lookdict') + + # Now try to add many equal values that collide + # with the hash, and see how long it takes + for i in range(ITEM_COUNT): + key = ChosenHash(variability=0, hash=42) + d[key] = 0 + cm.check_for_timeout(i) + + # We still shouldn't have transitioned: + self.assert_ma_lookup(d, 'lookdict') + + def test_paranoid_str_dict(self): + # Exercise the handling of pure-str paranoid dict: + if hasattr(dict, '_make_paranoid'): + from itertools import permutations + + d = dict([(''.join(chars), 1) + for chars in permutations('abcdefghijklmnopqrstuvwxyz', 4)]) + + # It shouldn't be in paranoid mode: + self.assert_ma_lookup(d, 'lookdict_unicode') + + with self.assertFasterThan(seconds=TIME_LIMIT) as cm: + # Force it into paranoid mode: + d._make_paranoid() + self.assert_ma_lookup(d, 'lookdict_unicode_paranoid') + self.assertIn('abcd', d) + self.assertEqual(d['zyxw'], 1) + + # Now empty the dict: + d.clear() + + # We should now have a smalltable dict, but it still uses the + # paranoid ma_lookup: + if hasattr(d, '_stats'): + self.assertEqual(d._stats()['ma_mask'], 7) + self.assert_ma_lookup(d, 'lookdict_unicode_paranoid') + + # Verify that the now-empty dict is still usable: + self.assertNotIn('abcd', d) + d['foo'] = 'bar' + self.assertIn('foo', d) + self.assertEqual(d['foo'], 'bar') + + # Transition it from pure-str to mixed mode: + d[1] = 'not a str' + self.assert_ma_lookup(d, 'lookdict_paranoid') + self.assertIn(1, d) + self.assertEqual(d[1], 'not a str') + from test import mapping_tests class GeneralMappingTests(mapping_tests.BasicTestMappingProtocol): @@ -771,6 +1068,7 @@ def test_main(): support.run_unittest( DictTest, + HashCollisionTests, GeneralMappingTests, SubclassMappingTests, ) diff -r 3be60a4c8c63 Lib/test/test_hash.py --- a/Lib/test/test_hash.py Fri Jan 20 11:01:06 2012 -0500 +++ b/Lib/test/test_hash.py Wed Jan 25 12:28:05 2012 -0500 @@ -4,9 +4,12 @@ # Also test that hash implementations are inherited as expected import unittest +import struct from test import support +from test.script_helper import assert_python_ok from collections import Hashable +IS_64BIT = (struct.calcsize('l') == 8) class HashEqualityTestCase(unittest.TestCase): @@ -117,10 +120,56 @@ for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +@support.cpython_only +class HashRandomizationTest(unittest.TestCase): + types_to_check = [str, + lambda s: bytes(s, encoding='ascii')] + + def get_randomized_hash(self, text, seed=None): + env = {} + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + out = assert_python_ok( + '-c', 'import sys; print(sys._getrandomizedhash(%r))' % text, + **env) + stdout = out[1].strip() + return int(stdout) + + def test_empty_string(self): + for t in self.types_to_check: + self.assertEqual(hash(t("")), 0) + + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + h = 1453079729188098211 + else: + h = -1600925533 + for t in self.types_to_check: + self.assertEqual(self.get_randomized_hash(t("abc"), seed=0), h) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + for t in self.types_to_check: + self.assertEqual(self.get_randomized_hash(t("abc"), seed=42), h) + + def test_randomized_hash(self): + # two runs should return different hashes + for t in self.types_to_check: + run1 = self.get_randomized_hash(t("abc")) + run2 = self.get_randomized_hash(t("abc")) + self.assertNotEqual(run1, run2) + + def test_main(): support.run_unittest(HashEqualityTestCase, - HashInheritanceTestCase, - HashBuiltinsTestCase) + HashInheritanceTestCase, + HashBuiltinsTestCase, + HashRandomizationTest) if __name__ == "__main__": diff -r 3be60a4c8c63 Lib/test/test_os.py --- a/Lib/test/test_os.py Fri Jan 20 11:01:06 2012 -0500 +++ b/Lib/test/test_os.py Wed Jan 25 12:28:05 2012 -0500 @@ -12,6 +12,7 @@ import time import shutil from test import support +from test.script_helper import assert_python_ok import contextlib import mmap import platform @@ -630,14 +631,32 @@ self.assertEqual(f.read(), b'') class URandomTests(unittest.TestCase): - def test_urandom(self): - try: - self.assertEqual(len(os.urandom(1)), 1) - self.assertEqual(len(os.urandom(10)), 10) - self.assertEqual(len(os.urandom(100)), 100) - self.assertEqual(len(os.urandom(1000)), 1000) - except NotImplementedError: - pass + def test_urandom_length(self): + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.buffer.write(data)', + 'sys.stdout.buffer.flush()')) + out = assert_python_ok('-c', code) + stdout = out[1] + self.assertEqual(len(stdout), 16) + return stdout + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) @contextlib.contextmanager def _execvpe_mockup(defpath=None): diff -r 3be60a4c8c63 Makefile.pre.in --- a/Makefile.pre.in Fri Jan 20 11:01:06 2012 -0500 +++ b/Makefile.pre.in Wed Jan 25 12:28:05 2012 -0500 @@ -322,6 +322,7 @@ Python/pystate.o \ Python/pythonrun.o \ Python/pytime.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ diff -r 3be60a4c8c63 Modules/posixmodule.c --- a/Modules/posixmodule.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Modules/posixmodule.c Wed Jan 25 12:28:05 2012 -0500 @@ -9329,81 +9329,36 @@ } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; +Return n pseudo-random bytes."); static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; +posix_urandom(PyObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *result; + int ret; /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) + if (!PyArg_ParseTuple(args, "n:urandom", &size)) + return NULL; + if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandleW(L"advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyBytes_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; + + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; } return result; } -#endif PyDoc_STRVAR(device_encoding__doc__, "device_encoding(fd) -> str\n\n\ @@ -9445,42 +9400,6 @@ return Py_None; } -#ifdef __VMS -/* Use openssl random routine */ -#include -PyDoc_STRVAR(vms_urandom__doc__, -"urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -static PyObject* -vms_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - if (RAND_pseudo_bytes((unsigned char*) - PyBytes_AS_STRING(result), - howMany) < 0) { - Py_DECREF(result); - return PyErr_Format(PyExc_ValueError, - "RAND_pseudo_bytes"); - } - } - return result; -} -#endif - #ifdef HAVE_SETRESUID PyDoc_STRVAR(posix_setresuid__doc__, "setresuid(ruid, euid, suid)\n\n\ @@ -10887,12 +10806,7 @@ #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, #ifdef HAVE_SETRESUID {"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__}, #endif diff -r 3be60a4c8c63 Objects/bytesobject.c --- a/Objects/bytesobject.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Objects/bytesobject.c Wed Jan 25 12:28:05 2012 -0500 @@ -867,6 +867,13 @@ return a->ob_shash; } +Py_hash_t +_PyBytes_RandomizedHash(PyObject *a) +{ + return _Py_RandomizedHashBytes((unsigned char *)((PyBytesObject *)a)->ob_sval, + Py_SIZE(a)); +} + static PyObject* bytes_subscript(PyBytesObject* self, PyObject* item) { diff -r 3be60a4c8c63 Objects/dictobject.c --- a/Objects/dictobject.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Objects/dictobject.c Wed Jan 25 12:28:05 2012 -0500 @@ -10,6 +10,32 @@ #include "Python.h" #include "stringlib/eq.h" +/* undefine this: */ +#define DICT_PROTECTION_TRACKING 1 + +#define Py_MAX_AVERAGE_PROBES_PER_INSERT 32 +/* power-of-two to make multiplication fast */ + +/* Avoid overflow by dividing stats when they exceed this: */ +#define Py_DICT_STATS_ROUNDING_THRESHOLD 0x40000000 + +/* For large dictionaries, reuse the space allocated for ma_smalltable */ +typedef struct _Py_LargeDictFields { + int is_inserting; + + /* Check the ratio between these, to track amortized cost per operation */ + size_t num_insertions; + size_t insertion_iter_count; + +} _Py_LargeDictFields; + +#define DICT_IS_PARANOID(mp) \ + ((mp)->ma_lookup == lookdict_paranoid \ + || ((mp)->ma_lookup == lookdict_unicode_paranoid)) + +#define PyDict_LARGEDICTFIELDS(mp) \ + ((_Py_LargeDictFields*)&(mp)->ma_smalltable) +/* FIXME: add assert(mp->ma_table != mp->ma_smalltable) */ /* Set a key error with the specified argument, wrapping it in a * tuple automatically so that tuple keys are not unpacked as the @@ -139,11 +165,28 @@ /* forward declarations */ static PyDictEntry * +lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash); + +static PyDictEntry * lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash); +static PyDictEntry * +lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash); + +static PyDictEntry * +lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash); + +static int +transition_to_paranoid_dict(PyDictObject *mp); + +static int +check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash); + #ifdef SHOW_CONVERSION_COUNTS static long created = 0L; -static long converted = 0L; +static long converted_unicode_to_general = 0L; +static long converted_unicode_to_paranoid = 0L; +static long converted_general_to_paranoid = 0L; static void show_counts(void) @@ -352,7 +395,7 @@ * XXX A clever adversary could prevent this * XXX from terminating. */ - return lookdict(mp, key, hash); + return mp->ma_lookup(mp, key, hash); } } freeslot = NULL; @@ -384,11 +427,21 @@ * XXX A clever adversary could prevent this * XXX from terminating. */ - return lookdict(mp, key, hash); + return mp->ma_lookup(mp, key, hash); } } else if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; + + if (check_iter_count(mp, key, hash)) { + if (-1 == transition_to_paranoid_dict(mp)) { + return NULL; + } + + /* Everything has changed. + Restart the lookup using the new function: */ + return mp->ma_lookup(mp, key, hash); + } } assert(0); /* NOT REACHED */ return 0; @@ -422,7 +475,14 @@ #ifdef SHOW_CONVERSION_COUNTS ++converted; #endif - mp->ma_lookup = lookdict; + if (mp->ma_lookup == lookdict_unicode) { + /* transition from lookdict_unicode -> lookdict */ + mp->ma_lookup = lookdict; + } else { + /* transition from lookdict_unicode_paranoid -> lookdict_paranoid */ + assert(mp->ma_lookup == lookdict_unicode_paranoid); + mp->ma_lookup = lookdict_paranoid; + } return lookdict(mp, key, hash); } i = (size_t)hash & mask; @@ -451,11 +511,66 @@ return ep; if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; + if (check_iter_count(mp, key, hash)) { + if (-1 == transition_to_paranoid_dict(mp)) { + return NULL; + } + + /* Everything has changed. + Restart the lookup using the new function: */ + return mp->ma_lookup(mp, key, hash); + } } assert(0); /* NOT REACHED */ return 0; } +/* Version of lookdict which ignores the hash value passed in, and instead + generates an alternate hash for each object using a secret. + Note that if the object can't do this, it simply reuses its cached hash +*/ +static PyDictEntry * +lookdict_paranoid(PyDictObject *mp, PyObject *key, Py_hash_t hash) +{ + /* + Ignore the object's cached hash value, and instead compute and use a + "secret" one. This won't be cached, meaning that the per-object cost + of the call gets a lot larger. However, it ought to still be O(1) + */ + Py_hash_t randomized_hash = PyObject_RandomizedHash(key); + PyDictEntry *ep = lookdict(mp, key, randomized_hash); + return ep; +} + +/* + Similar to lookdict_paranoid and lookdict_unicode: used when a dict has + received hostile data, where all keys are PyUnicodeObject +*/ +static PyDictEntry * +lookdict_unicode_paranoid(PyDictObject *mp, PyObject *key, + Py_hash_t hash) +{ + /* + As per lookdict_paranoid, we ignore the object's cached hash value + */ + Py_hash_t randomized_hash; + PyDictEntry *ep; + + if (!PyUnicode_CheckExact(key)) { + randomized_hash = PyObject_RandomizedHash(key); + /* this call will transition internally to lookdict_paranoid: */ + ep = lookdict_unicode(mp, key, randomized_hash); + } else { + /* use optimized implementation when obtaining randomized hash, since + we know it's a PyUnicodeObject: */ + randomized_hash = _PyUnicode_RandomizedHash(key); + ep = lookdict_unicode(mp, key, randomized_hash); + } + + return ep; +} + + int _PyDict_HasOnlyStringKeys(PyObject *dict) { @@ -463,7 +578,8 @@ PyObject *key, *value; assert(PyDict_Check(dict)); /* Shortcut */ - if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode) + if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode || + ((PyDictObject *)dict)->ma_lookup == lookdict_unicode_paranoid) return 1; while (PyDict_Next(dict, &pos, &key, &value)) if (!PyUnicode_Check(key)) @@ -530,9 +646,23 @@ PyObject *old_value; register PyDictEntry *ep; typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t); + _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp); assert(mp->ma_lookup != NULL); + if (mp->ma_mask >= PyDict_MINSIZE) { + large_fields->is_inserting = 1; + + /* Increment num_insertions, protecting against stats overflow: */ + if (++large_fields->num_insertions > Py_DICT_STATS_ROUNDING_THRESHOLD) { + /* Preserve the ratio between these two: */ + large_fields->insertion_iter_count >>= 4; + large_fields->num_insertions >>= 4; + } + } ep = mp->ma_lookup(mp, key, hash); + if (mp->ma_mask >= PyDict_MINSIZE) { + large_fields->is_inserting = 0; + } if (ep == NULL) { Py_DECREF(key); Py_DECREF(value); @@ -553,7 +683,15 @@ Py_DECREF(dummy); } ep->me_key = key; - ep->me_hash = hash; + if (!DICT_IS_PARANOID(mp)) { + ep->me_hash = hash; + } else { + /* Ideally we need some way of getting back the randomized hash + from the ma_lookup function, but there doesn't seem to be a + way of doing that without breaking ABI. + For now, recalculate the randomized hash: */ + ep->me_hash = PyObject_RandomizedHash(key); + } ep->me_value = value; mp->ma_used++; } @@ -675,8 +813,15 @@ /* else key == value == NULL: nothing to do */ } - if (is_oldtable_malloced) + if (is_oldtable_malloced) { PyMem_DEL(oldtable); + } else { + if (mp->ma_table != mp->ma_smalltable) { + /* clean ma_smalltable for use as _Py_LargeDictFields: */ + memset(mp->ma_smalltable, 0, sizeof(mp->ma_smalltable)); + } + } + return 0; } @@ -1996,6 +2141,91 @@ PyDoc_STRVAR(copy__doc__, "D.copy() -> a shallow copy of D"); +#ifdef DICT_PROTECTION_TRACKING +/* Debug code for use by selftests: */ +static const char * +get_name_of_ma_lookup(PyDictObject *mp) +{ + /* can't do this as a switch statement */ + if (mp->ma_lookup == lookdict) { + return "lookdict"; + } else if (mp->ma_lookup == lookdict_unicode) { + return "lookdict_unicode"; + } else if (mp->ma_lookup == lookdict_paranoid) { + return "lookdict_paranoid"; + } else if (mp->ma_lookup == lookdict_unicode_paranoid) { + return "lookdict_unicode_paranoid"; + } else { + return "unknown ma_lookup"; + } +} + +static PyObject * +dict__stats(PyDictObject *mp) +{ + PyObject *result = PyDict_New(); + if (!result) goto error; + +#define ADD_STAT(name, expr) \ + { \ + PyObject *value = (expr); \ + if (!value) \ + goto error; \ + if (-1 == PyDict_SetItemString(result, (name), value)) { \ + Py_DECREF(value); \ + goto error; \ + } \ + Py_DECREF(value); \ + } + + ADD_STAT("ma_mask", PyLong_FromLong(mp->ma_mask)); + ADD_STAT("ma_lookup", PyUnicode_FromString(get_name_of_ma_lookup(mp))); + if (mp->ma_mask >= PyDict_MINSIZE) { + _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp); + ADD_STAT("num_insertions", PyLong_FromLong(large_fields->num_insertions)); + ADD_STAT("insertion_iter_count", PyLong_FromLong(large_fields->insertion_iter_count)); + if (large_fields->num_insertions > 0) { + ADD_STAT("iter_count_per_insertion", + PyFloat_FromDouble((double)large_fields->insertion_iter_count + / (double)large_fields->num_insertions)); + } + } + + return result; + + error: + Py_XDECREF(result); + return NULL; +} + +static PyObject * +dict__make_paranoid(register PyDictObject *mp) +{ + if (mp->ma_mask >= PyDict_MINSIZE) { + if (DICT_IS_PARANOID(mp)) { + PyErr_SetString(PyExc_RuntimeError, + "dict is already in paranoid mode"); + return NULL; + } else { + transition_to_paranoid_dict(mp); + Py_RETURN_NONE; + } + } else { + PyErr_SetString(PyExc_RuntimeError, + ("dict is not large enough to be forced" + " into paranoid mode")); + return NULL; + } +} + + +PyDoc_STRVAR(_stats__doc__, +"D._stats() -> for CPython selftests: obtain debugging stats on D"); + +PyDoc_STRVAR(_make_paranoid__doc__, +"D._make_paranoid() -> for CPython selftests: forcibly transition this dict into paranoid mode"); +#endif + /* Forward */ static PyObject *dictkeys_new(PyObject *); static PyObject *dictitems_new(PyObject *); @@ -2037,6 +2267,12 @@ clear__doc__}, {"copy", (PyCFunction)dict_copy, METH_NOARGS, copy__doc__}, +#ifdef DICT_PROTECTION_TRACKING + {"_stats", (PyCFunction)dict__stats, METH_NOARGS, + _stats__doc__}, + {"_make_paranoid", (PyCFunction)dict__make_paranoid, METH_NOARGS, + _make_paranoid__doc__}, +#endif {NULL, NULL} /* sentinel */ }; @@ -3067,3 +3303,144 @@ { return dictview_new(dict, &PyDictValues_Type); } + +/* + Swap the hash values for the active entries with the values + in the corresponding indices in the given array (of size + (mp->ma_mask + 1) values) + */ +static void +swap_hash_values(PyDictObject *mp, Py_hash_t *hashes) +{ + Py_ssize_t i; + + assert(mp); + assert(hashes); + + for (i=0; i <= mp->ma_mask; i++) { + PyDictEntry *ep = &mp->ma_table[i]; + if (ep->me_value != NULL) { /* active entry */ + Py_hash_t old_hash = ep->me_hash; + ep->me_hash = hashes[i]; + hashes[i] = old_hash; + } + } +} + +/* + Switch to new lookup implementation, using secret hashes, reordering + all of the me_hash slots. + + Returns -1 for failure, 0 for success +*/ +static int +transition_to_paranoid_dict(PyDictObject *mp) +{ + Py_hash_t *hashes; + Py_ssize_t i; + + assert(mp->ma_table != mp->ma_smalltable); + + assert(!DICT_IS_PARANOID(mp)); + + /* + Recalculate all of the me_hash values within ma_table + using the randomized new algorithm. + + An error could occur during this process, so allocate + a temporary buffer: + */ + hashes = PyMem_NEW(Py_hash_t, mp->ma_mask + 1); + if (hashes == NULL) { + PyErr_NoMemory(); + return -1; + } + + /* Populate "hashes" with new values for the active entries, + leaving the other indices untouched: */ + for (i=0; i <= mp->ma_mask; i++) { + PyDictEntry *ep = &mp->ma_table[i]; + if (ep->me_value != NULL) { /* active entry */ + Py_hash_t new_hash = PyObject_RandomizedHash(ep->me_key); + if (new_hash != -1) { + hashes[i] = new_hash; + } else { + /* Error: */ + PyMem_DEL(hashes); + return -1; + } + } + } + + /* OK, we have new hashes for all active entries, and no errors + occurred. Copy them up into the ma_table, swapping the old + value into the buffer in case we need to reconstruct things + later: */ + swap_hash_values(mp, hashes); + + /* + "hashes" now holds the old hash values ; mp->ma_table holds + the new hash values + + Now use dictresize to reorganize the order of the entries. + We use ma_mask here as the minimum size to ensure that the dict + doesn't start using ma_smalltable again + */ + assert(mp->ma_mask >= PyDict_MINSIZE); + if (-1 == dictresize(mp, mp->ma_mask)) { + /* This can fail under low memory conditions, before changing + anything. If so, swap the old hash values back in: */ + swap_hash_values(mp, hashes); + PyMem_DEL(hashes); + return -1; + } + + /* Success. We're done with the "hashes" buffer: */ + PyMem_DEL(hashes); + + /* Change ma_lookup so that we use randomized hashes: */ + if (mp->ma_lookup == lookdict_unicode) { + /* transition from lookdict_unicode -> lookdict_unicode_paranoid */ + mp->ma_lookup = lookdict_unicode_paranoid; + } else { + /* transition from lookdict -> lookdict_paranoid */ + assert(mp->ma_lookup == lookdict); + mp->ma_lookup = lookdict_paranoid; + } + + /* FIXME: track conversions? */ + return 0; +} + +/* may want to hand-inline this one + + Returns 1 when the transition-to-paranoid should occur, 0 otherwise + */ +static int +check_iter_count(PyDictObject *mp, PyObject *key, Py_hash_t hash) +{ + if (mp->ma_mask >= PyDict_MINSIZE) { + _Py_LargeDictFields *large_fields = PyDict_LARGEDICTFIELDS(mp); + assert(mp->ma_table != mp->ma_smalltable); + if (large_fields->is_inserting) { + + /* Increment insertion_iter_count, + protecting against stats overflow: */ + if (++large_fields->insertion_iter_count + >= Py_DICT_STATS_ROUNDING_THRESHOLD) { + /* Preserve the ratio between these two: */ + large_fields->insertion_iter_count >>= 4; + large_fields->num_insertions >>= 4; + } + + /* Transition into paranoid mode if the ratio is too bad: */ + if (large_fields->insertion_iter_count + > (large_fields->num_insertions * Py_MAX_AVERAGE_PROBES_PER_INSERT) ) { + if (!DICT_IS_PARANOID(mp)) { + return 1; + } + } + } + } + return 0; +} diff -r 3be60a4c8c63 Objects/object.c --- a/Objects/object.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Objects/object.c Wed Jan 25 12:28:05 2012 -0500 @@ -769,6 +769,22 @@ } Py_hash_t +_Py_RandomizedHashBytes(unsigned char *p, Py_ssize_t len) +{ + Py_uhash_t x; + Py_ssize_t i; + + x = _Py_HashSecret.prefix ^ (Py_uhash_t) *p << 7; + for (i = 0; i < len; i++) + x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; + x ^= (Py_uhash_t) len; + x ^= _Py_HashSecret.suffix; + if (x == -1) + x = -2; + return x; +} + +Py_hash_t PyObject_HashNotImplemented(PyObject *v) { PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", @@ -797,6 +813,27 @@ return PyObject_HashNotImplemented(v); } +_Py_HashSecret_t _Py_HashSecret; + +Py_hash_t +PyObject_RandomizedHash(PyObject *v) +{ + /* + We only randomize the hash for a few specific types for now, to ease + backporting. Ultimately this could be a new entry in PyTypeObject + */ + if (Py_TYPE(v) == &PyUnicode_Type) { + return _PyUnicode_RandomizedHash(v); + } else if (Py_TYPE(v) == &PyBytes_Type) { + return _PyBytes_RandomizedHash(v); + } + /* FIXME: for Python 2 backport extend to cover PyBuffer_Type */ + + /* Otherwise, just use the regular hash value: */ + return PyObject_Hash(v); +} + + PyObject * PyObject_GetAttrString(PyObject *v, const char *name) { diff -r 3be60a4c8c63 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Objects/unicodeobject.c Wed Jan 25 12:28:05 2012 -0500 @@ -11242,6 +11242,57 @@ } #undef HASH +Py_hash_t +_PyUnicode_RandomizedHash(PyObject *self) +{ + Py_ssize_t len; + Py_uhash_t x; + + if (PyUnicode_READY(self) == -1) + return -1; + len = PyUnicode_GET_LENGTH(self); + if (len == 0) { + _PyUnicode_HASH(self) = 0; + return 0; + } + + /* Alternate definition of the hash function as a macro to above */ +#define HASH(P) \ + x = _Py_HashSecret.prefix ^ (Py_uhash_t)*P << 7; \ + while (--len >= 0) \ + x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++; + + switch (PyUnicode_KIND(self)) { + case PyUnicode_1BYTE_KIND: { + const unsigned char *c = PyUnicode_1BYTE_DATA(self); + HASH(c); + break; + } + case PyUnicode_2BYTE_KIND: { + const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); + HASH(s); + break; + } + default: { + Py_UCS4 *l; + assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && + "Impossible switch case in unicode_hash"); + l = PyUnicode_4BYTE_DATA(self); + HASH(l); + break; + } + } + x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); + x ^= _Py_HashSecret.suffix; + + if (x == -1) + x = -2; + /* we do *not* update _PyUnicode_HASH(self) here */ + return x; +} +#undef HASH + + PyDoc_STRVAR(index__doc__, "S.index(sub[, start[, end]]) -> int\n\ \n\ diff -r 3be60a4c8c63 PCbuild/pythoncore.vcproj --- a/PCbuild/pythoncore.vcproj Fri Jan 20 11:01:06 2012 -0500 +++ b/PCbuild/pythoncore.vcproj Wed Jan 25 12:28:05 2012 -0500 @@ -1891,6 +1891,10 @@ > + + diff -r 3be60a4c8c63 Python/pythonrun.c --- a/Python/pythonrun.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Python/pythonrun.c Wed Jan 25 12:28:05 2012 -0500 @@ -71,6 +71,7 @@ extern void _PyUnicode_Fini(void); extern int _PyLong_Init(void); extern void PyLong_Fini(void); +extern void _PyRandom_Init(void); extern int _PyFaulthandler_Init(void); extern void _PyFaulthandler_Fini(void); @@ -219,6 +220,8 @@ if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + _PyRandom_Init(); + interp = PyInterpreterState_New(); if (interp == NULL) Py_FatalError("Py_Initialize: can't make first interpreter"); diff -r 3be60a4c8c63 Python/random.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Python/random.c Wed Jan 25 12:28:05 2012 -0500 @@ -0,0 +1,268 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Fail to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t orig_size = size; + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = Py_MIN(size, INT_MAX); + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Fail to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} + +#else + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Fail to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Fail to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Fail to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else + return dev_urandom_python((char*)buffer, size); +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_HashSecret; + Py_ssize_t secret_size = sizeof(_Py_HashSecret); + + if (random_initialized) + return; + random_initialized = 1; + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0') { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized Unicode hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS +#if 1 + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else + /* fast but weak RNG (fast initialization, weak seed) */ + _PyTime_timeval t; + unsigned int seed; + _PyTime_gettimeofday(&t); + seed = (unsigned int)t.tv_sec; + seed ^= t.tv_usec; + seed ^= getpid(); + lcg_urandom(seed, (unsigned char*)secret, secret_size); +#endif +#else + dev_urandom_noraise((char*)secret, secret_size); +#endif + } +} + diff -r 3be60a4c8c63 Python/sysmodule.c --- a/Python/sysmodule.c Fri Jan 20 11:01:06 2012 -0500 +++ b/Python/sysmodule.c Wed Jan 25 12:28:05 2012 -0500 @@ -993,6 +993,20 @@ 10. Number of stack pops performed by call_function()" ); +PyDoc_STRVAR(_getrandomizedhash_doc, +"_getrandomizedhash(object) -> int\n\ +\n\ +For selftests only: return PyObject_RandomizedHash for the given object."); + +static PyObject * +sys__getrandomizedhash(PyObject *self, PyObject *arg) +{ + return PyLong_FromSsize_t(PyObject_RandomizedHash(arg)); + /* FIXME: when backporting, note the Py_hash_t to long change, + and make it just PyLong_FromLong on Python 2.* */ +} + + #ifdef __cplusplus extern "C" { #endif @@ -1093,6 +1107,8 @@ {"settrace", sys_settrace, METH_O, settrace_doc}, {"gettrace", sys_gettrace, METH_NOARGS, gettrace_doc}, {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc}, + {"_getrandomizedhash", (PyCFunction)sys__getrandomizedhash, + METH_O, _getrandomizedhash_doc}, {NULL, NULL} /* sentinel */ };