from __future__ import print_function from __future__ import unicode_literals import time import bisect import random import operator NUM_INTS = 1000000 NUM_INT_BISECTS = 5000 NUM_BYTES = 500000 NUM_BYTE_BISECTS = 5000 BYTES_SIZE_RANGE = 1024, 8192 NUM_STRS = 500000 NUM_STR_BISECTS = 5000 STR_SIZE_RANGE = 512, 4096 start = time.time() def bench_bisect(name, L, bisect_list, insort_list, key): L1 = list(L) L2 = list(L) L2_keys = [key(x) for x in L2] #L3 = list(L) #L3_keys = [key(x) for x in L3] start1 = time.time() for x in bisect_list: bisect.bisect_left(L1, x, key=key) end1 = time.time() start2 = time.time() for x in bisect_list: bisect.bisect_left(L2_keys, key(x)) end2 = time.time() print("Bisect for %s, with keys: %0.8f, without keys %0.8f" % (name, end1-start1, end2-start2)) bisect_time = end1-start1 + end2-start2 start1 = time.time() for x in insort_list: bisect.insort_left(L1, x, key=key) end1 = time.time() start2 = time.time() for x in insort_list: xval = key(x) idx = bisect.bisect_left(L2_keys, xval) L2_keys.insert(idx, xval) L2.insert(idx, x) end2 = time.time() print("Insort for %s, with keys: %0.8f, without keys %0.8f" % (name, end1-start1, end2-start2)) bisect_time += end1-start1 + end2-start2 #start1 = end1 = 0 #start1 = time.time() #for x in insort_list: #end1 = time.time() #print("DEBUG linear stopped at a step %d: %s, %s" % (i, xval, key(y))) #tart2 = time.time() #for x in insort_list: #end2 = time.time() #print("DEBUG linear stopped at a step %d: %s, %s" % (i, xval, y)) #print("Linear search for %s: %0.8f" % # (name, end2 - start2, )) return bisect_time, 0 # end1-start1 + end2-start2 def test_ints(): key = abs bisect_time = 0.0 linear_time = 0.0 for spread in [0.25, 2.0]: random_ints = sorted((-random.randrange(0, int(NUM_INTS*spread)) for x in xrange(NUM_INTS)), key=key) insort_list = [-random.randrange(0, int(NUM_INTS*spread)) for x in xrange(NUM_INT_BISECTS)] bisect_list = [-random.randrange(0, int(NUM_INTS*spread)) for x in xrange(NUM_INT_BISECTS)] t_bisect, t_linear = bench_bisect("int%dx%0.2f" % (NUM_INTS, spread), random_ints, insort_list, bisect_list, key) bisect_time += t_bisect linear_time += t_linear return bisect_time, linear_time def test_byte_strings(): urandom = open('/dev/urandom') random_strs = [] for x in xrange(NUM_BYTES): random_strs.append(urandom.read(random.randrange(*BYTES_SIZE_RANGE))) insort_list = [] bisect_list = [] for x in xrange(NUM_BYTE_BISECTS): insort_list.append(urandom.read( random.randrange(*BYTES_SIZE_RANGE))) bisect_list.append(urandom.read( random.randrange(*BYTES_SIZE_RANGE))) bisect_time = 0.0 linear_time = 0.0 for key in [len, str.lower, str.strip]: test_list = sorted(random_strs, key=key) t_bisect, t_linear = bench_bisect("byte_%s" % (key.__name__), test_list, insort_list, bisect_list, key) bisect_time += t_bisect linear_time += t_linear return bisect_time, linear_time def test_unicode_strings(): random_strs = [] for x in xrange(NUM_STRS): size = random.randrange(*BYTES_SIZE_RANGE) random_strs.append(''.join(unichr(random.randrange(0, 65536)) for x in xrange(size))) insort_list = [] bisect_list = [] for x in xrange(NUM_BYTE_BISECTS): size = random.randrange(*BYTES_SIZE_RANGE) insort_list.append(''.join(unichr(random.randrange(0, 65536)) for x in xrange(size))) size = random.randrange(*BYTES_SIZE_RANGE) bisect_list.append(''.join(unichr(random.randrange(0, 65536)) for x in xrange(size))) bisect_time = 0.0 linear_time = 0.0 for key in [len, unicode.lower, unicode.strip]: test_list = sorted(random_strs, key=key) t_bisect, t_linear = bench_bisect("unicode_%s" % (key.__name__), test_list, insort_list, bisect_list, key) bisect_time += t_bisect linear_time += t_linear return bisect_time, linear_time results = [] results.append(test_ints()) results.append(test_byte_strings()) #results.append(test_unicode_strings()) total_bisect_time = sum(x[0] for x in results) total_linear_time = sum(x[1] for x in results) end = time.time() print("Total bisect time: %0.8f" % total_bisect_time) print("(Total linear time: %0.8f)" % total_linear_time) print("Total time without bullshit: %0.8f" % (end-start - total_linear_time)) print("Total time: %0.8f" % (end-start))