""" Benchmark script for #11454 """ import re import pickle import random import timeit import marshal import functools # warning: after this point the code gets progressively worse print('Testing runtime of the _has_surrogates functions') print('Generating chars...') chars = ''.join(map(chr, range(0x110000))) print('Generating samples...') samples = [''.join(random.sample(chars, 1000)) for x in range(100)] _has_surrogates1 = re.compile( '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search _has_surrogates2 = re.compile('[\udc80-\udcff]').search _pep383_surrogates = set(map(chr, range(0xDC80, 0xDCFF+1))) def _has_surrogates3(s): return any(c in _pep383_surrogates for c in s) def _has_surrogates4(s): for c in s: if c in _pep383_surrogates: return True return False def _has_surrogates5(s): return re.search('[\udc80-\udcff]', s) _has_surrogates6 = functools.partial(re.search, '[\udc80-\udcff]') _pep383_surrogates_range = range(0xDC80, 0xDCFF+1) def _has_surrogates7(s): for c in map(ord, s): if c in _pep383_surrogates_range: return True return False def _has_surrogates8(s): for c in map(ord, s): if 0xDC80 <= c <= 0xDCFF: return True return False _has_surrogates9 = re.compile('[^\udc80-\udcff]*\Z').match hs_funcs = [_has_surrogates1, _has_surrogates2, _has_surrogates3, _has_surrogates4, _has_surrogates5, _has_surrogates6, _has_surrogates7, _has_surrogates8, _has_surrogates9] for n, _has_surrogates in enumerate(hs_funcs, start=1): s = """\ for sample in samples: _has_surrogates(sample) """ t = timeit.Timer(s, 'from __main__ import samples, _has_surrogates') print("%6.2f <- _has_surrogates%d" % (t.timeit(number=100), n)) print() print("Testing startup time of re.compile vs pickle.loads") N = 100000 s = "r = re.compile('[\\udc80-\\udcff]')" t = timeit.Timer(s, 'import re') print("%6.2f <- re.compile" % t.timeit(number=N)) s = "r = pickle.loads(p)" p = pickle.dumps(re.compile('[\udc80-\udcff]')) t = timeit.Timer(s, 'import pickle; from __main__ import p') print("%6.2f <- pickle.loads" % t.timeit(number=N))