""" Benchmark script for #11454 """ import re import pickle import random import timeit import marshal import functools # warning: after this point the code gets progressively worse print('Testing runtime of the _has_surrogates functions') print('Generating chars...') chars = ''.join(map(chr, range(0x110000))) print('Generating samples...') samples = [''.join(random.sample(chars, 1000)) for x in range(100)] _has_surrogates1 = re.compile( '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search _has_surrogates2 = re.compile('[\udc80-\udcff]').search _pep383_surrogates = set(map(chr, range(0xDC80, 0xDCFF+1))) def _has_surrogates3(s, _pep383_surrogates=_pep383_surrogates): return any(c in _pep383_surrogates for c in s) def _has_surrogates4(s, _pep383_surrogates=_pep383_surrogates): for c in s: if c in _pep383_surrogates: return True return False def _has_surrogates5(s): return re.search('[\udc80-\udcff]', s) _has_surrogates6 = functools.partial(re.search, '[\udc80-\udcff]') _pep383_surrogates_range = range(0xDC80, 0xDCFF+1) def _has_surrogates7(s, _pep383_surrogates_range=_pep383_surrogates_range): for c in map(ord, s): if c in _pep383_surrogates_range: return True return False def _has_surrogates8(s): for c in map(ord, s): if 0xDC80 <= c <= 0xDCFF: return True return False _has_surrogates9 = re.compile('[^\udc80-\udcff]*\Z').match def _has_surrogates10(s, isdisjoint=_pep383_surrogates.isdisjoint): return not isdisjoint(s) hs_funcs = [_has_surrogates1, _has_surrogates2, _has_surrogates3, _has_surrogates4, _has_surrogates5, _has_surrogates6, _has_surrogates7, _has_surrogates8, _has_surrogates9, _has_surrogates10] for n, _has_surrogates in enumerate(hs_funcs, start=1): s = """\ for sample in samples: _has_surrogates(sample) """ t = timeit.Timer(s, 'from __main__ import samples, _has_surrogates') print("%6.2f <- _has_surrogates%d" % (t.timeit(number=100), n)) print() print("Testing startup time") N = 10000 re.purge() p = pickle.dumps(re.compile('[\udc80-\udcff]')) startup = [ ("r = re.compile('[\\udc80-\\udcff]').search", 'import re'), ("r = re.compile('[^\\udc80-\\udcff]*\Z').match", 'import re'), ("r = re.compile('[\\udc80-\\udcff]').search; purge()", 'import re; from re import purge'), ("r = re.compile('[^\\udc80-\\udcff]*\Z').match; purge()", 'import re; from re import purge'), ("r = pickle.loads(p)", 'import pickle; from __main__ import p'), ] for code, setup in startup: t = timeit.Timer(code, setup) print("%6.2f <- %s" % (t.timeit(number=N), code))