import re import random import timeit import functools print('Generating chars...') chars = ''.join(map(chr, range(0x110000))) print('Generating samples...') samples = [''.join(random.sample(chars, 1000)) for x in range(100)] _has_surrogates1 = re.compile( '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search _has_surrogates2 = re.compile('[\udc80-\udcff]').search _pep383_surrogates = set(map(chr, range(0xDC80, 0xDCFF+1))) def _has_surrogates3(s): return any(c in _pep383_surrogates for c in s) def _has_surrogates4(s): for c in s: if c in _pep383_surrogates: return True return False def _has_surrogates5(s): return re.search('[\udc80-\udcff]', s) _has_surrogates6 = functools.partial(re.search, '[\udc80-\udcff]') for _has_surrogates in (_has_surrogates1, _has_surrogates2, _has_surrogates3, _has_surrogates4, _has_surrogates5, _has_surrogates6): s = """\ for sample in samples: _has_surrogates(sample) """ N = 100 t = timeit.Timer(s, 'from __main__ import samples, _has_surrogates') print("%6.2f usec/pass" % (N*10 * t.timeit(number=N)/N))