""" Micro-benchmark for the Python UTF-8 encoder. Run it with: ./python.orig benchmark.py script bench_str.py --file=orig ./python.patched benchmark.py script bench_str.py --file=patched ./python.patched benchmark.py compare_to orig patched Download benchmark.py from: https://bitbucket.org/haypo/misc/raw/tip/python/benchmark.py """ def run_benchmark(bench): for errors in ( 'ignore', 'replace', 'surrogateescape', 'surrogatepass', 'backslashreplace', ): for pattern in ( r'"\udcff" * length', r'"a" * length + "\udcff"', r'("a" * 99 + "\udcff" * 99) * length', r'("\udcff" * 99 + "a") * length', r'"\udcff" + "a" * length', ): bench.start_group("%s: %s" % (errors, pattern)) for length in ('10**1', '10**3', '10**2', '10**4'): bench.timeit(setup=r'length = %s; data = %s' % (length, pattern), stmt='data.encode("utf-8", %r)' % errors, name='length=%s' % length)