""" Micro-benchmark for the Python UTF-8 decoder. Run it with: ./python.orig benchmark.py script bench_str.py --file=orig ./python.patched benchmark.py script bench_str.py --file=patched ./python.patched benchmark.py compare_to orig patched Download benchmark.py from: https://bitbucket.org/haypo/misc/raw/tip/python/benchmark.py """ def run_benchmark(bench): bench.start_group('valid UTF-8 (strict)') for length in ('10**1', '10**3', '10**2', '10**4'): bench.timeit(setup='data = (b"a" * 100) * %s' % length, stmt='data.decode("utf-8")', name='100 x %s bytes' % length) for error_handler in ('ignore', 'replace', 'surrogateescape', 'backslashreplace'): bench.start_group(error_handler) for length in ('10**1', '10**3', '10**2', '10**4'): bench.timeit(setup=r'data = (b"a" * 99 + b"\x80\xff") * %s' % length, stmt='data.decode("utf-8", "%s")' % error_handler, name='100 x %s bytes' % length)