"""
Micro-benchmark for the Python UTF-8 encoder. Run it with:

./python.orig benchmark.py script bench_str.py --file=orig
./python.patched benchmark.py script bench_str.py --file=patched
./python.patched benchmark.py compare_to orig patched

Download benchmark.py from:

https://bitbucket.org/haypo/misc/raw/tip/python/benchmark.py
"""
def run_benchmark(bench):
    for errors in (
        'ignore',
        'replace',
        'surrogateescape',
        'surrogatepass',
        'backslashreplace',
    ):
        for pattern in (
            r'"\udcff" * length',
            r'"a" * length + "\udcff"',
            r'("a" * 99 + "\udcff" * 99) * length',
            r'("\udcff" * 99 + "a") * length',
            r'"\udcff" + "a" * length',
        ):
            bench.start_group("%s: %s" % (errors, pattern))
            for length in ('10**1', '10**3', '10**2', '10**4'):
                bench.timeit(setup=r'length = %s; data = %s' % (length, pattern),
                             stmt='data.encode("utf-8", %r)' % errors,
                             name='length=%s' % length)