import io def writer(n, data): s = io.StringIO() for loop in range(n): for text in data: s.write(text) return s.getvalue() def writer_reader(n, data): s = io.StringIO() for loop in range(n): for text in data: s.write(text) s.seek(0) for line in s: pass return s.getvalue() def reader(data): s = io.StringIO(data) for line in s: pass def run_benchmark(bench): n = 10**5 for charset, text in ( ('ascii', ('a', 'bb', 'cc\n')), ('latin1', ('\xe9', 'bb', 'cc\n')), ('bmp', ('\u20ac', 'bb', 'cc\n')), ('non-bmp', ('\U0010ffff', 'bb', 'cc\n')), ): bench.bench_func('writer %s' % charset, writer, n, text) bench.bench_func('writer reader %s' % charset, writer_reader, n, text) k = 300 for charset, text in ( ('ascii', 'a' * k + '\n'), ('latin1', '\xe9' * k + '\n'), ('bmp', '\u20ac' * k + '\n'), ('non-bmp', '\U0010ffff' * k + '\n'), ): bench.bench_func('writer long lines %s' % charset, writer, n // k, text) bench.bench_func('writer-reader long lines %s' % charset, writer_reader, n // k, text) for charset, text in ( ('ascii', 'a' * (n // 10) + '\n'), ('latin1', '\xe9' * (n // 10) + '\n'), ('bmp', '\u20ac' * (n // 10) + '\n'), ('non-bmp', '\U0010ffff' * (n // 10) + '\n'), ): bench.bench_func('writer very long lines %s' % charset, writer, 10, text) bench.bench_func('writer-reader very long lines %s' % charset, writer_reader, 10, text) data = 'abc\n' * n bench.bench_func('reader ascii', reader, data) data = 'ab\xe9\n' * n bench.bench_func('reader latin1', reader, data) data = 'ab\xe9\u20ac\n' * n bench.bench_func('reader bmp', reader, data) data = 'ab\xe9\u20ac\U0010ffff\n' * n bench.bench_func('reader non-bmp', reader, data)