import io def writer(n, data): s = io.StringIO() for loop in range(n): for text in data: s.write(text) return s.getvalue() def writer_reader(n, data): s = io.StringIO() for loop in range(n): for text in data: s.write(text) s.seek(0) for line in s: pass return s.getvalue() def reader(data): s = io.StringIO(data) for line in s: pass def run_benchmark(bench): bench_lines(bench, 10) bench_lines(bench, 10**3) bench_lines(bench, 10**5) def bench_lines(bench, n): bench.start_group("%s lines" % n) for charset, text in ( ('ascii', ('a', 'bb', 'cc\n')), ('latin1', ('\xe9', 'bb', 'cc\n')), ('bmp', ('\u20ac', 'bb', 'cc\n')), ('non-bmp', ('\U0010ffff', 'bb', 'cc\n')), ): bench.bench_func('reader short line %s' % charset, reader, ''.join(text) * n) bench.bench_func('writer short line %s' % charset, writer, n, text) bench.bench_func('writer-reader short line %s' % charset, writer_reader, n, text) k = 300 for charset, text in ( ('ascii', 'a' * k + '\n'), ('latin1', '\xe9' * k + '\n'), ('bmp', '\u20ac' * k + '\n'), ('non-bmp', '\U0010ffff' * k + '\n'), ): bench.bench_func('reader long lines %s' % charset, reader, text * k) bench.bench_func('writer long lines %s' % charset, writer, n // k, [text]) bench.bench_func('writer-reader long lines %s' % charset, writer_reader, n // k, [text]) k = 10 for charset, text in ( ('ascii', 'a' * (n // k) + '\n'), ('latin1', '\xe9' * (n // k) + '\n'), ('bmp', '\u20ac' * (n // k) + '\n'), ('non-bmp', '\U0010ffff' * (n // k) + '\n'), ): bench.bench_func('reader very long lines %s' % charset, reader, text * k) bench.bench_func('writer very long lines %s' % charset, writer, k, [text]) bench.bench_func('writer-reader very long lines %s' % charset, writer_reader, k, [text]) if 0: data = 'abc\n' * n bench.bench_func('reader ascii', reader, data) data = 'ab\xe9\n' * n bench.bench_func('reader latin1', reader, data) data = 'ab\xe9\u20ac\n' * n bench.bench_func('reader bmp', reader, data) data = 'ab\xe9\u20ac\U0010ffff\n' * n bench.bench_func('reader non-bmp', reader, data)