# -*- coding: utf-8 -*- from __future__ import division, print_function, unicode_literals import sys import timeit import math try: ascii except NameError: ascii = repr def bench_decode(encoding, string): try: x = eval(string).encode(encoding) assert x.decode(encoding) == eval(string) except UnicodeEncodeError: return setup = ''' import codecs d = codecs.getdecoder({0!r}) x = {1!r} '''.format(encoding, x) repeat = 10 number = 100 r = timeit.repeat('d(x)', setup, repeat=repeat, number=number) best = min(r) usec = best * 1e6 / number print("%-8s %-30s %.0f" % (encoding, string.replace("u'", "'"), len(x) / usec)) sys.stdout.flush() n = 10000 chars = ('A', '\u0080', '\u0100', '\u8000', '\U00010000') encodings = sys.argv[1:] if not encodings: encodings = ('ascii', 'latin1', 'utf-8', 'utf-16le', 'utf-16be', 'utf-32le', 'utf-32be') for encoding in encodings: for i, ch1 in enumerate(chars): bench_decode(encoding, '%s*%d' % (ascii(ch1), n)) # for ch2 in chars[:i]: # bench_decode(encoding, ' %s+%s*%d' % (ascii(ch1), ascii(ch2), n - 1)) # for ch2 in chars[i + 1:]: # bench_decode(encoding, ' %s*%d+%s' % (ascii(ch1), n - 1, ascii(ch2))) print()