import sys, collections refs = set() size_stat = collections.Counter() size_stat2 = collections.Counter() def stat_size(name, obj): size_stat[name] += sys.getsizeof(obj) if obj in refs: size_stat2[name] += sys.getsizeof(obj) else: refs.add(obj) def read_data1(f): n = f.read(1)[0] return f.read(n) def read_data4(f): n = int.from_bytes(f.read(4), 'little') return f.read(n) def read_int(f): obj = int.from_bytes(f.read(4), 'little', signed=True) size_stat['INT'] += sys.getsizeof(obj) if obj in refs: if not (-5 <= obj <= 256): size_stat2['INT'] += sys.getsizeof(obj) else: refs.add(obj) return obj def read_string(f): b = read_data4(f) stat_size('STRING', b) return b def read_unicode(f): b = read_data4(f) try: u = b.decode('utf-8', 'surrogatepass') stat_size('UNICODE', u) return u except UnicodeDecodeError: print('UnicodeDecodeError', b) pass def read_collection(f): n = int.from_bytes(f.read(4), 'little') for i in range(n): stat_object(f) def read_tuple(f): n = int.from_bytes(f.read(4), 'little') t = tuple(stat_object(f)[1] for i in range(n)) stat_size('TUPLE', t) return t def read_dict(f): while stat_object(f) != 'NULL': stat_object(f) def read_long(f): n = abs(int.from_bytes(f.read(4), 'little', signed=True)) return f.read(2 * n) def read_code(f): c = [f.read(4 * 5)] c += [stat_object(f) for i in range(8)] c += [f.read(4), stat_object(f)] c = tuple(c) stat_size('CODE', c) return c dispatch = { b'0': ('NULL', None), b'N': ('NONE', lambda f: None), b'T': ('TRUE', lambda f: True), b'F': ('FALSE', lambda f: False), b'S': ('STOPITER', lambda f: StopIteration), b'.': ('ELLIPSIS', lambda f: Ellipsis), b'i': ('INT', read_int), b'I': ('INT64', lambda f: int.from_bytes(f.read(8), 'little', signed=True)), b'f': ('FLOAT', read_data1), b'g': ('BINARY_FLOAT', lambda f: f.read(8)), b'x': ('COMPLEX', lambda f: (read_data1(f), read_data1(f))), b'y': ('BINARY_COMPLEX', lambda f: f.read(16)), b'l': ('LONG', read_long), b's': ('STRING', read_string), b't': ('INTERNED', read_unicode), b'r': ('REF', lambda f: f.read(4)), b'(': ('TUPLE', read_tuple), b'[': ('LIST', read_collection), b'{': ('DICT', read_dict), b'c': ('CODE', read_code), b'u': ('UNICODE', read_unicode), b'<': ('SET', read_collection), b'>': ('FROZENSET', read_collection), } stat_dict = collections.Counter() def stat_object(f): start = f.tell() tag = f.read(1) if not tag: return None ref = (tag[0] & 0x80) != 0 tag = bytes([tag[0] & ~0x80]) try: name, read = dispatch[tag] except KeyError: raise ValueError('Unknown tag: %r at 0x%X' % (tag, start)) from None stat_dict[name] += 1 if read is not None: return name, read(f) return name def stat_pyc(f): f.read(12) while stat_object(f): pass #stat_pyc(sys.stdin.buffer) for fn in sys.argv[1:]: with open(fn, 'rb') as f: refs = set() stat_pyc(f) total = sum(stat_dict.values()) s = 0 for tag, count in stat_dict.most_common(): s += count print('%-14s %7d %-6s' % (tag, count, '%.2g%%' % (100.0 * count / total)), end='') if tag in size_stat: print(' %8d %8d %s' % (size_stat[tag], size_stat2[tag], '%.2g%%' % (100 * size_stat2[tag] / size_stat[tag])), end='') print()