#!/usr/bin/env python3 """ Print duplicate code names.""" import sys import os import argparse import linecache import types import pyclbr from collections import Counter from operator import itemgetter def find_duplicates(file_paths): """Return duplicate code names in a dictionary. The dictionary has one entry for each code type: 'function', 'class', 'method' and 'nested function or class'. Their value is a list of the tuples (filename, duplicate name). """ prev_dirpath = '' duplicates = {'function':[], 'class':[], 'method':[], 'nested function or class':[]} for filename in file_paths: root, ext = os.path.splitext(filename) if ext != '.py' or not os.path.isfile(filename): print('Not a valid pathname: {}'.format(filename), file=sys.stderr) continue # Build the duplicates set. try: source_lines = linecache.getlines(filename) code = compile(''.join(source_lines), filename, 'exec') except Exception as e: print('{}: compile error: {}'.format(filename, e), file=sys.stderr) continue dups = set(name for name, cnt in Counter(code_names(code)).items() if cnt > 1) # Build function, class and method names lists using pyclbr. functions = [] classes = [] methods = [] module = os.path.basename(filename)[:-3] dirpath = os.path.dirname(filename) # readmodule_ex crashes with AttributeError on Lib/__phello__.foo.py. if '.' in module: print('{}: {} not a valid module name'.format(filename, module), file=sys.stderr) continue # Clear pyclbr cache to avoid module names conflicts. if prev_dirpath != dirpath: pyclbr._modules = {} prev_dirpath = dirpath objs = pyclbr.readmodule_ex(module, [dirpath]) for obj in objs.values(): if isinstance(obj, pyclbr.Class): classes.append(obj.name) for method in obj.methods: if method != '__path__': methods.append('{}.{}'.format(obj.name, method)) elif isinstance(obj, pyclbr.Function): functions.append(obj.name) # Classify duplicate names according to their types. duplicates['function'].extend((filename, name) for name in dups.intersection(functions)) duplicates['class'].extend((filename, name) for name in dups.intersection(classes)) duplicates['method'].extend((filename, name) for name in dups.intersection(methods)) duplicates['nested function or class'].extend((filename, name) for name in dups.difference( set(functions).union(classes).union(methods))) return duplicates def code_names(code, name=None): """Yield the fully qualified names of 'code' classes and functions. When a class or function is a duplicate in a given scope, all its subcode names are skipped. """ if not name: name = [code.co_name] else: name = name + [code.co_name] # Do no list non-user code names such as , etc... if not name[-1].startswith('<'): yield '.'.join(name[1:]) children_code_fqn = [] for c in code.co_consts: if isinstance(c, types.CodeType): first_item = True for item in code_names(c, name): if first_item: first_item = False if item in children_code_fqn: yield item # Do not list the fully qualified names of the subcodes # of this child when it is a duplicate. break children_code_fqn.append(item) yield item if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('file_paths', metavar='F', nargs='+', help='file pathname to search for duplicates') args = parser.parse_args() for code_type, duplicates in sorted( find_duplicates(args.file_paths).items(), key=itemgetter(0)): if duplicates: print('Duplicate {} names:'.format(code_type)) for filename, name in sorted(duplicates, key=itemgetter(0)): print('{}: {}'.format(filename, name)) print()