#!/usr/bin/env python3 """Print duplicate function, class or method names within the same scope.""" import sys import os import io import re import argparse import token import tokenize from collections import Counter from operator import itemgetter RE_PROPERTY_METHOD = r'^\s*@(?P\S+)\.%s\s*$' property_regexps = [re.compile(RE_PROPERTY_METHOD % m) for m in ('getter', 'setter', 'deleter')] def reiterate(it): """Iterator wrapper allowing to reiterate over an item with send().""" while True: try: item = next(it) except StopIteration: return repeat = (yield item) # Reiterate while the sent value is true. while repeat: yield item repeat = (yield item) class Parser: """Parse a module.""" CODE_TYPES = FUNC_ClASS, METHOD, NESTED = ( 'function or class', 'method', 'nested function, method or class') def __init__(self, name, readline, dups_to_ignore=None): self.name = name self.dups_to_ignore = dups_to_ignore self.duplicates = dict((ctype, []) for ctype in Parser.CODE_TYPES) try: self.token_gen = reiterate(tokenize.tokenize(readline)) except SyntaxError: # Occur when tokenizing Lib/test/badsyntax_pep3120.py. return self._parse() def _parse(self, pindent=0, parent='', nested=False, clss=None): """Parse a module, a class or a nested function. 'clss' is a tuple of (lines, lineno) where lines is the list of the class source lines and lineno the class starting line number. """ func_name = None names = [] ntop_names = [] lineno = 0 indent = 0 try: for tokentype, tok, srowcol, _end, _line in self.token_gen: if (tokentype == token.DEDENT or tok == 'def' or tok == 'class'): _lineno, _indent = srowcol # End of class or nested function definition. if (clss or nested) and _indent <= pindent: self.get_dups(parent, names, ntop_names, nested, clss) if tok == 'def' or tok == 'class': self.token_gen.send(1) return # End of function definition. if func_name and _indent <= indent: func_name = None if tok == 'def' or tok == 'class': tokentype, name = next(self.token_gen)[0:2] if tokentype != token.NAME: continue # ignore syntax error if func_name: name = '{}.{}'.format(func_name, name) elif parent: name = '{}.{}'.format(parent, name) # True when a function or a class nested in a top level # function or in the method of a top level class. ntop = func_name and (not parent or clss and not nested) if args.include_nested: if ntop: ntop_names.append((name, _lineno)) else: names.append((name, _lineno)) elif not ntop and not nested: names.append((name, _lineno)) if tok == 'def' and not func_name: indent = _indent func_name = name else: _nested = bool(func_name or parent) _clss = ([], _lineno) if tok == 'class' else None self._parse(_indent, name, _nested, _clss) continue if clss and lineno != srowcol[0]: clss[0].append(_line) lineno = srowcol[0] except StopIteration: pass self.get_dups(parent, names, ntop_names, nested, clss) def remove_properties(self, parent, clss, dups): dups = dict(dups) removed = [] for line in clss[0]: for regexp in property_regexps: matchobj = regexp.match(line.rstrip()) if matchobj: name = matchobj.group('name') fqn = '{}.{}'.format(parent, name) # Ignore when already removed or when the property is # defined in a super class. if fqn not in removed and '.' not in name and fqn in dups: del dups[fqn] removed.append(fqn) break return dups.items() def get_dups(self, parent, names, ntop_names, nested, clss): def build_dups(names): linenos = dict(names) if self.dups_to_ignore: return ((name, linenos[name]) for name, cnt in Counter(n[0] for n in names).items() if cnt > 1 and name not in self.dups_to_ignore) else: return ((name, linenos[name]) for name, cnt in Counter(n[0] for n in names).items() if cnt > 1) dups = build_dups(names) if clss and (args.include_nested or not nested): # Remove a duplicate from dups when it is a property. dups = self.remove_properties(parent, clss, dups) self.duplicates[Parser.NESTED].extend(build_dups(ntop_names)) if nested: self.duplicates[Parser.NESTED].extend(dups) elif clss: self.duplicates[Parser.METHOD].extend(dups) else: self.duplicates[Parser.FUNC_ClASS].extend(dups) def modules(files): for f in files: if os.path.isdir(f): for path, dirs, filenames in os.walk(f): for fname in filenames: if os.path.splitext(fname)[1] == '.py': yield os.path.join(path, fname) for dirname in dirs: modules((os.path.join(path, dirname), )) else: yield f def find_duplicates(files, to_ignore): """Return duplicate code names within the same scope as a dictionary. The dictionary has one entry for each code type. Their value is a list of the tuples (filename, duplicate name, line number). """ duplicates = dict((ctype, []) for ctype in Parser.CODE_TYPES) for fname in modules(files): dups_to_ignore = to_ignore.get(fname) # Skip this file. if dups_to_ignore == []: continue with open(fname, 'rb') as f: parser = Parser(fname, f.readline, dups_to_ignore) for code_type, names in parser.duplicates.items(): duplicates[code_type].extend((f.name, n, l) for n, l in names) return duplicates def print_duplicates(source, dups_to_ignore=None): """Parse the source code as a string. >>> test_functions = ''' ... class C: ... def foo(self): pass ... def foo(): pass ... def foo(): ... pass ... def bar(): pass ... ''' >>> print_duplicates(test_functions) # doctest: +NORMALIZE_WHITESPACE Duplicate function or class names: 5: foo >>> test_to_ignore = ''' ... class C: ... def foo(self): pass ... def foo(self): pass ... def foo(): pass ... def foo(): ... pass ... ''' >>> print_duplicates(test_to_ignore, ['foo']) \ # doctest: +NORMALIZE_WHITESPACE Duplicate method names: 4: C.foo >>> test_classes = ''' ... class C: ... def foo(self): pass ... ... class C: pass ... class D: pass ... ... def foo(): ... pass ... ''' >>> print_duplicates(test_classes) # doctest: +NORMALIZE_WHITESPACE Duplicate function or class names: 5: C >>> test_methods = ''' ... class C: ... def foo(self): pass ... def foo(self): pass ... def bar(self): pass ... ... class D: ... def foo(self): pass ... ... def foo(): ... pass ... ''' >>> print_duplicates(test_methods) # doctest: +NORMALIZE_WHITESPACE Duplicate method names: 4: C.foo >>> args.include_nested = True >>> test_nested = ''' ... def foo(): ... def bar(): pass ... ... class C: ... def foo(self): pass ... def bar(self): pass ... def bar(self): pass ... ... class D: ... def foo(self): pass ... def bar(self): pass ... def bar(self): pass ... ... def bar(self): pass ... ... def bar(): pass ... ... def foo(): ... pass ... ''' >>> print_duplicates(test_nested) # doctest: +NORMALIZE_WHITESPACE Duplicate function or class names: 19: foo Duplicate nested function, method or class names: 13: foo.C.D.bar 15: foo.C.bar 17: foo.bar >>> args.include_nested = False >>> print_duplicates(test_nested) # doctest: +NORMALIZE_WHITESPACE Duplicate function or class names: 19: foo >>> args.include_nested = True >>> test_properties = ''' ... class C: ... @property ... def foo(self): pass ... ... @foo.setter ... def foo(self, a, b): pass ... ... def bar(self): pass ... def bar(self): pass ... ... class D: ... @property ... def foo(self): pass ... ... @foo.setter ... def foo(self, a, b): pass ... ... def bar(self): pass ... def bar(self): pass ... ''' >>> print_duplicates(test_properties) # doctest: +NORMALIZE_WHITESPACE Duplicate method names: 10: C.bar Duplicate nested function, method or class names: 20: C.D.bar >>> test_mixed = ''' ... class C: ... def foo(self): pass ... class foo(): ... def bar(self): pass ... class bar: pass ... ... def C: ... class bar: pass ... def bar(): pass ... ''' >>> print_duplicates(test_mixed) # doctest: +NORMALIZE_WHITESPACE Duplicate function or class names: 8: C Duplicate method names: 4: C.foo Duplicate nested function, method or class names: 10: C.bar 6: C.foo.bar >>> test_top_nested = ''' ... def foo(): ... class C: pass ... class C: pass ... ... class C: ... def foo(): ... class D: pass ... class D: pass ... ''' >>> print_duplicates(test_top_nested) # doctest: +NORMALIZE_WHITESPACE Duplicate nested function, method or class names: 9: C.foo.D 4: foo.C """ if isinstance(source, str): source = source.encode() with io.BytesIO(source) as f: p = Parser('