import tokenize import ast import marshal from argparse import ArgumentParser from pathlib import Path class Remover(ast.NodeTransformer): def __init__(self): self.stats = 0 self.data = {"removed_length": 0} class RemoveAnnotations(Remover): def remove_returns(self, node): if node.returns: self.stats += 1 node.returns = None self.generic_visit(node) return node def visit_arguments(self, node): for arg in ( *node.posonlyargs, *node.args, node.vararg, *node.kwonlyargs, node.kwarg, ): if arg is not None and arg.annotation: self.stats += 1 arg.annotation = None return node visit_FunctionDef = remove_returns visit_AsyncFunctionDef = remove_returns def visit_AnnAssign(self, node): self.stats += 1 if node.value: pure_node = ast.Assign(targets=[node.target], value=node.value) else: pure_node = ast.Pass() ast.copy_location(pure_node, node) return pure_node class RemoveDocstrings(Remover): def remove_docstrings(self, node): if ( len(node.body) > 1 and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant) and isinstance(node.body[0].value.value, str) ): self.stats += 1 value = node.body.pop(0) self.data["removed_length"] += len(value.value.value) if len(node.body) == 0: node.body.append(ast.Pass()) self.generic_visit(node) return node visit_Module = remove_docstrings visit_ClassDef = remove_docstrings visit_FunctionDef = remove_docstrings visit_AsyncFunctionDef = remove_docstrings def test_package_impl(file): with tokenize.open(file) as f: content = f.read() tree = ast.parse(content) tree1 = ast.parse(content) tree2 = ast.parse(content) remover1 = RemoveDocstrings() remover2 = RemoveAnnotations() tree1 = remover1.visit(tree1) tree2 = remover2.visit(tree2) code = compile(tree, "", "exec") code1 = compile(tree1, "", "exec") code2 = compile(tree2, "", "exec") weight = len(marshal.dumps(code)) weight1 = len(marshal.dumps(code1)) weight2 = len(marshal.dumps(code2)) return ((remover1.stats, remover1.data["removed_length"], weight1), (remover2.stats, weight2), weight) def main(): parser = ArgumentParser() parser.add_argument("path", type=Path) options = parser.parse_args() total_bytes = 0 total_docstring = 0 total_annotation = 0 total_bytes_docstring = 0 total_bytes_annotation = 0 total_removed_docstring_length = 0 for result1, result2, total in map(test_package_impl, options.path.glob("**/*.py")): total_bytes += total total_docstring += result1[0] total_removed_docstring_length += result1[1] total_bytes_docstring += result1[2] total_annotation += result2[0] total_bytes_annotation += result2[1] print("Total bytes:", total_bytes) print(f"Total bytes after {total_docstring} docstrings (total length of {total_removed_docstring_length}) removed: {total_bytes_docstring}") print(f"Total bytes after {total_annotation} type annotations removed: {total_bytes_annotation}") if __name__ == "__main__": main()