import ast import inspect import io import os import re import sys import unittest try: from asttokens import ASTTokens except ImportError: ASTTokens = None class TestSourceSegment(unittest.TestCase): def test_sys_modules(self): for module in list(sys.modules.values()): try: filename = inspect.getsourcefile(module) except TypeError: continue if not filename: continue filename = os.path.abspath(filename) try: with io.open(filename) as f: source = f.read() except OSError: continue tree = ast.parse(source) atok = None for node in ast.walk(tree): for child in ast.iter_child_nodes(node): child.parent = node if not isinstance(node, (ast.expr, ast.stmt)): continue # get_source_segment doesn't include decorators # Is this behaviour we want to keep? if getattr(node, 'decorator_list', None): continue # get_source_segment only returns '*', excludes starred value # Maybe fixed by https://bugs.python.org/issue39080 ? if isinstance(node, ast.Starred): continue # Skip elif if isinstance(node.parent, ast.If) and node.parent.orelse == [node]: continue text = ast.get_source_segment(source, node, padded=True) # get_source_segments often messes up generator expressions # Maybe fixed by https://bugs.python.org/issue39235 ? if text == '(' and isinstance(node, ast.GeneratorExp): continue try: self.assert_nodes_equal(node, parse_snippet(text, node)) except Exception as e: if ASTTokens: if atok is None: atok = ASTTokens(source, tree=tree) atok_text = atok.get_text(node) if not atok_text: # generally means we're dealing with f-strings continue if atok_text == '(' + text: # solved in https://bugs.python.org/issue39474 continue # Solved in https://bugs.python.org/issue38535 if isinstance(node, ast.Call) and not node.args and text == '@' + atok_text: continue print('=========') print(f'File "{filename}", line {node.lineno}, in') print(e.__class__.__name__, ':', e) print('--------- ast.dump():') print(ast.dump(node)) print('--------- get_source_segment():') print(text) if ASTTokens: print('--------- asttokens text:') print(atok_text) def assert_nodes_equal(self, t1, t2): # Ignore the context of each node which can change when parsing # substrings of source code. We just want equal structure and contents. if isinstance(t1, ast.expr_context): self.assertIsInstance(t2, ast.expr_context) else: self.assertEqual(type(t1), type(t2)) if isinstance(t1, (list, tuple)): self.assertEqual(len(t1), len(t2)) for vc1, vc2 in zip(t1, t2): self.assert_nodes_equal(vc1, vc2) elif isinstance(t1, ast.AST): self.assert_nodes_equal( list(ast.iter_fields(t1)), list(ast.iter_fields(t2)), ) else: self.assertEqual(t1, t2) def parse_snippet(text, node): """ Returns the parsed AST tree for the given text, handling issues with indentation and newlines when text is really an extracted part of larger code. """ indent = re.match(r'^[ \t]+', text) if indent: indent = indent.group() else: indent = '' if isinstance(node, ast.expr): # If this is an expression that: # - has newlines # - or is an assignment expression # we parenthesize it to make it parsable. if '\n' in text or isinstance(node, ast.NamedExpr): text = indent + '(' + text.lstrip() + ')' # If text is indented, we need to put in a scope for indents to be valid # (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or # multiline strings). if indent: stmt = ast.parse('def dummy():\n' + text).body[0].body[0] else: stmt = ast.parse(text).body[0] if isinstance(node, ast.expr): return stmt.value else: return stmt if __name__ == '__main__': unittest.main()