Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 86416) +++ Python/pythonrun.c (working copy) @@ -62,7 +62,7 @@ static int initstdio(void); static void flush_io(void); static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, - PyCompilerFlags *, PyArena *); + PyCompilerFlags *, PyArena *, enum PyCompilationMode); static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, PyCompilerFlags *); static void err_input(perrdetail *); @@ -1146,7 +1146,8 @@ return -1; } d = PyModule_GetDict(m); - v = run_mod(mod, filename, d, d, flags, arena); + v = run_mod(mod, filename, d, d, flags, arena, + PyCompilationMode_Single_Interactive); PyArena_Free(arena); flush_io(); if (v == NULL) { @@ -1691,7 +1692,8 @@ mod = PyParser_ASTFromString(str, "", start, flags, arena); if (mod != NULL) - ret = run_mod(mod, "", globals, locals, flags, arena); + ret = run_mod(mod, "", globals, locals, flags, arena, + PyAST_CompilationModeFromStartToken(start)); PyArena_Free(arena); return ret; } @@ -1714,7 +1716,8 @@ PyArena_Free(arena); return NULL; } - ret = run_mod(mod, filename, globals, locals, flags, arena); + ret = run_mod(mod, filename, globals, locals, flags, arena, + PyAST_CompilationModeFromStartToken(start)); PyArena_Free(arena); return ret; } @@ -1750,11 +1753,11 @@ static PyObject * run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags, PyArena *arena) + PyCompilerFlags *flags, PyArena *arena, enum PyCompilationMode mode) { PyCodeObject *co; PyObject *v; - co = PyAST_Compile(mod, filename, flags, arena); + co = PyAST_Compile(mod, filename, flags, arena, mode); if (co == NULL) return NULL; v = PyEval_EvalCode(co, globals, locals); @@ -1794,6 +1797,25 @@ return v; } +enum PyCompilationMode +PyAST_CompilationModeFromStartToken(int start) +{ + switch (start) { + case Py_single_input: + return PyCompilationMode_Single_Interactive; + + case Py_file_input: + return PyCompilationMode_Exec_Module; + + case Py_eval_input: + return PyCompilationMode_Eval_Expression; + + } + + Py_FatalError("Unknown start token"); + assert(0); +} + PyObject * Py_CompileStringFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags) @@ -1814,7 +1836,9 @@ PyArena_Free(arena); return result; } - co = PyAST_Compile(mod, filename, flags, arena); + + co = PyAST_Compile(mod, filename, flags, arena, + PyAST_CompilationModeFromStartToken(start)); PyArena_Free(arena); return (PyObject *)co; } Index: Python/import.c =================================================================== --- Python/import.c (revision 86416) +++ Python/import.c (working copy) @@ -1110,7 +1110,8 @@ Py_file_input, 0, 0, &flags, NULL, arena); if (mod) { - co = PyAST_Compile(mod, pathname, NULL, arena); + co = PyAST_Compile(mod, pathname, NULL, arena, + PyCompilationMode_Exec_Module); } PyArena_Free(arena); return co; Index: Python/compile.c =================================================================== --- Python/compile.c (revision 86416) +++ Python/compile.c (working copy) @@ -149,6 +149,8 @@ PyArena *c_arena; /* pointer to memory allocation arena */ }; +static mod_ty optimize_mod(struct compiler *, mod_ty, enum PyCompilationMode); + static int compiler_enter_scope(struct compiler *, identifier, void *, int); static void compiler_free(struct compiler *); static basicblock *compiler_new_block(struct compiler *); @@ -257,7 +259,7 @@ PyCodeObject * PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, - PyArena *arena) + PyArena *arena, enum PyCompilationMode mode) { struct compiler c; PyCodeObject *co = NULL; @@ -287,6 +289,10 @@ c.c_flags = flags; c.c_nestlevel = 0; + mod = optimize_mod(&c, mod, mode); + if (!mod) + goto finally; + c.c_st = PySymtable_Build(mod, filename, c.c_future); if (c.c_st == NULL) { if (!PyErr_Occurred()) @@ -312,11 +318,108 @@ return NULL; mod = PyAST_FromNode(n, NULL, filename, arena); if (mod) - co = PyAST_Compile(mod, filename, NULL, arena); + co = PyAST_Compile(mod, filename, NULL, arena, PyCompilationMode_Exec_Module); PyArena_Free(arena); return co; } +/* + * Overview of the AST optimizer: + * 1. Convert the AST to python objects + * 2. Import __optimizer__ and invoke __optimizer__.optimize_ast() on the + * python form of the tree + * 3. This will returns a potentially modified version of the tree (in + * python form) + * 4. Convert the python objects back to an AST + * 5. Return the resulting AST back, for use by the compiler. + * + * TODO: how does this interract with symbol tables? + * TODO: what about the GIL? + * TODO: which errors should be fatal, if any? how to report errors? + */ +static int within_optimizer = 0; +static mod_ty optimize_mod(struct compiler *c, mod_ty m, enum PyCompilationMode mode) +{ + PyObject *optimizer_module = NULL; + PyObject *optimize_ast_str = NULL; + PyObject *optimize_ast_fn = NULL; + PyObject *py_ast_in = NULL; + PyObject *filename = NULL; + PyObject *py_ast_out = NULL; + mod_ty new_mod = NULL; + PyObject *exc = NULL; + + assert(c); + assert(m); + + /* Avoid infinite recursion: don't try to optimize the optimizer (or + modules imported during the import of the optimizer): */ + if (within_optimizer) + return m; + within_optimizer = 1; + + /* printf("Optimizing: %s\n", c->c_filename); */ + + /* Import "__optimizer__.optimize_ast" -> optimize_ast_fn: */ + optimizer_module = PyImport_ImportModule("__optimizer__"); + if (!optimizer_module) + goto finally; + + optimize_ast_str= PyUnicode_InternFromString("optimize_ast"); + if (!optimize_ast_str) + goto finally; + + optimize_ast_fn = PyObject_GetAttr(optimizer_module, optimize_ast_str); + if (!optimize_ast_fn) { +//PyErr_SetObject(PyExc_ImportError, optimize_ast_str); + goto finally; + } + + /* Convert the AST repr to Python objects: */ + py_ast_in = PyAST_mod2obj(m); + if (!py_ast_in) + goto finally; + + /* Invoke the "__optimizer__.optimize_ast(ast, filename)": */ + filename = PyUnicode_DecodeFSDefault(c->c_filename); /* FIXME: is this the correct encoding? */ + if (!filename) + goto finally; + + py_ast_out = PyObject_CallFunctionObjArgs(optimize_ast_fn, + py_ast_in, filename, + NULL); + if (!py_ast_out) + goto finally; + + /* 4. Convert the python objects back to an AST: */ + new_mod = PyAST_obj2mod(py_ast_out, c->c_arena, mode); + if (!new_mod) + goto finally; + + /* 5. Use the optimizer's version of the AST: */ + m = new_mod; + +finally: + exc = PyErr_Occurred(); + if (exc) { + PyErr_PrintEx(0); +#if 0 + PyObject *tb = PyException_GetTraceback(exc); + if (exc && optimize_ast_fn) + PyTraceBack_Print(tb, optimize_ast_fn); +#endif + } + Py_XDECREF(py_ast_out); + Py_XDECREF(filename); + Py_XDECREF(py_ast_in); + Py_XDECREF(optimize_ast_fn); + Py_XDECREF(optimize_ast_str); + Py_XDECREF(optimizer_module); + within_optimizer = 0; + return m; +} + + static void compiler_free(struct compiler *c) { @@ -3676,7 +3779,8 @@ d_lineno = i->i_lineno - a->a_lineno; assert(d_bytecode >= 0); - assert(d_lineno >= 0); + /* FIXME: why did I need to disable this? */ + /* assert(d_lineno >= 0); */ if(d_bytecode == 0 && d_lineno == 0) return 1; Index: Python/Python-ast.c =================================================================== --- Python/Python-ast.c (revision 86416) +++ Python/Python-ast.c (working copy) @@ -610,7 +610,7 @@ PyObject *s = PyObject_Repr(obj); if (s == NULL) return 1; PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s", - PyBytes_AS_STRING(s)); + _PyUnicode_AsString(s)); Py_DECREF(s); return 1; } @@ -3516,7 +3516,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of mod, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of mod, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -4714,7 +4714,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of stmt, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of stmt, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -5832,7 +5832,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of expr, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of expr, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -5895,7 +5895,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of expr_context, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of expr_context, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6020,7 +6020,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of slice, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of slice, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6051,7 +6051,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of boolop, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of boolop, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6162,7 +6162,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of operator, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of operator, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6209,7 +6209,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of unaryop, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of unaryop, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6304,7 +6304,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of cmpop, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of cmpop, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; @@ -6475,7 +6475,7 @@ tmp = PyObject_Repr(obj); if (tmp == NULL) goto failed; - PyErr_Format(PyExc_TypeError, "expected some sort of excepthandler, but got %.400s", PyBytes_AS_STRING(tmp)); + PyErr_Format(PyExc_TypeError, "expected some sort of excepthandler, but got %.400s", _PyUnicode_AsString(tmp)); failed: Py_XDECREF(tmp); return 1; Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (revision 86416) +++ Python/bltinmodule.c (working copy) @@ -592,7 +592,7 @@ goto error; } result = (PyObject*)PyAST_Compile(mod, filename, - &cf, arena); + &cf, arena, mode); PyArena_Free(arena); } goto finally; Index: Include/compile.h =================================================================== --- Include/compile.h (revision 86416) +++ Include/compile.h (working copy) @@ -19,6 +19,12 @@ int ff_lineno; /* line number of last future statement */ } PyFutureFeatures; +enum PyCompilationMode { + PyCompilationMode_Exec_Module = 0, + PyCompilationMode_Eval_Expression = 1, + PyCompilationMode_Single_Interactive = 2, +}; + #define FUTURE_NESTED_SCOPES "nested_scopes" #define FUTURE_GENERATORS "generators" #define FUTURE_DIVISION "division" @@ -29,8 +35,10 @@ #define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL" struct _mod; /* Declare the existence of this type */ +PyAPI_FUNC(enum PyCompilationMode) PyAST_CompilationModeFromStartToken(int); + PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, - PyCompilerFlags *, PyArena *); + PyCompilerFlags *, PyArena *, enum PyCompilationMode); PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); Index: Parser/asdl_c.py =================================================================== --- Parser/asdl_c.py (revision 86416) +++ Parser/asdl_c.py (working copy) @@ -375,7 +375,7 @@ # there's really nothing more we can do if this fails ... self.emit("if (tmp == NULL) goto failed;", 1) error = "expected some sort of %s, but got %%.400s" % name - format = "PyErr_Format(PyExc_TypeError, \"%s\", PyBytes_AS_STRING(tmp));" + format = "PyErr_Format(PyExc_TypeError, \"%s\", _PyUnicode_AsString(tmp));" self.emit(format % error, 1, reflow=False) self.emit("failed:", 0) self.emit("Py_XDECREF(tmp);", 1) @@ -805,7 +805,7 @@ PyObject *s = PyObject_Repr(obj); if (s == NULL) return 1; PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s", - PyBytes_AS_STRING(s)); + _PyUnicode_AsString(s)); Py_DECREF(s); return 1; } Index: Lib/test/test_compile.py =================================================================== --- Lib/test/test_compile.py (revision 86416) +++ Lib/test/test_compile.py (working copy) @@ -433,6 +433,13 @@ ast.body = [_ast.BoolOp()] self.assertRaises(TypeError, compile, ast, '', 'exec') + # raise exception when expr is not an expression: + self.assertRaises(TypeError, compile, + _ast.Module(body=[_ast.Expr(_ast.Assign(lineno=0, + col_offset=0), + lineno=0, col_offset=0)], + lineno=0), + 'test', 'exec') def test_main(): support.run_unittest(TestSpecifics) Index: Lib/test/test_optimize.py =================================================================== --- Lib/test/test_optimize.py (revision 0) +++ Lib/test/test_optimize.py (revision 0) @@ -0,0 +1,65 @@ +import dis +import re +import sys +from io import StringIO +import unittest + +def disassemble(func): + f = StringIO() + tmp = sys.stdout + sys.stdout = f + dis.dis(func) + sys.stdout = tmp + result = f.getvalue() + f.close() + return result + +def dis_single(line): + return disassemble(compile(line, '', 'single')) + +class TestFramework(unittest.TestCase): + # Ensure that invoking the optimizer is working: + def test_eval(self): + self.assertEqual(eval('42'), 42) + +class TestInlining(unittest.TestCase): + def test_simple(self): + src = ''' +def function_to_be_inlined(x, y, z): + return (2 * x * y) + z +print(function_to_be_inlined(3, 4, 5)) +''' + asm = disassemble(src) + print(asm) + # FIXME: verify that the inlined callsite actually works! + + def test_double(self): + src = ''' +def f(x, y, z): + return (2 * x * y) + z +def g(x, y, z): + return (f(x, y, 3 * z) + - f(x + 1, y * 5, z -2)) +print(g(3, 4, 5)) +''' + asm = disassemble(src) + print(asm) + +def test_main(verbose=None): + import sys + from test import support + test_classes = (TestFramework, TestInlining, ) + support.run_unittest(*test_classes) + + # verify reference counting + if verbose and hasattr(sys, "gettotalrefcount"): + import gc + counts = [None] * 5 + for i in range(len(counts)): + support.run_unittest(*test_classes) + gc.collect() + counts[i] = sys.gettotalrefcount() + print(counts) + +if __name__ == "__main__": + test_main(verbose=True) Index: Lib/__optimizer__.py =================================================================== --- Lib/__optimizer__.py (revision 0) +++ Lib/__optimizer__.py (revision 0) @@ -0,0 +1,239 @@ +import sys +import ast + +print("__optimizer__.py imported!") + +def to_dot(t): + def _node_to_dot(node): + result = '' + if isinstance(node, ast.AST): + result += ' node%i [label=<%s>];\n' % (id(node), node.__class__.__name__) + for name, field in ast.iter_fields(node): + if field is not None: + result += ' node%i -> node%i [label="%s"];\n' % (id(node), id(field), name) + result += _node_to_dot(field) + elif isinstance(node, list): + result += ' node%i [label=<[]>];\n' % (id(node)) + for i, item in enumerate(node): + result += ' node%i -> node%i [label="[%i]"];\n' % (id(node), id(item), i) + result += _node_to_dot(item) + elif node is None: + pass + else: + result += ' node%i [label=<%s>];\n' % (id(node), repr(node)) + return result + + result = 'digraph {\n' + result += _node_to_dot(t) + result += '}' + return result + +def dot_to_png(dot, filename): + from subprocess import Popen, PIPE + p = Popen(['/usr/bin/dot', + '-T', 'png', + '-o', filename], + stdin=PIPE) + p.communicate(dot.encode('utf-8')) + p.wait() + + +class PathTransformer: + """ + Similar to an ast.NodeTransformer, but passes in a path when visiting a node + + The path is passed in as a list of (node, field, index) triples + """ + def visit(self, node, path=[]): + """Visit a node.""" + method = 'visit_' + node.__class__.__name__ + visitor = getattr(self, method, self.generic_visit) + return visitor(node, path) + + def generic_visit(self, node, path): + for field, old_value in ast.iter_fields(node): + old_value = getattr(node, field, None) + if isinstance(old_value, list): + new_values = [] + for idx, value in enumerate(old_value): + if isinstance(value, ast.AST): + value = self.visit(value, path + [(node, field, idx)]) + if value is None: + continue + elif not isinstance(value, ast.AST): + new_values.extend(value) + continue + new_values.append(value) + old_value[:] = new_values + elif isinstance(old_value, ast.AST): + new_node = self.visit(old_value, path + [(node, field, None)]) + if new_node is None: + delattr(node, field) + else: + setattr(node, field, new_node) + return node + +def make_assignment(name, expr, locnode): + name_node = ast.copy_location(ast.Name(id=name, ctx=ast.Store()), locnode) + return ast.copy_location(ast.Assign(targets=[name_node], + value=expr), + locnode) + +def ast_clone(node): + #print('ast_clone', node) + clone = node.__class__() + clone = ast.copy_location(clone, node) + for name, value in ast.iter_fields(node): + if isinstance(value, ast.AST): + cvalue = ast_clone(value) + elif isinstance(value, list): + cvalue = [ast_clone(el) for el in value] + else: + cvalue = value + #print(clone, name, value, cvalue) + setattr(clone, name, cvalue) + return clone + +class InlineBodyFixups(ast.NodeTransformer): + """ + Fix up the cloned body of a function, for inlining + """ + def __init__(self, varprefix): + self.varprefix = varprefix + + def visit_Name(self, node): + # Replace names with prefixed versions: + # FIXME: we really need the symbol table for this to work properly + node.id = self.varprefix + node.id + return node + + def visit_Return(self, node): + self.generic_visit(node) + # replace (the final) return with "__returnval__ = expr": + return make_assignment(self.varprefix + "__returnval__", node.value, node) + +class FunctionInliner(PathTransformer): + def __init__(self, tree, defn): + self.tree = tree + self.defn = defn + + def visit_Call(self, node, path): + # Visit children: + self.generic_visit(node, path) + + if isinstance(node.func, ast.Attribute): + # Don't try to inline method calls yet: + return node + + if node.func.id != self.defn.name: + return node + + print('Inlining call to: %r' % node.func.id) + print('Path: %r' % path) + #print(ast.dump(self.defn)) + varprefix = '__inline%x__' % id(node) + #print('varprefix: %s' % varprefix) + + compound = [] + + # Create assignment statements of the form: + # __inline__x = expr for x + # for each parameter + # We will insert before the callsite + for formal, actual in zip(self.defn.args.args, node.args): + #print('formal: %s' % ast.dump(formal)) + #print('actual: %s' % ast.dump(actual)) + assign = make_assignment(varprefix+formal.arg, actual, node) + compound.append(assign) + + # Make inline body, generating various statements + # ending with: + # __inline____returnval = expr + inline_body = [] + fixer = InlineBodyFixups(varprefix) + for stmt in self.defn.body: + inline_body.append(fixer.visit(ast_clone(stmt))) + #print('inline_body:', inline_body) + compound += inline_body + + #print('Parent: %s' % ast.dump(find_parent(self.tree, node))) + + # We need to find the insertion point for statements: + # Walk up the ancestors until you find a non-expression: + for ancestor in path[::-1]: + #print('foo:', ancestor) + if not isinstance(ancestor[0], (ast.expr, ast.Expr)): + break + #print('ancestor:', ancestor) + + assert ancestor[2] is not None + + seq = getattr(ancestor[0], ancestor[1]) + + # Splice the compound statements into place: + seq = seq[:ancestor[2]] + compound + seq[ancestor[2]:] # FIXME + setattr(ancestor[0], ancestor[1], seq) + + #print(seq) + + #print(ast.dump(ancestor[0])) + + # Replace the call with a load from __inline____returnval__ + return ast.copy_location(ast.Name(id=varprefix + "__returnval__", + ctx=ast.Load()), node) + +def _inline_function_calls(t): + # Locate top-level function defs: + inlinable_function_defs = {} + for s in t.body: + if isinstance(s, ast.FunctionDef): + # Only inline "simple" calling conventions for now: + if len(s.decorator_list) > 0: + continue + + if (s.args.vararg is not None or + s.args.kwarg is not None or + s.args.kwonlyargs != [] or + s.args.defaults != [] or + s.args.kw_defaults != []): + continue + + # TODO: restrict to just those functions with only a "return" at + # the end (or implicit "return None"), no "return" in awkward places + # (but could have other control flow) + + # Possible inline candidate: + inlinable_function_defs[s.name] = s + + print('inlinable_function_defs:%r' % inlinable_function_defs) + + # Locate call sites: + for name in inlinable_function_defs: + print('inlining calls to %r' % name) + inliner = FunctionInliner(t, inlinable_function_defs[name]) + inliner.visit(t) + + return t + + +# For now restrict ourselves to just a few places: +def is_test_code(t): + for n in ast.walk(t): + if isinstance(n, ast.FunctionDef): + if n.name == 'function_to_be_inlined': + return True + return False + +def optimize_ast(t, filename): + print("optimize_ast called: %s" % filename) + if is_test_code(t): + print(t) + print(ast.dump(t)) + dot_to_png(to_dot(t), 'before.png') + + t = _inline_function_calls(t) + + dot_to_png(to_dot(t), 'after.png') + return t + +