diff -r 336137a359ae Include/compile.h --- a/Include/compile.h Fri Mar 11 10:27:14 2011 -0500 +++ b/Include/compile.h Mon Mar 14 23:22:44 2011 -0400 @@ -38,6 +38,9 @@ PyArena *arena); PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); +#ifndef Py_LIMITED_API +int _PyAST_Optimize(struct _mod *, PyArena *arena); +#endif #ifdef __cplusplus } diff -r 336137a359ae Makefile.pre.in --- a/Makefile.pre.in Fri Mar 11 10:27:14 2011 -0500 +++ b/Makefile.pre.in Mon Mar 14 23:22:44 2011 -0400 @@ -274,6 +274,9 @@ # XXX Note that a build now requires Python exist before the build starts ASDLGEN= $(srcdir)/Parser/asdl_c.py +ASDLTX= $(srcdir)/Parser/asdl_ct.py +ASDLTX_FILES= $(srcdir)/Python/ast_opt.ct + ########################################################################## # Python @@ -291,6 +294,7 @@ Python/Python-ast.o \ Python/asdl.o \ Python/ast.o \ + Python/ast_opt.o \ Python/bltinmodule.o \ Python/ceval.o \ Python/compile.o \ @@ -608,6 +612,9 @@ $(AST_C): $(AST_ASDL) $(ASDLGEN_FILES) $(ASDLGEN) -c $(AST_C_DIR) $(AST_ASDL) +$(ASDLTX_FILES:.ct=.c): %.c: %.ct $(ASDLTX) $(AST_ASDL) + $(ASDLTX) $(AST_ASDL) $< $@ + Python/compile.o Python/symtable.o Python/ast.o: $(GRAMMAR_H) $(AST_H) Python/getplatform.o: $(srcdir)/Python/getplatform.c diff -r 336137a359ae Parser/asdl_ct.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Parser/asdl_ct.py Mon Mar 14 23:22:44 2011 -0400 @@ -0,0 +1,258 @@ +#!/usr/bin/env python +"Generate code for AST visitors." + +import asdl +import re +import sys + +def is_reachable(ty, types, rules): + "Are any 'rules' reachable from 'ty' given 'types'." + + visited = set() + def reachable(ty): + if ty in visited: + return False + visited.add(ty) + + if isinstance(ty, asdl.Id): + if ty.value in rules: + return True + if ty.value not in types: + return False + return reachable(types[ty.value]) + elif isinstance(ty, asdl.Sum): + return any(map(reachable, ty.types)) + elif isinstance(ty, asdl.Constructor): + if ty.name.value in rules: + return True + return any(reachable(field.type) for field in ty.fields) + elif isinstance(ty, asdl.Product): + return any(reachable(field.type) for field in ty.fields) + else: + raise TypeError(type(ty)) + + return False + + return reachable(ty) + + +MACRO_DEF = """ +#define CALL(FUNC, TYPE, ARG) \\ + if (!FUNC((ARG){0})) \\ + return 0; + +#define CALL_OPT(FUNC, TYPE, ARG) \\ + if ((ARG) != NULL && !FUNC((ARG){0})) \\ + return 0; + +#define CALL_SEQ(FUNC, TYPE, ARG) {{ \\ + int i; \\ + asdl_seq *seq = (ARG); /* avoid variable capture */ \\ + for (i = 0; i < asdl_seq_LEN(seq); i++) {{ \\ + TYPE elt = (TYPE)asdl_seq_GET(seq, i); \\ + /* XXX: kw_defaults has NULL elements, because it's \\ + sized to the number of kw args */ \\ + if (elt != NULL && !FUNC(elt{0})) \\ + return 0; \\ + }} \\ +}} +""" + +MACRO_UNDEF = """ +#undef CALL +#undef CALL_OPT +#undef CALL_SEQ +""" + +BANNER = '/* File automatically generated by Parser/asdl_ct.py. */\n' + +class Visitor: + "One visitor definition." + + def __init__(self, name, types): + self.name = name + self.types = types + self.rules = {} + self._funcs = {} + + def add_rule(self, name, func, kind): + "Add @kind(name, func) rule." + + if self._funcs: + raise RuntimeError('Visitor already generated.') + if name in self.rules: + raise NameError('{0} already registered to {1}' + .format(name, self.rules[name])) + self.rules[name] = (func, kind) + + def generate(self, out, start, ctx): + "Generate visitor function." + + arg = ', ctx_' if ctx else '' + out(MACRO_DEF.format(arg), depth=0) + + self._ctx = ctx + self._reach = {} + self._stack = [] + self._need_func(start) + while self._stack: + self._process(out, self._stack.pop()) + + out(MACRO_UNDEF, depth=0) + + def write_protos(self, out): + "Write prototypes for generated functions." + + if not self._funcs: + raise RuntimeError('Visitor not generated.') + + for proto in self._funcs.values(): + out(proto + ';', depth=0) + + def used(self): + return bool(self._funcs) + + def _process(self, out, name): + def worker(): + node = self.types[name] + if isinstance(node, (asdl.Constructor, asdl.Product)): + self._process_case(out, node, nodety, depth=1) + elif isinstance(node, asdl.Sum): + out('switch (node_->kind) {', depth=1) + for ty in node.types: + if self._can_reach(ty): + out('case ' + ty.name.value + '_kind:', depth=1) + self._process_case(out, ty, nodety, depth=2) + out('break;', depth=2) + out('default:', depth=1) + out('break;', depth=2) + out('}', depth=1) + else: + raise TypeError(type(node)) + + out(self._funcs[name], depth=0) + out('{', depth=0) + nodety = name + '_ty' + self._with_kind(out, name, nodety, 'node_', worker, depth=1) + out('return 1;\n}\n', depth=1) + + def _process_case(self, out, node, nodety, depth): + assert isinstance(node, (asdl.Constructor, asdl.Product)) + def worker(): + for field in node.fields: + ty = field.type + assert isinstance(ty, asdl.Id) + if not self._can_reach(ty): + continue + func_name = self._need_func(ty.value) + nodety = ty.value + "_ty" + kind = 'OPT' if field.opt else 'SEQ' if field.seq else '' + self._call(out, func_name, kind, nodety, prefix + field.name.value, + depth=depth) + + if isinstance(node, asdl.Constructor): + name = node.name.value + prefix = 'node_->v.' + name + '.' + else: + name = '' + prefix = 'node_->' + self._with_kind(out, name, nodety, 'node_', worker, depth=depth) + + def _with_kind(self, out, name, nodety, arg, func, depth): + rule, kind = self.rules.get(name, (None, None)) + if kind in ('pre', 'just'): + self._call(out, rule, '', nodety, arg, depth=depth) + if kind != 'just': + func() + if kind == 'post': + self._call(out, rule, '', nodety, arg, depth=depth) + + def _can_reach(self, ty): + if ty in self._reach: + return self._reach[ty] + self._reach[ty] = can = is_reachable(ty, self.types, self.rules) + return can + + def _call(self, out, func, kind, type, arg, depth): + if kind: + kind = '_' + kind + out('CALL{0}({1}, {2}, {3});'.format(kind, func, type, arg), + depth=depth) + + def _need_func(self, name): + func_name = self.name + '_' + name + if name not in self._funcs: + proto = "static int {0}({1}_ty node_".format(func_name, name) + if self._ctx: + proto += ', ' + self._ctx + ' ctx_' + proto += ')' + self._funcs[name] = proto + self._stack.append(name) + return func_name + + +class Processor: + def __init__(self, asdl_name): + self.mod = asdl.parse(asdl_name) + self.visitors = {} + + def process(self, infile, outfile): + with open(infile) as f: + s = re.sub('@(\w+)\(((?:[^,()]*,?)*)\)\n', + self._action, f.read()) + with open(outfile, 'w') as f: + f.write(BANNER) + f.write(s) + self._warn() + + def _action(self, match): + cmd = match.group(1) + args = list(map(str.strip, match.group(2).split(','))) + if cmd in ('pre', 'post', 'just'): + if len(args) != 3: + raise TypeError(cmd + ' expects 3 arguments') + + v = self.visitors.setdefault(args[0], + Visitor(args[0], self.mod.types)) + v.add_rule(args[1], args[2], cmd) + return '' + if cmd == 'visitor': + if len(args) < 3: + raise TypeError(cmd + ' expects at least 3 arguments') + if args[0] not in self.visitors: + raise KeyError('visitor ' + args[0] + ' is not defined') + + return self._gen(self.visitors[args[0]], args[2:], args[1]) + + raise NameError('unknown command ' + cmd) + + def _gen(self, visitor, starts, ctx): + def output(s, depth): + out.append(' ' * depth + s) + + out = [] + for start in starts: + visitor.generate(output, start, ctx) + code = '\n'.join(out) + + out = [] + visitor.write_protos(output) + protos = '\n'.join(out) + return protos + code + + def _warn(self): + for v in self.visitors.values(): + if not v.used(): + sys.stderr.write('warning: unused visitor ' + + v.name + '\n') + + +if __name__ == "__main__": + args = sys.argv + if len(args) != 4: + sys.stdout.write("usage: {0} \n" + .format(args[0])) + sys.exit(1) + p = Processor(args[1]) + p.process(args[2], args[3]) + diff -r 336137a359ae Python/ast_opt.ct --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Python/ast_opt.ct Mon Mar 14 23:22:44 2011 -0400 @@ -0,0 +1,229 @@ +#include "Python.h" +#include "Python-ast.h" + + +static int make_lit(expr_ty node, PyObject *val, PyArena *arena) +{ + if (val == NULL) { + if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) + PyErr_Clear(); + return 1; + } + if (PyArena_AddPyObject(arena, val) < 0) { + Py_DECREF(val); + return 1; + } + node->kind = Lit_kind; + node->v.Lit.v = val; + return 1; +} + +#define MOVE_NODE(TO, FROM) (memcpy((TO), (FROM), sizeof(struct _expr))) + +static PyObject *unary_not(PyObject *v) +{ + int r = PyObject_IsTrue(v); + if (r < 0) + return NULL; + return r ? Py_False : Py_True; +} + +@post(astfold, UnaryOp, fold_unaryop) +static int fold_unaryop(expr_ty node, PyArena *arena) +{ + typedef PyObject *(*unary_op)(PyObject*); + static const unary_op ops[] = { + PyNumber_Invert, + unary_not, + PyNumber_Positive, + PyNumber_Negative + }; + /* Eq and NotEq are often implemented in terms of one another, so + folding not (self == other) into self != other breaks implementation + of !=. Detecting such cases doesn't seem worthwhile. + Python uses for 'is subset'/'is superset' operations on sets. + They don't satisfy not folding laws. */ + static const int negated_op[] = { + 0, 0, 0, 0, 0, 0, IsNot, Is, NotIn, In + }; + + expr_ty arg; + PyObject *newval; + + arg = node->v.UnaryOp.operand; + if (arg->kind != Lit_kind) { + /* Fold not into comparison */ + if (node->v.UnaryOp.op == Not && + arg->kind == Compare_kind && + asdl_seq_LEN(arg->v.Compare.ops) == 1) { + int op = asdl_seq_GET(arg->v.Compare.ops, 0); + op = negated_op[op-1]; + if (op) { + asdl_seq_SET(arg->v.Compare.ops, 0, op); + MOVE_NODE(node, arg); + return 1; + } + } + /* TODO: assume that all unary operations cancel out + themselves, i.e. op op x == x? */ + return 1; + } + + newval = ops[node->v.UnaryOp.op - 1](arg->v.Lit.v); + return make_lit(node, newval, arena); +} + +static PyObject *binary_power(PyObject *base, PyObject *power) +{ + return PyNumber_Power(base, power, Py_None); +} + +@post(astfold, BinOp, fold_binop) +static int fold_binop(expr_ty node, PyArena *arena) +{ + typedef PyObject *(*bin_op)(PyObject*, PyObject*); + static const bin_op ops[] = { + PyNumber_Add, + PyNumber_Subtract, + PyNumber_Multiply, + PyNumber_TrueDivide, + PyNumber_Remainder, + binary_power, + PyNumber_Lshift, + PyNumber_Rshift, + PyNumber_Or, + PyNumber_Xor, + PyNumber_And, + PyNumber_FloorDivide + }; + + expr_ty lhs, rhs; + Py_ssize_t size; + PyObject *newval; + + lhs = node->v.BinOp.left; + rhs = node->v.BinOp.right; + if (lhs->kind != Lit_kind || rhs->kind != Lit_kind) + return 1; + + newval = ops[node->v.BinOp.op - 1](lhs->v.Lit.v, rhs->v.Lit.v); + /* Avoid creating large constants. */ + size = PyObject_Size(newval); + if (size == -1) { + if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) + return 1; + PyErr_Clear(); + } + else if (size > 20) { + Py_DECREF(newval); + return 1; + } + return make_lit(node, newval, arena); +} + +static PyObject *make_const_tuple(asdl_seq *elts, int make_set) +{ + PyObject *newval; + int i; + + for (i = 0; i < asdl_seq_LEN(elts); i++) { + expr_ty e = (expr_ty)asdl_seq_GET(elts, i); + if (e->kind != Lit_kind) + return NULL; + } + + newval = PyTuple_New(asdl_seq_LEN(elts)); + if (newval == NULL) + return NULL; + + for (i = 0; i < asdl_seq_LEN(elts); i++) { + expr_ty e = (expr_ty)asdl_seq_GET(elts, i); + Py_INCREF(e->v.Lit.v); + PyTuple_SET_ITEM(newval, i, e->v.Lit.v); + } + + /* Need to create frozen_set instead. */ + if (make_set) { + PyObject *old = newval; + newval = PyFrozenSet_New(old); + Py_DECREF(old); + } + return newval; +} + +@post(astfold, Tuple, fold_tuple) +static int fold_tuple(expr_ty node, PyArena *arena) +{ + PyObject *newval; + + if (node->v.Tuple.ctx != Load) + return 1; + + newval = make_const_tuple(node->v.Tuple.elts, 0); + return make_lit(node, newval, arena); +} + +@post(astfold, Subscript, fold_subscr) +static int fold_subscr(expr_ty node, PyArena *arena) +{ + PyObject *newval; + expr_ty arg, idx; + slice_ty slice; + + arg = node->v.Subscript.value; + slice = node->v.Subscript.slice; + if (node->v.Subscript.ctx != Load || + arg->kind != Lit_kind || + /* TODO: handle other types of slices */ + slice->kind != Index_kind || + slice->v.Index.value->kind != Lit_kind) + return 1; + + idx = slice->v.Index.value; + newval = PyObject_GetItem(arg->v.Lit.v, idx->v.Lit.v); + return make_lit(node, newval, arena); +} + +@post(astfold, Compare, fold_compare) +static int fold_compare(expr_ty node, PyArena *arena) +{ + asdl_int_seq *ops; + asdl_seq *args; + PyObject *newval; + int i; + + ops = node->v.Compare.ops; + args = node->v.Compare.comparators; + /* TODO: optimize cases with literal arguments. */ + for (i = 0; i < asdl_seq_LEN(ops); i++) { + int op; + expr_ty arg; + asdl_seq *elts; + + op = asdl_seq_GET(ops, i); + arg = (expr_ty)asdl_seq_GET(args, i); + /* Change literal list or set in 'in' or 'not in' into + tuple or frozenset respectively. */ + /* TODO: do the same when list or set is used as iterable + in for loop and comprehensions? */ + if (op != In && op != NotIn) + continue; + if (arg->kind == List_kind) + elts = arg->v.List.elts; + else if (arg->kind == Set_kind) + elts = arg->v.Set.elts; + else continue; + + newval = make_const_tuple(elts, arg->kind == Set_kind); + make_lit(arg, newval, arena); + } + return 1; +} + +@visitor(astfold, PyArena*, mod) + +int _PyAST_Optimize(mod_ty mod, PyArena *arena) +{ + return astfold_mod(mod, arena); +} +