diff -r 1452520dfe7b Include/node.h --- a/Include/node.h Fri Jul 08 17:55:01 2016 +0200 +++ b/Include/node.h Fri Jul 08 19:09:24 2016 +0100 @@ -19,6 +19,7 @@ PyAPI_FUNC(node *) PyNode_New(int type); PyAPI_FUNC(int) PyNode_AddChild(node *n, int type, char *str, int lineno, int col_offset); +PyAPI_FUNC(void) PyNode_Compress(node *n); PyAPI_FUNC(void) PyNode_Free(node *n); #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyNode_SizeOf(node *n); diff -r 1452520dfe7b Misc/NEWS --- a/Misc/NEWS Fri Jul 08 17:55:01 2016 +0200 +++ b/Misc/NEWS Fri Jul 08 19:09:24 2016 +0100 @@ -10,6 +10,10 @@ Core and Builtins ----------------- +- Issue #26415: Reduce peak memory consumption by the parser, by compressing + some branchless stretches in the parse tree into a single node. AST is not + affected. + - Issue #23034: The output of a special Python build with defined COUNT_ALLOCS, SHOW_ALLOC_COUNT or SHOW_TRACK_COUNT macros is now off by default. It can be re-enabled using the "-X showalloccount" option. It now outputs to stderr diff -r 1452520dfe7b Modules/parsermodule.c --- a/Modules/parsermodule.c Fri Jul 08 17:55:01 2016 +0200 +++ b/Modules/parsermodule.c Fri Jul 08 19:09:24 2016 +0100 @@ -32,6 +32,7 @@ #include "Python.h" /* general Python API */ #include "Python-ast.h" /* mod_ty */ +#include "pgenheaders.h" /* bitset operations */ #include "graminit.h" /* symbols defined in the grammar */ #include "node.h" /* internal parser structure */ #include "errcode.h" /* error codes for PyNode_*() */ @@ -43,6 +44,7 @@ #include "ast.h" extern grammar _PyParser_Grammar; /* From graminit.c */ +static grammar ValidationGrammar; /* Initialized from PyInit_parser */ #ifdef lint #include @@ -682,11 +684,11 @@ assert(ISNONTERMINAL(type)); type -= NT_OFFSET; - if (type >= _PyParser_Grammar.g_ndfas) { + if (type >= ValidationGrammar.g_ndfas) { PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree)); return 0; } - nt_dfa = &_PyParser_Grammar.g_dfa[type]; + nt_dfa = &ValidationGrammar.g_dfa[type]; REQ(tree, nt_dfa->d_type); /* Run the DFA for this nonterminal. */ @@ -695,14 +697,21 @@ node *ch = CHILD(tree, pos); int ch_type = TYPE(ch); for (arc = 0; arc < dfa_state->s_narcs; ++arc) { - short a_label = dfa_state->s_arc[arc].a_lbl; - assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); - if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) { - /* The child is acceptable; if non-terminal, validate it recursively. */ + short a_lbl_idx = dfa_state->s_arc[arc].a_lbl; + label *a_label = &ValidationGrammar.g_ll.ll_label[a_lbl_idx]; + if (a_label->lb_type == ch_type) { + + /* If the arc label specifies a string, but the child string + * doesn't match, then skip this arc. */ + if (a_label->lb_str && strcmp(a_label->lb_str, STR(ch))) + continue; + + /* If the child is non-terminal, validate it recursively. */ if (ISNONTERMINAL(ch_type) && !validate_node(ch)) return 0; - /* Update the state, and move on to the next child. */ + /* The child is acceptable. + * Update the state, and move on to the next child. */ dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow]; goto arc_found; } @@ -711,13 +720,18 @@ { short a_label = dfa_state->s_arc->a_lbl; int next_type; + const char* expected_str; if (!a_label) /* Wouldn't accept any more children */ goto illegal_num_children; - next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; + next_type = ValidationGrammar.g_ll.ll_label[a_label].lb_type; + expected_str = ValidationGrammar.g_ll.ll_label[a_label].lb_str; if (ISNONTERMINAL(next_type)) PyErr_Format(parser_error, "Expected node type %d, got %d.", next_type, ch_type); + else if (expected_str) + PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", + expected_str); else PyErr_Format(parser_error, "Illegal terminal: expected %s.", _PyParser_TokenNames[next_type]); @@ -740,6 +754,140 @@ return 0; } +static int countbits(bitset ss, int nbits) +{ + int i, count = 0; + for (i = 0; i < nbits; ++i) + if (testbit(ss, i)) + ++count; + return count; +} + +/* Set up ValidationGrammar by extending _PyParser_Grammar with + * the "transitive closure" of single-arc productions. + */ + +static void Init_ValidationGrammar(void) +{ + labellist *labels = &_PyParser_Grammar.g_ll; + int ndfas = _PyParser_Grammar.g_ndfas, + nlabels = labels->ll_nlabels; + bitset *reducible_from = (bitset *)PyObject_MALLOC(sizeof(bitset) * ndfas); + short* nt_label = (short *)PyObject_MALLOC(sizeof(short) * ndfas); + dfa* new_dfa = (dfa *)PyObject_MALLOC(sizeof(dfa) * ndfas); + int i, j; + for (i = 0; i < ndfas; ++i) + reducible_from[i] = newbitset(nlabels); + + /* Build the reverse mapping for NT index -> label ID */ + for (i = 0; i < nlabels; ++i) { + int lb_type = labels->ll_label[i].lb_type; + if (ISNONTERMINAL(lb_type)) + nt_label[lb_type - NT_OFFSET] = i; + } + + /* Pass 1: build the bitsets */ + for (i = 0; i < ndfas; ++i) { + dfa *nt_dfa = &_PyParser_Grammar.g_dfa[i]; + state *dfa_state = &nt_dfa->d_state[nt_dfa->d_initial]; + short i_label = nt_label[i]; + int arc; + for (arc = 0; arc < dfa_state->s_narcs; ++arc) { + short a_label = dfa_state->s_arc[arc].a_lbl; + state *next = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow]; + + /* Is `next` a final state? */ + for (j = 0; j < next->s_narcs; ++j) { + if (!next->s_arc[j].a_lbl) { + + /* Yes! Update the bitsets: i is reducible from a_label */ + int lb_type = labels->ll_label[a_label].lb_type; + int other; + addbit(reducible_from[i], a_label); + + if (ISNONTERMINAL(lb_type)) + /* i is transitively reducible from whatever + * a_label's symbol is reducible from. */ + mergebitset(reducible_from[i], + reducible_from[lb_type - NT_OFFSET], + nlabels); + + /* Anything that is reducible from i, is transitively + * reducible from whatever i is reducible from. */ + for (other = 0; other < ndfas; ++other) + if (testbit(reducible_from[other], i_label)) + mergebitset(reducible_from[other], + reducible_from[i], nlabels); + } + } + } + } + + /* Pass 2: generate the extended DFAs */ + for (i = 0; i < ndfas; ++i) { + dfa *old_dfa = &_PyParser_Grammar.g_dfa[i]; + int nstates = old_dfa->d_nstates; + new_dfa[i] = *old_dfa; + new_dfa[i].d_state = (state *)PyObject_MALLOC(sizeof(state) * nstates); + for(j = 0; j < nstates; ++j) { + state *old_state = &old_dfa->d_state[j], + *new_state = &new_dfa[i].d_state[j]; + int need_arcs = old_state->s_narcs; + int k, next_arc = 0; + + /* Count how many arcs we'll need in new_state */ + for (k = 0; k < old_state->s_narcs; ++k) { + short a_label = old_state->s_arc[k].a_lbl; + int lb_type = labels->ll_label[a_label].lb_type; + if (ISNONTERMINAL(lb_type)) + need_arcs += countbits(reducible_from[lb_type - NT_OFFSET], + nlabels); + } + + /* Allocate and initialize the new arcs. */ + new_state->s_narcs = need_arcs; + new_state->s_arc = (arc *)PyObject_MALLOC(sizeof(arc) * need_arcs); + for (k = 0; k < old_state->s_narcs; ++k) { + arc *old_arc = &old_state->s_arc[k]; + short a_label = old_arc->a_lbl; + int lb_type = labels->ll_label[a_label].lb_type; + + /* Copy the original arc; if it's a specific NAME, put it first, + so that it takes precedence over a generic NAME arc. */ + if (labels->ll_label[a_label].lb_str) { + memmove(new_state->s_arc+1, + new_state->s_arc, + sizeof(arc) * next_arc++); + new_state->s_arc[0] = *old_arc; + } else { + new_state->s_arc[next_arc++] = *old_arc; + } + + /* Add new arcs for symbols that lb_type is reducible from. */ + if (ISNONTERMINAL(lb_type)) { + int other_lbl; + lb_type -= NT_OFFSET; + for (other_lbl = 0; other_lbl < nlabels; ++other_lbl) + if (testbit(reducible_from[lb_type], other_lbl)) { + arc *new_arc = &new_state->s_arc[next_arc++]; + new_arc->a_lbl = other_lbl; + new_arc->a_arrow = old_arc->a_arrow; + } + } + } + } + } + + ValidationGrammar.g_ndfas = ndfas; + ValidationGrammar.g_dfa = new_dfa; + ValidationGrammar.g_ll = *labels; + + for (i = 0; i < ndfas; ++i) + delbitset(reducible_from[i]); + PyObject_FREE(reducible_from); + PyObject_FREE(nt_label); +} + /* PyObject* parser_tuple2st(PyObject* self, PyObject* args) * * This is the public function, called from the Python code. It receives a @@ -1197,5 +1345,9 @@ Py_XDECREF(pickler); Py_DECREF(copyreg); } + + /* This could have been a build-time step. */ + Init_ValidationGrammar(); + return module; } diff -r 1452520dfe7b Parser/parser.c --- a/Parser/parser.c Fri Jul 08 17:55:01 2016 +0200 +++ b/Parser/parser.c Fri Jul 08 19:09:24 2016 +0100 @@ -56,12 +56,12 @@ { if (s_empty(s)) Py_FatalError("s_pop: parser stack underflow -- FATAL"); - s->s_top++; + PyNode_Compress(s->s_top++->s_parent); } #else /* !Py_DEBUG */ -#define s_pop(s) (s)->s_top++ +#define s_pop(s) PyNode_Compress((s)->s_top++->s_parent) #endif diff -r 1452520dfe7b Parser/pgenmain.c --- a/Parser/pgenmain.c Fri Jul 08 17:55:01 2016 +0200 +++ b/Parser/pgenmain.c Fri Jul 08 19:09:24 2016 +0100 @@ -186,3 +186,6 @@ vfprintf(stderr, format, va); va_end(va); } + +void PyNode_Compress(node* n) {} // no compression in pgen + diff -r 1452520dfe7b Python/ast.c --- a/Python/ast.c Fri Jul 08 17:55:01 2016 +0200 +++ b/Python/ast.c Fri Jul 08 19:09:24 2016 +0100 @@ -11,6 +11,13 @@ #include +#define case_EXPR case comparison: case lambdef: \ + case term: case factor: case power: \ + case test: case test_nocond: case and_test: case or_test: case not_test: \ + case expr: case xor_expr: case and_expr: case shift_expr: \ + case arith_expr: case star_expr: case atom_expr: case atom + + static int validate_stmts(asdl_seq *); static int validate_exprs(asdl_seq *, expr_context_ty, int); static int validate_nonempty_seq(asdl_seq *, const char *, const char *); @@ -712,7 +719,7 @@ l = 0; for (i = 0; i < NCH(n); i++) { ch = CHILD(n, i); - if (TYPE(ch) == stmt) + if (TYPE(ch) == stmt || TYPE(ch) == simple_stmt || TYPE(ch) == compound_stmt) l += num_stmts(ch); } return l; @@ -723,14 +730,11 @@ case simple_stmt: return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */ case suite: - if (NCH(n) == 1) - return num_stmts(CHILD(n, 0)); - else { - l = 0; - for (i = 2; i < (NCH(n) - 1); i++) - l += num_stmts(CHILD(n, i)); - return l; - } + assert(NCH(n) > 1); + l = 0; + for (i = 2; i < (NCH(n) - 1); i++) + l += num_stmts(CHILD(n, i)); + return l; default: { char buf[128]; @@ -776,7 +780,7 @@ ch = CHILD(n, i); if (TYPE(ch) == NEWLINE) continue; - REQ(ch, stmt); + assert(TYPE(ch) == stmt || TYPE(ch) == simple_stmt || TYPE(ch) == compound_stmt); num = num_stmts(ch); if (num == 1) { s = ast_for_stmt(&c, ch); @@ -785,7 +789,6 @@ asdl_seq_SET(stmts, k++, s); } else { - ch = CHILD(ch, 0); REQ(ch, simple_stmt); for (j = 0; j < num; j++) { s = ast_for_stmt(&c, CHILD(ch, j * 2)); @@ -1120,9 +1123,7 @@ /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is' |'is' 'not' */ - REQ(n, comp_op); - if (NCH(n) == 1) { - n = CHILD(n, 0); + if (NCH(n) == 0) { switch (TYPE(n)) { case LESS: return Lt; @@ -1147,7 +1148,8 @@ return (cmpop_ty)0; } } - else if (NCH(n) == 2) { + REQ(n, comp_op); + if (NCH(n) == 2) { /* handle "not in" and "is not" */ switch (TYPE(CHILD(n, 0))) { case NAME: @@ -1183,7 +1185,10 @@ for (i = 0; i < NCH(n); i += 2) { const node *ch = CHILD(n, i); - assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr); + switch (TYPE(ch)) { + case_EXPR: break; + default: assert(0 && "unexpected type in testlist"); + } expression = ast_for_expr(c, ch); if (!expression) @@ -2192,47 +2197,51 @@ node *ch; expr_ty lower = NULL, upper = NULL, step = NULL; - REQ(n, subscript); - /* subscript: test | [test] ':' [test] [sliceop] sliceop: ':' [test] */ - ch = CHILD(n, 0); - if (NCH(n) == 1 && TYPE(ch) == test) { + switch (TYPE(n)) { + case_EXPR: /* 'step' variable hold no significance in terms of being used over other vars */ - step = ast_for_expr(c, ch); + step = ast_for_expr(c, n); if (!step) return NULL; return Index(step, c->c_arena); + case COLON: + return Slice(NULL, NULL, NULL, c->c_arena); } - if (TYPE(ch) == test) { + REQ(n, subscript); + assert(NCH(n) > 1); + ch = CHILD(n, 0); + if (TYPE(ch) != COLON) { lower = ast_for_expr(c, ch); if (!lower) return NULL; - } - - /* If there's an upper bound it's in the second or third position. */ - if (TYPE(ch) == COLON) { - if (NCH(n) > 1) { - node *n2 = CHILD(n, 1); - - if (TYPE(n2) == test) { + + /* If there's an upper bound it's in the second or third position. */ + if (NCH(n) > 2) { + node *n2 = CHILD(n, 2); + + if (TYPE(n2) != sliceop) { upper = ast_for_expr(c, n2); if (!upper) return NULL; } } - } else if (NCH(n) > 2) { - node *n2 = CHILD(n, 2); - - if (TYPE(n2) == test) { - upper = ast_for_expr(c, n2); - if (!upper) - return NULL; + } else { + REQ(ch, COLON); + if (NCH(n) > 1) { + node *n2 = CHILD(n, 1); + + if (TYPE(n2) != sliceop) { + upper = ast_for_expr(c, n2); + if (!upper) + return NULL; + } } } @@ -2240,11 +2249,9 @@ if (TYPE(ch) == sliceop) { if (NCH(ch) != 1) { ch = CHILD(ch, 1); - if (TYPE(ch) == test) { - step = ast_for_expr(c, ch); - if (!step) - return NULL; - } + step = ast_for_expr(c, ch); + if (!step) + return NULL; } } @@ -2412,17 +2419,14 @@ REQ(n, atom_expr); nch = NCH(n); - - if (TYPE(CHILD(n, 0)) == AWAIT) { + assert(nch > 1); + + if (TYPE(CHILD(n, 0)) == AWAIT) start = 1; - assert(nch > 1); - } e = ast_for_atom(c, CHILD(n, start)); if (!e) return NULL; - if (nch == 1) - return e; if (start && nch == 2) { return Await(e, LINENO(n), n->n_col_offset, c->c_arena); } @@ -2454,13 +2458,17 @@ /* power: atom trailer* ('**' factor)* */ expr_ty e; + node *ch; REQ(n, power); - e = ast_for_atom_expr(c, CHILD(n, 0)); + assert(NCH(n) > 1); + ch = CHILD(n, 0); + if (TYPE(ch) == atom) + e = ast_for_atom(c, ch); + else + e = ast_for_atom_expr(c, ch); if (!e) return NULL; - if (NCH(n) == 1) - return e; - if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { + if (TYPE(CHILD(n, NCH(n) - 2)) == DOUBLESTAR) { expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); if (!f) return NULL; @@ -2509,25 +2517,22 @@ yield_expr: 'yield' [yield_arg] */ - asdl_seq *seq; + asdl_seq *seq, *cmps; + asdl_int_seq *ops; + expr_ty expression; int i; - loop: switch (TYPE(n)) { + case lambdef: + case lambdef_nocond: + return ast_for_lambdef(c, n); case test: case test_nocond: - if (TYPE(CHILD(n, 0)) == lambdef || - TYPE(CHILD(n, 0)) == lambdef_nocond) - return ast_for_lambdef(c, CHILD(n, 0)); - else if (NCH(n) > 1) - return ast_for_ifexpr(c, n); - /* Fallthrough */ + assert(NCH(n) > 1); + return ast_for_ifexpr(c, n); case or_test: case and_test: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } + assert(NCH(n) > 1); seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); if (!seq) return NULL; @@ -2543,59 +2548,45 @@ assert(!strcmp(STR(CHILD(n, 1)), "or")); return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena); case not_test: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; + assert(NCH(n) > 1); + expression = ast_for_expr(c, CHILD(n, 1)); + if (!expression) + return NULL; + + return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, + c->c_arena); + case comparison: + assert(NCH(n) > 1); + ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena); + if (!ops) + return NULL; + cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); + if (!cmps) { + return NULL; } - else { - expr_ty expression = ast_for_expr(c, CHILD(n, 1)); - if (!expression) - return NULL; - - return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, - c->c_arena); - } - case comparison: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } - else { - expr_ty expression; - asdl_int_seq *ops; - asdl_seq *cmps; - ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena); - if (!ops) - return NULL; - cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); - if (!cmps) { + for (i = 1; i < NCH(n); i += 2) { + cmpop_ty newoperator; + + newoperator = ast_for_comp_op(c, CHILD(n, i)); + if (!newoperator) { return NULL; } - for (i = 1; i < NCH(n); i += 2) { - cmpop_ty newoperator; - - newoperator = ast_for_comp_op(c, CHILD(n, i)); - if (!newoperator) { - return NULL; - } - - expression = ast_for_expr(c, CHILD(n, i + 1)); - if (!expression) { - return NULL; - } - - asdl_seq_SET(ops, i / 2, newoperator); - asdl_seq_SET(cmps, i / 2, expression); - } - expression = ast_for_expr(c, CHILD(n, 0)); + + expression = ast_for_expr(c, CHILD(n, i + 1)); if (!expression) { return NULL; } - return Compare(expression, ops, cmps, LINENO(n), - n->n_col_offset, c->c_arena); + asdl_seq_SET(ops, i / 2, newoperator); + asdl_seq_SET(cmps, i / 2, expression); } - break; + expression = ast_for_expr(c, CHILD(n, 0)); + if (!expression) { + return NULL; + } + + return Compare(expression, ops, cmps, LINENO(n), + n->n_col_offset, c->c_arena); case star_expr: return ast_for_starred(c, n); @@ -2609,10 +2600,7 @@ case shift_expr: case arith_expr: case term: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } + assert(NCH(n) > 1); return ast_for_binop(c, n); case yield_expr: { node *an = NULL; @@ -2637,13 +2625,14 @@ return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena); } case factor: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } + assert(NCH(n) > 1); return ast_for_factor(c, n); case power: return ast_for_power(c, n); + case atom: + return ast_for_atom(c, n); + case atom_expr: + return ast_for_atom_expr(c, n); default: PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n)); return NULL; @@ -2824,23 +2813,30 @@ { /* testlist_comp: test (comp_for | (',' test)* [',']) */ /* testlist: test (',' test)* [','] */ + asdl_seq *tmp; assert(NCH(n) > 0); - if (TYPE(n) == testlist_comp) { - if (NCH(n) > 1) + switch (TYPE(n)) { + case testlist_comp: + if (NCH(n) > 1) { assert(TYPE(CHILD(n, 1)) != comp_for); + break; + } + n = CHILD(n, 0); + /* Fallthrough */ + case_EXPR: + return ast_for_expr(c, n); + case testlist: + case testlist_star_expr: + assert(NCH(n) > 1); + break; + default: + PyErr_SetString(PyExc_SystemError, "unexpected testlist type"); + return NULL; } - else { - assert(TYPE(n) == testlist || - TYPE(n) == testlist_star_expr); - } - if (NCH(n) == 1) - return ast_for_expr(c, CHILD(n, 0)); - else { - asdl_seq *tmp = seq_for_testlist(c, n); - if (!tmp) - return NULL; - return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); - } + tmp = seq_for_testlist(c, n); + if (!tmp) + return NULL; + return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); } static stmt_ty @@ -3048,8 +3044,9 @@ dotted_name: NAME ('.' NAME)* */ identifier str, name; - - loop: + node *asname_node; + alias_ty a; + switch (TYPE(n)) { case import_as_name: { node *name_node = CHILD(n, 0); @@ -3072,24 +3069,18 @@ return alias(name, str, c->c_arena); } case dotted_as_name: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } - else { - node *asname_node = CHILD(n, 2); - alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0); - if (!a) - return NULL; - assert(!a->asname); - a->asname = NEW_IDENTIFIER(asname_node); - if (!a->asname) - return NULL; - if (forbidden_name(c, a->asname, asname_node, 0)) - return NULL; - return a; - } - break; + assert(NCH(n) > 1); + asname_node = CHILD(n, 2); + a = alias_for_import_name(c, CHILD(n, 0), 0); + if (!a) + return NULL; + assert(!a->asname); + a->asname = NEW_IDENTIFIER(asname_node); + if (!a->asname) + return NULL; + if (forbidden_name(c, a->asname, asname_node, 0)) + return NULL; + return a; case dotted_name: if (NCH(n) == 1) { node *name_node = CHILD(n, 0); @@ -3351,14 +3342,13 @@ int i, total, num, end, pos = 0; node *ch; - REQ(n, suite); + assert(TYPE(n) == suite || TYPE(n) == simple_stmt); total = num_stmts(n); seq = _Py_asdl_seq_new(total, c->c_arena); if (!seq) return NULL; - if (TYPE(CHILD(n, 0)) == simple_stmt) { - n = CHILD(n, 0); + if (TYPE(n) == simple_stmt) { /* simple_stmt always ends with a NEWLINE, and may have a trailing SEMI */ @@ -3377,7 +3367,7 @@ else { for (i = 2; i < (NCH(n) - 1); i++) { ch = CHILD(n, i); - REQ(ch, stmt); + assert(TYPE(ch) == stmt || TYPE(ch) == simple_stmt || TYPE(ch) == compound_stmt); num = num_stmts(ch); if (num == 1) { /* small_stmt or compound_stmt with only one child */ @@ -3388,7 +3378,6 @@ } else { int j; - ch = CHILD(ch, 0); REQ(ch, simple_stmt); for (j = 0; j < NCH(ch); j += 2) { /* statement terminates with a semi-colon ';' */ @@ -3619,7 +3608,7 @@ { /* except_clause: 'except' [test ['as' test]] */ REQ(exc, except_clause); - REQ(body, suite); + assert(TYPE(body) == suite || TYPE(body) == simple_stmt); if (NCH(exc) == 1) { asdl_seq *suite_seq = ast_for_suite(c, body); @@ -3851,10 +3840,7 @@ static stmt_ty ast_for_stmt(struct compiling *c, const node *n) { - if (TYPE(n) == stmt) { - assert(NCH(n) == 1); - n = CHILD(n, 0); - } + assert(TYPE(n) != stmt); // must have been compressed if (TYPE(n) == simple_stmt) { assert(num_stmts(n) == 1); n = CHILD(n, 0); @@ -5060,3 +5046,39 @@ FstringParser_Dealloc(&state); return NULL; } + +void PyNode_Compress(node* n) { + if (NCH(n) == 1) { + node* ch; + switch (TYPE(n)) { + case suite: + case comp_op: + case subscript: + case atom_expr: + case power: + case factor: + case expr: + case xor_expr: + case and_expr: + case shift_expr: + case arith_expr: + case term: + case comparison: + case testlist_star_expr: + case testlist: + case test: + case test_nocond: + case or_test: + case and_test: + case not_test: + case dotted_as_name: + case stmt: + if (STR(n) != NULL) + PyObject_FREE(STR(n)); + ch = CHILD(n, 0); + *n = *ch; + // All grandchildren are now adopted; don't need to free them, so no need for PyNode_Free + PyObject_FREE(ch); + } + } +} diff -r 1452520dfe7b setup.py --- a/setup.py Fri Jul 08 17:55:01 2016 +0200 +++ b/setup.py Fri Jul 08 19:09:24 2016 +0100 @@ -681,7 +681,8 @@ exts.append( Extension('select', ['selectmodule.c']) ) # Fred Drake's interface to the Python parser - exts.append( Extension('parser', ['parsermodule.c']) ) + exts.append( Extension('parser', ['parsermodule.c', + 'Parser/bitset.c']) ) # Memory-mapped files (also works on Win32). exts.append( Extension('mmap', ['mmapmodule.c']) )