diff -r 6f4e0c462daf Lib/lib2to3/pgen2/tokenize.py --- a/Lib/lib2to3/pgen2/tokenize.py Wed Jul 22 14:49:13 2015 +0300 +++ b/Lib/lib2to3/pgen2/tokenize.py Thu Jul 23 00:59:06 2015 +0300 @@ -366,10 +366,11 @@ contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False while 1: # loop over lines in stream try: @@ -438,15 +439,18 @@ ("", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -466,10 +470,13 @@ newline = NEWLINE if parenlev > 0: newline = NL + elif async_def: + async_def_nl = True if stashed: yield stashed stashed = None yield (newline, token, spos, epos, line) + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -508,7 +515,7 @@ yield (STRING, token, spos, epos, line) elif initial in namechars: # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, line) continue @@ -523,15 +530,13 @@ and stashed[0] == NAME and stashed[1] == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield (ASYNC, stashed[1], stashed[2], stashed[3], stashed[4]) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed diff -r 6f4e0c462daf Lib/lib2to3/tests/test_parser.py --- a/Lib/lib2to3/tests/test_parser.py Wed Jul 22 14:49:13 2015 +0300 +++ b/Lib/lib2to3/tests/test_parser.py Thu Jul 23 00:59:06 2015 +0300 @@ -67,10 +67,32 @@ await x """) + self.validate("""async def foo(): + + def foo(): pass + + def foo(): pass + + await x + """) + + self.validate("""async def foo(): return await a""") + + self.validate("""def foo(): + def foo(): pass + async def foo(): await x + """) + self.invalid_syntax("await x") self.invalid_syntax("""def foo(): await x""") + self.invalid_syntax("""def foo(): + def foo(): pass + async def foo(): pass + await x + """) + def test_async_var(self): self.validate("""async = 1""") self.validate("""await = 1""") diff -r 6f4e0c462daf Lib/test/test_coroutines.py --- a/Lib/test/test_coroutines.py Wed Jul 22 14:49:13 2015 +0300 +++ b/Lib/test/test_coroutines.py Thu Jul 23 00:59:06 2015 +0300 @@ -211,6 +211,10 @@ pass """, + """async def foo(a:await b): + pass + """, + """def baz(): async def foo(a=await b): pass diff -r 6f4e0c462daf Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py Wed Jul 22 14:49:13 2015 +0300 +++ b/Lib/test/test_tokenize.py Thu Jul 23 00:59:06 2015 +0300 @@ -840,6 +840,79 @@ OP ')' (1, 19) (1, 20) OP ':' (1, 20) (1, 21) AWAIT 'await' (1, 22) (1, 27) + + >>> dump_tokens('''def f(): + ... + ... def baz(): pass + ... async def bar(): pass + ... + ... await = 2''') + ENCODING 'utf-8' (0, 0) (0, 0) + NAME 'def' (1, 0) (1, 3) + NAME 'f' (1, 4) (1, 5) + OP '(' (1, 5) (1, 6) + OP ')' (1, 6) (1, 7) + OP ':' (1, 7) (1, 8) + NEWLINE '\\n' (1, 8) (1, 9) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + NAME 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) + + >>> dump_tokens('''async def f(): + ... + ... def baz(): pass + ... async def bar(): pass + ... + ... await = 2''') + ENCODING 'utf-8' (0, 0) (0, 0) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'f' (1, 10) (1, 11) + OP '(' (1, 11) (1, 12) + OP ')' (1, 12) (1, 13) + OP ':' (1, 13) (1, 14) + NEWLINE '\\n' (1, 14) (1, 15) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + AWAIT 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) """ from test import support diff -r 6f4e0c462daf Lib/tokenize.py --- a/Lib/tokenize.py Wed Jul 22 14:49:13 2015 +0300 +++ b/Lib/tokenize.py Thu Jul 23 00:59:06 2015 +0300 @@ -498,10 +498,11 @@ contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False if encoding is not None: if encoding == "utf-8-sig": @@ -579,15 +580,18 @@ ("", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -609,8 +613,13 @@ if stashed: yield stashed stashed = None - yield TokenInfo(NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + if async_def: + async_def_nl = True + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -644,7 +653,7 @@ yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -660,15 +669,13 @@ and stashed.type == NAME and stashed.string == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, stashed.line) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed diff -r 6f4e0c462daf Parser/tokenizer.c --- a/Parser/tokenizer.c Wed Jul 22 14:49:13 2015 +0300 +++ b/Parser/tokenizer.c Thu Jul 23 00:59:06 2015 +0300 @@ -31,12 +31,6 @@ || c == '_'\ || (c >= 128)) -/* The following DEFTYPE* flags are used in 'tok_state->deftypestack', - and should be removed in 3.7, when async/await are regular - keywords. */ -#define DEFTYPE_ASYNC 1 -#define DEFTYPE_HAS_NL 2 - extern char *PyOS_Readline(FILE *, FILE *, const char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -133,12 +127,6 @@ tok->indent = 0; tok->indstack[0] = 0; - tok->def = 0; - tok->defstack[0] = 0; - tok->deftypestack[0] = 0; - tok->def_async_behind = 0; - tok->def_in_async = 0; - tok->atbol = 1; tok->pendin = 0; tok->prompt = tok->nextprompt = NULL; @@ -159,6 +147,11 @@ tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif + + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + return tok; } @@ -1350,7 +1343,6 @@ int c; int blankline, nonascii; - int tok_len; struct tok_state ahead_tok; char *ahead_tok_start = NULL, *ahead_top_end = NULL; int ahead_tok_kind; @@ -1443,13 +1435,10 @@ if (tok->pendin < 0) { tok->pendin++; - while (tok->def && tok->defstack[tok->def] >= tok->indent) { - if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { - tok->def_in_async--; - assert(tok->def_in_async >= 0); - } - tok->def--; - assert(tok->def >= 0); + if (tok->async_def && tok->async_def_indent >= tok->indent) { + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; } return DEDENT; @@ -1460,20 +1449,15 @@ } } - if (!blankline && tok->level == 0 - && tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL - && tok->defstack[tok->def] >= tok->indent) + if (!blankline + && tok->level == 0 + && tok->async_def + && tok->async_def_indent >= tok->indent + && tok->async_def_nl) { - /* The top function on the stack did have a NEWLINE - token, but didn't have an INDENT. That means that - it's a one-line function and it should now be removed - from the stack. */ - if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { - tok->def_in_async--; - assert(tok->def_in_async >= 0); - } - tok->def--; - assert(tok->def >= 0); + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; } again: @@ -1528,36 +1512,20 @@ *p_start = tok->start; *p_end = tok->cur; - tok_len = tok->cur - tok->start; - if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) { - /* The current token is 'def'. */ - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; + if (tok->cur - tok->start == 5) { + /* Current token length is 5. */ + if (tok->async_def) { + /* We're inside an 'async def' function. */ + if (memcmp(tok->start, "async", 5) == 0) + return ASYNC; + if (memcmp(tok->start, "await", 5) == 0) + return AWAIT; } + else if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. + Look ahead one token.*/ - /* Advance defs stack. */ - tok->def++; - tok->defstack[tok->def] = tok->indent; - - if (tok->def_async_behind) { - /* The previous token was 'async'. */ - tok->def_async_behind = 0; - tok->deftypestack[tok->def] = DEFTYPE_ASYNC; - tok->def_in_async++; - } - else { - /* This is a regular function (not async def). */ - tok->deftypestack[tok->def] = 0; - } - } - else if (tok_len == 5) { - if (memcmp(tok->start, "async", 5) == 0) { - /* The current token is 'async'. */ memcpy(&ahead_tok, tok, sizeof(ahead_tok)); - - /* Try to look ahead one token. */ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, &ahead_top_end); @@ -1567,22 +1535,10 @@ { /* The next token is going to be 'def', so instead of returning 'async' NAME token, we return ASYNC. */ - tok->def_async_behind = 1; + tok->async_def_indent = tok->indent; + tok->async_def = 1; return ASYNC; } - else if (tok->def_in_async) - { - /* We're inside an 'async def' function, so we treat - 'async' token as ASYNC, instead of NAME. */ - return ASYNC; - } - - } - else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async) - { - /* We're inside an 'async def' function, so we treat - 'await' token as AWAIT, instead of NAME. */ - return AWAIT; } } @@ -1597,12 +1553,10 @@ *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; - if (tok->def) { - /* Mark the top function on the stack that it had - at least one NEWLINE. That will help us to - distinguish one-line functions from functions - with multiple statements. */ - tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL; + if (tok->async_def) { + /* We're somewhere inside an 'async def' function, and + we've encountered a NEWLINE after its signature. */ + tok->async_def_nl = 1; } return NEWLINE; } diff -r 6f4e0c462daf Parser/tokenizer.h --- a/Parser/tokenizer.h Wed Jul 22 14:49:13 2015 +0300 +++ b/Parser/tokenizer.h Thu Jul 23 00:59:06 2015 +0300 @@ -66,21 +66,12 @@ const char* str; const char* input; /* Tokenizer's newline translated copy of the string. */ - /* `def*` fields are for parsing async/await in a backwards compatible - way. They should be removed in 3.7, when they will become - regular constants. See PEP 492 for more details. */ - int defstack[MAXINDENT]; /* Stack of funcs & indents where they - were defined. */ - int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_* - constants. */ - int def; /* Length of stack of func types/flags. */ - int def_async_behind; /* 1 if there was an 'async' token before - a 'def' token. */ - int def_in_async; /* Counter of how deep 'async def's - are nested. If greater than 0, - we are somewhere in an 'async def' - body, so 'async' and 'await' should - be parsed as keywords.*/ + /* async/await related fields; can be removed in 3.7 when async and await + become normal keywords. */ + int async_def; /* =1 if tokens are inside an 'async def' body. */ + int async_def_indent; /* Indentation level of the outermost 'async def'. */ + int async_def_nl; /* =1 if the outermost 'async def' had at least one + NEWLINE token after it. */ }; extern struct tok_state *PyTokenizer_FromString(const char *, int);