diff --git a/Doc/c-api/tokenizer.rst b/Doc/c-api/tokenizer.rst new file mode 100644 --- /dev/null +++ b/Doc/c-api/tokenizer.rst @@ -0,0 +1,96 @@ +.. highlightlang:: c + +.. _tokenizer: + +Tokenizing Python Code +====================== + +.. sectionauthor:: Dustin J. Mitchell + +.. index:: + tokenizer + +These routines allow C code to break Python code into a stream of tokens. +The token constants match those defined in :mod:`token`, but with a ``PYTOK_`` prefix. + +.. c:type:: PyTokenizer_State + + The C structure used to represent the state of a tokenizer. + +.. c:function:: PyTokenizer_State *PyTokenizer_FromString(string, exec_input) + + :param string: string to convert to tokens + :param exec_input: true if the input is from an ``exec`` call + + Initialize a tokenizer to read from a C string. + If ``exec_input`` is true, then an implicit newline will be added to the end of the string. + +.. c:function:: PyTokenizer_State *PyTokenizer_FromUTF8String(string, exec_input) + + :param string: UTF-8 encoded string to convert to tokens + :param exec_input: true if the input is from an ``exec`` call + + Initialize a tokenizer to read from a UTF-8 encoded C string. + If ``exec_input`` is true, then an implicit newline will be added to the end of the string. + +.. c:function:: PyTokenizer_State *PyTokenizer_FromFile(FILE *fp, const char *encoding, const char *ps1, const char *ps2) + + :param fp: file to tokenize + :param encoding: encoding of the file contents + :param ps1: initial-line interactive prompt + :param ps2: subsequent-line interactive prompt + + Initialize a tokenizer to read from a file. + The file data is decoded using ``encoding``, if given. + If ``ps1`` and ``ps2`` are not NULL, the tokenizer will operate in interactive mode. + +.. c:function:: PyTokenizer_Free(PyTokenizer_State *state) + + :param state: tokenizer state + + Free the given tokenizer. + +.. c:function:: int PyTokenizer_Get(PyTokenizer_State *state, char **p_start, char **p_end) + + :param state: tokenizer state + :param p_start: (output) first character of the returned token + :param p_end: (output) first character following the returned token + :return: token + + Get the next token from the tokenizer. + The ``p_start`` and ``p_end`` output parameters give the boundaries of the returned token. + +.. c:function:: PYTOK_ISTERMINAL(x) + + Return true for terminal token values. + +.. c:function:: ISNONTERMINAL(x) + + Return true for non-terminal token values. + +.. c:function:: ISEOF(x) + + Return true if *x* is the marker indicating the end of input. + +Putting all of that together:: + + PyTokenizer_State *tokenizer; + int tok; + int nest_level; + char *p_start, *p_end; + + tokenizer = PyTokenizer_FromString("((1+2)+(3+4))", 1); + + nest_level = 0; + while (1) { + tok = PyTokenizer_Get(tokenizer, &p_start, &p_end); + if (PYTOK_ISEOF(tok)) + break; + switch (tok) { + case PYTOK_LPAR: nest_level++; break; + case PYTOK_RPAR: nest_level--; break; + } + } + + PyTokenizer_Free(tokenizer); + printf("final nesting level: %d\n", nest_level); diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst --- a/Doc/c-api/utilities.rst +++ b/Doc/c-api/utilities.rst @@ -19,3 +19,4 @@ and parsing function arguments and const conversion.rst reflection.rst codec.rst + tokenizer.rst diff --git a/Include/Python.h b/Include/Python.h --- a/Include/Python.h +++ b/Include/Python.h @@ -117,6 +117,7 @@ #include "sysmodule.h" #include "intrcheck.h" #include "import.h" +#include "tokenizer.h" #include "abstract.h" #include "bltinmodule.h" diff --git a/Include/token.h b/Include/token.h --- a/Include/token.h +++ b/Include/token.h @@ -7,73 +7,73 @@ extern "C" { #endif -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ +#undef PYTOK_TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ -#define ENDMARKER 0 -#define NAME 1 -#define NUMBER 2 -#define STRING 3 -#define NEWLINE 4 -#define INDENT 5 -#define DEDENT 6 -#define LPAR 7 -#define RPAR 8 -#define LSQB 9 -#define RSQB 10 -#define COLON 11 -#define COMMA 12 -#define SEMI 13 -#define PLUS 14 -#define MINUS 15 -#define STAR 16 -#define SLASH 17 -#define VBAR 18 -#define AMPER 19 -#define LESS 20 -#define GREATER 21 -#define EQUAL 22 -#define DOT 23 -#define PERCENT 24 -#define LBRACE 25 -#define RBRACE 26 -#define EQEQUAL 27 -#define NOTEQUAL 28 -#define LESSEQUAL 29 -#define GREATEREQUAL 30 -#define TILDE 31 -#define CIRCUMFLEX 32 -#define LEFTSHIFT 33 -#define RIGHTSHIFT 34 -#define DOUBLESTAR 35 -#define PLUSEQUAL 36 -#define MINEQUAL 37 -#define STAREQUAL 38 -#define SLASHEQUAL 39 -#define PERCENTEQUAL 40 -#define AMPEREQUAL 41 -#define VBAREQUAL 42 -#define CIRCUMFLEXEQUAL 43 -#define LEFTSHIFTEQUAL 44 -#define RIGHTSHIFTEQUAL 45 -#define DOUBLESTAREQUAL 46 -#define DOUBLESLASH 47 -#define DOUBLESLASHEQUAL 48 -#define AT 49 -#define ATEQUAL 50 -#define RARROW 51 -#define ELLIPSIS 52 +#define PYTOK_ENDMARKER 0 +#define PYTOK_NAME 1 +#define PYTOK_NUMBER 2 +#define PYTOK_STRING 3 +#define PYTOK_NEWLINE 4 +#define PYTOK_INDENT 5 +#define PYTOK_DEDENT 6 +#define PYTOK_LPAR 7 +#define PYTOK_RPAR 8 +#define PYTOK_LSQB 9 +#define PYTOK_RSQB 10 +#define PYTOK_COLON 11 +#define PYTOK_COMMA 12 +#define PYTOK_SEMI 13 +#define PYTOK_PLUS 14 +#define PYTOK_MINUS 15 +#define PYTOK_STAR 16 +#define PYTOK_SLASH 17 +#define PYTOK_VBAR 18 +#define PYTOK_AMPER 19 +#define PYTOK_LESS 20 +#define PYTOK_GREATER 21 +#define PYTOK_EQUAL 22 +#define PYTOK_DOT 23 +#define PYTOK_PERCENT 24 +#define PYTOK_LBRACE 25 +#define PYTOK_RBRACE 26 +#define PYTOK_EQEQUAL 27 +#define PYTOK_NOTEQUAL 28 +#define PYTOK_LESSEQUAL 29 +#define PYTOK_GREATEREQUAL 30 +#define PYTOK_TILDE 31 +#define PYTOK_CIRCUMFLEX 32 +#define PYTOK_LEFTSHIFT 33 +#define PYTOK_RIGHTSHIFT 34 +#define PYTOK_DOUBLESTAR 35 +#define PYTOK_PLUSEQUAL 36 +#define PYTOK_MINEQUAL 37 +#define PYTOK_STAREQUAL 38 +#define PYTOK_SLASHEQUAL 39 +#define PYTOK_PERCENTEQUAL 40 +#define PYTOK_AMPEREQUAL 41 +#define PYTOK_VBAREQUAL 42 +#define PYTOK_CIRCUMFLEXEQUAL 43 +#define PYTOK_LEFTSHIFTEQUAL 44 +#define PYTOK_RIGHTSHIFTEQUAL 45 +#define PYTOK_DOUBLESTAREQUAL 46 +#define PYTOK_DOUBLESLASH 47 +#define PYTOK_DOUBLESLASHEQUAL 48 +#define PYTOK_AT 49 +#define PYTOK_ATEQUAL 50 +#define PYTOK_RARROW 51 +#define PYTOK_ELLIPSIS 52 /* Don't forget to update the table _PyParser_TokenNames in tokenizer.c! */ -#define OP 53 -#define ERRORTOKEN 54 -#define N_TOKENS 55 +#define PYTOK_OP 53 +#define PYTOK_ERRORTOKEN 54 +#define PYTOK_N_TOKENS 55 /* Special definitions for cooperation with parser */ -#define NT_OFFSET 256 +#define PYTOK_NT_OFFSET 256 -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) +#define PYTOK_ISTERMINAL(x) ((x) < PYTOK_NT_OFFSET) +#define PYTOK_ISNONTERMINAL(x) ((x) >= PYTOK_NT_OFFSET) +#define PYTOK_ISEOF(x) ((x) == PYTOK_ENDMARKER) PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */ diff --git a/Include/tokenizer.h b/Include/tokenizer.h new file mode 100644 --- /dev/null +++ b/Include/tokenizer.h @@ -0,0 +1,83 @@ +#ifndef Py_TOKENIZER_H +#define Py_TOKENIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "object.h" + +/* Tokenizer interface */ + +#include "token.h" /* For token types */ + +typedef struct PyTokenizer_State PyTokenizer_State; + +#ifndef Py_LIMITED_API +#define PYTOKENIZER_MAXINDENT 100 /* Max indentation level */ + +enum PyTokenizer_DecodingState { + PYTOKENIZER_STATE_INIT, + PYTOKENIZER_STATE_RAW, + PYTOKENIZER_STATE_NORMAL /* have a codec associated with input */ +}; + +struct PyTokenizer_State { + /* Input state; buf <= cur <= inp <= end */ + /* NB an entire line is held in the buffer */ + char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ + char *cur; /* Next character in buffer */ + char *inp; /* End of data in buffer */ + char *end; /* End of input buffer if buf != NULL */ + char *start; /* Start of current token if not NULL */ + int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ + /* NB If done != E_OK, cur must be == inp!!! */ + FILE *fp; /* Rest of input; NULL if tokenizing a string */ + int tabsize; /* Tab spacing */ + int indent; /* Current indentation index */ + int indstack[PYTOKENIZER_MAXINDENT]; /* Stack of indents */ + int atbol; /* Nonzero if at begin of new line */ + int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ + const char *prompt, *nextprompt; /* For interactive prompting */ + int lineno; /* Current line number */ + int level; /* () [] {} Parentheses nesting level */ + /* Used to allow free continuations inside them */ + /* Stuff for checking on different tab sizes */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif + int altwarning; /* Issue warning if alternate tabs don't match */ + int alterror; /* Issue error if alternate tabs don't match */ + int alttabsize; /* Alternate tab spacing */ + int altindstack[PYTOKENIZER_MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + enum PyTokenizer_DecodingState decoding_state; + int decoding_erred; /* whether erred in decoding */ + int read_coding_spec; /* whether 'coding:...' has been read */ + char *encoding; /* Source encoding. */ + int cont_line; /* whether we are in a continuation line. */ + const char* line_start; /* pointer to start of current line */ +#ifndef PGEN + PyObject *decoding_readline; /* open(...).readline */ + PyObject *decoding_buffer; +#endif + const char* enc; /* Encoding for the current str. */ + const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ +}; +#endif + +PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromString(const char *, int); +PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromUTF8(const char *, int); +PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromFile(FILE *, const char*, + const char *, const char *); +PyAPI_FUNC(void) PyTokenizer_Free(PyTokenizer_State *); +PyAPI_FUNC(int) PyTokenizer_Get(PyTokenizer_State *, char **, char **); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKENIZER_H */ diff --git a/Lib/token.py b/Lib/token.py --- a/Lib/token.py +++ b/Lib/token.py @@ -100,7 +100,7 @@ def _main(): with fp: lines = fp.read().split("\n") prog = re.compile( - "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", + "#define[ \t][ \t]*PYTOK_([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", re.IGNORECASE) tokens = {} for line in lines: diff --git a/Makefile.pre.in b/Makefile.pre.in --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -322,7 +322,7 @@ PGOBJS= \ PARSER_HEADERS= \ $(srcdir)/Parser/parser.h \ $(srcdir)/Include/parsetok.h \ - $(srcdir)/Parser/tokenizer.h + $(srcdir)/Include/tokenizer.h PGENSRCS= $(PSRCS) $(PGSRCS) PGENOBJS= $(POBJS) $(PGOBJS) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -33,7 +33,7 @@ #include "token.h" /* token definitions */ #include "grammar.h" #include "parsetok.h" - /* ISTERMINAL() / ISNONTERMINAL() */ + /* PYTOK_ISTERMINAL() / PYTOK_ISNONTERMINAL() */ #undef Yield #include "ast.h" @@ -90,7 +90,7 @@ node2tuple(node *n, return Py_None; } - if (ISNONTERMINAL(TYPE(n))) { + if (PYTOK_ISNONTERMINAL(TYPE(n))) { int i; result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl)); @@ -116,7 +116,7 @@ node2tuple(node *n, (void) addelem(result, i+1, w); } } - else if (ISTERMINAL(TYPE(n))) { + else if (PYTOK_ISTERMINAL(TYPE(n))) { result = mkseq(2 + lineno + col_offset); if (result == NULL) goto error; @@ -269,7 +269,7 @@ parser_compare_nodes(node *left, node *r if (TYPE(right) < TYPE(left)) return (1); - if (ISTERMINAL(TYPE(left))) + if (PYTOK_ISTERMINAL(TYPE(left))) return (strcmp(STR(left), STR(right))); if (NCH(left) < NCH(right)) @@ -792,7 +792,7 @@ build_node_children(PyObject *tuple, nod Py_XDECREF(elem); return (0); } - if (ISTERMINAL(type)) { + if (PYTOK_ISTERMINAL(type)) { Py_ssize_t len = PyObject_Size(elem); PyObject *temp; const char *temp_str; @@ -855,7 +855,7 @@ build_node_children(PyObject *tuple, nod (void) memcpy(strn, temp_str, len + 1); Py_DECREF(temp); } - else if (!ISNONTERMINAL(type)) { + else if (!PYTOK_ISNONTERMINAL(type)) { /* * It has to be one or the other; this is an error. * Raise an exception. @@ -880,7 +880,7 @@ build_node_children(PyObject *tuple, nod return NULL; } - if (ISNONTERMINAL(type)) { + if (PYTOK_ISNONTERMINAL(type)) { node* new_child = CHILD(root, i - 1); if (new_child != build_node_children(elem, new_child, line_num)) { @@ -888,7 +888,7 @@ build_node_children(PyObject *tuple, nod return (0); } } - else if (type == NEWLINE) { /* It's true: we increment the */ + else if (type == PYTOK_NEWLINE) { /* It's true: we increment the */ ++(*line_num); /* line number *after* the newline! */ } Py_XDECREF(elem); @@ -907,7 +907,7 @@ build_node_tree(PyObject *tuple) if (temp != NULL) num = PyLong_AsLong(temp); Py_XDECREF(temp); - if (ISTERMINAL(num)) { + if (PYTOK_ISTERMINAL(num)) { /* * The tuple is simple, but it doesn't start with a start symbol. * Raise an exception now and be done with it. @@ -917,7 +917,7 @@ build_node_tree(PyObject *tuple) PyErr_SetObject(parser_error, tuple); Py_XDECREF(tuple); } - else if (ISNONTERMINAL(num)) { + else if (PYTOK_ISNONTERMINAL(num)) { /* * Not efficient, but that can be handled later. */ @@ -981,24 +981,24 @@ build_node_tree(PyObject *tuple) */ static int validate_terminal(node *terminal, int type, char *string); -#define validate_ampersand(ch) validate_terminal(ch, AMPER, "&") -#define validate_circumflex(ch) validate_terminal(ch, CIRCUMFLEX, "^") -#define validate_colon(ch) validate_terminal(ch, COLON, ":") -#define validate_comma(ch) validate_terminal(ch, COMMA, ",") -#define validate_dedent(ch) validate_terminal(ch, DEDENT, "") -#define validate_equal(ch) validate_terminal(ch, EQUAL, "=") -#define validate_indent(ch) validate_terminal(ch, INDENT, (char*)NULL) -#define validate_lparen(ch) validate_terminal(ch, LPAR, "(") -#define validate_newline(ch) validate_terminal(ch, NEWLINE, (char*)NULL) -#define validate_rparen(ch) validate_terminal(ch, RPAR, ")") -#define validate_semi(ch) validate_terminal(ch, SEMI, ";") -#define validate_star(ch) validate_terminal(ch, STAR, "*") -#define validate_vbar(ch) validate_terminal(ch, VBAR, "|") -#define validate_doublestar(ch) validate_terminal(ch, DOUBLESTAR, "**") -#define validate_dot(ch) validate_terminal(ch, DOT, ".") -#define validate_at(ch) validate_terminal(ch, AT, "@") -#define validate_rarrow(ch) validate_terminal(ch, RARROW, "->") -#define validate_name(ch, str) validate_terminal(ch, NAME, str) +#define validate_ampersand(ch) validate_terminal(ch, PYTOK_AMPER, "&") +#define validate_circumflex(ch) validate_terminal(ch, PYTOK_CIRCUMFLEX, "^") +#define validate_colon(ch) validate_terminal(ch, PYTOK_COLON, ":") +#define validate_comma(ch) validate_terminal(ch, PYTOK_COMMA, ",") +#define validate_dedent(ch) validate_terminal(ch, PYTOK_DEDENT, "") +#define validate_equal(ch) validate_terminal(ch, PYTOK_EQUAL, "=") +#define validate_indent(ch) validate_terminal(ch, PYTOK_INDENT, (char*)NULL) +#define validate_lparen(ch) validate_terminal(ch, PYTOK_LPAR, "(") +#define validate_newline(ch) validate_terminal(ch, PYTOK_NEWLINE, (char*)NULL) +#define validate_rparen(ch) validate_terminal(ch, PYTOK_RPAR, ")") +#define validate_semi(ch) validate_terminal(ch, PYTOK_SEMI, ";") +#define validate_star(ch) validate_terminal(ch, PYTOK_STAR, "*") +#define validate_vbar(ch) validate_terminal(ch, PYTOK_VBAR, "|") +#define validate_doublestar(ch) validate_terminal(ch, PYTOK_DOUBLESTAR, "**") +#define validate_dot(ch) validate_terminal(ch, PYTOK_DOT, ".") +#define validate_at(ch) validate_terminal(ch, PYTOK_AT, "@") +#define validate_rarrow(ch) validate_terminal(ch, PYTOK_RARROW, "->") +#define validate_name(ch, str) validate_terminal(ch, PYTOK_NAME, str) #define VALIDATER(n) static int validate_##n(node *tree) @@ -1117,7 +1117,7 @@ validate_repeating_list(node *tree, int /* validate_class() * * classdef: - * 'class' NAME ['(' testlist ')'] ':' suite + * 'class' PYTOK_NAME ['(' testlist ')'] ':' suite */ static int validate_class(node *tree) @@ -1128,7 +1128,7 @@ validate_class(node *tree) if (res) { res = (validate_name(CHILD(tree, 0), "class") - && validate_ntype(CHILD(tree, 1), NAME) + && validate_ntype(CHILD(tree, 1), PYTOK_NAME) && validate_colon(CHILD(tree, nch - 2)) && validate_suite(CHILD(tree, nch - 1))); } @@ -1219,7 +1219,7 @@ validate_parameters(node *tree) * * suite: * simple_stmt - * | NEWLINE INDENT stmt+ DEDENT + * | PYTOK_NEWLINE PYTOK_INDENT stmt+ PYTOK_DEDENT */ static int validate_suite(node *tree) @@ -1230,7 +1230,7 @@ validate_suite(node *tree) if (res && (nch == 1)) res = validate_simple_stmt(CHILD(tree, 0)); else if (res) { - /* NEWLINE INDENT stmt+ DEDENT */ + /* PYTOK_NEWLINE PYTOK_INDENT stmt+ PYTOK_DEDENT */ res = (validate_newline(CHILD(tree, 0)) && validate_indent(CHILD(tree, 1)) && validate_stmt(CHILD(tree, 2)) @@ -1238,7 +1238,7 @@ validate_suite(node *tree) if (res && (nch > 4)) { int i = 3; - --nch; /* forget the DEDENT */ + --nch; /* forget the PYTOK_DEDENT */ for ( ; res && (i < nch); ++i) res = validate_stmt(CHILD(tree, i)); } @@ -1265,8 +1265,8 @@ validate_testlist_star_expr(node *tl) /* validate either vfpdef or tfpdef. - * vfpdef: NAME - * tfpdef: NAME [':' test] + * vfpdef: PYTOK_NAME + * tfpdef: PYTOK_NAME [':' test] */ static int validate_vfpdef(node *tree) @@ -1289,7 +1289,7 @@ validate_vfpdef(node *tree) } /* '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef - * ..or tfpdef in place of vfpdef. vfpdef: NAME; tfpdef: NAME [':' test] + * ..or tfpdef in place of vfpdef. vfpdef: PYTOK_NAME; tfpdef: PYTOK_NAME [':' test] */ static int validate_varargslist_trailer(node *tree, int start) @@ -1301,7 +1301,7 @@ validate_varargslist_trailer(node *tree, err_string("expected variable argument trailer for varargslist"); return 0; } - if (TYPE(CHILD(tree, start)) == STAR) { + if (TYPE(CHILD(tree, start)) == PYTOK_STAR) { /* * '*' [vfpdef] */ @@ -1317,19 +1317,19 @@ validate_varargslist_trailer(node *tree, TYPE(CHILD(tree, start + 1)) == tfpdef)) { res = (validate_comma(CHILD(tree, start++)) && validate_vfpdef(CHILD(tree, start++))); - if (res && start + 1 < nch && TYPE(CHILD(tree, start)) == EQUAL) + if (res && start + 1 < nch && TYPE(CHILD(tree, start)) == PYTOK_EQUAL) res = (validate_equal(CHILD(tree, start++)) && validate_test(CHILD(tree, start++))); } /* * [',' '**' vfpdef] */ - if (res && start + 2 < nch && TYPE(CHILD(tree, start+1)) == DOUBLESTAR) + if (res && start + 2 < nch && TYPE(CHILD(tree, start+1)) == PYTOK_DOUBLESTAR) res = (validate_comma(CHILD(tree, start++)) && validate_doublestar(CHILD(tree, start++)) && validate_vfpdef(CHILD(tree, start++))); } - else if (TYPE(CHILD(tree, start)) == DOUBLESTAR) { + else if (TYPE(CHILD(tree, start)) == PYTOK_DOUBLESTAR) { /* * '**' vfpdef */ @@ -1362,12 +1362,12 @@ validate_varargslist_trailer(node *tree, * ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | * '**' tfpdef) * | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) - * tfpdef: NAME [':' test] + * tfpdef: PYTOK_NAME [':' test] * varargslist: ((vfpdef ['=' test] ',')* * ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | * '**' vfpdef) * | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) - * vfpdef: NAME + * vfpdef: PYTOK_NAME * */ static int @@ -1394,7 +1394,7 @@ validate_varargslist(node *tree) /* validate (vfpdef ['=' test] ',')+ */ res = validate_vfpdef(ch); ++i; - if (res && (i+2 <= nch) && TYPE(CHILD(tree, i)) == EQUAL) { + if (res && (i+2 <= nch) && TYPE(CHILD(tree, i)) == PYTOK_EQUAL) { res = (validate_equal(CHILD(tree, i)) && validate_test(CHILD(tree, i+1))); if (res) @@ -1404,7 +1404,7 @@ validate_varargslist(node *tree) res = validate_comma(CHILD(tree, i)); ++i; } - } else if (sym == DOUBLESTAR || sym == STAR) { + } else if (sym == PYTOK_DOUBLESTAR || sym == PYTOK_STAR) { res = validate_varargslist_trailer(tree, i); break; } else { @@ -1495,7 +1495,7 @@ validate_stmt(node *tree) } -/* small_stmt (';' small_stmt)* [';'] NEWLINE +/* small_stmt (';' small_stmt)* [';'] PYTOK_NEWLINE * */ static int @@ -1509,7 +1509,7 @@ validate_simple_stmt(node *tree) if (nch < 2) res = validate_numnodes(tree, 2, "simple_stmt"); - --nch; /* forget the NEWLINE */ + --nch; /* forget the PYTOK_NEWLINE */ if (res && is_even(nch)) res = validate_semi(CHILD(tree, --nch)); if (res && (nch > 2)) { @@ -1771,7 +1771,7 @@ validate_import_as_name(node *tree) } -/* dotted_name: NAME ("." NAME)* +/* dotted_name: PYTOK_NAME ("." PYTOK_NAME)* */ static int validate_dotted_name(node *tree) @@ -1790,7 +1790,7 @@ validate_dotted_name(node *tree) } -/* dotted_as_name: dotted_name [NAME NAME] +/* dotted_as_name: dotted_name [PYTOK_NAME PYTOK_NAME] */ static int validate_dotted_as_name(node *tree) @@ -1862,7 +1862,7 @@ count_from_dots(node *tree) { int i; for (i = 1; i < NCH(tree); i++) - if (TYPE(CHILD(tree, i)) != DOT && TYPE(CHILD(tree, i)) != ELLIPSIS) + if (TYPE(CHILD(tree, i)) != PYTOK_DOT && TYPE(CHILD(tree, i)) != PYTOK_ELLIPSIS) break; return i - 1; } @@ -1884,12 +1884,12 @@ validate_import_from(node *tree) && (!havename || validate_dotted_name(CHILD(tree, ndots + 1))) && validate_name(CHILD(tree, offset + 1), "import"); - if (res && TYPE(CHILD(tree, offset + 2)) == LPAR) + if (res && TYPE(CHILD(tree, offset + 2)) == PYTOK_LPAR) res = ((nch == offset + 5) && validate_lparen(CHILD(tree, offset + 2)) && validate_import_as_names(CHILD(tree, offset + 3)) && validate_rparen(CHILD(tree, offset + 4))); - else if (res && TYPE(CHILD(tree, offset + 2)) != STAR) + else if (res && TYPE(CHILD(tree, offset + 2)) != PYTOK_STAR) res = validate_import_as_names(CHILD(tree, offset + 2)); return (res); } @@ -1924,7 +1924,7 @@ validate_import_stmt(node *tree) /* global_stmt: * - * 'global' NAME (',' NAME)* + * 'global' PYTOK_NAME (',' PYTOK_NAME)* */ static int validate_global_stmt(node *tree) @@ -1939,17 +1939,17 @@ validate_global_stmt(node *tree) if (res) res = (validate_name(CHILD(tree, 0), "global") - && validate_ntype(CHILD(tree, 1), NAME)); + && validate_ntype(CHILD(tree, 1), PYTOK_NAME)); for (j = 2; res && (j < nch); j += 2) res = (validate_comma(CHILD(tree, j)) - && validate_ntype(CHILD(tree, j + 1), NAME)); + && validate_ntype(CHILD(tree, j + 1), PYTOK_NAME)); return (res); } /* nonlocal_stmt: * - * 'nonlocal' NAME (',' NAME)* + * 'nonlocal' PYTOK_NAME (',' PYTOK_NAME)* */ static int validate_nonlocal_stmt(node *tree) @@ -1964,10 +1964,10 @@ validate_nonlocal_stmt(node *tree) if (res) res = (validate_name(CHILD(tree, 0), "nonlocal") - && validate_ntype(CHILD(tree, 1), NAME)); + && validate_ntype(CHILD(tree, 1), PYTOK_NAME)); for (j = 2; res && (j < nch); j += 2) res = (validate_comma(CHILD(tree, j)) - && validate_ntype(CHILD(tree, j + 1), NAME)); + && validate_ntype(CHILD(tree, j + 1), PYTOK_NAME)); return res; } @@ -2066,7 +2066,7 @@ validate_try(node *tree) "Illegal number of children for try/%s node.", name); } /* Handle try/finally statement */ - if (res && (TYPE(CHILD(tree, pos)) == NAME) && + if (res && (TYPE(CHILD(tree, pos)) == PYTOK_NAME) && (strcmp(STR(CHILD(tree, pos)), "finally") == 0)) { res = (validate_numnodes(tree, 6, "try/finally") && validate_colon(CHILD(tree, 4)) @@ -2081,7 +2081,7 @@ validate_try(node *tree) pos += 3; } /* skip else clause */ - if (res && pos < nch && (TYPE(CHILD(tree, pos)) == NAME) && + if (res && pos < nch && (TYPE(CHILD(tree, pos)) == PYTOK_NAME) && (strcmp(STR(CHILD(tree, pos)), "else") == 0)) { res = (validate_colon(CHILD(tree, pos + 1)) && validate_suite(CHILD(tree, pos + 2))); @@ -2110,7 +2110,7 @@ validate_except_clause(node *tree) res = validate_test(CHILD(tree, 1)); if (res && (nch == 4)) res = (validate_name(CHILD(tree, 2), "as") - && validate_ntype(CHILD(tree, 3), NAME)); + && validate_ntype(CHILD(tree, 3), PYTOK_NAME)); return (res); } @@ -2229,20 +2229,20 @@ validate_comp_op(node *tree) if (nch == 1) { /* * Only child will be a terminal with a well-defined symbolic name - * or a NAME with a string of either 'is' or 'in' + * or a PYTOK_NAME with a string of either 'is' or 'in' */ tree = CHILD(tree, 0); switch (TYPE(tree)) { - case LESS: - case GREATER: - case EQEQUAL: - case EQUAL: - case LESSEQUAL: - case GREATEREQUAL: - case NOTEQUAL: + case PYTOK_LESS: + case PYTOK_GREATER: + case PYTOK_EQEQUAL: + case PYTOK_EQUAL: + case PYTOK_LESSEQUAL: + case PYTOK_GREATEREQUAL: + case PYTOK_NOTEQUAL: res = 1; break; - case NAME: + case PYTOK_NAME: res = ((strcmp(STR(tree), "in") == 0) || (strcmp(STR(tree), "is") == 0)); if (!res) { @@ -2256,8 +2256,8 @@ validate_comp_op(node *tree) } } else if ((res = validate_numnodes(tree, 2, "comp_op")) != 0) { - res = (validate_ntype(CHILD(tree, 0), NAME) - && validate_ntype(CHILD(tree, 1), NAME) + res = (validate_ntype(CHILD(tree, 0), PYTOK_NAME) + && validate_ntype(CHILD(tree, 1), PYTOK_NAME) && (((strcmp(STR(CHILD(tree, 0)), "is") == 0) && (strcmp(STR(CHILD(tree, 1)), "not") == 0)) || ((strcmp(STR(CHILD(tree, 0)), "not") == 0) @@ -2276,7 +2276,7 @@ validate_star_expr(node *tree) if (!res) return res; if (!validate_numnodes(tree, 2, "star_expr")) return 0; - return validate_ntype(CHILD(tree, 0), STAR) && \ + return validate_ntype(CHILD(tree, 0), PYTOK_STAR) && \ validate_expr(CHILD(tree, 1)); } @@ -2355,7 +2355,7 @@ validate_shift_expr(node *tree) { return (validate_ntype(tree, shift_expr) && validate_chain_two_ops(tree, validate_arith_expr, - LEFTSHIFT, RIGHTSHIFT)); + PYTOK_LEFTSHIFT, PYTOK_RIGHTSHIFT)); } @@ -2363,7 +2363,7 @@ static int validate_arith_expr(node *tree) { return (validate_ntype(tree, arith_expr) - && validate_chain_two_ops(tree, validate_term, PLUS, MINUS)); + && validate_chain_two_ops(tree, validate_term, PYTOK_PLUS, PYTOK_MINUS)); } @@ -2377,10 +2377,10 @@ validate_term(node *tree) && validate_factor(CHILD(tree, 0))); for ( ; res && (pos < nch); pos += 2) - res = (((TYPE(CHILD(tree, pos)) == STAR) - || (TYPE(CHILD(tree, pos)) == SLASH) - || (TYPE(CHILD(tree, pos)) == DOUBLESLASH) - || (TYPE(CHILD(tree, pos)) == PERCENT)) + res = (((TYPE(CHILD(tree, pos)) == PYTOK_STAR) + || (TYPE(CHILD(tree, pos)) == PYTOK_SLASH) + || (TYPE(CHILD(tree, pos)) == PYTOK_DOUBLESLASH) + || (TYPE(CHILD(tree, pos)) == PYTOK_PERCENT)) && validate_factor(CHILD(tree, pos + 1))); return (res); @@ -2397,9 +2397,9 @@ validate_factor(node *tree) int nch = NCH(tree); int res = (validate_ntype(tree, factor) && (((nch == 2) - && ((TYPE(CHILD(tree, 0)) == PLUS) - || (TYPE(CHILD(tree, 0)) == MINUS) - || (TYPE(CHILD(tree, 0)) == TILDE)) + && ((TYPE(CHILD(tree, 0)) == PYTOK_PLUS) + || (TYPE(CHILD(tree, 0)) == PYTOK_MINUS) + || (TYPE(CHILD(tree, 0)) == PYTOK_TILDE)) && validate_factor(CHILD(tree, 1))) || ((nch == 1) && validate_power(CHILD(tree, 0))))); @@ -2445,7 +2445,7 @@ validate_atom(node *tree) res = validate_numnodes(tree, nch+1, "atom"); if (res) { switch (TYPE(CHILD(tree, 0))) { - case LPAR: + case PYTOK_LPAR: res = ((nch <= 3) && (validate_rparen(CHILD(tree, nch - 1)))); @@ -2456,32 +2456,32 @@ validate_atom(node *tree) res = validate_testlist_comp(CHILD(tree, 1)); } break; - case LSQB: + case PYTOK_LSQB: if (nch == 2) - res = validate_ntype(CHILD(tree, 1), RSQB); + res = validate_ntype(CHILD(tree, 1), PYTOK_RSQB); else if (nch == 3) res = (validate_testlist_comp(CHILD(tree, 1)) - && validate_ntype(CHILD(tree, 2), RSQB)); + && validate_ntype(CHILD(tree, 2), PYTOK_RSQB)); else { res = 0; err_string("illegal list display atom"); } break; - case LBRACE: + case PYTOK_LBRACE: res = ((nch <= 3) - && validate_ntype(CHILD(tree, nch - 1), RBRACE)); + && validate_ntype(CHILD(tree, nch - 1), PYTOK_RBRACE)); if (res && (nch == 3)) res = validate_dictorsetmaker(CHILD(tree, 1)); break; - case NAME: - case NUMBER: - case ELLIPSIS: + case PYTOK_NAME: + case PYTOK_NUMBER: + case PYTOK_ELLIPSIS: res = (nch == 1); break; - case STRING: + case PYTOK_STRING: for (pos = 1; res && (pos < nch); ++pos) - res = validate_ntype(CHILD(tree, pos), STRING); + res = validate_ntype(CHILD(tree, pos), PYTOK_STRING); break; default: res = 0; @@ -2531,7 +2531,7 @@ validate_testlist_comp(node *tree) } /* decorator: - * '@' dotted_name [ '(' [arglist] ')' ] NEWLINE + * '@' dotted_name [ '(' [arglist] ')' ] PYTOK_NEWLINE */ static int validate_decorator(node *tree) @@ -2606,7 +2606,7 @@ validate_with_stmt(node *tree) return ok; } -/* funcdef: 'def' NAME parameters ['->' test] ':' suite */ +/* funcdef: 'def' PYTOK_NAME parameters ['->' test] ':' suite */ static int validate_funcdef(node *tree) @@ -2616,14 +2616,14 @@ validate_funcdef(node *tree) if (res) { if (nch == 5) { res = (validate_name(CHILD(tree, 0), "def") - && validate_ntype(CHILD(tree, 1), NAME) + && validate_ntype(CHILD(tree, 1), PYTOK_NAME) && validate_parameters(CHILD(tree, 2)) && validate_colon(CHILD(tree, 3)) && validate_suite(CHILD(tree, 4))); } else if (nch == 7) { res = (validate_name(CHILD(tree, 0), "def") - && validate_ntype(CHILD(tree, 1), NAME) + && validate_ntype(CHILD(tree, 1), PYTOK_NAME) && validate_parameters(CHILD(tree, 2)) && validate_rarrow(CHILD(tree, 3)) && validate_test(CHILD(tree, 4)) @@ -2745,7 +2745,7 @@ validate_arglist(node *tree) ok = 0; } } - else if (sym == STAR) { + else if (sym == PYTOK_STAR) { ok = validate_star(CHILD(tree, i)); if (ok && (nch-i == 2)) ok = validate_test(CHILD(tree, i+1)); @@ -2759,7 +2759,7 @@ validate_arglist(node *tree) ok = 0; } } - else if (sym == DOUBLESTAR) { + else if (sym == PYTOK_DOUBLESTAR) { if (nch-i == 2) ok = (validate_doublestar(CHILD(tree, i)) && validate_test(CHILD(tree, i+1))); @@ -2803,7 +2803,7 @@ validate_argument(node *tree) /* trailer: * - * '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + * '(' [arglist] ')' | '[' subscriptlist ']' | '.' PYTOK_NAME */ static int validate_trailer(node *tree) @@ -2813,19 +2813,19 @@ validate_trailer(node *tree) if (res) { switch (TYPE(CHILD(tree, 0))) { - case LPAR: + case PYTOK_LPAR: res = validate_rparen(CHILD(tree, nch - 1)); if (res && (nch == 3)) res = validate_arglist(CHILD(tree, 1)); break; - case LSQB: + case PYTOK_LSQB: res = (validate_numnodes(tree, 3, "trailer") && validate_subscriptlist(CHILD(tree, 1)) - && validate_ntype(CHILD(tree, 2), RSQB)); + && validate_ntype(CHILD(tree, 2), PYTOK_RSQB)); break; - case DOT: + case PYTOK_DOT: res = (validate_numnodes(tree, 2, "trailer") - && validate_ntype(CHILD(tree, 1), NAME)); + && validate_ntype(CHILD(tree, 1), PYTOK_NAME)); break; default: res = 0; @@ -2867,7 +2867,7 @@ validate_subscript(node *tree) err_string("invalid number of arguments for subscript node"); return (0); } - if (TYPE(CHILD(tree, 0)) == DOT) + if (TYPE(CHILD(tree, 0)) == PYTOK_DOT) /* take care of ('.' '.' '.') possibility */ return (validate_numnodes(tree, 3, "subscript") && validate_dot(CHILD(tree, 0)) @@ -2884,7 +2884,7 @@ validate_subscript(node *tree) * but at least one of the optional components will * be present, but we don't know which yet. */ - if ((TYPE(CHILD(tree, 0)) != COLON) || (nch == 4)) { + if ((TYPE(CHILD(tree, 0)) != PYTOK_COLON) || (nch == 4)) { res = validate_test(CHILD(tree, 0)); offset = 1; } @@ -2974,7 +2974,7 @@ validate_dictorsetmaker(node *tree) if (!res) return 0; - if (nch - i >= 2 && TYPE(CHILD(tree, i)) == COLON) { + if (nch - i >= 2 && TYPE(CHILD(tree, i)) == PYTOK_COLON) { /* Dictionary display or dictionary comprehension. */ res = (validate_colon(CHILD(tree, i++)) && validate_test(CHILD(tree, i++))); @@ -3045,10 +3045,10 @@ validate_eval_input(node *tree) int res = (validate_ntype(tree, eval_input) && (nch >= 2) && validate_testlist(CHILD(tree, 0)) - && validate_ntype(CHILD(tree, nch - 1), ENDMARKER)); + && validate_ntype(CHILD(tree, nch - 1), PYTOK_ENDMARKER)); for (pos = 1; res && (pos < (nch - 1)); ++pos) - res = validate_ntype(CHILD(tree, pos), NEWLINE); + res = validate_ntype(CHILD(tree, pos), PYTOK_NEWLINE); return (res); } @@ -3258,7 +3258,7 @@ validate_expr_tree(node *tree) /* file_input: - * (NEWLINE | stmt)* ENDMARKER + * (PYTOK_NEWLINE | stmt)* PYTOK_ENDMARKER */ static int validate_file_input(node *tree) @@ -3266,7 +3266,7 @@ validate_file_input(node *tree) int j; int nch = NCH(tree) - 1; int res = ((nch >= 0) - && validate_ntype(CHILD(tree, nch), ENDMARKER)); + && validate_ntype(CHILD(tree, nch), PYTOK_ENDMARKER)); for (j = 0; res && (j < nch); ++j) { if (TYPE(CHILD(tree, j)) == stmt) diff --git a/Parser/acceler.c b/Parser/acceler.c --- a/Parser/acceler.c +++ b/Parser/acceler.c @@ -84,10 +84,10 @@ fixstate(grammar *g, state *s) printf("XXX too many states!\n"); continue; } - if (ISNONTERMINAL(type)) { + if (PYTOK_ISNONTERMINAL(type)) { dfa *d1 = PyGrammar_FindDFA(g, type); int ibit; - if (type - NT_OFFSET >= (1 << 7)) { + if (type - PYTOK_NT_OFFSET >= (1 << 7)) { printf("XXX too high nonterminal number!\n"); continue; } @@ -96,7 +96,7 @@ fixstate(grammar *g, state *s) if (accel[ibit] != -1) printf("XXX ambiguity!\n"); accel[ibit] = a->a_arrow | (1 << 7) | - ((type - NT_OFFSET) << 8); + ((type - PYTOK_NT_OFFSET) << 8); } } } diff --git a/Parser/firstsets.c b/Parser/firstsets.c --- a/Parser/firstsets.c +++ b/Parser/firstsets.c @@ -80,7 +80,7 @@ calcfirstset(grammar *g, dfa *d) "no mem to resize sym in calcfirstset"); sym[nsyms++] = a->a_lbl; type = l0[a->a_lbl].lb_type; - if (ISNONTERMINAL(type)) { + if (PYTOK_ISNONTERMINAL(type)) { d1 = PyGrammar_FindDFA(g, type); if (d1->d_first == dummy) { fprintf(stderr, @@ -94,7 +94,7 @@ calcfirstset(grammar *g, dfa *d) d1->d_first, nbits); } } - else if (ISTERMINAL(type)) { + else if (PYTOK_ISTERMINAL(type)) { addbit(result, a->a_lbl); } } diff --git a/Parser/grammar.c b/Parser/grammar.c --- a/Parser/grammar.c +++ b/Parser/grammar.c @@ -149,7 +149,7 @@ translabel(grammar *g, label *lb) if (Py_DebugFlag) printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); - if (lb->lb_type == NAME) { + if (lb->lb_type == PYTOK_NAME) { for (i = 0; i < g->g_ndfas; i++) { if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { if (Py_DebugFlag) @@ -163,7 +163,7 @@ translabel(grammar *g, label *lb) return; } } - for (i = 0; i < (int)N_TOKENS; i++) { + for (i = 0; i < (int)PYTOK_N_TOKENS; i++) { if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { if (Py_DebugFlag) printf("Label %s is terminal %d.\n", @@ -174,11 +174,11 @@ translabel(grammar *g, label *lb) return; } } - printf("Can't translate NAME label '%s'\n", lb->lb_str); + printf("Can't translate PYTOK_NAME label '%s'\n", lb->lb_str); return; } - if (lb->lb_type == STRING) { + if (lb->lb_type == PYTOK_STRING) { if (isalpha(Py_CHARMASK(lb->lb_str[1])) || lb->lb_str[1] == '_') { char *p; @@ -187,7 +187,7 @@ translabel(grammar *g, label *lb) size_t name_len; if (Py_DebugFlag) printf("Label %s is a keyword\n", lb->lb_str); - lb->lb_type = NAME; + lb->lb_type = PYTOK_NAME; src = lb->lb_str + 1; p = strchr(src, '\''); if (p) @@ -206,42 +206,42 @@ translabel(grammar *g, label *lb) } else if (lb->lb_str[2] == lb->lb_str[0]) { int type = (int) PyToken_OneChar(lb->lb_str[1]); - if (type != OP) { + if (type != PYTOK_OP) { lb->lb_type = type; free(lb->lb_str); lb->lb_str = NULL; } else - printf("Unknown OP label %s\n", + printf("Unknown PYTOK_OP label %s\n", lb->lb_str); } else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { int type = (int) PyToken_TwoChars(lb->lb_str[1], lb->lb_str[2]); - if (type != OP) { + if (type != PYTOK_OP) { lb->lb_type = type; free(lb->lb_str); lb->lb_str = NULL; } else - printf("Unknown OP label %s\n", + printf("Unknown PYTOK_OP label %s\n", lb->lb_str); } else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { int type = (int) PyToken_ThreeChars(lb->lb_str[1], lb->lb_str[2], lb->lb_str[3]); - if (type != OP) { + if (type != PYTOK_OP) { lb->lb_type = type; free(lb->lb_str); lb->lb_str = NULL; } else - printf("Unknown OP label %s\n", + printf("Unknown PYTOK_OP label %s\n", lb->lb_str); } else - printf("Can't translate STRING label %s\n", + printf("Can't translate PYTOK_STRING label %s\n", lb->lb_str); } else diff --git a/Parser/grammar1.c b/Parser/grammar1.c --- a/Parser/grammar1.c +++ b/Parser/grammar1.c @@ -14,7 +14,7 @@ PyGrammar_FindDFA(grammar *g, int type) dfa *d; #if 1 /* Massive speed-up */ - d = &g->g_dfa[type - NT_OFFSET]; + d = &g->g_dfa[type - PYTOK_NT_OFFSET]; assert(d->d_type == type); return d; #else @@ -35,9 +35,9 @@ PyGrammar_LabelRepr(label *lb) { static char buf[100]; - if (lb->lb_type == ENDMARKER) + if (lb->lb_type == PYTOK_ENDMARKER) return "EMPTY"; - else if (ISNONTERMINAL(lb->lb_type)) { + else if (PYTOK_ISNONTERMINAL(lb->lb_type)) { if (lb->lb_str == NULL) { PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); return buf; @@ -45,7 +45,7 @@ PyGrammar_LabelRepr(label *lb) else return lb->lb_str; } - else if (lb->lb_type < N_TOKENS) { + else if (lb->lb_type < PYTOK_N_TOKENS) { if (lb->lb_str == NULL) return _PyParser_TokenNames[lb->lb_type]; else { diff --git a/Parser/listnode.c b/Parser/listnode.c --- a/Parser/listnode.c +++ b/Parser/listnode.c @@ -30,17 +30,17 @@ list1node(FILE *fp, node *n) { if (n == 0) return; - if (ISNONTERMINAL(TYPE(n))) { + if (PYTOK_ISNONTERMINAL(TYPE(n))) { int i; for (i = 0; i < NCH(n); i++) list1node(fp, CHILD(n, i)); } - else if (ISTERMINAL(TYPE(n))) { + else if (PYTOK_ISTERMINAL(TYPE(n))) { switch (TYPE(n)) { - case INDENT: + case PYTOK_INDENT: ++level; break; - case DEDENT: + case PYTOK_DEDENT: --level; break; default: @@ -50,7 +50,7 @@ list1node(FILE *fp, node *n) fprintf(fp, "\t"); atbol = 0; } - if (TYPE(n) == NEWLINE) { + if (TYPE(n) == PYTOK_NEWLINE) { if (STR(n) != NULL) fprintf(fp, "%s", STR(n)); fprintf(fp, "\n"); diff --git a/Parser/parser.c b/Parser/parser.c --- a/Parser/parser.c +++ b/Parser/parser.c @@ -139,12 +139,12 @@ classify(parser_state *ps, int type, con grammar *g = ps->p_grammar; int n = g->g_ll.ll_nlabels; - if (type == NAME) { + if (type == PYTOK_NAME) { const char *s = str; label *l = g->g_ll.ll_label; int i; for (i = n; i > 0; i--, l++) { - if (l->lb_type != NAME || l->lb_str == NULL || + if (l->lb_type != PYTOK_NAME || l->lb_str == NULL || l->lb_str[0] != s[0] || strcmp(l->lb_str, s) != 0) continue; @@ -202,14 +202,14 @@ future_hack(parser_state *ps) return; ch = CHILD(n, 3); /* ch can be a star, a parenthesis or import_as_names */ - if (TYPE(ch) == STAR) + if (TYPE(ch) == PYTOK_STAR) return; - if (TYPE(ch) == LPAR) + if (TYPE(ch) == PYTOK_LPAR) ch = CHILD(n, 4); for (i = 0; i < NCH(ch); i += 2) { cch = CHILD(ch, i); - if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == PYTOK_NAME) { char *str_ch = STR(CHILD(cch, 0)); if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { ps->p_flags |= CO_FUTURE_WITH_STATEMENT; @@ -253,7 +253,7 @@ PyParser_AddToken(parser_state *ps, int if (x != -1) { if (x & (1<<7)) { /* Push non-terminal */ - int nt = (x >> 8) + NT_OFFSET; + int nt = (x >> 8) + PYTOK_NT_OFFSET; int arrow = x & ((1<<7)-1); dfa *d1 = PyGrammar_FindDFA( ps->p_grammar, nt); @@ -350,7 +350,7 @@ dumptree(grammar *g, node *n) l.lb_type = TYPE(n); l.lb_str = STR(n); printf("%s", PyGrammar_LabelRepr(&l)); - if (ISNONTERMINAL(TYPE(n))) { + if (PYTOK_ISNONTERMINAL(TYPE(n))) { printf("("); for (i = 0; i < NCH(n); i++) { if (i > 0) @@ -369,13 +369,13 @@ showtree(grammar *g, node *n) if (n == NULL) return; - if (ISNONTERMINAL(TYPE(n))) { + if (PYTOK_ISNONTERMINAL(TYPE(n))) { for (i = 0; i < NCH(n); i++) showtree(g, CHILD(n, i)); } - else if (ISTERMINAL(TYPE(n))) { + else if (PYTOK_ISTERMINAL(TYPE(n))) { printf("%s", _PyParser_TokenNames[TYPE(n)]); - if (TYPE(n) == NUMBER || TYPE(n) == NAME) + if (TYPE(n) == PYTOK_NUMBER || TYPE(n) == PYTOK_NAME) printf("(%s)", STR(n)); printf(" "); } @@ -431,18 +431,18 @@ symbol. As an example, consider this grammar: -expr: term (OP term)* +expr: term (PYTOK_OP term)* term: CONSTANT | '(' expr ')' The DFA corresponding to the rule for expr is: -------->.---term-->.-------> - ^ | - | | - \----OP----/ +------->.---------term-->.-------> + ^ | + | | + \----PYTOK_OP----/ The parse tree generated for the input a+b is: -(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) +(expr: (term: (PYTOK_NAME: a)), (PYTOK_OP: +), (term: (PYTOK_NAME: b))) */ diff --git a/Parser/parsetok.c b/Parser/parsetok.c --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -12,7 +12,7 @@ /* Forward */ -static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); +static node *parsetok(PyTokenizer_State *, grammar *, int, perrdetail *, int *); static int initerr(perrdetail *err_ret, PyObject * filename); /* Parse input coming from a string. Return error code, print some errors. */ @@ -45,7 +45,7 @@ PyParser_ParseStringObject(const char *s grammar *g, int start, perrdetail *err_ret, int *flags) { - struct tok_state *tok; + PyTokenizer_State *tok; int exec_input = start == file_input; if (initerr(err_ret, filename) < 0) @@ -118,7 +118,7 @@ PyParser_ParseFileObject(FILE *fp, PyObj const char *ps1, const char *ps2, perrdetail *err_ret, int *flags) { - struct tok_state *tok; + PyTokenizer_State *tok; if (initerr(err_ret, filename) < 0) return NULL; @@ -181,7 +181,7 @@ warn(const char *msg, const char *filena Return error code. */ static node * -parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, +parsetok(PyTokenizer_State *tok, grammar *g, int start, perrdetail *err_ret, int *flags) { parser_state *ps; @@ -206,12 +206,12 @@ parsetok(struct tok_state *tok, grammar int col_offset; type = PyTokenizer_Get(tok, &a, &b); - if (type == ERRORTOKEN) { + if (type == PYTOK_ERRORTOKEN) { err_ret->error = tok->done; break; } - if (type == ENDMARKER && started) { - type = NEWLINE; /* Add an extra newline */ + if (type == PYTOK_ENDMARKER && started) { + type = PYTOK_NEWLINE; /* Add an extra newline */ started = 0; /* Add the right number of dedent tokens, except if a certain flag is given -- @@ -236,7 +236,7 @@ parsetok(struct tok_state *tok, grammar str[len] = '\0'; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - if (type == NOTEQUAL) { + if (type == PYTOK_NOTEQUAL) { if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && strcmp(str, "!=")) { PyObject_FREE(str); diff --git a/Parser/pgen.c b/Parser/pgen.c --- a/Parser/pgen.c +++ b/Parser/pgen.c @@ -79,7 +79,7 @@ static nfa * newnfa(char *name) { nfa *nf; - static int type = NT_OFFSET; /* All types will be disjunct */ + static int type = PYTOK_NT_OFFSET; /* All types will be disjunct */ nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); if (nf == NULL) @@ -113,7 +113,7 @@ newnfagrammar(void) gr->gr_nfa = NULL; gr->gr_ll.ll_nlabels = 0; gr->gr_ll.ll_label = NULL; - addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); + addlabel(&gr->gr_ll, PYTOK_ENDMARKER, "EMPTY"); return gr; } @@ -128,7 +128,7 @@ addnfa(nfagrammar *gr, char *name) if (gr->gr_nfa == NULL) Py_FatalError("out of mem"); gr->gr_nfa[gr->gr_nnfas++] = nf; - addlabel(&gr->gr_ll, NAME, nf->nf_name); + addlabel(&gr->gr_ll, PYTOK_NAME, nf->nf_name); return nf; } @@ -157,10 +157,10 @@ metacompile(node *n) printf("Compiling (meta-) parse tree into NFA grammar\n"); gr = newnfagrammar(); REQ(n, MSTART); - i = n->n_nchildren - 1; /* Last child is ENDMARKER */ + i = n->n_nchildren - 1; /* Last child is PYTOK_ENDMARKER */ n = n->n_child; for (; --i >= 0; n++) { - if (n->n_type != NEWLINE) + if (n->n_type != PYTOK_NEWLINE) compile_rule(gr, n); } return gr; @@ -174,15 +174,15 @@ compile_rule(nfagrammar *gr, node *n) REQ(n, RULE); REQN(n->n_nchildren, 4); n = n->n_child; - REQ(n, NAME); + REQ(n, PYTOK_NAME); nf = addnfa(gr, n->n_str); n++; - REQ(n, COLON); + REQ(n, PYTOK_COLON); n++; REQ(n, RHS); compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); n++; - REQ(n, NEWLINE); + REQ(n, PYTOK_NEWLINE); } static void @@ -207,7 +207,7 @@ compile_rhs(labellist *ll, nfa *nf, node addnfaarc(nf, *pa, a, EMPTY); addnfaarc(nf, b, *pb, EMPTY); for (; --i >= 0; n++) { - REQ(n, VBAR); + REQ(n, PYTOK_VBAR); REQN(i, 1); --i; n++; @@ -250,7 +250,7 @@ compile_item(labellist *ll, nfa *nf, nod i = n->n_nchildren; REQN(i, 1); n = n->n_child; - if (n->n_type == LSQB) { + if (n->n_type == PYTOK_LSQB) { REQN(i, 3); n++; REQ(n, RHS); @@ -262,7 +262,7 @@ compile_item(labellist *ll, nfa *nf, nod addnfaarc(nf, b, *pb, EMPTY); REQN(i, 1); n++; - REQ(n, RSQB); + REQ(n, PYTOK_RSQB); } else { compile_atom(ll, nf, n, pa, pb); @@ -270,10 +270,10 @@ compile_item(labellist *ll, nfa *nf, nod return; n++; addnfaarc(nf, *pb, *pa, EMPTY); - if (n->n_type == STAR) + if (n->n_type == PYTOK_STAR) *pb = *pa; else - REQ(n, PLUS); + REQ(n, PYTOK_PLUS); } } @@ -287,21 +287,21 @@ compile_atom(labellist *ll, nfa *nf, nod (void)i; /* Don't warn about set but unused */ REQN(i, 1); n = n->n_child; - if (n->n_type == LPAR) { + if (n->n_type == PYTOK_LPAR) { REQN(i, 3); n++; REQ(n, RHS); compile_rhs(ll, nf, n, pa, pb); n++; - REQ(n, RPAR); + REQ(n, PYTOK_RPAR); } - else if (n->n_type == NAME || n->n_type == STRING) { + else if (n->n_type == PYTOK_NAME || n->n_type == PYTOK_STRING) { *pa = addnfastate(nf); *pb = addnfastate(nf); addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); } else - REQ(n, NAME); + REQ(n, PYTOK_NAME); } static void diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -40,9 +40,9 @@ extern char *PyOS_Readline(FILE *, FILE #define TABSIZE 8 /* Forward */ -static struct tok_state *tok_new(void); -static int tok_nextc(struct tok_state *tok); -static void tok_backup(struct tok_state *tok, int c); +static PyTokenizer_State *tok_new(void); +static int tok_nextc(PyTokenizer_State *tok); +static void tok_backup(PyTokenizer_State *tok, int c); /* Token names */ @@ -110,11 +110,11 @@ const char *_PyParser_TokenNames[] = { /* Create and initialize a new tok_state structure */ -static struct tok_state * +static PyTokenizer_State * tok_new(void) { - struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( - sizeof(struct tok_state)); + PyTokenizer_State *tok = (PyTokenizer_State *)PyMem_MALLOC( + sizeof(PyTokenizer_State)); if (tok == NULL) return NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; @@ -133,7 +133,7 @@ tok_new(void) tok->alterror = 1; tok->alttabsize = 1; tok->altindstack[0] = 0; - tok->decoding_state = STATE_INIT; + tok->decoding_state = PYTOKENIZER_STATE_INIT; tok->decoding_erred = 0; tok->read_coding_spec = 0; tok->enc = NULL; @@ -148,7 +148,7 @@ tok_new(void) } static char * -new_string(const char *s, Py_ssize_t len, struct tok_state *tok) +new_string(const char *s, Py_ssize_t len, PyTokenizer_State *tok) { char* result = (char *)PyMem_MALLOC(len + 1); if (!result) { @@ -163,19 +163,19 @@ new_string(const char *s, Py_ssize_t len #ifdef PGEN static char * -decoding_fgets(char *s, int size, struct tok_state *tok) +decoding_fgets(char *s, int size, PyTokenizer_State *tok) { return fgets(s, size, tok->fp); } static int -decoding_feof(struct tok_state *tok) +decoding_feof(PyTokenizer_State *tok) { return feof(tok->fp); } static char * -decode_str(const char *str, int exec_input, struct tok_state *tok) +decode_str(const char *str, int exec_input, PyTokenizer_State *tok) { return new_string(str, strlen(str), tok); } @@ -183,7 +183,7 @@ decode_str(const char *str, int exec_inp #else /* PGEN */ static char * -error_ret(struct tok_state *tok) /* XXX */ +error_ret(PyTokenizer_State *tok) /* XXX */ { tok->decoding_erred = 1; if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ @@ -225,7 +225,7 @@ get_normal_name(char *s) /* for u /* Return the coding spec in S, or NULL if none is found. */ static int -get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok) +get_coding_spec(const char *s, char **spec, Py_ssize_t size, PyTokenizer_State *tok) { Py_ssize_t i; *spec = NULL; @@ -278,8 +278,8 @@ get_coding_spec(const char *s, char **sp Return 1 on success, 0 on failure. */ static int -check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, - int set_readline(struct tok_state *, const char *)) +check_coding_spec(const char* line, Py_ssize_t size, PyTokenizer_State *tok, + int set_readline(PyTokenizer_State *, const char *)) { char *cs; int r = 1; @@ -307,14 +307,14 @@ check_coding_spec(const char* line, Py_s } tok->read_coding_spec = 1; if (tok->encoding == NULL) { - assert(tok->decoding_state == STATE_RAW); + assert(tok->decoding_state == PYTOKENIZER_STATE_RAW); if (strcmp(cs, "utf-8") == 0) { tok->encoding = cs; } else { r = set_readline(tok, cs); if (r) { tok->encoding = cs; - tok->decoding_state = STATE_NORMAL; + tok->decoding_state = PYTOKENIZER_STATE_NORMAL; } else { PyErr_Format(PyExc_SyntaxError, @@ -337,14 +337,14 @@ check_coding_spec(const char* line, Py_s Return 1 on success, 0 on failure. */ static int -check_bom(int get_char(struct tok_state *), - void unget_char(int, struct tok_state *), - int set_readline(struct tok_state *, const char *), - struct tok_state *tok) +check_bom(int get_char(PyTokenizer_State *), + void unget_char(int, PyTokenizer_State *), + int set_readline(PyTokenizer_State *, const char *), + PyTokenizer_State *tok) { int ch1, ch2, ch3; ch1 = get_char(tok); - tok->decoding_state = STATE_RAW; + tok->decoding_state = PYTOKENIZER_STATE_RAW; if (ch1 == EOF) { return 1; } else if (ch1 == 0xEF) { @@ -373,7 +373,7 @@ check_bom(int get_char(struct tok_state } if (!set_readline(tok, "utf-16-be")) return 0; - tok->decoding_state = STATE_NORMAL; + tok->decoding_state = PYTOKENIZER_STATE_NORMAL; } else if (ch1 == 0xFF) { ch2 = get_char(tok); if (ch2 != 0xFE) { @@ -383,7 +383,7 @@ check_bom(int get_char(struct tok_state } if (!set_readline(tok, "utf-16-le")) return 0; - tok->decoding_state = STATE_NORMAL; + tok->decoding_state = PYTOKENIZER_STATE_NORMAL; #endif } else { unget_char(ch1, tok); @@ -414,7 +414,7 @@ check_bom(int get_char(struct tok_state */ static char * -fp_readl(char *s, int size, struct tok_state *tok) +fp_readl(char *s, int size, PyTokenizer_State *tok) { PyObject* bufobj; const char *buf; @@ -485,7 +485,7 @@ error: Return 1 on success, 0 on failure. */ static int -fp_setreadl(struct tok_state *tok, const char* enc) +fp_setreadl(PyTokenizer_State *tok, const char* enc) { PyObject *readline = NULL, *stream = NULL, *io = NULL; _Py_IDENTIFIER(open); @@ -533,13 +533,13 @@ fp_setreadl(struct tok_state *tok, const /* Fetch the next byte from TOK. */ -static int fp_getc(struct tok_state *tok) { +static int fp_getc(PyTokenizer_State *tok) { return getc(tok->fp); } /* Unfetch the last byte back into TOK. */ -static void fp_ungetc(int c, struct tok_state *tok) { +static void fp_ungetc(int c, PyTokenizer_State *tok) { ungetc(c, tok->fp); } @@ -575,17 +575,17 @@ static int valid_utf8(const unsigned cha if necessary. */ static char * -decoding_fgets(char *s, int size, struct tok_state *tok) +decoding_fgets(char *s, int size, PyTokenizer_State *tok) { char *line = NULL; int badchar = 0; for (;;) { - if (tok->decoding_state == STATE_NORMAL) { + if (tok->decoding_state == PYTOKENIZER_STATE_NORMAL) { /* We already have a codec associated with this input. */ line = fp_readl(s, size, tok); break; - } else if (tok->decoding_state == STATE_RAW) { + } else if (tok->decoding_state == PYTOKENIZER_STATE_RAW) { /* We want a 'raw' read. */ line = Py_UniversalNewlineFgets(s, size, tok->fp, NULL); @@ -596,7 +596,7 @@ decoding_fgets(char *s, int size, struct reader functions from now on. */ if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) return error_ret(tok); - assert(tok->decoding_state != STATE_INIT); + assert(tok->decoding_state != PYTOKENIZER_STATE_INIT); } } if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { @@ -632,9 +632,9 @@ decoding_fgets(char *s, int size, struct } static int -decoding_feof(struct tok_state *tok) +decoding_feof(PyTokenizer_State *tok) { - if (tok->decoding_state != STATE_NORMAL) { + if (tok->decoding_state != PYTOKENIZER_STATE_NORMAL) { return feof(tok->fp); } else { PyObject* buf = tok->decoding_buffer; @@ -654,14 +654,14 @@ decoding_feof(struct tok_state *tok) /* Fetch a byte from TOK, using the string buffer. */ static int -buf_getc(struct tok_state *tok) { +buf_getc(PyTokenizer_State *tok) { return Py_CHARMASK(*tok->str++); } /* Unfetch a byte from TOK, using the string buffer. */ static void -buf_ungetc(int c, struct tok_state *tok) { +buf_ungetc(int c, PyTokenizer_State *tok) { tok->str--; assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ } @@ -670,7 +670,7 @@ buf_ungetc(int c, struct tok_state *tok) tokenizer, this means to just record the encoding. */ static int -buf_setreadl(struct tok_state *tok, const char* enc) { +buf_setreadl(PyTokenizer_State *tok, const char* enc) { tok->enc = enc; return 1; } @@ -691,7 +691,7 @@ translate_into_utf8(const char* str, con static char * -translate_newlines(const char *s, int exec_input, struct tok_state *tok) { +translate_newlines(const char *s, int exec_input, PyTokenizer_State *tok) { int skip_next_lf = 0; size_t needed_length = strlen(s) + 2, final_length; char *buf, *current; @@ -736,7 +736,7 @@ translate_newlines(const char *s, int ex inside TOK. */ static const char * -decode_str(const char *input, int single, struct tok_state *tok) +decode_str(const char *input, int single, PyTokenizer_State *tok) { PyObject* utf8 = NULL; const char *str; @@ -795,10 +795,10 @@ decode_str(const char *input, int single /* Set up tokenizer for string */ -struct tok_state * +PyTokenizer_State * PyTokenizer_FromString(const char *str, int exec_input) { - struct tok_state *tok = tok_new(); + PyTokenizer_State *tok = tok_new(); if (tok == NULL) return NULL; str = decode_str(str, exec_input, tok); @@ -812,10 +812,10 @@ PyTokenizer_FromString(const char *str, return tok; } -struct tok_state * +PyTokenizer_State * PyTokenizer_FromUTF8(const char *str, int exec_input) { - struct tok_state *tok = tok_new(); + PyTokenizer_State *tok = tok_new(); if (tok == NULL) return NULL; #ifndef PGEN @@ -825,7 +825,7 @@ PyTokenizer_FromUTF8(const char *str, in PyTokenizer_Free(tok); return NULL; } - tok->decoding_state = STATE_RAW; + tok->decoding_state = PYTOKENIZER_STATE_RAW; tok->read_coding_spec = 1; tok->enc = NULL; tok->str = str; @@ -843,11 +843,11 @@ PyTokenizer_FromUTF8(const char *str, in /* Set up tokenizer for file */ -struct tok_state * +PyTokenizer_State * PyTokenizer_FromFile(FILE *fp, const char* enc, const char *ps1, const char *ps2) { - struct tok_state *tok = tok_new(); + PyTokenizer_State *tok = tok_new(); if (tok == NULL) return NULL; if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { @@ -868,7 +868,7 @@ PyTokenizer_FromFile(FILE *fp, const cha return NULL; } strcpy(tok->encoding, enc); - tok->decoding_state = STATE_NORMAL; + tok->decoding_state = PYTOKENIZER_STATE_NORMAL; } return tok; } @@ -877,7 +877,7 @@ PyTokenizer_FromFile(FILE *fp, const cha /* Free a tok_state structure */ void -PyTokenizer_Free(struct tok_state *tok) +PyTokenizer_Free(PyTokenizer_State *tok) { if (tok->encoding != NULL) PyMem_FREE(tok->encoding); @@ -896,7 +896,7 @@ PyTokenizer_Free(struct tok_state *tok) /* Get next char, updating state; error code goes into tok->done */ static int -tok_nextc(struct tok_state *tok) +tok_nextc(PyTokenizer_State *tok) { for (;;) { if (tok->cur != tok->inp) { @@ -1093,7 +1093,7 @@ tok_nextc(struct tok_state *tok) /* Back-up one character */ static void -tok_backup(struct tok_state *tok, int c) +tok_backup(PyTokenizer_State *tok, int c) { if (c != EOF) { if (--tok->cur < tok->buf) @@ -1110,30 +1110,30 @@ int PyToken_OneChar(int c) { switch (c) { - case '(': return LPAR; - case ')': return RPAR; - case '[': return LSQB; - case ']': return RSQB; - case ':': return COLON; - case ',': return COMMA; - case ';': return SEMI; - case '+': return PLUS; - case '-': return MINUS; - case '*': return STAR; - case '/': return SLASH; - case '|': return VBAR; - case '&': return AMPER; - case '<': return LESS; - case '>': return GREATER; - case '=': return EQUAL; - case '.': return DOT; - case '%': return PERCENT; - case '{': return LBRACE; - case '}': return RBRACE; - case '^': return CIRCUMFLEX; - case '~': return TILDE; - case '@': return AT; - default: return OP; + case '(': return PYTOK_LPAR; + case ')': return PYTOK_RPAR; + case '[': return PYTOK_LSQB; + case ']': return PYTOK_RSQB; + case ':': return PYTOK_COLON; + case ',': return PYTOK_COMMA; + case ';': return PYTOK_SEMI; + case '+': return PYTOK_PLUS; + case '-': return PYTOK_MINUS; + case '*': return PYTOK_STAR; + case '/': return PYTOK_SLASH; + case '|': return PYTOK_VBAR; + case '&': return PYTOK_AMPER; + case '<': return PYTOK_LESS; + case '>': return PYTOK_GREATER; + case '=': return PYTOK_EQUAL; + case '.': return PYTOK_DOT; + case '%': return PYTOK_PERCENT; + case '{': return PYTOK_LBRACE; + case '}': return PYTOK_RBRACE; + case '^': return PYTOK_CIRCUMFLEX; + case '~': return PYTOK_TILDE; + case '@': return PYTOK_AT; + default: return PYTOK_OP; } } @@ -1144,77 +1144,77 @@ PyToken_TwoChars(int c1, int c2) switch (c1) { case '=': switch (c2) { - case '=': return EQEQUAL; + case '=': return PYTOK_EQEQUAL; } break; case '!': switch (c2) { - case '=': return NOTEQUAL; + case '=': return PYTOK_NOTEQUAL; } break; case '<': switch (c2) { - case '>': return NOTEQUAL; - case '=': return LESSEQUAL; - case '<': return LEFTSHIFT; + case '>': return PYTOK_NOTEQUAL; + case '=': return PYTOK_LESSEQUAL; + case '<': return PYTOK_LEFTSHIFT; } break; case '>': switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; + case '=': return PYTOK_GREATEREQUAL; + case '>': return PYTOK_RIGHTSHIFT; } break; case '+': switch (c2) { - case '=': return PLUSEQUAL; + case '=': return PYTOK_PLUSEQUAL; } break; case '-': switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; + case '=': return PYTOK_MINEQUAL; + case '>': return PYTOK_RARROW; } break; case '*': switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; + case '*': return PYTOK_DOUBLESTAR; + case '=': return PYTOK_STAREQUAL; } break; case '/': switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; + case '/': return PYTOK_DOUBLESLASH; + case '=': return PYTOK_SLASHEQUAL; } break; case '|': switch (c2) { - case '=': return VBAREQUAL; + case '=': return PYTOK_VBAREQUAL; } break; case '%': switch (c2) { - case '=': return PERCENTEQUAL; + case '=': return PYTOK_PERCENTEQUAL; } break; case '&': switch (c2) { - case '=': return AMPEREQUAL; + case '=': return PYTOK_AMPEREQUAL; } break; case '^': switch (c2) { - case '=': return CIRCUMFLEXEQUAL; + case '=': return PYTOK_CIRCUMFLEXEQUAL; } break; case '@': switch (c2) { - case '=': return ATEQUAL; + case '=': return PYTOK_ATEQUAL; } break; } - return OP; + return PYTOK_OP; } int @@ -1226,7 +1226,7 @@ PyToken_ThreeChars(int c1, int c2, int c case '<': switch (c3) { case '=': - return LEFTSHIFTEQUAL; + return PYTOK_LEFTSHIFTEQUAL; } break; } @@ -1236,7 +1236,7 @@ PyToken_ThreeChars(int c1, int c2, int c case '>': switch (c3) { case '=': - return RIGHTSHIFTEQUAL; + return PYTOK_RIGHTSHIFTEQUAL; } break; } @@ -1246,7 +1246,7 @@ PyToken_ThreeChars(int c1, int c2, int c case '*': switch (c3) { case '=': - return DOUBLESTAREQUAL; + return PYTOK_DOUBLESTAREQUAL; } break; } @@ -1256,7 +1256,7 @@ PyToken_ThreeChars(int c1, int c2, int c case '/': switch (c3) { case '=': - return DOUBLESLASHEQUAL; + return PYTOK_DOUBLESLASHEQUAL; } break; } @@ -1266,17 +1266,17 @@ PyToken_ThreeChars(int c1, int c2, int c case '.': switch (c3) { case '.': - return ELLIPSIS; + return PYTOK_ELLIPSIS; } break; } break; } - return OP; + return PYTOK_OP; } static int -indenterror(struct tok_state *tok) +indenterror(PyTokenizer_State *tok) { if (tok->alterror) { tok->done = E_TABSPACE; @@ -1303,7 +1303,7 @@ indenterror(struct tok_state *tok) All identifier strings are guaranteed to be "ready" unicode objects. */ static int -verify_identifier(struct tok_state *tok) +verify_identifier(PyTokenizer_State *tok) { PyObject *s; int result; @@ -1328,7 +1328,7 @@ verify_identifier(struct tok_state *tok) /* Get next token, after space stripping etc. */ static int -tok_get(struct tok_state *tok, char **p_start, char **p_end) +tok_get(PyTokenizer_State *tok, char **p_start, char **p_end) { int c; int blankline, nonascii; @@ -1361,7 +1361,7 @@ tok_get(struct tok_state *tok, char **p_ if (c == '#' || c == '\n') { /* Lines with only whitespace and/or comments shouldn't affect the indentation and are - not passed to the parser as NEWLINE tokens, + not passed to the parser as PYTOK_NEWLINE tokens, except *totally* empty lines in interactive mode, which signal the end of a command group. */ if (col == 0 && c == '\n' && tok->prompt != NULL) @@ -1376,19 +1376,19 @@ tok_get(struct tok_state *tok, char **p_ /* No change */ if (altcol != tok->altindstack[tok->indent]) { if (indenterror(tok)) - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } } else if (col > tok->indstack[tok->indent]) { /* Indent -- always one */ - if (tok->indent+1 >= MAXINDENT) { + if (tok->indent+1 >= PYTOKENIZER_MAXINDENT) { tok->done = E_TOODEEP; tok->cur = tok->inp; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } if (altcol <= tok->altindstack[tok->indent]) { if (indenterror(tok)) - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } tok->pendin++; tok->indstack[++tok->indent] = col; @@ -1404,11 +1404,11 @@ tok_get(struct tok_state *tok, char **p_ if (col != tok->indstack[tok->indent]) { tok->done = E_DEDENT; tok->cur = tok->inp; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } if (altcol != tok->altindstack[tok->indent]) { if (indenterror(tok)) - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } } } @@ -1420,11 +1420,11 @@ tok_get(struct tok_state *tok, char **p_ if (tok->pendin != 0) { if (tok->pendin < 0) { tok->pendin++; - return DEDENT; + return PYTOK_DEDENT; } else { tok->pendin--; - return INDENT; + return PYTOK_INDENT; } } @@ -1445,7 +1445,7 @@ tok_get(struct tok_state *tok, char **p_ /* Check for EOF and errors now */ if (c == EOF) { - return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; + return tok->done == E_EOF ? PYTOK_ENDMARKER : PYTOK_ERRORTOKEN; } /* Identifier (most frequent token!) */ @@ -1478,11 +1478,11 @@ tok_get(struct tok_state *tok, char **p_ if (nonascii && !verify_identifier(tok)) { tok->done = E_IDENTIFIER; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } *p_start = tok->start; *p_end = tok->cur; - return NAME; + return PYTOK_NAME; } /* Newline */ @@ -1493,7 +1493,7 @@ tok_get(struct tok_state *tok, char **p_ *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; - return NEWLINE; + return PYTOK_NEWLINE; } /* Period or number starting with period? */ @@ -1506,7 +1506,7 @@ tok_get(struct tok_state *tok, char **p_ if (c == '.') { *p_start = tok->start; *p_end = tok->cur; - return ELLIPSIS; + return PYTOK_ELLIPSIS; } else { tok_backup(tok, c); } @@ -1516,7 +1516,7 @@ tok_get(struct tok_state *tok, char **p_ } *p_start = tok->start; *p_end = tok->cur; - return DOT; + return PYTOK_DOT; } /* Number */ @@ -1535,7 +1535,7 @@ tok_get(struct tok_state *tok, char **p_ if (!isxdigit(c)) { tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } do { c = tok_nextc(tok); @@ -1547,7 +1547,7 @@ tok_get(struct tok_state *tok, char **p_ if (c < '0' || c >= '8') { tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } do { c = tok_nextc(tok); @@ -1559,7 +1559,7 @@ tok_get(struct tok_state *tok, char **p_ if (c != '0' && c != '1') { tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } do { c = tok_nextc(tok); @@ -1584,7 +1584,7 @@ tok_get(struct tok_state *tok, char **p_ else if (nonzero) { tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } } } @@ -1613,14 +1613,14 @@ tok_get(struct tok_state *tok, char **p_ if (!isdigit(c)) { tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } } else if (!isdigit(c)) { tok_backup(tok, c); tok_backup(tok, e); *p_start = tok->start; *p_end = tok->cur; - return NUMBER; + return PYTOK_NUMBER; } do { c = tok_nextc(tok); @@ -1635,7 +1635,7 @@ tok_get(struct tok_state *tok, char **p_ tok_backup(tok, c); *p_start = tok->start; *p_end = tok->cur; - return NUMBER; + return PYTOK_NUMBER; } letter_quote: @@ -1666,12 +1666,12 @@ tok_get(struct tok_state *tok, char **p_ else tok->done = E_EOLS; tok->cur = tok->inp; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } if (quote_size == 1 && c == '\n') { tok->done = E_EOLS; tok->cur = tok->inp; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } if (c == quote) end_quote_size += 1; @@ -1684,7 +1684,7 @@ tok_get(struct tok_state *tok, char **p_ *p_start = tok->start; *p_end = tok->cur; - return STRING; + return PYTOK_STRING; } /* Line continuation */ @@ -1693,7 +1693,7 @@ tok_get(struct tok_state *tok, char **p_ if (c != '\n') { tok->done = E_LINECONT; tok->cur = tok->inp; - return ERRORTOKEN; + return PYTOK_ERRORTOKEN; } tok->cont_line = 1; goto again; /* Read next line */ @@ -1703,10 +1703,10 @@ tok_get(struct tok_state *tok, char **p_ { int c2 = tok_nextc(tok); int token = PyToken_TwoChars(c, c2); - if (token != OP) { + if (token != PYTOK_OP) { int c3 = tok_nextc(tok); int token3 = PyToken_ThreeChars(c, c2, c3); - if (token3 != OP) { + if (token3 != PYTOK_OP) { token = token3; } else { tok_backup(tok, c3); @@ -1739,11 +1739,11 @@ tok_get(struct tok_state *tok, char **p_ } int -PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) +PyTokenizer_Get(PyTokenizer_State *tok, char **p_start, char **p_end) { int result = tok_get(tok, p_start, p_end); if (tok->decoding_erred) { - result = ERRORTOKEN; + result = PYTOK_ERRORTOKEN; tok->done = E_DECODE; } return result; @@ -1762,7 +1762,7 @@ PyTokenizer_Get(struct tok_state *tok, c char * PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { - struct tok_state *tok; + PyTokenizer_State *tok; FILE *fp; char *p_start =NULL , *p_end =NULL , *encoding = NULL; @@ -1823,7 +1823,7 @@ void tok_dump(int type, char *start, char *end) { printf("%s", _PyParser_TokenNames[type]); - if (type == NAME || type == NUMBER || type == STRING || type == OP) + if (type == PYTOK_NAME || type == PYTOK_NUMBER || type == PYTOK_STRING || type == PYTOK_OP) printf("(%.*s)", (int)(end - start), start); } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h deleted file mode 100644 --- a/Parser/tokenizer.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef Py_TOKENIZER_H -#define Py_TOKENIZER_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "object.h" - -/* Tokenizer interface */ - -#include "token.h" /* For token types */ - -#define MAXINDENT 100 /* Max indentation level */ - -enum decoding_state { - STATE_INIT, - STATE_RAW, - STATE_NORMAL /* have a codec associated with input */ -}; - -/* Tokenizer state */ -struct tok_state { - /* Input state; buf <= cur <= inp <= end */ - /* NB an entire line is held in the buffer */ - char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ - char *cur; /* Next character in buffer */ - char *inp; /* End of data in buffer */ - char *end; /* End of input buffer if buf != NULL */ - char *start; /* Start of current token if not NULL */ - int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ - /* NB If done != E_OK, cur must be == inp!!! */ - FILE *fp; /* Rest of input; NULL if tokenizing a string */ - int tabsize; /* Tab spacing */ - int indent; /* Current indentation index */ - int indstack[MAXINDENT]; /* Stack of indents */ - int atbol; /* Nonzero if at begin of new line */ - int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ - const char *prompt, *nextprompt; /* For interactive prompting */ - int lineno; /* Current line number */ - int level; /* () [] {} Parentheses nesting level */ - /* Used to allow free continuations inside them */ - /* Stuff for checking on different tab sizes */ -#ifndef PGEN - /* pgen doesn't have access to Python codecs, it cannot decode the input - filename. The bytes filename might be kept, but it is only used by - indenterror() and it is not really needed: pgen only compiles one file - (Grammar/Grammar). */ - PyObject *filename; -#endif - int altwarning; /* Issue warning if alternate tabs don't match */ - int alterror; /* Issue error if alternate tabs don't match */ - int alttabsize; /* Alternate tab spacing */ - int altindstack[MAXINDENT]; /* Stack of alternate indents */ - /* Stuff for PEP 0263 */ - enum decoding_state decoding_state; - int decoding_erred; /* whether erred in decoding */ - int read_coding_spec; /* whether 'coding:...' has been read */ - char *encoding; /* Source encoding. */ - int cont_line; /* whether we are in a continuation line. */ - const char* line_start; /* pointer to start of current line */ -#ifndef PGEN - PyObject *decoding_readline; /* open(...).readline */ - PyObject *decoding_buffer; -#endif - const char* enc; /* Encoding for the current str. */ - const char* str; - const char* input; /* Tokenizer's newline translated copy of the string. */ -}; - -extern struct tok_state *PyTokenizer_FromString(const char *, int); -extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); -extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*, - const char *, const char *); -extern void PyTokenizer_Free(struct tok_state *); -extern int PyTokenizer_Get(struct tok_state *, char **, char **); -extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, - int len, int *offset); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TOKENIZER_H */ diff --git a/Python/ast.c b/Python/ast.c --- a/Python/ast.c +++ b/Python/ast.c @@ -606,7 +606,7 @@ ast_error(struct compiling *c, const nod grammar: stmt: simple_stmt | compound_stmt - simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + simple_stmt: small_stmt (';' small_stmt)* [';'] PYTOK_NEWLINE A simple_stmt can contain multiple small_stmt elements joined by semicolons. If the arg is a simple_stmt, the number of @@ -621,7 +621,7 @@ num_stmts(const node *n) switch (TYPE(n)) { case single_input: - if (TYPE(CHILD(n, 0)) == NEWLINE) + if (TYPE(CHILD(n, 0)) == PYTOK_NEWLINE) return 0; else return num_stmts(CHILD(n, 0)); @@ -703,7 +703,7 @@ PyAST_FromNodeObject(const node *n, PyCo goto out; for (i = 0; i < NCH(n) - 1; i++) { ch = CHILD(n, i); - if (TYPE(ch) == NEWLINE) + if (TYPE(ch) == PYTOK_NEWLINE) continue; REQ(ch, stmt); num = num_stmts(ch); @@ -737,7 +737,7 @@ PyAST_FromNodeObject(const node *n, PyCo break; } case single_input: - if (TYPE(CHILD(n, 0)) == NEWLINE) { + if (TYPE(CHILD(n, 0)) == PYTOK_NEWLINE) { stmts = _Py_asdl_seq_new(1, arena); if (!stmts) goto out; @@ -763,7 +763,7 @@ PyAST_FromNodeObject(const node *n, PyCo /* Only a simple_stmt can contain multiple statements. */ REQ(n, simple_stmt); for (i = 0; i < NCH(n); i += 2) { - if (TYPE(CHILD(n, i)) == NEWLINE) + if (TYPE(CHILD(n, i)) == PYTOK_NEWLINE) break; s = ast_for_stmt(&c, CHILD(n, i)); if (!s) @@ -811,29 +811,29 @@ static operator_ty get_operator(const node *n) { switch (TYPE(n)) { - case VBAR: + case PYTOK_VBAR: return BitOr; - case CIRCUMFLEX: + case PYTOK_CIRCUMFLEX: return BitXor; - case AMPER: + case PYTOK_AMPER: return BitAnd; - case LEFTSHIFT: + case PYTOK_LEFTSHIFT: return LShift; - case RIGHTSHIFT: + case PYTOK_RIGHTSHIFT: return RShift; - case PLUS: + case PYTOK_PLUS: return Add; - case MINUS: + case PYTOK_MINUS: return Sub; - case STAR: + case PYTOK_STAR: return Mult; - case AT: + case PYTOK_AT: return MatMult; - case SLASH: + case PYTOK_SLASH: return Div; - case DOUBLESLASH: + case PYTOK_DOUBLESLASH: return FloorDiv; - case PERCENT: + case PYTOK_PERCENT: return Mod; default: return (operator_ty)0; @@ -1052,19 +1052,19 @@ ast_for_comp_op(struct compiling *c, con if (NCH(n) == 1) { n = CHILD(n, 0); switch (TYPE(n)) { - case LESS: + case PYTOK_LESS: return Lt; - case GREATER: + case PYTOK_GREATER: return Gt; - case EQEQUAL: /* == */ + case PYTOK_EQEQUAL: /* == */ return Eq; - case LESSEQUAL: + case PYTOK_LESSEQUAL: return LtE; - case GREATEREQUAL: + case PYTOK_GREATEREQUAL: return GtE; - case NOTEQUAL: + case PYTOK_NOTEQUAL: return NotEq; - case NAME: + case PYTOK_NAME: if (strcmp(STR(n), "in") == 0) return In; if (strcmp(STR(n), "is") == 0) @@ -1078,7 +1078,7 @@ ast_for_comp_op(struct compiling *c, con else if (NCH(n) == 2) { /* handle "not in" and "is not" */ switch (TYPE(CHILD(n, 0))) { - case NAME: + case PYTOK_NAME: if (strcmp(STR(CHILD(n, 1)), "in") == 0) return NotIn; if (strcmp(STR(CHILD(n, 0)), "is") == 0) @@ -1139,7 +1139,7 @@ ast_for_arg(struct compiling *c, const n if (forbidden_name(c, name, ch, 0)) return NULL; - if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) { + if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == PYTOK_COLON) { annotation = ast_for_expr(c, CHILD(n, 2)); if (!annotation) return NULL; @@ -1180,7 +1180,7 @@ handle_keywordonly_args(struct compiling switch (TYPE(ch)) { case vfpdef: case tfpdef: - if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { + if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == PYTOK_EQUAL) { expression = ast_for_expr(c, CHILD(n, i + 2)); if (!expression) goto error; @@ -1191,7 +1191,7 @@ handle_keywordonly_args(struct compiling asdl_seq_SET(kwdefaults, j, NULL); } if (NCH(ch) == 3) { - /* ch is NAME ':' test */ + /* ch is PYTOK_NAME ':' test */ annotation = ast_for_expr(c, CHILD(ch, 2)); if (!annotation) goto error; @@ -1213,7 +1213,7 @@ handle_keywordonly_args(struct compiling asdl_seq_SET(kwonlyargs, j++, arg); i += 2; /* the name and the comma */ break; - case DOUBLESTAR: + case PYTOK_DOUBLESTAR: return i; default: ast_error(c, ch, "unexpected node"); @@ -1238,12 +1238,12 @@ ast_for_arguments(struct compiling *c, c ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) - tfpdef: NAME [':' test] + tfpdef: PYTOK_NAME [':' test] varargslist: ((vfpdef ['=' test] ',')* ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) - vfpdef: NAME + vfpdef: PYTOK_NAME */ int i, j, k, nposargs = 0, nkwonlyargs = 0; int nposdefaults = 0, found_default = 0; @@ -1265,7 +1265,7 @@ ast_for_arguments(struct compiling *c, c */ for (i = 0; i < NCH(n); i++) { ch = CHILD(n, i); - if (TYPE(ch) == STAR) { + if (TYPE(ch) == PYTOK_STAR) { /* skip star */ i++; if (i < NCH(n) && /* skip argument following star */ @@ -1275,15 +1275,15 @@ ast_for_arguments(struct compiling *c, c } break; } - if (TYPE(ch) == DOUBLESTAR) break; + if (TYPE(ch) == PYTOK_DOUBLESTAR) break; if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++; - if (TYPE(ch) == EQUAL) nposdefaults++; + if (TYPE(ch) == PYTOK_EQUAL) nposdefaults++; } /* count the number of keyword only args & defaults for keyword only args */ for ( ; i < NCH(n); ++i) { ch = CHILD(n, i); - if (TYPE(ch) == DOUBLESTAR) break; + if (TYPE(ch) == PYTOK_DOUBLESTAR) break; if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++; } posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL); @@ -1310,8 +1310,8 @@ ast_for_arguments(struct compiling *c, c return NULL; } - /* tfpdef: NAME [':' test] - vfpdef: NAME + /* tfpdef: PYTOK_NAME [':' test] + vfpdef: PYTOK_NAME */ i = 0; j = 0; /* index for defaults */ @@ -1322,9 +1322,9 @@ ast_for_arguments(struct compiling *c, c case tfpdef: case vfpdef: /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is - anything other than EQUAL or a comma? */ + anything other than PYTOK_EQUAL or a comma? */ /* XXX Should NCH(n) check be made a separate check? */ - if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { + if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == PYTOK_EQUAL) { expr_ty expression = ast_for_expr(c, CHILD(n, i + 2)); if (!expression) return NULL; @@ -1344,14 +1344,14 @@ ast_for_arguments(struct compiling *c, c asdl_seq_SET(posargs, k++, arg); i += 2; /* the name and the comma */ break; - case STAR: + case PYTOK_STAR: if (i+1 >= NCH(n)) { ast_error(c, CHILD(n, i), "named arguments must follow bare *"); return NULL; } - ch = CHILD(n, i+1); /* tfpdef or COMMA */ - if (TYPE(ch) == COMMA) { + ch = CHILD(n, i+1); /* tfpdef or PYTOK_COMMA */ + if (TYPE(ch) == PYTOK_COMMA) { int res = 0; i += 2; /* now follows keyword only arguments */ res = handle_keywordonly_args(c, n, i, @@ -1375,7 +1375,7 @@ ast_for_arguments(struct compiling *c, c } } break; - case DOUBLESTAR: + case PYTOK_DOUBLESTAR: ch = CHILD(n, i+1); /* tfpdef */ assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef); kwarg = ast_for_arg(c, ch); @@ -1428,13 +1428,13 @@ ast_for_dotted_name(struct compiling *c, static expr_ty ast_for_decorator(struct compiling *c, const node *n) { - /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */ + /* decorator: '@' dotted_name [ '(' [arglist] ')' ] PYTOK_NEWLINE */ expr_ty d = NULL; expr_ty name_expr; REQ(n, decorator); - REQ(CHILD(n, 0), AT); - REQ(RCHILD(n, -1), NEWLINE); + REQ(CHILD(n, 0), PYTOK_AT); + REQ(RCHILD(n, -1), PYTOK_NEWLINE); name_expr = ast_for_dotted_name(c, CHILD(n, 1)); if (!name_expr) @@ -1485,7 +1485,7 @@ ast_for_decorators(struct compiling *c, static stmt_ty ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) { - /* funcdef: 'def' NAME parameters ['->' test] ':' suite */ + /* funcdef: 'def' PYTOK_NAME parameters ['->' test] ':' suite */ identifier name; arguments_ty args; asdl_seq *body; @@ -1502,7 +1502,7 @@ ast_for_funcdef(struct compiling *c, con args = ast_for_arguments(c, CHILD(n, name_i + 1)); if (!args) return NULL; - if (TYPE(CHILD(n, name_i+2)) == RARROW) { + if (TYPE(CHILD(n, name_i+2)) == PYTOK_RARROW) { returns = ast_for_expr(c, CHILD(n, name_i + 3)); if (!returns) return NULL; @@ -1768,7 +1768,7 @@ ast_for_dictcomp(struct compiling *c, co asdl_seq *comps; assert(NCH(n) > 3); - REQ(CHILD(n, 1), COLON); + REQ(CHILD(n, 1), PYTOK_COLON); key = ast_for_expr(c, CHILD(n, 0)); if (!key) @@ -1810,14 +1810,14 @@ static expr_ty ast_for_atom(struct compiling *c, const node *n) { /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' - | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+ + | '{' [dictmaker|testlist_comp] '}' | PYTOK_NAME | PYTOK_NUMBER | PYTOK_STRING+ | '...' | 'None' | 'True' | 'False' */ node *ch = CHILD(n, 0); int bytesmode = 0; switch (TYPE(ch)) { - case NAME: { + case PYTOK_NAME: { PyObject *name; const char *s = STR(ch); size_t len = strlen(s); @@ -1835,7 +1835,7 @@ ast_for_atom(struct compiling *c, const /* All names start in Load context, but may later be changed. */ return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena); } - case STRING: { + case PYTOK_STRING: { PyObject *str = parsestrplus(c, n, &bytesmode); if (!str) { const char *errtype = NULL; @@ -1871,7 +1871,7 @@ ast_for_atom(struct compiling *c, const else return Str(str, LINENO(n), n->n_col_offset, c->c_arena); } - case NUMBER: { + case PYTOK_NUMBER: { PyObject *pynum = parsenumber(c, STR(ch)); if (!pynum) return NULL; @@ -1882,12 +1882,12 @@ ast_for_atom(struct compiling *c, const } return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena); } - case ELLIPSIS: /* Ellipsis */ + case PYTOK_ELLIPSIS: /* Ellipsis */ return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena); - case LPAR: /* some parenthesized expressions */ + case PYTOK_LPAR: /* some parenthesized expressions */ ch = CHILD(n, 1); - if (TYPE(ch) == RPAR) + if (TYPE(ch) == PYTOK_RPAR) return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); if (TYPE(ch) == yield_expr) @@ -1898,14 +1898,14 @@ ast_for_atom(struct compiling *c, const return ast_for_genexp(c, ch); return ast_for_testlist(c, ch); - case LSQB: /* list (or list comprehension) */ + case PYTOK_LSQB: /* list (or list comprehension) */ ch = CHILD(n, 1); - if (TYPE(ch) == RSQB) + if (TYPE(ch) == PYTOK_RSQB) return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); REQ(ch, testlist_comp); - if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { + if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == PYTOK_COMMA) { asdl_seq *elts = seq_for_testlist(c, ch); if (!elts) return NULL; @@ -1914,17 +1914,17 @@ ast_for_atom(struct compiling *c, const } else return ast_for_listcomp(c, ch); - case LBRACE: { + case PYTOK_LBRACE: { /* dictorsetmaker: test ':' test (',' test ':' test)* [','] | * test (gen_for | (',' test)* [',']) */ int i, size; asdl_seq *keys, *values; ch = CHILD(n, 1); - if (TYPE(ch) == RBRACE) { + if (TYPE(ch) == PYTOK_RBRACE) { /* it's an empty dict */ return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); - } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { + } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == PYTOK_COMMA) { /* it's a simple set */ asdl_seq *elts; size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */ @@ -2009,7 +2009,7 @@ ast_for_slice(struct compiling *c, const } /* If there's an upper bound it's in the second or third position. */ - if (TYPE(ch) == COLON) { + if (TYPE(ch) == PYTOK_COLON) { if (NCH(n) > 1) { node *n2 = CHILD(n, 1); @@ -2099,19 +2099,19 @@ ast_for_binop(struct compiling *c, const static expr_ty ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr) { - /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' PYTOK_NAME subscriptlist: subscript (',' subscript)* [','] subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] */ REQ(n, trailer); - if (TYPE(CHILD(n, 0)) == LPAR) { + if (TYPE(CHILD(n, 0)) == PYTOK_LPAR) { if (NCH(n) == 2) return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); else return ast_for_call(c, CHILD(n, 1), left_expr); } - else if (TYPE(CHILD(n, 0)) == DOT) { + else if (TYPE(CHILD(n, 0)) == PYTOK_DOT) { PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1)); if (!attr_id) return NULL; @@ -2119,8 +2119,8 @@ ast_for_trailer(struct compiling *c, con LINENO(n), n->n_col_offset, c->c_arena); } else { - REQ(CHILD(n, 0), LSQB); - REQ(CHILD(n, 2), RSQB); + REQ(CHILD(n, 0), PYTOK_LSQB); + REQ(CHILD(n, 2), PYTOK_RSQB); n = CHILD(n, 1); if (NCH(n) == 1) { slice_ty slc = ast_for_slice(c, CHILD(n, 0)); @@ -2182,13 +2182,13 @@ ast_for_factor(struct compiling *c, cons return NULL; switch (TYPE(CHILD(n, 0))) { - case PLUS: + case PYTOK_PLUS: return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, c->c_arena); - case MINUS: + case PYTOK_MINUS: return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, c->c_arena); - case TILDE: + case PYTOK_TILDE: return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset, c->c_arena); } @@ -2526,13 +2526,13 @@ ast_for_call(struct compiling *c, const asdl_seq_SET(keywords, nkeywords++, kw); } } - else if (TYPE(ch) == STAR) { + else if (TYPE(ch) == PYTOK_STAR) { vararg = ast_for_expr(c, CHILD(n, i+1)); if (!vararg) return NULL; i++; } - else if (TYPE(ch) == DOUBLESTAR) { + else if (TYPE(ch) == PYTOK_DOUBLESTAR) { kwarg = ast_for_expr(c, CHILD(n, i+1)); if (!kwarg) return NULL; @@ -2631,7 +2631,7 @@ ast_for_expr_stmt(struct compiling *c, c expr_ty expression; /* a normal assignment */ - REQ(CHILD(n, 1), EQUAL); + REQ(CHILD(n, 1), PYTOK_EQUAL); targets = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); if (!targets) return NULL; @@ -2767,9 +2767,9 @@ static alias_ty alias_for_import_name(struct compiling *c, const node *n, int store) { /* - import_as_name: NAME ['as' NAME] - dotted_as_name: dotted_name ['as' NAME] - dotted_name: NAME ('.' NAME)* + import_as_name: PYTOK_NAME ['as' PYTOK_NAME] + dotted_as_name: dotted_name ['as' PYTOK_NAME] + dotted_name: PYTOK_NAME ('.' PYTOK_NAME)* */ identifier str, name; @@ -2865,7 +2865,7 @@ alias_for_import_name(struct compiling * return alias(str, NULL, c->c_arena); } break; - case STAR: + case PYTOK_STAR: str = PyUnicode_InternFromString("*"); if (PyArena_AddPyObject(c->c_arena, str) < 0) { Py_DECREF(str); @@ -2929,23 +2929,23 @@ ast_for_import_stmt(struct compiling *c, return NULL; idx++; break; - } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) { - /* three consecutive dots are tokenized as one ELLIPSIS */ + } else if (TYPE(CHILD(n, idx)) == PYTOK_ELLIPSIS) { + /* three consecutive dots are tokenized as one PYTOK_ELLIPSIS */ ndots += 3; continue; - } else if (TYPE(CHILD(n, idx)) != DOT) { + } else if (TYPE(CHILD(n, idx)) != PYTOK_DOT) { break; } ndots++; } idx++; /* skip over the 'import' keyword */ switch (TYPE(CHILD(n, idx))) { - case STAR: + case PYTOK_STAR: /* from ... import * */ n = CHILD(n, idx); n_children = 1; break; - case LPAR: + case PYTOK_LPAR: /* from ... import (x, y, z) */ n = CHILD(n, idx + 1); n_children = NCH(n); @@ -2970,7 +2970,7 @@ ast_for_import_stmt(struct compiling *c, return NULL; /* handle "from ... import *" special b/c there's no children */ - if (TYPE(n) == STAR) { + if (TYPE(n) == PYTOK_STAR) { alias_ty import_alias = alias_for_import_name(c, n, 1); if (!import_alias) return NULL; @@ -2998,7 +2998,7 @@ ast_for_import_stmt(struct compiling *c, static stmt_ty ast_for_global_stmt(struct compiling *c, const node *n) { - /* global_stmt: 'global' NAME (',' NAME)* */ + /* global_stmt: 'global' PYTOK_NAME (',' PYTOK_NAME)* */ identifier name; asdl_seq *s; int i; @@ -3019,7 +3019,7 @@ ast_for_global_stmt(struct compiling *c, static stmt_ty ast_for_nonlocal_stmt(struct compiling *c, const node *n) { - /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */ + /* nonlocal_stmt: 'nonlocal' PYTOK_NAME (',' PYTOK_NAME)* */ identifier name; asdl_seq *s; int i; @@ -3069,7 +3069,7 @@ ast_for_assert_stmt(struct compiling *c, static asdl_seq * ast_for_suite(struct compiling *c, const node *n) { - /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */ + /* suite: simple_stmt | PYTOK_NEWLINE PYTOK_INDENT stmt+ PYTOK_DEDENT */ asdl_seq *seq; stmt_ty s; int i, total, num, end, pos = 0; @@ -3083,11 +3083,11 @@ ast_for_suite(struct compiling *c, const return NULL; if (TYPE(CHILD(n, 0)) == simple_stmt) { n = CHILD(n, 0); - /* simple_stmt always ends with a NEWLINE, - and may have a trailing SEMI + /* simple_stmt always ends with a PYTOK_NEWLINE, + and may have a trailing PYTOK_SEMI */ end = NCH(n) - 1; - if (TYPE(CHILD(n, end - 1)) == SEMI) + if (TYPE(CHILD(n, end - 1)) == PYTOK_SEMI) end--; /* loop by 2 to skip semi-colons */ for (i = 0; i < end; i += 2) { @@ -3187,7 +3187,7 @@ ast_for_if_stmt(struct compiling *c, con n_elif = NCH(n) - 4; /* must reference the child n_elif+1 since 'else' token is third, not fourth, child from the end. */ - if (TYPE(CHILD(n, (n_elif + 1))) == NAME + if (TYPE(CHILD(n, (n_elif + 1))) == PYTOK_NAME && STR(CHILD(n, (n_elif + 1)))[2] == 's') { has_else = 1; n_elif -= 3; @@ -3399,9 +3399,9 @@ ast_for_try_stmt(struct compiling *c, co if (body == NULL) return NULL; - if (TYPE(CHILD(n, nch - 3)) == NAME) { + if (TYPE(CHILD(n, nch - 3)) == PYTOK_NAME) { if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) { - if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { + if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == PYTOK_NAME) { /* we can assume it's an "else", because nch >= 9 for try-else-finally and it would otherwise have a type of except_clause */ @@ -3504,14 +3504,14 @@ ast_for_with_stmt(struct compiling *c, c static stmt_ty ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) { - /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */ + /* classdef: 'class' PYTOK_NAME ['(' arglist ')'] ':' suite */ PyObject *classname; asdl_seq *s; expr_ty call; REQ(n, classdef); - if (NCH(n) == 4) { /* class NAME ':' suite */ + if (NCH(n) == 4) { /* class PYTOK_NAME ':' suite */ s = ast_for_suite(c, CHILD(n, 3)); if (!s) return NULL; @@ -3524,7 +3524,7 @@ ast_for_classdef(struct compiling *c, co LINENO(n), n->n_col_offset, c->c_arena); } - if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */ + if (TYPE(CHILD(n, 3)) == PYTOK_RPAR) { /* class PYTOK_NAME '(' ')' ':' suite */ s = ast_for_suite(c, CHILD(n,5)); if (!s) return NULL; @@ -3537,7 +3537,7 @@ ast_for_classdef(struct compiling *c, co LINENO(n), n->n_col_offset, c->c_arena); } - /* class NAME '(' arglist ')' ':' suite */ + /* class PYTOK_NAME '(' arglist ')' ':' suite */ /* build up a fake Call node so we can extract its pieces */ { PyObject *dummy_name; @@ -3844,7 +3844,7 @@ parsestr(struct compiling *c, const node need_encoding ? c->c_encoding : NULL); } -/* Build a Python string object out of a STRING+ atom. This takes care of +/* Build a Python string object out of a PYTOK_STRING+ atom. This takes care of * compile-time literal catenation, calling parsestr() on each piece, and * pasting the intermediate results together. */ @@ -3853,7 +3853,7 @@ parsestrplus(struct compiling *c, const { PyObject *v; int i; - REQ(CHILD(n, 0), STRING); + REQ(CHILD(n, 0), PYTOK_STRING); v = parsestr(c, CHILD(n, 0), bytesmode); if (v != NULL) { /* String literal concatenation */ diff --git a/Python/pythonrun.c b/Python/pythonrun.c --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1267,11 +1267,11 @@ err_input(perrdetail *err) return; case E_SYNTAX: errtype = PyExc_IndentationError; - if (err->expected == INDENT) + if (err->expected == PYTOK_INDENT) msg = "expected an indented block"; - else if (err->token == INDENT) + else if (err->token == PYTOK_INDENT) msg = "unexpected indent"; - else if (err->token == DEDENT) + else if (err->token == PYTOK_DEDENT) msg = "unexpected unindent"; else { errtype = PyExc_SyntaxError;