diff --git a/Include/converters.h b/Include/converters.h new file mode 100644 --- /dev/null +++ b/Include/converters.h @@ -0,0 +1,84 @@ + +/* + * This file contains type declarations for the converter functions used + * by PyArg_ParseTuple(), PyArg_ParseTupleAndKeywords() and PyArg_Parse(). + * + * The default converters are denoted by their corresponding format strings. + * This means that the quotes are part of the function names. + * + * Custom converter names are unquoted. + * + * The type constraint on the LHS of the arrow is either a single Python + * type or a list of alternative Python types. + * + * The RHS contains the C result type(s) of the conversion. If a conversion + * has a single return velue, both the type and the identifier 'res' are + * (currently [1]) required. + * + * If a conversion has multiple return values, the main return value is + * denoted by the identifier 'res'; additional return values must have + * identifiers of the form 'res_suffix'. + * + * By default parameters are passed by value to the implementation function. + * If 'res' is prefixed by an ampersand, the address of the parameter is + * passed instead. + * + * + * [1] A sufficiently smart parser could eliminate some of the requirements. + * + */ + + +/*[converter] +##### Default converters ##### +"s": str -> const char *res; +"s*": [str, bytes, bytearray, rw_buffer] -> Py_buffer &res; +"s#": [str, bytes, r_buffer] -> (const char *res, Py_ssize_t res_length); +"z": [str, None] -> const char *res; +"z*": [str, bytes, bytearray, rw_buffer, None] -> Py_buffer &res; +"z#": [str, bytes, r_buffer, None] -> (const char *res, Py_ssize_t res_length); +"y": bytes -> const char *res; +"y*": [bytes, bytearray, rw_buffer] -> Py_buffer &res; +"y#": bytes -> (const char *res, Py_ssize_t res_length); +"S": bytes -> PyBytesObject *res; +"Y": bytearray -> PyByteArrayObject *res; +"u": str -> Py_UNICODE *res; +"u#": str -> (Py_UNICODE *res, Py_ssize_t res_length); +"Z": [str, None] -> Py_UNICODE *res; +"Z#": [str, None] -> (Py_UNICODE *res, Py_ssize_t res_length); +"U": str -> PyObject *res; +"w*": [bytearray, rw_buffer] -> Py_buffer &res; +"es": str -> (const char *res_encoding, char **res); +"et": [str, bytes, bytearray] -> (const char *res_encoding, char **res); +"es#": str -> (const char *res_encoding, char **res, Py_ssize_t *res_length); +"et#": [str, bytes, bytearray] -> (const char *res_encoding, char **res, Py_ssize_t *res_length); + +"b": int -> unsigned char res; +"B": int -> unsigned char res; +"h": int -> short res; +"H": int -> unsigned short res; +"i": int -> int res; +"I": int -> unsigned int res; +"l": int -> long res; +"k": int -> unsigned long res; +"L": int -> PY_LONG_LONG res; +"K": int -> unsigned PY_LONG_LONG res; +"n": int -> Py_ssize_t res; +"c": int -> char res; +"C": str -> int res; +"f": float -> float res; +"d": float -> double res; +"D": complex -> Py_complex res; +"p": bool -> int res; +"O": object -> PyObject *res; +"O!": object -> (PyTypeObject *res_type, PyObject *res); + + +##### Custom converters ##### +path_converter: [str, bytes, int] -> path_t &res; +OS_STAT_DIR_FD_CONVERTER: [int, None] -> int res; + +[converter_end]*/ + + + diff --git a/Include/pymacro.h b/Include/pymacro.h --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -49,10 +49,13 @@ /* Define macros for inline documentation. */ #define PyDoc_VAR(name) static char name[] #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) +#define PyDoc_XSTRVAR(name,header,str) PyDoc_VAR(name) = PyDoc_XSTR(header, str) #ifdef WITH_DOC_STRINGS #define PyDoc_STR(str) str +#define PyDoc_XSTR(header,str) header str #else #define PyDoc_STR(str) "" +#define PyDoc_XSTR(header,str) "" #endif /* Below "a" is a power of 2. */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1998,19 +1998,34 @@ /* If true, st_?time is float. */ static int _stat_float_times = 1; -PyDoc_STRVAR(stat_float_times__doc__, -"stat_float_times([newval]) -> oldval\n\n\ -Determine whether os.[lf]stat represents time stamps as float objects.\n\ -If newval is True, future calls to stat() return floats, if it is False,\n\ -future calls return ints. \n\ -If newval is omitted, return the current setting.\n"); - -static PyObject* -stat_float_times(PyObject* self, PyObject *args) +/*[define stat_float_times] +def os.stat_float_times(/, newval: "i") -> os.stat_result: pass +%% +int newval = -1; +[define_end]*/ + +#define STAT_FLOAT_TIMES_HEADER "\ +os.stat_float_times(/, newval: int) -> os.stat_result" +static PyObject *stat_float_times_impl(PyObject *self, int newval); + +static PyObject * +stat_float_times(PyObject *self, PyObject *args) { int newval = -1; - if (!PyArg_ParseTuple(args, "|i:stat_float_times", &newval)) - return NULL; + PyObject *_ret; + + if (!PyArg_ParseTuple(args,"|i:stat_float_times", + &newval)) + return NULL; + + _ret = stat_float_times_impl(self, newval); + return _ret; +} + +static PyObject * +stat_float_times_impl(PyObject *self, int newval) +/*[define_output_end]*/ +{ if (PyErr_WarnEx(PyExc_DeprecationWarning, "stat_float_times() is deprecated", 1)) @@ -2230,46 +2245,56 @@ return _pystat_fromstructstat(&st); } -PyDoc_STRVAR(posix_stat__doc__, -"stat(path, *, dir_fd=None, follow_symlinks=True) -> stat result\n\n\ -Perform a stat system call on the given path.\n\ -\n\ -path may be specified as either a string or as an open file descriptor.\n\ -\n\ -If dir_fd is not None, it should be a file descriptor open to a directory,\n\ - and path should be relative; path will then be relative to that directory.\n\ - dir_fd may not be supported on your platform; if it is unavailable, using\n\ - it will raise a NotImplementedError.\n\ -If follow_symlinks is False, and the last element of the path is a symbolic\n\ - link, stat will examine the symbolic link itself instead of the file the\n\ - link points to.\n\ -It is an error to use dir_fd or follow_symlinks when specifying path as\n\ - an open file descriptor."); +#define PATH_T_INITIALIZE(function_name, nullable, allow_fd) \ + {function_name, NULL, nullable, allow_fd, NULL, NULL, 0, 0, NULL, NULL} + +#ifdef HAVE_FSTATAT + #define OS_STAT_DIR_FD_CONVERTER dir_fd_converter +#else + #define OS_STAT_DIR_FD_CONVERTER dir_fd_unavailable +#endif + +/*[define posix_stat] +def os.stat(path: path_converter, *, dir_fd: OS_STAT_DIR_FD_CONVERTER = None, + follow_symlinks: "p" = True) -> os.stat_result: pass +%% +path_t path = PATH_T_INITIALIZE("stat", 0, 1); +int dir_fd = DEFAULT_DIR_FD; +int follow_symlinks = 1; +%% +path_cleanup(&path); +[define_end]*/ + +#define POSIX_STAT_HEADER "\ +os.stat(path: [bytes, int, str], *, dir_fd: [None, int] = None,\n\ + follow_symlinks: bool = True) -> os.stat_result" +static PyObject *posix_stat_impl(PyObject *self, path_t *path, int dir_fd, + int follow_symlinks); static PyObject * posix_stat(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *keywords[] = {"path", "dir_fd", "follow_symlinks", NULL}; - path_t path; + static char *_keywords[] = {"path", "dir_fd", "follow_symlinks", NULL}; + path_t path = PATH_T_INITIALIZE("stat", 0, 1); int dir_fd = DEFAULT_DIR_FD; int follow_symlinks = 1; - PyObject *return_value; - - memset(&path, 0, sizeof(path)); - path.function_name = "stat"; - path.allow_fd = 1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|$O&p:stat", keywords, - path_converter, &path, -#ifdef HAVE_FSTATAT - dir_fd_converter, &dir_fd, -#else - dir_fd_unavailable, &dir_fd, -#endif - &follow_symlinks)) - return NULL; - return_value = posix_do_stat("stat", &path, dir_fd, follow_symlinks); + PyObject *_ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O&|$O&p:stat", _keywords, + path_converter, &path, OS_STAT_DIR_FD_CONVERTER, &dir_fd, &follow_symlinks)) + return NULL; + + _ret = posix_stat_impl(self, &path, dir_fd, follow_symlinks); path_cleanup(&path); - return return_value; + return _ret; +} + +static PyObject * +posix_stat_impl(PyObject *self, path_t *path, int dir_fd, int follow_symlinks) +/*[define_output_end]*/ +{ + return posix_do_stat("stat", path, dir_fd, follow_symlinks); } PyDoc_STRVAR(posix_lstat__doc__, @@ -9919,69 +9944,79 @@ return buffer; } -PyDoc_STRVAR(posix_setxattr__doc__, -"setxattr(path, attribute, value, flags=0, *, follow_symlinks=True)\n\n\ -Set extended attribute attribute on path to value.\n\ -path may be either a string or an open file descriptor.\n\ -If follow_symlinks is False, and the last element of the path is a symbolic\n\ - link, setxattr will modify the symbolic link itself instead of the file\n\ - the link points to."); +/*[define posix_setxattr] +def os.setxattr(path: path_converter, attribute: path_converter, value: "y*", + flags: "i" = 0, *, follow_symlinks: "p" = True) -> None: pass +%% +path_t path = PATH_T_INITIALIZE("setxattr", 0, 1); +path_t attribute = {0}; +Py_buffer value = {0}; +int flags = 0; +int follow_symlinks = 1; +%% +path_cleanup(&path); +path_cleanup(&attribute); +PyBuffer_Release(&value); +[define_end]*/ + +#define POSIX_SETXATTR_HEADER "\ +os.setxattr(path: [bytes, int, str], attribute: [bytes, int, str],\n\ + value: [bytearray, bytes, rw_buffer], flags: int = 0, *,\n\ + follow_symlinks: bool = True) -> None" +static PyObject *posix_setxattr_impl(PyObject *self, path_t *path, + path_t *attribute, Py_buffer *value, + int flags, int follow_symlinks); static PyObject * posix_setxattr(PyObject *self, PyObject *args, PyObject *kwargs) { - path_t path; - path_t attribute; - Py_buffer value; + static char *_keywords[] = {"path", "attribute", "value", "flags", "follow_symlinks", NULL}; + path_t path = PATH_T_INITIALIZE("setxattr", 0, 1); + path_t attribute = {0}; + Py_buffer value = {0}; int flags = 0; int follow_symlinks = 1; - int result; - PyObject *return_value = NULL; - static char *keywords[] = {"path", "attribute", "value", - "flags", "follow_symlinks", NULL}; - - memset(&path, 0, sizeof(path)); - path.function_name = "setxattr"; - path.allow_fd = 1; - memset(&attribute, 0, sizeof(attribute)); - memset(&value, 0, sizeof(value)); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&O&y*|i$p:setxattr", - keywords, - path_converter, &path, - path_converter, &attribute, - &value, &flags, + PyObject *_ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O&O&y*|i$p:setxattr", _keywords, + path_converter, &path, path_converter, &attribute, &value, &flags, &follow_symlinks)) return NULL; - if (fd_and_follow_symlinks_invalid("setxattr", path.fd, follow_symlinks)) - goto exit; - - Py_BEGIN_ALLOW_THREADS; - if (path.fd > -1) - result = fsetxattr(path.fd, attribute.narrow, - value.buf, value.len, flags); - else if (follow_symlinks) - result = setxattr(path.narrow, attribute.narrow, - value.buf, value.len, flags); - else - result = lsetxattr(path.narrow, attribute.narrow, - value.buf, value.len, flags); - Py_END_ALLOW_THREADS; - - if (result) { - return_value = path_error(&path); - goto exit; - } - - return_value = Py_None; - Py_INCREF(return_value); - -exit: + _ret = posix_setxattr_impl(self, &path, &attribute, &value, flags, follow_symlinks); path_cleanup(&path); path_cleanup(&attribute); PyBuffer_Release(&value); - - return return_value; + return _ret; +} + +static PyObject * +posix_setxattr_impl(PyObject *self, path_t *path, path_t *attribute, + Py_buffer *value, int flags, int follow_symlinks) +/*[define_output_end]*/ +{ + int result; + + if (fd_and_follow_symlinks_invalid("setxattr", path->fd, follow_symlinks)) + return NULL; + + Py_BEGIN_ALLOW_THREADS; + if (path->fd > -1) + result = fsetxattr(path->fd, attribute->narrow, + value->buf, value->len, flags); + else if (follow_symlinks) + result = setxattr(path->narrow, attribute->narrow, + value->buf, value->len, flags); + else + result = lsetxattr(path->narrow, attribute->narrow, + value->buf, value->len, flags); + Py_END_ALLOW_THREADS; + + if (result) + return path_error(path); + + Py_RETURN_NONE; } PyDoc_STRVAR(posix_removexattr__doc__, @@ -10282,6 +10317,8 @@ #endif /* defined(TERMSIZE_USE_CONIO) || defined(TERMSIZE_USE_IOCTL) */ +#include "posixmodule_docstrings.h" + static PyMethodDef posix_methods[] = { {"access", (PyCFunction)posix_access, METH_VARARGS | METH_KEYWORDS, diff --git a/Modules/posixmodule_docstrings.h b/Modules/posixmodule_docstrings.h new file mode 100644 --- /dev/null +++ b/Modules/posixmodule_docstrings.h @@ -0,0 +1,41 @@ +#ifndef POSIXMODULE_DOCSTRINGS_H +#define POSIXMODULE_DOCSTRINGS_H + + +PyDoc_XSTRVAR(stat_float_times__doc__, STAT_FLOAT_TIMES_HEADER, +"\n\n\ +Determine whether os.[lf]stat represents time stamps as float objects.\n\ +If newval is True, future calls to stat() return floats, if it is False,\n\ +future calls return ints. \n\ +If newval is omitted, return the current setting.\n"); + + +PyDoc_XSTRVAR(posix_stat__doc__, POSIX_STAT_HEADER, +"\n\n\ +Perform a stat system call on the given path.\n\ +\n\ +path may be specified as either a string or as an open file descriptor.\n\ +\n\ +If dir_fd is not None, it should be a file descriptor open to a directory,\n\ + and path should be relative; path will then be relative to that directory.\n\ + dir_fd may not be supported on your platform; if it is unavailable, using\n\ + it will raise a NotImplementedError.\n\ +If follow_symlinks is False, and the last element of the path is a symbolic\n\ + link, stat will examine the symbolic link itself instead of the file the\n\ + link points to.\n\ +It is an error to use dir_fd or follow_symlinks when specifying path as\n\ + an open file descriptor."); + +PyDoc_XSTRVAR(posix_setxattr__doc__, POSIX_SETXATTR_HEADER, +"\n\n\ +Set extended attribute attribute on path to value.\n\ +path may be either a string or an open file descriptor.\n\ +If follow_symlinks is False, and the last element of the path is a symbolic\n\ + link, setxattr will modify the symbolic link itself instead of the file\n\ + the link points to."); + + +#endif + + + diff --git a/Tools/preprocess/README.txt b/Tools/preprocess/README.txt new file mode 100644 --- /dev/null +++ b/Tools/preprocess/README.txt @@ -0,0 +1,56 @@ + +This directory contains tools that implement the preprocessor DSL described in +PEP 437 "A DSL for specifying signatures, annotations and argument converters". + +Due to time constraints the tools are written in Standard ML, using the +ml-yacc and ml-lex toolchains. + +The statically linked Linux binaries have been compiled with mlton. + + +Files +===== + + preprocessor.grm + ---------------- + The DSL BNF grammar in ml-yacc readable form. The length of the + grammar is due to the fact that it contains a full specification + of a Python expression. + + + tokens.txt + ---------- + The lexer specification. For readability this is in pseudo-code. + + + printsemant64/32 + ---------------- + Linux 64/32-bit binaries. If run from the top directory of the Python + tree, the tool first reads Include/converters.h and then preprocesses + the given filename. The output is the semantically checked parse tree + of the complete C file written to stdout: + + ./Tools/preprocess/printsemant64 Modules/posixmodule.c + + An alternative header file can be explicitly given: + + ./Tools/preprocess/printsemant64 --header myconverters.h myfile.c + + + preprocess64/32 + --------------- + Linux 64/32-bit binaries. If run from the top directory of the Python + tree, the tool first reads Include/converters.h and then preprocesses + the given filename. + + The output is the preprocessed C file written to stdout. + + ./Tools/preprocess/preprocess64 Modules/posixmodule.c + + An alternative header file can be explicitly given: + + ./Tools/preprocess/preprocess64 --header myconverters.h myfile.c + + + + diff --git a/Tools/preprocess/preprocessor.grm b/Tools/preprocess/preprocessor.grm new file mode 100644 --- /dev/null +++ b/Tools/preprocess/preprocessor.grm @@ -0,0 +1,602 @@ + +(* Copyright (c) 2013 Stefan Krah. BSD two-clause license. The grammar for + the 'test' expression is a direct translation of parts of the Python + grammar. *) + + +(* + * Grammar for the proposed DSL for generating function signatures, type + * annotations and (possibly custom generated) argument parsers. + * + * NOTE1: The length of this grammar is due to the fact that it contains + * Python 'test' expressions whenever a default value occurs. + * + * NOTE2: Parsing of C variable declarations ('declarator', 'init-declarator') + * is currently very primitive and should be replaced by an appropriate + * excerpt of the C grammar. + * + * The grammar is in ml-yacc readable form. ml-yacc is part of smlnj or + * mlton. It is not necessary to know any Standard ML in order to change + * the grammar: + * + * Make any changes, then run: + * + * ml-yacc preprocessor.grm + * + * This will generate (among other things) a file 'preprocessor.grm.desc' + * where conflicts and errors (if any) are listed. + * + *) + +%% + +(***** Parser declarations *****) + +%name Preprocessor + +%term + EOF + | LITERAL of string + | HEADER | HEADER_END + | DEFINE | DEFINE_END | DEFINE_OUTPUT_END + | SECTIONMARKER + + | LPAREN | RPAREN | LBRACE | RBRACE | LBRACK | RBRACK + | EQ | COMMA | SEMI | DOT | COLON + | BAR | CARET | AMPER | TILDE + | PLUS | MINUS | STAR | SLASH | PERCENT + | EQEQ | NEQ | LT | LE | GE | GT + | DOUBLESTAR | DOUBLESLASH + | LSHIFT | RSHIFT | ARROW + | IN | NOT_IN | IS | IS_NOT + | PY_NONE | FALSE | TRUE | ELLIPSIS + | OR | AND | NOT + | IF | ELSE | FOR + | DEF | LAMBDA | YIELD | FROM | PASS + | NAME of string | PATHNAME of string | NUMBER of string | STRING of string + +%nonterm + program of () + | program_block_list of () + | program_blocks of () + | source_block of () + | source_literal of () + | preprocessor_block of () + | header_block of () + | converter_decl_list of () + | converter_decl of () + | converter of () + | define_block of () + | cname_opt of () + | function_spec of () + | declaration of () + | decl_params of () + | decl_typedargslist of () + | decl_typedarg of () + | cdeclaration_list_opt of () + | cdeclaration_list of () + | cdeclaration of () + | cleanup_source_opt of () + + | py_type of () + | c_atomlist of () + | c_atom of () + | c_typedargslist of () + + | name of () + | pathname of () + | name_list of () + | name_list_plus of () + + | where_clause_opt of () + | where_clause of () + | group_list of () + | group of () + + (***** Python 'test' expression *****) + | varargslist of () + | varargs of () + | vararg of () + | vfpdef of () + | star_vfpdef of () + | vararg_star of () + + | test of () + | or_test of () + | test_nocond of () + + | lambdef of () + | lambdef_nocond of () + + | and_test of () + | not_test of () + | comparison of () + | comp_op of () + | star_expr of () + | expr of () + | xor_expr of () + | and_expr of () + | shift_expr of () + | arith_expr of () + | term of () + | factor of () + | power of () + | atom of () + | string_plus of () + | testlist_comp of () + | test_or_star_expr of () + | test_or_star_exprs of () + | test_or_star_expr_list of () + | trailer of () + | trailers of () + | subscriptlist of () + | subscripts of () + | subscript of () + | sliceop of () + | exprlist of () + | expr_or_star_exprs of () + | expr_or_star_expr of () + | testlist of () + | tests of () + | dictorsetmaker of () + | dictfields of () + | dictfieldlist of () + | arglist of () + | arguments of () + | argument of () + | comp_iter of () + | comp_for of () + | comp_if of () + | yield_expr of () + | yield_arg of () + +%pos int + +%verbose +%start program +%eop EOF +%noshift EOF + +%% + +(***** program *****) + +program: program_block_list () + +program_block_list : + source_block () +| program_blocks () +| source_block program_blocks () + +program_blocks : + preprocessor_block () +| preprocessor_block program_blocks () + +(* In order to prevent conflicts, both of these end in source blocks of any + length. Since C translation units must end in a newline, there will always + be a trailing source block. *) +preprocessor_block : + header_block () +| define_block () + + +(***** source_block *****) + +source_block: source_literal () + +source_literal: + LITERAL () +| source_literal LITERAL () + + +(***** NAME or python keyword ******) + +name: + NAME () +| IN () +| IS () +| PY_NONE () +| TRUE () +| FALSE () +| OR () +| AND () +| NOT () +| IF () +| ELSE () +| FOR () +| DEF () +| LAMBDA () +| YIELD () +| FROM () +| PASS () + + +(***** header block *****) + +(* A header block must end in a source block to prevent a conflict. *) +header_block : HEADER converter_decl_list HEADER_END source_block () + +converter_decl_list : + converter_decl () +| converter_decl converter_decl_list () + +converter_decl : + converter COLON py_type ARROW c_atomlist NAME SEMI () +| converter COLON py_type ARROW c_atomlist AMPER NAME SEMI () +| converter COLON py_type ARROW LPAREN c_typedargslist RPAREN SEMI () + +converter: + name () +| STRING () + +py_type : + name () +| LBRACK name_list_plus RBRACK () + +(* NOTE: This is a very primitive way of parsing C declarations that works + for most purposes. It should be replaced by the real thing. *) +c_atom : + name () +| STAR () + +c_atomlist : + c_atom () +| c_atomlist c_atom () + +c_typedargslist : + c_atomlist NAME COMMA c_atomlist NAME () +| c_atomlist NAME COMMA c_atomlist AMPER NAME () +| c_atomlist AMPER NAME COMMA c_atomlist NAME () +| c_atomlist AMPER NAME COMMA c_atomlist AMPER NAME () +| c_typedargslist COMMA c_atomlist NAME () +| c_typedargslist COMMA c_atomlist AMPER NAME () + +name_list : + name () +| name COMMA name_list () + +name_list_plus : + name COMMA name () +| name COMMA name_list_plus () + + +(***** define_block *****) + +(* A define block must end in a source block to prevent a conflict. *) +define_block: + cname_opt function_spec DEFINE_END source_block () +| cname_opt function_spec DEFINE_END source_block DEFINE_OUTPUT_END source_block () + +cname_opt : + DEFINE RBRACK (NONE) +| DEFINE name RBRACK (SOME (sym(name), nameleft)) + + +(***** define_block: function_spec *****) + +function_spec : + declaration () +| declaration SECTIONMARKER cdeclaration_list_opt () +| declaration SECTIONMARKER cdeclaration_list_opt SECTIONMARKER cleanup_source_opt () + + +(***** define_block: declaration *****) + +declaration: DEF pathname decl_params ARROW test COLON PASS where_clause_opt () + +decl_params: LPAREN decl_typedargslist RPAREN () + +decl_typedargslist: + (* empty *) () +| decl_typedarg () +| decl_typedarg COMMA decl_typedargslist () + +decl_typedarg: + STAR () +| SLASH () +| name COLON converter () +| name COLON converter EQ test () + +pathname: + name () +| name DOT pathname () + +where_clause_opt : + (* empty *) () +| where_clause () + +where_clause : name name EQ LBRACK group_list RBRACK () + +group_list : + group () +| group COMMA group_list () + +group : LBRACK name_list RBRACK () + + +(***** define_block: optional C declaration list *****) + +cdeclaration_list_opt : + (* empty *) () +| cdeclaration_list () + +cdeclaration_list : + cdeclaration () +| cdeclaration cdeclaration_list () + +cdeclaration : + c_atomlist name SEMI () +| c_atomlist name EQ source_literal SEMI () + + +(***** define_block: optional cleanup source *****) + +cleanup_source_opt : + (* empty *) () +| source_literal () + + +(***** define_block: Python 'test' expression *****) + +varargslist: + varargs () +| varargs COMMA () +| varargs COMMA vararg_star () +| vararg_star () + +vararg: + vfpdef () +| vfpdef EQ test () + +varargs: + vararg () +| varargs COMMA vararg () + +star_vfpdef: + STAR () +| STAR vfpdef () + +vararg_star: + star_vfpdef () +| star_vfpdef COMMA varargs () +| star_vfpdef COMMA DOUBLESTAR vfpdef () +| star_vfpdef COMMA varargs COMMA DOUBLESTAR vfpdef () +| DOUBLESTAR vfpdef () + +vfpdef: NAME () + + +test: + or_test () +| or_test IF or_test ELSE test () +| lambdef () + +test_nocond: + or_test () +| lambdef_nocond () + +lambdef: + LAMBDA COLON test () +| LAMBDA varargslist COLON test () + +lambdef_nocond: + LAMBDA COLON test_nocond () +| LAMBDA varargslist COLON test_nocond () + +or_test: + and_test () +| and_test OR or_test () + +and_test: + not_test () +| not_test AND and_test () + +not_test: + NOT not_test () +| comparison () + +comparison: + expr () +| expr comp_op comparison () + +comp_op: + LT () +| GT () +| EQEQ () +| GE () +| LE () +| NEQ () +| IN () +| NOT () +| NOT_IN () +| IS () +| IS_NOT () + +star_expr: STAR expr () + +expr: + xor_expr () +| xor_expr BAR expr () + +xor_expr: + and_expr () +| and_expr CARET xor_expr () + +and_expr: + shift_expr () +| shift_expr AMPER and_expr () + +shift_expr: + arith_expr () +| arith_expr LSHIFT shift_expr () +| arith_expr RSHIFT shift_expr () + +arith_expr: + term () +| arith_expr PLUS term () +| arith_expr MINUS term () + +term: + factor () +| term STAR factor () +| term SLASH factor () +| term PERCENT factor () +| term DOUBLESLASH factor () + +factor: + power () +| PLUS factor () +| MINUS factor () +| TILDE factor () + +power: + atom () +| atom DOUBLESTAR factor () +| atom trailers () +| atom trailers DOUBLESTAR factor () + +atom: + LPAREN RPAREN () +| LPAREN yield_expr RPAREN () +| LPAREN testlist_comp RPAREN () +| LBRACK RBRACK () +| LBRACK testlist_comp RBRACK () +| LBRACE RBRACE () +| LBRACE dictorsetmaker RBRACE () +| NAME () +| NUMBER () +| string_plus () +| ELLIPSIS () +| PY_NONE () +| TRUE () +| FALSE () + +string_plus: + STRING () +| STRING string_plus () + +testlist_comp: + test_or_star_expr comp_for () +| test_or_star_expr_list () + +test_or_star_expr_list: + test_or_star_exprs () +| test_or_star_exprs COMMA () + +test_or_star_exprs: + test_or_star_expr () +| test_or_star_exprs COMMA test_or_star_expr () + +test_or_star_expr: + test () +| star_expr () + +trailer: + LPAREN RPAREN () +| LPAREN arglist RPAREN () +| LBRACK subscriptlist RBRACK () +| DOT NAME () + +trailers: + trailer () +| trailers trailer () + +subscriptlist: + subscripts () +| subscripts COMMA () + +subscripts: + subscript () +| subscripts COMMA subscript () + +subscript: + test () +| COLON () +| test COLON () +| test COLON test () +| test COLON sliceop () +| test COLON test sliceop () +| COLON test () +| COLON sliceop () +| COLON test sliceop () + +sliceop: + COLON () +| COLON test () + +exprlist: + expr_or_star_exprs () +| expr_or_star_exprs COMMA () + +expr_or_star_exprs: + expr_or_star_expr () +| expr_or_star_exprs COMMA expr_or_star_expr () + +expr_or_star_expr: + expr () +| star_expr () + +testlist: + tests () +| tests COMMA () + +tests: + test () +| tests COMMA test () + +dictorsetmaker: + test COLON test comp_for () +| dictfieldlist () +| test comp_for () +| testlist () + +dictfieldlist: + dictfields () +| dictfields COMMA () + +dictfields: + test COLON test () +| dictfields COMMA test COLON test () + +arglist: + arguments () +| arguments COMMA () +| arguments COMMA STAR test () +| arguments COMMA STAR test COMMA arguments () +| arguments COMMA STAR test COMMA DOUBLESTAR test () +| arguments COMMA STAR test COMMA arguments COMMA DOUBLESTAR test () +| arguments COMMA DOUBLESTAR test () + +arguments: + argument () +| arguments COMMA argument () + +argument: + test () +| test comp_for () +| test EQ test () + +comp_iter: + comp_for () +| comp_if () + +comp_for: + FOR exprlist IN or_test () +| FOR exprlist IN or_test comp_iter () + +comp_if: + IF test_nocond () +| IF test_nocond comp_iter () + +yield_expr: + YIELD () +| YIELD yield_arg () + +yield_arg: + FROM test () +| testlist () + + + + + diff --git a/Tools/preprocess/testcases/01_syntax_err.c b/Tools/preprocess/testcases/01_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/01_syntax_err.c @@ -0,0 +1,10 @@ +/* lexing error: illegal character '"' */ + +/*[define] +def module_func(a: "i") -> bool: pass + +where groups = [[a], ""] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/02_syntax_err.c b/Tools/preprocess/testcases/02_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/02_syntax_err.c @@ -0,0 +1,8 @@ +/* Non-identifier in annotation position */ + +/*[define cname] +def module_func(param1: (1,2,3)) -> bool: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/03_syntax_err.c b/Tools/preprocess/testcases/03_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/03_syntax_err.c @@ -0,0 +1,8 @@ +/* invalid initializer */ + +/*[define] +def module_func(param1: "i" = ^) -> bool: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/04_syntax_err.c b/Tools/preprocess/testcases/04_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/04_syntax_err.c @@ -0,0 +1,8 @@ +/* missing return value annotation */ + +/*[define cname] +def module_func(param1: "p" = True): pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/05_syntax_err.c b/Tools/preprocess/testcases/05_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/05_syntax_err.c @@ -0,0 +1,8 @@ +/* syntax error: missing function "body" */ + +/*[define cname] +def module_func (param1: "p") -> 1000: +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/06_syntax_err.c b/Tools/preprocess/testcases/06_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/06_syntax_err.c @@ -0,0 +1,7 @@ +/* invalid C function name */ + +/*[define @] +def module_func (param1: "p") -> bool: pass +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/07_syntax_err.c b/Tools/preprocess/testcases/07_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/07_syntax_err.c @@ -0,0 +1,11 @@ +/* invalid C declaration */ + +/*[define] +def module_func (a: "i") -> bool: pass +%% +Py_DECREF(a); +%% +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/08_syntax_err.c b/Tools/preprocess/testcases/08_syntax_err.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/08_syntax_err.c @@ -0,0 +1,7 @@ +/* invalid function name */ + +/*[define] +def module~func (a: "i") -> bool: pass +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/09_undeclared_conv.c b/Tools/preprocess/testcases/09_undeclared_conv.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/09_undeclared_conv.c @@ -0,0 +1,8 @@ +/***** undeclared converter *****/ + +/*[define cname] +def module_func (param1: "es##") -> bool: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/10_conflicting_cdecl_type.c b/Tools/preprocess/testcases/10_conflicting_cdecl_type.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/10_conflicting_cdecl_type.c @@ -0,0 +1,9 @@ +/***** conflicting types for 'param1' ("i" returns int) *****/ + +/*[define] +def module_func (param1: "i") -> tuple: pass +%% +float param1 = 9.0; +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/11_conflicting_conv_decl.c b/Tools/preprocess/testcases/11_conflicting_conv_decl.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/11_conflicting_conv_decl.c @@ -0,0 +1,12 @@ +/***** conflicting converter declaration *****/ + +/*[converter] +"i": int -> float res; +[converter_end]*/ + +/*[define] +def module_func(param1: "i") -> bool: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/12_duplicate_python_param.c b/Tools/preprocess/testcases/12_duplicate_python_param.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/12_duplicate_python_param.c @@ -0,0 +1,11 @@ +/***** duplicate parameter name: 'param1' *****/ + +/*[define cname] +def module_func ( + param1: "i" = 98121, + param1: "i" = -111 +) -> int: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/13_uninitialized_optarg.c b/Tools/preprocess/testcases/13_uninitialized_optarg.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/13_uninitialized_optarg.c @@ -0,0 +1,8 @@ +/***** missing C initialization for optional argument *****/ + +/*[define cname] +def module_func(param1: "i" = 0) -> int: pass +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/14_undeclared_cvar.c b/Tools/preprocess/testcases/14_undeclared_cvar.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/14_undeclared_cvar.c @@ -0,0 +1,10 @@ +/***** undeclared variable name: 'param2' ******/ + +/*[define cname] +def module_func (param1: "i") -> bool: pass +%% +int param2 = 9; +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/15_undeclared_cvar.c b/Tools/preprocess/testcases/15_undeclared_cvar.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/15_undeclared_cvar.c @@ -0,0 +1,10 @@ +/***** undeclared variable name: 'param2' *****/ + +/*[define cname] +def module_func (param1: "i" = 1000) -> bool: pass +%% +int param1 = 9; +int param2 = 9; +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/16_dupl_cvar.c b/Tools/preprocess/testcases/16_dupl_cvar.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/16_dupl_cvar.c @@ -0,0 +1,10 @@ +/***** duplicate variable name: 'param1' *****/ + +/*[define cname] +def module_func (param1: "i" = 100) -> bool: pass +%% +int param1 = 9; +int param1 = 9; +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/17_duplicate_special_param.c b/Tools/preprocess/testcases/17_duplicate_special_param.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/17_duplicate_special_param.c @@ -0,0 +1,7 @@ +/***** duplicate special parameter *****/ + +/*[define] +def module_func (*, /) -> bool: pass +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/18_duplicate_special_param.c b/Tools/preprocess/testcases/18_duplicate_special_param.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/18_duplicate_special_param.c @@ -0,0 +1,7 @@ +/***** duplicate special parameter *****/ + +/*[define] +def module_func (*, a: "i", /) -> bool: pass +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/19_conflicting_groups_slash.c b/Tools/preprocess/testcases/19_conflicting_groups_slash.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/19_conflicting_groups_slash.c @@ -0,0 +1,9 @@ +/***** conflicting groups/slash specification *****/ + +/*[define] +def module_func(/, a: "i") -> bool: pass + +where groups = [[a]] +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/20_pos_req_with_default.c b/Tools/preprocess/testcases/20_pos_req_with_default.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/20_pos_req_with_default.c @@ -0,0 +1,9 @@ +/***** positional-only arg with default value *****/ + +/*[define] +def module_func (/, a: "i" = 100) -> bool: pass +[define_end]*/ + + + + diff --git a/Tools/preprocess/testcases/21_duplicate_symbols_in_group.c b/Tools/preprocess/testcases/21_duplicate_symbols_in_group.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/21_duplicate_symbols_in_group.c @@ -0,0 +1,9 @@ +/***** duplicate symbols in group *****/ + +/*[define] +def module_func(a: "i") -> float: pass +where groups = [[a, a]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/22_duplicate_group_length.c b/Tools/preprocess/testcases/22_duplicate_group_length.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/22_duplicate_group_length.c @@ -0,0 +1,9 @@ +/***** duplicate group length *****/ + +/*[define] +def module_func(a: "i", b: "i", c: "i") -> bool: pass +where groups = [[a], [a, b], [c, a]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/23_required_group_incomplete.c b/Tools/preprocess/testcases/23_required_group_incomplete.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/23_required_group_incomplete.c @@ -0,0 +1,9 @@ +/***** required group incomplete or missing (should be e.g.: [a, b, c]) *****/ + +/*[define] +def module_func(a: "i", b: "i", c: "i") -> bool : pass +where groups = [[a], [b, c]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/24_ambiguous_groups_definition.c b/Tools/preprocess/testcases/24_ambiguous_groups_definition.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/24_ambiguous_groups_definition.c @@ -0,0 +1,9 @@ +/***** ambiguous groups definition *****/ + +/*[define] +def module_func(a: "i", b: "i", c: "i", x: "i", y: "i") -> int: pass +where groups = [[a], [a, b], [c, d, a], [x, c, d, a]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/25_group_param_mismatch.c b/Tools/preprocess/testcases/25_group_param_mismatch.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/25_group_param_mismatch.c @@ -0,0 +1,9 @@ +/***** group: parameter mismatch *****/ + +/*[define] +def module_func(a: "i", b: "i", c: "i") -> tuple: pass +where groups = [[a], [a, b, x]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/README.txt b/Tools/preprocess/testcases/README.txt new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/README.txt @@ -0,0 +1,4 @@ + +This directory contains testcases that check various error conditions. + + diff --git a/Tools/preprocess/testcases/_posixsubprocess.c b/Tools/preprocess/testcases/_posixsubprocess.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/_posixsubprocess.c @@ -0,0 +1,12 @@ + +/*[define subprocess_fork_exec] +def _posixsubprocess.fork_exec( + process_args: "O", executable_list: "O", + close_fds: "p", py_fds_to_keep: "O", + cwd_obj: "O", env_list: "O", + p2cread: "i", p2cwrite: "i", c2pread: "i", c2pwrite: "i", + errread: "i", errwrite: "i", errpipe_read: "i", errpipe_write: "i", + restore_signals: "i", call_setsid: "i", preexec_fn: "i", /) -> int: pass +[define_end]*/ + + diff --git a/Tools/preprocess/testcases/addch.c b/Tools/preprocess/testcases/addch.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/addch.c @@ -0,0 +1,11 @@ + +/* NOTE: The preprocess tool does not yet emit code for this legacy + definition. */ + +/*[define] +def curses.window.addch(y: "i", x: "i", ch: "O", attr: "l") -> None: pass +where groups = [[ch], [ch, attr], [y, x, ch], [y, x, ch, attr]] +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/posix_stat.c b/Tools/preprocess/testcases/posix_stat.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/posix_stat.c @@ -0,0 +1,14 @@ + +/*[define posix_stat] +def os.stat(path: path_converter, *, dir_fd: OS_STAT_DIR_FD_CONVERTER = None, + follow_symlinks: "p" = True) -> os.stat_result: pass +%% +path_t path = PATH_T_INITIALIZE("stat", 0, 1); +int dir_fd = DEFAULT_DIR_FD; +int follow_symlinks = 1; +%% +path_cleanup(&path); +[define_end]*/ + + + diff --git a/Tools/preprocess/testcases/stat_float_times.c b/Tools/preprocess/testcases/stat_float_times.c new file mode 100644 --- /dev/null +++ b/Tools/preprocess/testcases/stat_float_times.c @@ -0,0 +1,8 @@ + +/*[define stat_float_times] +def os.stat_float_times(/, newval: "i") -> os.stat_result: pass +%% +int newval = -1; +[define_end]*/ + + diff --git a/Tools/preprocess/tokens.txt b/Tools/preprocess/tokens.txt new file mode 100644 --- /dev/null +++ b/Tools/preprocess/tokens.txt @@ -0,0 +1,165 @@ + +(* Copyright (c) 2013 Stefan Krah. BSD two-clause license. *) + + +(* + * Because the grammar covers DSL, Python and C fragments, the lexer must + * have support for switching between named start states. + *) + +(***** Regular expressions *****) + +quotedlit := \"[^\"\n]*\" +name := [A-Za-z_][A-Za-z0-9_]* +space := [\ \t] + +stringlit := Python string literal +byteslit := Python bytes literal + +string := stringlit | byteslit + +integer := Python integer +float := Python float +imag := Python imag +number := integer | float | imag + + +(***** INITIAL state *****) + +"/*[converter]" => Tokens.HEADER +"/*[define" => Tokens.DEFINE +"/*[define_output_end]*/" => Tokens.DEFINE_OUTPUT_END +\n => skip +(.|{quotedlit}) => Tokens.LITERAL + + +(***** HEADER state *****) + +"[converter_end]*/" => switch to INITIAL state; Tokens.HEADER_END +"->" => Tokens.ARROW +"(" => Tokens.LPAREN +")" => Tokens.RPAREN +"[" => Tokens.LBRACK +"]" => Tokens.RBRACK +"," => Tokens.COMMA +":" => Tokens.COLON +";" => Tokens.SEMI +"*" => Tokens.STAR +"&" => Tokens.AMPER +{name} => Tokens.NAME +{string} => Tokens.STRING + +#[^\n]* => skip +{space} => skip +\n => skip +. => error + + +(***** DECLARATION *****) + +"[define_end]*/" => switch to initial state; Tokens.DEFINE_END +"(" => Tokens.LPAREN +")" => Tokens.RPAREN +"{" => Tokens.LBRACE +"}" => Tokens.RBRACE +"[" => Tokens.LBRACK +"]" => Tokens.RBRACK +"=" => Tokens.EQ +"," => Tokens.COMMA +";" => Tokens.SEMI +"." => Tokens.DOT +":" => Tokens.COLON +"|" => Tokens.BAR +"^" => Tokens.CARET +"&" => Tokens.AMPER +"~" => Tokens.TILDE +"+" => Tokens.PLUS +"-" => Tokens.MINUS +"*" => Tokens.STAR +"/" => Tokens.SLASH +"%" => Tokens.PERCENT +"<" => Tokens.LT +">" => Tokens.GT +"==" => Tokens.EQEQ +"!=" => Tokens.NEQ +"<=" => Tokens.LE +">=" => Tokens.GE +"**" => Tokens.DOUBLESTAR +"//" => Tokens.DOUBLESLASH +"<<" => Tokens.LSHIFT +">>" => Tokens.RSHIFT +"->" => Tokens.ARROW + +"in" => Tokens.IN +"is" => Tokens.IS +"not"{space}"in" => Tokens.NOT_IN; +"is"{space}"not" => Tokens.IS_NOT + +"None" => Tokens.PY_NONE +"False" => Tokens.FALSE +"True" => Tokens.TRUE +"..." => Tokens.ELLIPSIS + +"or" => Tokens.OR +"and" => Tokens.AND +"not" => Tokens.NOT + +"if" => Tokens.IF +"else" => Tokens.ELSE +"for" => Tokens.FOR + +"def" => Tokens.DEF +"lambda" => Tokens.LAMBDA +"yield" => Tokens.YIELD +"from" => Tokens.FROM +"pass" => Tokens.PASS + +{name} => Tokens.NAME +{string} => Tokens.STRING +{number} => Tokens.NUMBER + +"%%" => switch to CDECLARATION state; Tokens.SECTIONMARKER + +{space} => skip +#[^\n]* => skip +\n => skip +. => error + + +(***** CDECLARATION *****) + +(* + * NOTE: Due to the (currently) primitive C declaration parsing rvalues are + * read as literal strings. + *) + +"[define_end]*/" => switch to INITIAL state; Tokens.DEFINE_END +"=" => switch to RVALUE state; Tokens.EQ +";" => Tokens.SEMI +"*" => Tokens.STAR +{name} => Tokens.NAME +"%%" => switch to cleanup state; Tokens.SECTIONMARKER + +{space} => skip +\n => skip +. => error + + +(***** RVALUE *****) + +";" => switch to CDECLARATION; Tokens.SEMI +\n => skip +(.|{quotedlit}) => Tokens.LITERAL +(* Note that there is no error here: if no semicolon occurs, the lexer + will read until EOF and the parser will signal the error. *) + + +(***** CLEANUP *****) + +"[define_end]*/" => switch to INITIAL state; Tokens.DEFINE_END +\n => skip +(.|{quotedlit}) => Tokens.LITERAL + + + +