diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index fcbd50b..a80f814 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -134,7 +134,8 @@ in various ways. There is a separate error indicator for each thread. This is the most common way to set the error indicator. The first argument specifies the exception type; it is normally one of the standard exceptions, e.g. :cdata:`PyExc_RuntimeError`. You need not increment its reference count. - The second argument is an error message; it is converted to a string object. + The second argument is an error message; it is decoded to a string object + with ``'utf-8'`` encoding. .. cfunction:: void PyErr_SetObject(PyObject *type, PyObject *value) @@ -146,9 +147,10 @@ in various ways. There is a separate error indicator for each thread. .. cfunction:: PyObject* PyErr_Format(PyObject *exception, const char *format, ...) This function sets the error indicator and returns *NULL*. *exception* should be - a Python exception (class, not an instance). *format* should be a string, - containing format codes, similar to :cfunc:`printf`. The ``width.precision`` - before a format code is parsed, but the width part is ignored. + a Python exception (class, not an instance). *format* should be a string + encoded to ISO-8859-1, containing format codes, similar to :cfunc:`printf`. + The ``width.precision`` before a format code is parsed, but the width part + is ignored. .. % This should be exactly the same as the table in PyString_FromFormat. .. % One should just refer to the other. @@ -261,6 +263,8 @@ in various ways. There is a separate error indicator for each thread. *filename* is not *NULL*, it is passed to the constructor of *type* as a third parameter. In the case of exceptions such as :exc:`IOError` and :exc:`OSError`, this is used to define the :attr:`filename` attribute of the exception instance. + *filename* is encoded to the filesystem encoding + (:func:`sys.getfilesystemencoding`). .. cfunction:: PyObject* PyErr_SetFromWindowsErr(int ierr) @@ -286,6 +290,7 @@ in various ways. There is a separate error indicator for each thread. Similar to :cfunc:`PyErr_SetFromWindowsErr`, with the additional behavior that if *filename* is not *NULL*, it is passed to the constructor of :exc:`WindowsError` as a third parameter. Availability: Windows. + *filename* is a string encoded to ``'utf-8'``. .. cfunction:: PyObject* PyErr_SetExcFromWindowsErrWithFilename(PyObject *type, int ierr, char *filename) diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index 00ddf00..c6370cc 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -12,7 +12,8 @@ Operating System Utilities deemed interactive. This is the case for files for which ``isatty(fileno(fp))`` is true. If the global flag :cdata:`Py_InteractiveFlag` is true, this function also returns true if the *filename* pointer is *NULL* or if the name is equal to - one of the strings ``''`` or ``'???'``. + one of the strings ``''`` or ``'???'``. *filename* is encoded to + ``'utf-8'``. .. cfunction:: void PyOS_AfterFork() diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 4b89f49..c80dd7b 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -230,11 +230,12 @@ APIs: .. cfunction:: PyObject* PyUnicode_FromFormat(const char *format, ...) - Take a C :cfunc:`printf`\ -style *format* string and a variable number of + Take a C :cfunc:`printf`\ -style format string and a variable number of arguments, calculate the size of the resulting Python unicode string and return a string with the values formatted into it. The variable arguments must be C types and must correspond exactly to the format characters in the *format* - string. The following format characters are allowed: + string. *format* should a a string encoded to ISO-8859-1. The following + format characters are allowed: .. % This should be exactly the same as the table in PyErr_Format. .. % The descriptions for %zd and %zu are wrong, but the truth is complicated @@ -412,16 +413,24 @@ used, passsing :cfunc:`PyUnicode_FSDecoder` as the conversion function: .. cfunction:: PyObject* PyUnicode_DecodeFSDefault(const char *s) - Decode a string using :cdata:`Py_FileSystemDefaultEncoding` and - the ``"surrogateescape"`` error handler. + Decode a string using :cdata:`Py_FileSystemDefaultEncoding` and the + ``'surrogateescape'`` error handler. On Windows, use ``'strict'`` error + handler if :cdata:`Py_FileSystemDefaultEncoding` is ``'mbcs'`` (which is the + default encoding). If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8. + .. versionchanged:: 3.2 + Use ``'strict'`` error handler if :cdata:`Py_FileSystemDefaultEncoding` + is ``'mbcs'``. + .. cfunction:: PyObject* PyUnicode_EncodeFSDefault(PyObject *unicode) Encode a Unicode object to :cdata:`Py_FileSystemDefaultEncoding` with the - ``'surrogateescape'`` error handler, and return :class:`bytes`. + ``'surrogateescape'`` error handler, and return :class:`bytes`. On Windows, + use ``'strict'`` error handler if :cdata:`Py_FileSystemDefaultEncoding` is + ``'mbcs'`` (which is the default encoding). If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8. @@ -1022,7 +1031,8 @@ They all return *NULL* or ``-1`` if an exception occurs. .. cfunction:: int PyUnicode_CompareWithASCIIString(PyObject *uni, char *string) Compare a unicode object, *uni*, with *string* and return -1, 0, 1 for less - than, equal, and greater than, respectively. + than, equal, and greater than, respectively. *string* should be encoded to + ISO-8859-1. .. cfunction:: int PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index d716a46..360319b 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -45,20 +45,23 @@ the same library that the Python runtime is using. .. cfunction:: int PyRun_AnyFile(FILE *fp, const char *filename) - This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving - *closeit* set to ``0`` and *flags* set to *NULL*. + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, + leaving *closeit* set to ``0`` and *flags* set to *NULL*. *filename* is + encoded to ``'utf-8'``. .. cfunction:: int PyRun_AnyFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) - This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving - the *closeit* argument set to ``0``. + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, + leaving the *closeit* argument set to ``0``. *filename* is encoded to + ``'utf-8'``. .. cfunction:: int PyRun_AnyFileEx(FILE *fp, const char *filename, int closeit) - This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving - the *flags* argument set to *NULL*. + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, + leaving the *flags* argument set to *NULL*. *filename* is encoded to + ``'utf-8'``. .. cfunction:: int PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit, PyCompilerFlags *flags) @@ -66,8 +69,8 @@ the same library that the Python runtime is using. If *fp* refers to a file associated with an interactive device (console or terminal input or Unix pseudo-terminal), return the value of :cfunc:`PyRun_InteractiveLoop`, otherwise return the result of - :cfunc:`PyRun_SimpleFile`. If *filename* is *NULL*, this function uses - ``"???"`` as the filename. + :cfunc:`PyRun_SimpleFile`. *filename* is encoded to ``'utf-8'``. If + *filename* is *NULL*, this function uses ``"???"`` as the filename. .. cfunction:: int PyRun_SimpleString(const char *command) @@ -80,7 +83,11 @@ the same library that the Python runtime is using. Executes the Python source code from *command* in the :mod:`__main__` module according to the *flags* argument. If :mod:`__main__` does not already exist, it - is created. Returns ``0`` on success or ``-1`` if an exception was raised. If + is created. *command* is encoded to ``'utf-8'`` if + :cdata:`PyPARSE_IGNORE_COOKIE` flag is set, otherwise the parser checks for + ``#coding:xxx cookie``. + + Returns ``0`` on success or ``-1`` if an exception was raised. If there was an error, there is no way to get the exception information. For the meaning of *flags*, see below. @@ -92,19 +99,20 @@ the same library that the Python runtime is using. .. cfunction:: int PyRun_SimpleFile(FILE *fp, const char *filename) This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, - leaving *closeit* set to ``0`` and *flags* set to *NULL*. + leaving *closeit* set to ``0`` and *flags* set to *NULL*. *filename* is + encoded to ``'utf-8'``. .. cfunction:: int PyRun_SimpleFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, - leaving *closeit* set to ``0``. + leaving *closeit* set to ``0``. *filename* is encoded to ``'utf-8'``. .. cfunction:: int PyRun_SimpleFileEx(FILE *fp, const char *filename, int closeit) This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, - leaving *flags* set to *NULL*. + leaving *flags* set to *NULL*. *filename* is encoded to ``'utf-8'``. .. cfunction:: int PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, PyCompilerFlags *flags) @@ -112,37 +120,40 @@ the same library that the Python runtime is using. Similar to :cfunc:`PyRun_SimpleStringFlags`, but the Python source code is read from *fp* instead of an in-memory string. *filename* should be the name of the file. If *closeit* is true, the file is closed before PyRun_SimpleFileExFlags - returns. + returns. *filename* is encoded to ``'utf-8'``. .. cfunction:: int PyRun_InteractiveOne(FILE *fp, const char *filename) This is a simplified interface to :cfunc:`PyRun_InteractiveOneFlags` below, - leaving *flags* set to *NULL*. + leaving *flags* set to *NULL*. *filename* is encoded to ``'utf-8'``. .. cfunction:: int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) Read and execute a single statement from a file associated with an interactive - device according to the *flags* argument. If *filename* is *NULL*, ``"???"`` is - used instead. The user will be prompted using ``sys.ps1`` and ``sys.ps2``. + device according to the *flags* argument. *filename* is encoded to + ``'utf-8'``. If *filename* is *NULL*, ``"???"`` is used instead. The user + will be prompted using ``sys.ps1`` and ``sys.ps2``. + Returns ``0`` when the input was executed successfully, ``-1`` if there was an exception, or an error code from the :file:`errcode.h` include file distributed - as part of Python if there was a parse error. (Note that :file:`errcode.h` is + as part of Python if there was a parse error. (Note that :file:`errcode.h` is not included by :file:`Python.h`, so must be included specifically if needed.) .. cfunction:: int PyRun_InteractiveLoop(FILE *fp, const char *filename) This is a simplified interface to :cfunc:`PyRun_InteractiveLoopFlags` below, - leaving *flags* set to *NULL*. + leaving *flags* set to *NULL*. *filename* is encoded to ``'utf-8'``. -.. cfunction:: int PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) +.. cfunction:: int PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) Read and execute statements from a file associated with an interactive device - until EOF is reached. If *filename* is *NULL*, ``"???"`` is used instead. The - user will be prompted using ``sys.ps1`` and ``sys.ps2``. Returns ``0`` at EOF. + until EOF is reached. *filename* is encoded to ``'utf-8'``. If *filename* is + *NULL*, ``"???"`` is used instead. The user will be prompted using + ``sys.ps1`` and ``sys.ps2``. Returns ``0`` at EOF. .. cfunction:: struct _node* PyParser_SimpleParseString(const char *str, int start) @@ -189,8 +200,10 @@ the same library that the Python runtime is using. Execute Python source code from *str* in the context specified by the dictionaries *globals* and *locals* with the compiler flags specified by - *flags*. The parameter *start* specifies the start token that should be used to - parse the source code. + *flags*. The parameter *start* specifies the start token that should be used to + parse the source code. *str* is encoded to ``'utf-8'`` if + :cdata:`PyPARSE_IGNORE_COOKIE` flag is set, otherwise the parser checks for + ``#coding:xxx cookie``. Returns the result of executing the code as a Python object, or *NULL* if an exception was raised. @@ -216,10 +229,10 @@ the same library that the Python runtime is using. .. cfunction:: PyObject* PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals, int closeit, PyCompilerFlags *flags) - Similar to :cfunc:`PyRun_StringFlags`, but the Python source code is read from - *fp* instead of an in-memory string. *filename* should be the name of the file. - If *closeit* is true, the file is closed before :cfunc:`PyRun_FileExFlags` - returns. + Similar to :cfunc:`PyRun_StringFlags`, but the Python source code is read + from *fp* instead of an in-memory string. *filename* should be the name of + the file encoded to ``'utf-8'``. If *closeit* is true, the file is closed + before :cfunc:`PyRun_FileExFlags` returns. .. cfunction:: PyObject* Py_CompileString(const char *str, const char *filename, int start) @@ -230,13 +243,16 @@ the same library that the Python runtime is using. .. cfunction:: PyObject* Py_CompileStringFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags) - Parse and compile the Python source code in *str*, returning the resulting code - object. The start token is given by *start*; this can be used to constrain the - code which can be compiled and should be :const:`Py_eval_input`, - :const:`Py_file_input`, or :const:`Py_single_input`. The filename specified by - *filename* is used to construct the code object and may appear in tracebacks or - :exc:`SyntaxError` exception messages. This returns *NULL* if the code cannot - be parsed or compiled. + Parse and compile the Python source code in *str*, returning the resulting + code object. The start token is given by *start*; this can be used to + constrain the code which can be compiled and should be + :const:`Py_eval_input`, :const:`Py_file_input`, or :const:`Py_single_input`. + The filename specified by *filename* is used to construct the code object + and may appear in tracebacks or :exc:`SyntaxError` exception messages; it is + encoded to ``'utf-8'``. *str* is encoded to ``'utf-8'`` if + :cdata:`PyPARSE_IGNORE_COOKIE` flag is set, otherwise the parser checks for + ``#coding:xxx cookie``. This returns *NULL* if the code cannot be parsed or + compiled. .. cfunction:: PyObject* PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals) diff --git a/Include/parsetok.h b/Include/parsetok.h index af80570..77fc9ce 100644 --- a/Include/parsetok.h +++ b/Include/parsetok.h @@ -9,10 +9,10 @@ extern "C" { typedef struct { int error; - const char *filename; + const char *filename; /* encoded to utf-8 */ int lineno; int offset; - char *text; + char *text; /* encoded to utf-8 */ int token; int expected; } perrdetail; @@ -32,30 +32,67 @@ typedef struct { #define PyPARSE_IGNORE_COOKIE 0x0010 #define PyPARSE_BARRY_AS_BDFL 0x0020 -PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int, - perrdetail *); -PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int, - char *, char *, perrdetail *); +PyAPI_FUNC(node *) PyParser_ParseString( + const char *s, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + grammar *g, + int start, + perrdetail *err_ret); +PyAPI_FUNC(node *) PyParser_ParseFile( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + grammar *g, + int start, + char *ps1, /* encoded to stderr encoding, see PyOS_Readline() */ + char *ps2, /* encoded to stderr encoding, see PyOS_Readline() */ + perrdetail *err_ret + ); -PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, - const char*, grammar *, - int, char *, char *, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, - const char*, grammar *, - int, char *, char *, - perrdetail *, int *); +PyAPI_FUNC(node *) PyParser_ParseStringFlags( + const char *s, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + grammar *g, + int start, + perrdetail *err_ret, + int flags); +PyAPI_FUNC(node *) PyParser_ParseFileFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + const char *encoding, /* if NULL, check for #coding:xxx cookie */ + grammar *g, + int start, + char *ps1, /* encoded to stderr encoding, see PyOS_Readline() */ + char *ps2, /* encoded to stderr encoding, see PyOS_Readline() */ + perrdetail *err_ret, + int flags); +PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + const char *encoding, /* if NULL, check for #coding:xxx cookie */ + grammar *g, + int start, + char *ps1, /* encoded to stderr encoding, see PyOS_Readline() */ + char *ps2, /* encoded to stderr encoding, see PyOS_Readline() */ + perrdetail *err_ret, + int *flags + ); -PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *, - const char *, - grammar *, int, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, - const char *, - grammar *, int, - perrdetail *, int *); +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename( + const char *s, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + const char *filename, /* encoded to utf-8 */ + grammar *g, + int start, + perrdetail *err_ret, + int flags); +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx( + const char *s, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + const char *filename, /* encoded to utf-8 */ + grammar *g, + int start, + perrdetail *err_ret, + int *flags); /* Note that he following function is defined in pythonrun.c not parsetok.c. */ PyAPI_FUNC(void) PyParser_SetError(perrdetail *); diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 1eee16d..14829e1 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -60,7 +60,10 @@ typedef struct { PyAPI_FUNC(void) PyErr_SetNone(PyObject *); PyAPI_FUNC(void) PyErr_SetObject(PyObject *, PyObject *); -PyAPI_FUNC(void) PyErr_SetString(PyObject *, const char *); +PyAPI_FUNC(void) PyErr_SetString( + PyObject *exception, + const char *string /* encoded to utf-8 */ + ); PyAPI_FUNC(PyObject *) PyErr_Occurred(void); PyAPI_FUNC(void) PyErr_Clear(void); PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **); @@ -177,26 +180,34 @@ PyAPI_FUNC(PyObject *) PyErr_SetFromErrno(PyObject *); PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilenameObject( PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilename( - PyObject *, const char *); + PyObject *exc, + const char *filename /* encoded to the filesystem encoding */ + ); #ifdef MS_WINDOWS PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename( PyObject *, const Py_UNICODE *); #endif /* MS_WINDOWS */ -PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...); +PyAPI_FUNC(PyObject *) PyErr_Format( + PyObject *exception, + const char *format, /* encoded to ISO-8859-1 */ + ...); #ifdef MS_WINDOWS -PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject( - int, const char *); PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilename( - int, const char *); + int ierr, + const char *filename /* encoded to utf-8 */ + ); PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithUnicodeFilename( int, const Py_UNICODE *); PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErr(int); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObject( PyObject *,int, PyObject *); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilename( - PyObject *,int, const char *); + PyObject *exc, + int ierr, + const char *filename /* encoded to utf-8 */ + ); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithUnicodeFilename( PyObject *,int, const Py_UNICODE *); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErr(PyObject *, int); @@ -224,7 +235,9 @@ PyAPI_FUNC(void) PyErr_SetInterrupt(void); int PySignal_SetWakeupFd(int fd); /* Support for adding program text to SyntaxErrors */ -PyAPI_FUNC(void) PyErr_SyntaxLocation(const char *, int); +PyAPI_FUNC(void) PyErr_SyntaxLocation( + const char *filename, /* encoded to utf-8 */ + int lineno); PyAPI_FUNC(PyObject *) PyErr_ProgramText(const char *, int); /* The following functions are used to create and modify unicode @@ -232,15 +245,32 @@ PyAPI_FUNC(PyObject *) PyErr_ProgramText(const char *, int); /* create a UnicodeDecodeError object */ PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create( - const char *, const char *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + const char *encoding, /* encoded to utf-8 */ + const char *object, /* byte string */ + Py_ssize_t length, + Py_ssize_t start, + Py_ssize_t end, + const char *reason /* encoded to utf-8 */ + ); /* create a UnicodeEncodeError object */ PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create( - const char *, const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + const char *encoding, /* encoded to utf-8 */ + const Py_UNICODE *object, + Py_ssize_t length, + Py_ssize_t start, + Py_ssize_t end, + const char *reason /* encoded to utf-8 */ + ); /* create a UnicodeTranslateError object */ PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create( - const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + const Py_UNICODE *object, + Py_ssize_t length, + Py_ssize_t start, + Py_ssize_t end, + const char *reason /* encoded to utf-8 */ + ); /* get the encoding attribute */ PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *); @@ -283,11 +313,17 @@ PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *); /* assign a new value to the reason attribute return 0 on success, -1 on failure */ PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason( - PyObject *, const char *); + PyObject *exc, + const char *reason /* encoded to utf-8 */ + ); PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason( - PyObject *, const char *); + PyObject *exc, + const char *reason /* encoded to utf-8 */ + ); PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason( - PyObject *, const char *); + PyObject *exc, + const char *reason /* encoded to utf-8 */ + ); /* These APIs aren't really part of the error implementation, but diff --git a/Include/pythonrun.h b/Include/pythonrun.h index b9da550..83794e8 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -33,21 +33,52 @@ PyAPI_FUNC(int) Py_IsInitialized(void); PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); -PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_AnyFileExFlags(FILE *, const char *, int, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_SimpleFileExFlags(FILE *, const char *, int, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_InteractiveOneFlags(FILE *, const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_AnyFileFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_AnyFileExFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + int closeit, + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_SimpleStringFlags( + /* command is encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + const char *command, + PyCompilerFlags *flags + ); +PyAPI_FUNC(int) PyRun_SimpleFileExFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + int closeit, + PyCompilerFlags *flags + ); +PyAPI_FUNC(int) PyRun_InteractiveOneFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + PyCompilerFlags *flags + ); PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, - int, PyCompilerFlags *flags, - PyArena *); -PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, - const char*, int, - char *, char *, - PyCompilerFlags *, int *, - PyArena *); +PyAPI_FUNC(struct _mod *) PyParser_ASTFromString( + const char *s, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + const char *filename, /* encoded to utf-8 */ + int start, + PyCompilerFlags *flags, + PyArena *arena); +PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + const char* encoding, /* if NULL, the parser checks for #coding:xxx */ + int start, + char *ps1, /* encoded to stderr encoding, see PyOS_Readline() */ + char *ps2, /* encoded to stderr encoding, see PyOS_Readline() */ + PyCompilerFlags *flags, + int *errcode, + PyArena *arena + ); #define PyParser_SimpleParseString(S, B) \ PyParser_SimpleParseStringFlags(S, B, 0) #define PyParser_SimpleParseFile(FP, S, B) \ @@ -57,17 +88,35 @@ PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *, int, int); -PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, - PyObject *, PyCompilerFlags *); - -PyAPI_FUNC(PyObject *) PyRun_FileExFlags(FILE *, const char *, int, - PyObject *, PyObject *, int, - PyCompilerFlags *); +PyAPI_FUNC(PyObject *) PyRun_StringFlags( + const char *str, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + int start, + PyObject *globals, + PyObject *locals, + PyCompilerFlags *flags); + +PyAPI_FUNC(PyObject *) PyRun_FileExFlags( + FILE *fp, + const char *filename, /* encoded to utf-8 */ + int start, + PyObject *globals, + PyObject *locals, + int closeit, + PyCompilerFlags *flags + ); #define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, s, NULL) -PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, int, - PyCompilerFlags *); -PyAPI_FUNC(struct symtable *) Py_SymtableString(const char *, const char *, int); +PyAPI_FUNC(PyObject *) Py_CompileStringFlags( + const char *str, /* encoded to utf-8 if PyPARSE_IGNORE_COOKIE flag is set, + otherwise the parser checks for #coding:xxx cookie */ + const char *filename, /* encoded to utf-8 */ + int start, + PyCompilerFlags *flags); +PyAPI_FUNC(struct symtable *) Py_SymtableString( + const char *str, /* the parser checks for #coding:xxx cookie */ + const char *filename, /* encoded to utf-8 */ + int start); PyAPI_FUNC(void) PyErr_Print(void); PyAPI_FUNC(void) PyErr_PrintEx(int); @@ -84,7 +133,10 @@ PyAPI_FUNC(void) Py_Exit(int); /* Restore signals that the interpreter has called SIG_IGN on to SIG_DFL. */ PyAPI_FUNC(void) _Py_RestoreSignals(void); -PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *); +PyAPI_FUNC(int) Py_FdIsInteractive( + FILE *fp, + const char *filename /* encoded to utf-8 */ + ); /* Bootstrap */ PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv); @@ -150,10 +202,18 @@ PyAPI_FUNC(void) PyFloat_Fini(void); PyAPI_FUNC(void) PyOS_FiniInterrupts(void); PyAPI_FUNC(void) _PyGC_Fini(void); -/* Stuff with no proper home (yet) */ -PyAPI_FUNC(char *) PyOS_Readline(FILE *, FILE *, char *); +/* Stuff living in myreadline.c */ +PyAPI_FUNC(char *) PyOS_Readline( + FILE *sys_stdin, + FILE *sys_stdout, + char *prompt /* encoded to stderr encoding */ + ); PyAPI_DATA(int) (*PyOS_InputHook)(void); -PyAPI_DATA(char) *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); +PyAPI_DATA(char) *(*PyOS_ReadlineFunctionPointer)( + FILE *sys_stdin, + FILE *sys_stdout, + char *prompt /* encoded to stderr encoding, see PyOS_Readline() */ + ); PyAPI_DATA(PyThreadState*) _PyOS_ReadlineTState; /* Stack size, in "pointers" (so we get extra safety margins diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index cd2f165..df8ab6c 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -466,14 +466,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( - const char *u, /* char buffer */ + const char *u, /* string encoded to utf-8 */ Py_ssize_t size /* size of buffer */ ); /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated UTF-8 encoded bytes */ PyAPI_FUNC(PyObject*) PyUnicode_FromString( - const char *u /* string */ + const char *u /* string encoded to utf-8 */ ); /* Return a read-only pointer to the Unicode object's internal @@ -551,8 +551,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject( register PyObject *obj /* Object */ ); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( + const char *format, /* encoded to ISO-8859-1 */ + va_list vargs + ); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( + const char *format, /* encoded to ISO-8859-1 */ + ...); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ @@ -562,7 +567,9 @@ PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); -PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *); +PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( + const char *str /* encoded to utf-8 */ + ); PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); /* Use only if you know it's a string */ @@ -1149,7 +1156,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */ const char *errors /* error handling */ ); @@ -1224,7 +1231,7 @@ PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( - const char *s /* encoded string */ + const char *s /* encoded to the filesystem encoding */ ); /* Decode a string using Py_FileSystemDefaultEncoding @@ -1234,8 +1241,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( - const char *s, /* encoded string */ - Py_ssize_t size /* size */ + const char *s, /* encoded to the filesystem encoding */ + Py_ssize_t size /* size */ ); /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the @@ -1417,7 +1424,7 @@ PyAPI_FUNC(int) PyUnicode_Compare( PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( PyObject *left, - const char *right + const char *right /* encoded to ISO-8859-1 (ASCII) */ ); /* Rich compare two strings and return one of the following: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4c4b43c..93d103e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9767,13 +9767,13 @@ PyUnicode_InternImmortal(PyObject **p) } PyObject * -PyUnicode_InternFromString(const char *cp) +PyUnicode_InternFromString(const char *str) { - PyObject *s = PyUnicode_FromString(cp); - if (s == NULL) + PyObject *unicode = PyUnicode_FromString(str); + if (unicode == NULL) return NULL; - PyUnicode_InternInPlace(&s); - return s; + PyUnicode_InternInPlace(&unicode); + return unicode; } void _Py_ReleaseInternedUnicodeStrings(void) diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 7636a54..1410f44 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -32,8 +32,8 @@ PyParser_ParseStringFlags(const char *s, grammar *g, int start, node * PyParser_ParseStringFlagsFilename(const char *s, const char *filename, - grammar *g, int start, - perrdetail *err_ret, int flags) + grammar *g, int start, + perrdetail *err_ret, int flags) { int iflags = flags; return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, @@ -42,8 +42,8 @@ PyParser_ParseStringFlagsFilename(const char *s, const char *filename, node * PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, - grammar *g, int start, - perrdetail *err_ret, int *flags) + grammar *g, int start, + perrdetail *err_ret, int *flags) { struct tok_state *tok; int exec_input = start == file_input; @@ -74,25 +74,25 @@ PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, } node * -PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc, +PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *encoding, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int flags) { int iflags = flags; - return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, + return PyParser_ParseFileFlagsEx(fp, filename, encoding, g, start, ps1, ps2, err_ret, &iflags); } node * PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, - const char *enc, grammar *g, int start, + const char *encoding, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int *flags) { struct tok_state *tok; initerr(err_ret, filename); - if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { + if ((tok = PyTokenizer_FromFile(fp, (char *)encoding, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 90b1b68..7900ab6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -806,7 +806,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) /* Set up tokenizer for file */ struct tok_state * -PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) +PyTokenizer_FromFile(FILE *fp, char* encoding, char *ps1, char *ps2) { struct tok_state *tok = tok_new(); if (tok == NULL) @@ -820,15 +820,15 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) tok->fp = fp; tok->prompt = ps1; tok->nextprompt = ps2; - if (enc != NULL) { + if (encoding != NULL) { /* Must copy encoding declaration since it gets copied into the parse tree. */ - tok->encoding = PyMem_MALLOC(strlen(enc)+1); + tok->encoding = PyMem_MALLOC(strlen(encoding)+1); if (!tok->encoding) { PyTokenizer_Free(tok); return NULL; } - strcpy(tok->encoding, enc); + strcpy(tok->encoding, encoding); tok->decoding_state = STATE_NORMAL; } return tok; diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index c8e19c1..11be04d 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,7 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* For error messages */ + const char *filename; /* For error messages, encoded as utf-8 */ int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ diff --git a/Python/pythonrun.c b/Python/pythonrun.c index fd31974..67b1060 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1878,7 +1878,7 @@ PyParser_ASTFromString(const char *s, const char *filename, int start, } mod_ty -PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, +PyParser_ASTFromFile(FILE *fp, const char *filename, const char* encoding, int start, char *ps1, char *ps2, PyCompilerFlags *flags, int *errcode, PyArena *arena) @@ -1888,9 +1888,9 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, perrdetail err; int iflags = PARSER_FLAGS(flags); - node *n = PyParser_ParseFileFlagsEx(fp, filename, enc, - &_PyParser_Grammar, - start, ps1, ps2, &err, &iflags); + node *n = PyParser_ParseFileFlagsEx(fp, filename, encoding, + &_PyParser_Grammar, + start, ps1, ps2, &err, &iflags); if (flags == NULL) { localflags.cf_flags = 0; flags = &localflags; @@ -2367,23 +2367,23 @@ PyParser_SimpleParseString(const char *str, int start) #undef PyRun_AnyFile PyAPI_FUNC(int) -PyRun_AnyFile(FILE *fp, const char *name) +PyRun_AnyFile(FILE *fp, const char *filename) { - return PyRun_AnyFileExFlags(fp, name, 0, NULL); + return PyRun_AnyFileExFlags(fp, filename, 0, NULL); } #undef PyRun_AnyFileEx PyAPI_FUNC(int) -PyRun_AnyFileEx(FILE *fp, const char *name, int closeit) +PyRun_AnyFileEx(FILE *fp, const char *filename, int closeit) { - return PyRun_AnyFileExFlags(fp, name, closeit, NULL); + return PyRun_AnyFileExFlags(fp, filename, closeit, NULL); } #undef PyRun_AnyFileFlags PyAPI_FUNC(int) -PyRun_AnyFileFlags(FILE *fp, const char *name, PyCompilerFlags *flags) +PyRun_AnyFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) { - return PyRun_AnyFileExFlags(fp, name, 0, flags); + return PyRun_AnyFileExFlags(fp, filename, 0, flags); } #undef PyRun_File