diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5881,9 +5881,11 @@ static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) +_PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors, + char *first_invalid_escape_char, + Py_ssize_t *first_invalid_escape_idx) { const char *starts = s; _PyUnicodeWriter writer; @@ -5891,6 +5893,9 @@ PyObject *errorHandler = NULL; PyObject *exc = NULL; + // so we can remember if we've seen an invalid escape char or not + *first_invalid_escape_idx = -1; + if (size == 0) { _Py_RETURN_UNICODE_EMPTY(); } @@ -6065,9 +6070,10 @@ goto error; default: - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", c) < 0) - goto onError; + if (*first_invalid_escape_idx == -1) { + *first_invalid_escape_idx = s - starts; + *first_invalid_escape_char = c; + } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); continue; @@ -6102,6 +6108,26 @@ return NULL; } +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) +{ + char first_invalid_escape_char; + Py_ssize_t first_invalid_escape_idx; + PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors, &first_invalid_escape_char, &first_invalid_escape_idx); + if (result == NULL) + return NULL; + if (first_invalid_escape_idx != -1) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", first_invalid_escape_char) < 0) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + /* Return a Unicode-Escape string version of the Unicode object. If quotes is true, the string is enclosed in u"" or u'' quotes as diff --git a/Python/ast.c b/Python/ast.c --- a/Python/ast.c +++ b/Python/ast.c @@ -4116,8 +4116,15 @@ return PyUnicode_DecodeUTF8(t, s - t, NULL); } +PyObject * +_PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors, + char *first_invalid_escape_char, + Py_ssize_t *first_invalid_escape_idx); + static PyObject * -decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) +decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, size_t len) { PyObject *v, *u; char *buf; @@ -4170,8 +4177,20 @@ len = p - buf; s = buf; - v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); + char first_invalid_escape_char; + Py_ssize_t first_invalid_escape_idx; + v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape_char, &first_invalid_escape_idx); Py_XDECREF(u); + + if (v != NULL && first_invalid_escape_idx != -1) { + Py_DECREF(v); + char buf[300]; + PyOS_snprintf(buf, sizeof(buf), + "invalid escape sequence \\%c", + first_invalid_escape_char); + ast_error(c, n, buf); + return NULL; + } return v; } @@ -4313,7 +4332,7 @@ literal_end-literal_start, NULL, NULL); else - *literal = decode_unicode_with_escapes(c, literal_start, + *literal = decode_unicode_with_escapes(c, n, literal_start, literal_end-literal_start); if (!*literal) return -1; @@ -5056,7 +5075,7 @@ if (*rawmode) *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); else - *result = decode_unicode_with_escapes(c, s, len); + *result = decode_unicode_with_escapes(c, n, s, len); } return *result == NULL ? -1 : 0; }