diff -r 2d71d0f954fb Include/errcode.h --- a/Include/errcode.h Sat Jan 17 17:33:49 2015 -0800 +++ b/Include/errcode.h Sun Jan 18 00:33:55 2015 -0500 @@ -30,7 +30,8 @@ #define E_EOLS 24 /* EOL in single-quoted string */ #define E_LINECONT 25 /* Unexpected characters after a line continuation */ #define E_IDENTIFIER 26 /* Invalid characters in identifier */ -#define E_BADSINGLE 27 /* Ill-formed single statement input */ +#define E_IDENTIFIER_START 27 /* Identifier starts with invalid character */ +#define E_BADSINGLE 28 /* Ill-formed single statement input */ #ifdef __cplusplus } diff -r 2d71d0f954fb Include/unicodeobject.h --- a/Include/unicodeobject.h Sat Jan 17 17:33:49 2015 -0800 +++ b/Include/unicodeobject.h Sun Jan 18 00:33:55 2015 -0500 @@ -591,6 +591,13 @@ #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) +#define PyUnicode_IDENTIFIER_INVALID_START 0 +#define PyUnicode_IDENTIFIER_INVALID_IS_EMPTY 1 +#define PyUnicode_IDENTIFIER_INVALID_CONTINUE 2 +#define PyUnicode_IDENTIFIER_INVALID_CHARACTER 3 +#define PyUnicode_IDENTIFIER_NOT_READY 4 +#define PyUnicode_IDENTIFIER_VALID 5 + /* === Public API ========================================================= */ /* --- Plain Py_UNICODE --------------------------------------------------- */ @@ -2069,6 +2076,7 @@ /* Checks whether argument is a valid identifier. */ PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); +PyAPI_FUNC(int) PyUnicode_ValidateIdentifier(PyObject *s); #ifndef Py_LIMITED_API /* Externally visible for str.strip(unicode) */ diff -r 2d71d0f954fb Lib/test/badsyntax_3131_start.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/badsyntax_3131_start.py Sun Jan 18 00:33:55 2015 -0500 @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- +# While è is valid ̀e is not +̀e = 2 diff -r 2d71d0f954fb Lib/test/test_pep3131.py --- a/Lib/test/test_pep3131.py Sat Jan 17 17:33:49 2015 -0800 +++ b/Lib/test/test_pep3131.py Sun Jan 18 00:33:55 2015 -0500 @@ -27,6 +27,13 @@ "invalid character in identifier (badsyntax_3131.py, line 2)") else: self.fail("expected exception didn't occur") + try: + from test import badsyntax_3131_start + except SyntaxError as s: + self.assertEqual(str(s), + "invalid start character in identifier (badsyntax_3131_start.py, line 3)") + else: + self.fail("expected exception didn't occur") def test_main(): support.run_unittest(PEP3131Test) diff -r 2d71d0f954fb Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Jan 17 17:33:49 2015 -0800 +++ b/Objects/unicodeobject.c Sun Jan 18 00:33:55 2015 -0500 @@ -11796,6 +11796,16 @@ int PyUnicode_IsIdentifier(PyObject *self) { + if(PyUnicode_ValidateIdentifier(self) == PyUnicode_IDENTIFIER_VALID) { + return 1; + } else { + return 0; + } +} + +int +PyUnicode_ValidateIdentifier(PyObject *self) +{ int kind; void *data; Py_ssize_t i; @@ -11803,12 +11813,12 @@ if (PyUnicode_READY(self) == -1) { Py_FatalError("identifier not ready"); - return 0; + return PyUnicode_IDENTIFIER_NOT_READY; } /* Special case for empty strings */ if (PyUnicode_GET_LENGTH(self) == 0) - return 0; + return PyUnicode_IDENTIFIER_INVALID_IS_EMPTY; kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -11822,12 +11832,15 @@ as starting an identifier. */ first = PyUnicode_READ(kind, data, 0); if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */) - return 0; - + if(!_PyUnicode_IsXidContinue(first)) { + return PyUnicode_IDENTIFIER_INVALID_CHARACTER; + } else { + return PyUnicode_IDENTIFIER_INVALID_START; + } for (i = 1; i < PyUnicode_GET_LENGTH(self); i++) if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i))) - return 0; - return 1; + return PyUnicode_IDENTIFIER_INVALID_CONTINUE; + return PyUnicode_IDENTIFIER_VALID; } PyDoc_STRVAR(isidentifier__doc__, diff -r 2d71d0f954fb Parser/tokenizer.c --- a/Parser/tokenizer.c Sat Jan 17 17:33:49 2015 -0800 +++ b/Parser/tokenizer.c Sun Jan 18 00:33:55 2015 -0500 @@ -1317,11 +1317,16 @@ } return 0; } - result = PyUnicode_IsIdentifier(s); + result = PyUnicode_ValidateIdentifier(s); Py_DECREF(s); - if (result == 0) + if (result == PyUnicode_IDENTIFIER_INVALID_START) { + tok->done = E_IDENTIFIER_START; + return 0; + } else if(result != PyUnicode_IDENTIFIER_VALID) { tok->done = E_IDENTIFIER; - return result; + return 0; + } + return 1; } #endif @@ -1477,7 +1482,6 @@ tok_backup(tok, c); if (nonascii && !verify_identifier(tok)) { - tok->done = E_IDENTIFIER; return ERRORTOKEN; } *p_start = tok->start; diff -r 2d71d0f954fb Python/pythonrun.c --- a/Python/pythonrun.c Sat Jan 17 17:33:49 2015 -0800 +++ b/Python/pythonrun.c Sun Jan 18 00:33:55 2015 -0500 @@ -1325,7 +1325,9 @@ case E_LINECONT: msg = "unexpected character after line continuation character"; break; - + case E_IDENTIFIER_START: + msg = "invalid start character in identifier"; + break; case E_IDENTIFIER: msg = "invalid character in identifier"; break;