diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -594,6 +594,20 @@ def"', """\ STRING "'green'" (2, 7) (2, 14) """) + self.check_tokenize("℘· = 1\n℮᧚ = 2\n℮··፩፰፱᧚ = 3", """\ + NAME '℘·' (1, 0) (1, 2) + OP '=' (1, 3) (1, 4) + NUMBER '1' (1, 5) (1, 6) + NEWLINE '\\n' (1, 6) (1, 7) + NAME '℮᧚' (2, 0) (2, 2) + OP '=' (2, 3) (2, 4) + NUMBER '2' (2, 5) (2, 6) + NEWLINE '\\n' (2, 6) (2, 7) + NAME '℮··፩፰፱᧚' (3, 0) (3, 7) + OP '=' (3, 8) (3, 9) + NUMBER '3' (3, 10) (3, 11) + """) + def test_unicode(self): # Legacy unicode literals: self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ diff --git a/Lib/tokenize.py b/Lib/tokenize.py --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -118,7 +118,9 @@ def maybe(*choices): return group(*choic Whitespace = r'[ \f\t]*' Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) -Name = r'\w+' +IDStart = group(r'\w', r'[\u2118\u212E\u309B\u309C]') +IDContinue = group(IDStart, r'[\u00B7\u0387\u1369-\u1371\u19DA]') +Name = IDStart + any(IDContinue) Hexnumber = r'0[xX][0-9a-fA-F]+' Binnumber = r'0[bB][01]+'