diff -r c711c36cf988 Include/pystrtod.h --- a/Include/pystrtod.h Wed Feb 10 10:31:43 2016 +0200 +++ b/Include/pystrtod.h Thu Feb 11 12:39:38 2016 +0100 @@ -19,6 +19,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string int *type); #ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PyOS_string_to_number_with_underscores( + const char *str, Py_ssize_t len, const char *what, void *arg, + PyObject *(*innerfunc)(const char *, Py_ssize_t, void *)); + PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr); #endif diff -r c711c36cf988 Lib/_pydecimal.py --- a/Lib/_pydecimal.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/_pydecimal.py Thu Feb 11 12:39:38 2016 +0100 @@ -606,7 +606,7 @@ class Decimal(object): fracpart = m.group('frac') or '' exp = int(m.group('exp') or '0') self._int = str(int(intpart+fracpart)) - self._exp = exp - len(fracpart) + self._exp = exp - len(fracpart.replace('_', '')) self._is_special = False else: diag = m.group('diag') @@ -6138,10 +6138,10 @@ import re # \s* (?P[-+])? # an optional sign, followed by either... ( - (?=\d|\.\d) # ...a number (with at least one digit) - (?P\d*) # having a (possibly empty) integer part - (\.(?P\d*))? # followed by an optional fractional part - (E(?P[-+]?\d+))? # followed by an optional exponent, or... + (?=\d|\.\d) # ...a number (with at least one digit) + (?P[\d_]*) # having a (possibly empty) integer part + (\.(?P\d[\d_]*)?)? # followed by an optional fractional part + (E(?P[-+]?\d[\d_]*))? # followed by an optional exponent, or... | Inf(inity)? # ...an infinity, or... | diff -r c711c36cf988 Lib/test/test_complex.py --- a/Lib/test/test_complex.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_complex.py Thu Feb 11 12:39:38 2016 +0100 @@ -377,6 +377,12 @@ class ComplexTest(unittest.TestCase): self.assertAlmostEqual(complex(complex1(1j)), 2j) self.assertRaises(TypeError, complex, complex2(1j)) + # check underscores + for s in ['1_j', '1.5_j', '1_.5j', '(1_2.5+3_j)', '(.5_6j)']: + self.assertEqual(complex(s), eval(s)) + for s in ['1_j_', '(1+1.5_j_)', '1._4j', '_1j']: + self.assertRaises(ValueError, complex, s) + def test_hash(self): for x in range(-30, 30): self.assertEqual(hash(x), hash(complex(x, 0))) diff -r c711c36cf988 Lib/test/test_decimal.py --- a/Lib/test/test_decimal.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_decimal.py Thu Feb 11 12:39:38 2016 +0100 @@ -554,6 +554,10 @@ class ExplicitConstructionTest(unittest. self.assertEqual(str(Decimal(' -7.89')), '-7.89') self.assertEqual(str(Decimal(" 3.45679 ")), '3.45679') + # underscores + self.assertEqual(str(Decimal('1_.3_e4_')), '1.3E+4') + self.assertEqual(str(Decimal('1_0_0_0')), '1000') + # unicode whitespace for lead in ["", ' ', '\u00a0', '\u205f']: for trail in ["", ' ', '\u00a0', '\u205f']: @@ -578,6 +582,11 @@ class ExplicitConstructionTest(unittest. # embedded NUL self.assertRaises(InvalidOperation, Decimal, "12\u00003") + # invalid underscores + self.assertRaises(InvalidOperation, Decimal, '1._3e4') + self.assertRaises(InvalidOperation, Decimal, '1.3e_4') + self.assertRaises(InvalidOperation, Decimal, '_1.3e4') + @cpython_only def test_from_legacy_strings(self): import _testcapi diff -r c711c36cf988 Lib/test/test_float.py --- a/Lib/test/test_float.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_float.py Thu Feb 11 12:39:38 2016 +0100 @@ -10,6 +10,8 @@ import time import unittest from test import support +from test.test_grammar import VALID_UNDERSCORE_LITERALS, \ + INVALID_UNDERSCORE_LITERALS from math import isinf, isnan, copysign, ldexp INF = float("inf") @@ -61,6 +63,16 @@ class GeneralFloatCases(unittest.TestCas float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_underscores(self): + for lit in VALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eE'): + if 'j' not in lit and 'J' not in lit: + self.assertEqual(float(lit), float(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eE'): + if 'j' not in lit and 'J' not in lit: + self.assertRaises(ValueError, float, lit) + def test_non_numeric_input_types(self): # Test possible non-numeric types for the argument x, including # subclasses of the explicitly documented accepted types. diff -r c711c36cf988 Lib/test/test_grammar.py --- a/Lib/test/test_grammar.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_grammar.py Thu Feb 11 12:39:38 2016 +0100 @@ -8,6 +8,50 @@ import sys # testing import * from sys import * +# These are shared with other test modules. +VALID_UNDERSCORE_LITERALS = [ + '0_0_0', + '4_2', + '4_______2', + '1_0000_0000', + '0b_1001_0100', + '0x_ffff_ffff', + '0o_5_7_7', + '1__.4', + '42_j', + '1.4_j', + '1.4e5_j', + '1_00_00_.5', + '1_e10', + '1_E10', + '1_e1_0', + '.1_4', + '0_', + '42_', + '0b1_', + '0xf_', + '0o5_', +] +INVALID_UNDERSCORE_LITERALS = [ + # Trailing underscores: + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '0_7', + '09_99', + # Underscore after non-digit: + '1.4j_', + '1.4e_1', + '.1_4e_1', + '1.0e+_1', + '1._4', + '1._4j', + '1._4e5_j', + '._5', +] + class TokenTests(unittest.TestCase): @@ -87,6 +131,18 @@ class TokenTests(unittest.TestCase): self.assertEqual(1 if 0else 0, 0) self.assertRaises(SyntaxError, eval, "0 if 1Else 0") + def test_underscore_literals(self): + for lit in VALID_UNDERSCORE_LITERALS: + self.assertEqual(eval(lit), eval(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + self.assertRaises(SyntaxError, eval, lit) + # Check with floating "e" vs. "else" + self.assertEqual(eval('0 if 1_____else 1'), 0) + self.assertEqual(eval('0 if 1.0_____else 1'), 0) + self.assertRaises(SyntaxError, eval, '0 if 1_Else 1') + # Sanity check: no literal begins with an underscore + self.assertRaises(NameError, eval, "_0") + def test_string_literals(self): x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39) diff -r c711c36cf988 Lib/test/test_int.py --- a/Lib/test/test_int.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_int.py Thu Feb 11 12:39:38 2016 +0100 @@ -2,6 +2,8 @@ import sys import unittest from test import support +from test.test_grammar import VALID_UNDERSCORE_LITERALS, \ + INVALID_UNDERSCORE_LITERALS L = [ ('0', 0), @@ -212,6 +214,16 @@ class IntTestCases(unittest.TestCase): self.assertEqual(int('2br45qc', 35), 4294967297) self.assertEqual(int('1z141z5', 36), 4294967297) + def test_underscores(self): + for lit in VALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eEjJ'): + continue + self.assertEqual(int(lit, 0), int(lit.replace('_', ''), 0)) + for lit in INVALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eEjJ'): + continue + self.assertRaises(ValueError, int, lit, 0) + @support.cpython_only def test_small_ints(self): # Bug #3236: Return small longs from PyLong_FromString diff -r c711c36cf988 Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/test/test_tokenize.py Thu Feb 11 12:39:38 2016 +0100 @@ -4,6 +4,8 @@ from tokenize import (tokenize, _tokeniz open as tokenize_open, Untokenizer) from io import BytesIO from unittest import TestCase, mock +from test.test_grammar import (VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS) import os import token @@ -185,6 +187,23 @@ def k(x): NUMBER '3.14e159' (1, 4) (1, 12) """) + def test_underscore_literals(self): + def number_token(s): + result = [] + f = BytesIO(s.encode('utf-8')) + ret = '' + for type, token, start, end, line in tokenize(f.readline): + if tok_name[type] == 'NUMBER': + return token + return ret or 'invalid token' + for lit in VALID_UNDERSCORE_LITERALS: + if 'else' in lit: + # special test for if-else expression + continue + self.assertEqual(number_token(lit), lit) + for lit in INVALID_UNDERSCORE_LITERALS: + self.assertNotEqual(number_token(lit), lit) + def test_string(self): # String literals self.check_tokenize("x = ''; y = \"\"", """\ diff -r c711c36cf988 Lib/tokenize.py --- a/Lib/tokenize.py Wed Feb 10 10:31:43 2016 +0200 +++ b/Lib/tokenize.py Thu Feb 11 12:39:38 2016 +0100 @@ -120,16 +120,17 @@ Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Name = r'\w+' -Hexnumber = r'0[xX][0-9a-fA-F]+' -Binnumber = r'0[bB][01]+' -Octnumber = r'0[oO][0-7]+' -Decnumber = r'(?:0+|[1-9][0-9]*)' +Hexnumber = r'0[xX][0-9a-fA-F_]+' +Binnumber = r'0[bB][01_]+' +Octnumber = r'0[oO][0-7_]+' +Decnumber = r'(?:0[0_]*|[1-9][0-9_]*)' Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?[0-9]+' -Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) -Expfloat = r'[0-9]+' + Exponent +Exponent = r'[eE][-+]?[0-9][0-9_]*' +Pointfloat = group(r'[0-9][0-9_]*\.(?:[0-9][0-9_]*)?', + r'\.[0-9][0-9_]*') + maybe(Exponent) +Expfloat = r'[0-9][0-9_]*' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'[0-9][0-9_]*[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Return the empty string, plus all of the valid string prefixes. diff -r c711c36cf988 Objects/complexobject.c --- a/Objects/complexobject.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Objects/complexobject.c Thu Feb 11 12:39:38 2016 +0100 @@ -759,30 +759,13 @@ static PyMemberDef complex_members[] = { }; static PyObject * -complex_subtype_from_string(PyTypeObject *type, PyObject *v) +complex_from_string_inner(const char *s, Py_ssize_t len, void *type) { - const char *s, *start; - char *end; double x=0.0, y=0.0, z; int got_bracket=0; - PyObject *s_buffer = NULL; - Py_ssize_t len; - - if (PyUnicode_Check(v)) { - s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v); - if (s_buffer == NULL) - return NULL; - s = PyUnicode_AsUTF8AndSize(s_buffer, &len); - if (s == NULL) - goto error; - } - else { - PyErr_Format(PyExc_TypeError, - "complex() argument must be a string or a number, not '%.200s'", - Py_TYPE(v)->tp_name); - return NULL; - } - + const char *start; + char *end; + /* position on first nonblank */ start = s; while (Py_ISSPACE(*s)) @@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject if (PyErr_ExceptionMatches(PyExc_ValueError)) PyErr_Clear(); else - goto error; + return NULL; } if (end != s) { /* all 4 forms starting with land here */ @@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject if (PyErr_ExceptionMatches(PyExc_ValueError)) PyErr_Clear(); else - goto error; + return NULL; } if (end != s) /* j */ @@ -890,15 +873,41 @@ complex_subtype_from_string(PyTypeObject if (s-start != len) goto parse_error; - Py_XDECREF(s_buffer); - return complex_subtype_from_doubles(type, x, y); + return complex_subtype_from_doubles((PyTypeObject *)type, x, y); parse_error: PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); + return NULL; +} + +static PyObject * +complex_subtype_from_string(PyTypeObject *type, PyObject *v) +{ + const char *s; + PyObject *s_buffer = NULL, *result = NULL; + Py_ssize_t len; + + if (PyUnicode_Check(v)) { + s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v); + if (s_buffer == NULL) + return NULL; + s = PyUnicode_AsUTF8AndSize(s_buffer, &len); + if (s == NULL) + goto error; + } + else { + PyErr_Format(PyExc_TypeError, + "complex() argument must be a string or a number, not '%.200s'", + Py_TYPE(v)->tp_name); + return NULL; + } + + result = _PyOS_string_to_number_with_underscores(s, len, "complex", type, + complex_from_string_inner); error: Py_XDECREF(s_buffer); - return NULL; + return result; } static PyObject * diff -r c711c36cf988 Objects/floatobject.c --- a/Objects/floatobject.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Objects/floatobject.c Thu Feb 11 12:39:38 2016 +0100 @@ -124,11 +124,37 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } +static PyObject * +float_from_string_inner(const char *s, Py_ssize_t len, void *obj) +{ + double x; + const char *end; + const char *last = s + strlen(s); + /* strip space */ + while (s < last && Py_ISSPACE(*s)) + s++; + while (s < last - 1 && Py_ISSPACE(last[-1])) + last--; + /* We don't care about overflow or underflow. If the platform + * supports them, infinities and signed zeroes (on underflow) are + * fine. */ + x = PyOS_string_to_double(s, (char **)&end, NULL); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%R", obj); + return NULL; + } + else if (x == -1.0 && PyErr_Occurred()) + return NULL; + else + return PyFloat_FromDouble(x); +} + PyObject * PyFloat_FromString(PyObject *v) { - const char *s, *last, *end; - double x; + const char *s; PyObject *s_buffer = NULL; Py_ssize_t len; Py_buffer view = {NULL, NULL}; @@ -169,27 +195,8 @@ PyFloat_FromString(PyObject *v) Py_TYPE(v)->tp_name); return NULL; } - last = s + len; - /* strip space */ - while (s < last && Py_ISSPACE(*s)) - s++; - while (s < last - 1 && Py_ISSPACE(last[-1])) - last--; - /* We don't care about overflow or underflow. If the platform - * supports them, infinities and signed zeroes (on underflow) are - * fine. */ - x = PyOS_string_to_double(s, (char **)&end, NULL); - if (end != last) { - PyErr_Format(PyExc_ValueError, - "could not convert string to float: " - "%R", v); - result = NULL; - } - else if (x == -1.0 && PyErr_Occurred()) - result = NULL; - else - result = PyFloat_FromDouble(x); - + result = _PyOS_string_to_number_with_underscores(s, len, "float", (void *)v, + float_from_string_inner); PyBuffer_Release(&view); Py_XDECREF(s_buffer); return result; diff -r c711c36cf988 Objects/longobject.c --- a/Objects/longobject.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Objects/longobject.c Thu Feb 11 12:39:38 2016 +0100 @@ -2017,6 +2017,7 @@ long_from_binary_base(const char **str, { const char *p = *str; const char *start = p; + int digits = 0; int bits_per_char; Py_ssize_t n; PyLongObject *z; @@ -2028,12 +2029,16 @@ long_from_binary_base(const char **str, n = base; for (bits_per_char = -1; n; ++bits_per_char) n >>= 1; - /* n <- total # of bits needed, while setting p to end-of-string */ - while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) + /* count digits and set p to end-of-string */ + while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base || *p == '_') { + if (*p != '_') { + ++digits; + } ++p; + } *str = p; /* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */ - n = (p - start) * bits_per_char + PyLong_SHIFT - 1; + n = digits * bits_per_char + PyLong_SHIFT - 1; if (n / bits_per_char < p - start) { PyErr_SetString(PyExc_ValueError, "int string too large to convert"); @@ -2050,7 +2055,11 @@ long_from_binary_base(const char **str, bits_in_accum = 0; pdigit = z->ob_digit; while (--p >= start) { - int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)]; + int k; + if (*p == '_') { + continue; + } + k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)]; assert(k >= 0 && k < base); accum |= (twodigits)k << bits_in_accum; bits_in_accum += bits_per_char; @@ -2121,8 +2130,16 @@ PyLong_FromString(const char *str, char if (str[0] == '0' && ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || (base == 8 && (str[1] == 'o' || str[1] == 'O')) || - (base == 2 && (str[1] == 'b' || str[1] == 'B')))) + (base == 2 && (str[1] == 'b' || str[1] == 'B')))) { str += 2; + /* Underscores allowed here. */ + while (*str == '_') + ++str; + } + if (str[0] == '_') { + /* May not start with underscores. */ + goto onError; + } start = str; if ((base & (base - 1)) == 0) @@ -2215,6 +2232,7 @@ digit beyond the first. ***/ twodigits c; /* current input character */ Py_ssize_t size_z; + int digits = 0; int i; int convwidth; twodigits convmultmax, convmult; @@ -2245,15 +2263,19 @@ digit beyond the first. /* Find length of the string of numeric characters. */ scan = str; - while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) + while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') { + if (*scan != '_') { + ++digits; + } ++scan; + } /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before * being stored into. */ - size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; + size_z = (Py_ssize_t)(digits * log_base_BASE[base]) + 1; /* Uncomment next line to test exceedingly rare copy code */ /* size_z = 1; */ assert(size_z > 0); @@ -2270,9 +2292,17 @@ digit beyond the first. /* Work ;-) */ while (str < scan) { + if (*str == '_') { + str++; + continue; + } /* grab up to convwidth digits from the input string */ c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)]; - for (i = 1; i < convwidth && str != scan; ++i, ++str) { + for (i = 1; i < convwidth && str != scan; ++str) { + if (*str == '_') { + continue; + } + i++; c = (twodigits)(c * base + (int)_PyLong_DigitValue[Py_CHARMASK(*str)]); assert(c < PyLong_BASE); diff -r c711c36cf988 Parser/tokenizer.c --- a/Parser/tokenizer.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Parser/tokenizer.c Thu Feb 11 12:39:38 2016 +0100 @@ -1586,14 +1586,19 @@ tok_get(struct tok_state *tok, char **p_ if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); - if (c == '.') + if (c == '.') { + c = tok_nextc(tok); goto fraction; + } if (c == 'j' || c == 'J') goto imaginary; + /* Note: no underscore is allowed in the middle of + "0x", "0b" or "0o". */ if (c == 'x' || c == 'X') { - /* Hex */ - c = tok_nextc(tok); + do { + c = tok_nextc(tok); + } while (c == '_'); if (!isxdigit(c)) { tok->done = E_TOKEN; tok_backup(tok, c); @@ -1601,11 +1606,13 @@ tok_get(struct tok_state *tok, char **p_ } do { c = tok_nextc(tok); - } while (isxdigit(c)); + } while (isxdigit(c) || c == '_'); } else if (c == 'o' || c == 'O') { /* Octal */ - c = tok_nextc(tok); + do { + c = tok_nextc(tok); + } while (c == '_'); if (c < '0' || c >= '8') { tok->done = E_TOKEN; tok_backup(tok, c); @@ -1613,11 +1620,13 @@ tok_get(struct tok_state *tok, char **p_ } do { c = tok_nextc(tok); - } while ('0' <= c && c < '8'); + } while (('0' <= c && c < '8') || c == '_'); } else if (c == 'b' || c == 'B') { /* Binary */ - c = tok_nextc(tok); + do { + c = tok_nextc(tok); + } while (c == '_'); if (c != '0' && c != '1') { tok->done = E_TOKEN; tok_backup(tok, c); @@ -1625,25 +1634,28 @@ tok_get(struct tok_state *tok, char **p_ } do { c = tok_nextc(tok); - } while (c == '0' || c == '1'); + } while (c == '0' || c == '1' || c == '_'); } else { int nonzero = 0; /* maybe old-style octal; c is first char of it */ /* in any case, allow '0' as a literal */ - while (c == '0') + while (c == '0' || c == '_') c = tok_nextc(tok); - while (isdigit(c)) { + while (isdigit(c) || c == '_') { nonzero = 1; c = tok_nextc(tok); } - if (c == '.') + if (c == '.') { + c = tok_nextc(tok); goto fraction; + } else if (c == 'e' || c == 'E') goto exponent; else if (c == 'j' || c == 'J') goto imaginary; else if (nonzero) { + /* Old-style octal: now disallowed. */ tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; @@ -1654,15 +1666,19 @@ tok_get(struct tok_state *tok, char **p_ /* Decimal */ do { c = tok_nextc(tok); - } while (isdigit(c)); + } while (isdigit(c) || c == '_'); { /* Accept floating point numbers. */ if (c == '.') { + c = tok_nextc(tok); fraction: /* Fraction */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + /* Right after dot, an underscore is not allowed. */ + if (isdigit(c)) { + do { + c = tok_nextc(tok); + } while (isdigit(c) || c == '_'); + } } if (c == 'e' || c == 'E') { int e; @@ -1678,6 +1694,8 @@ tok_get(struct tok_state *tok, char **p_ return ERRORTOKEN; } } else if (!isdigit(c)) { + /* "e" may be the start of a name + or keyword such as "else" */ tok_backup(tok, c); tok_backup(tok, e); *p_start = tok->start; @@ -1686,7 +1704,7 @@ tok_get(struct tok_state *tok, char **p_ } do { c = tok_nextc(tok); - } while (isdigit(c)); + } while (isdigit(c) || c == '_'); } if (c == 'j' || c == 'J') /* Imaginary part */ diff -r c711c36cf988 Python/ast.c --- a/Python/ast.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Python/ast.c Thu Feb 11 12:39:38 2016 +0100 @@ -3941,7 +3941,7 @@ ast_for_stmt(struct compiling *c, const } static PyObject * -parsenumber(struct compiling *c, const char *s) +parsenumber_raw(struct compiling *c, const char *s) { const char *end; long x; @@ -3984,6 +3984,31 @@ parsenumber(struct compiling *c, const c } static PyObject * +parsenumber(struct compiling *c, const char *s) +{ + char *dup, *end; + PyObject *res = NULL; + + assert(s != NULL); + + if (strchr(s, '_') == NULL) { + return parsenumber_raw(c, s); + } + /* Create a duplicate without underscores. */ + dup = PyMem_Malloc(strlen(s) + 1); + end = dup; + for (; *s; s++) { + if (*s != '_') { + *end++ = *s; + } + } + *end-- = '\0'; + res = parsenumber_raw(c, dup); + PyMem_Free(dup); + return res; +} + +static PyObject * decode_utf8(struct compiling *c, const char **sPtr, const char *end) { const char *s, *t; diff -r c711c36cf988 Python/pystrtod.c --- a/Python/pystrtod.c Wed Feb 10 10:31:43 2016 +0200 +++ b/Python/pystrtod.c Thu Feb 11 12:39:38 2016 +0100 @@ -370,6 +370,53 @@ PyOS_string_to_double(const char *s, return result; } +PyObject * +_PyOS_string_to_number_with_underscores( + const char *s, Py_ssize_t orig_len, const char *what, void *arg, + PyObject *(*innerfunc)(const char *, Py_ssize_t, void *)) +{ + char prev; + char *dup, *end; + Py_ssize_t slen; + PyObject *strobj, *result; + + if (strchr(s, '_') == NULL) { + return innerfunc(s, orig_len, arg); + } + + dup = PyMem_Malloc(orig_len + 1); + end = dup; + prev = '\0'; + for (const char *ch = s; *ch; ch++) { + if (*ch == '_') { + /* Underscores are only allowed after digits + (and other underscores.) */ + if (!(prev == '_' || (prev >= '0' && prev <= '9'))) { + goto error; + } + } else { + *end++ = *ch; + } + prev = *ch; + } + *end = '\0'; + result = innerfunc(dup, end - dup, arg); + PyMem_Free(dup); + return result; + + error: + slen = orig_len < 200 ? orig_len : 200; + strobj = PyUnicode_FromStringAndSize(s, slen); + if (strobj == NULL) + return NULL; + PyErr_Format(PyExc_ValueError, + "could not convert string to %s: " + "%R", what, strobj); + Py_DECREF(strobj); + PyMem_Free(dup); + return NULL; +} + #ifdef PY_NO_SHORT_FLOAT_REPR /* Given a string that may have a decimal point in the current