Index: Python/dtoa.c =================================================================== --- Python/dtoa.c (revision 82031) +++ Python/dtoa.c (working copy) @@ -242,6 +242,8 @@ #define Frac_mask 0xfffff #define Frac_mask1 0xfffff #define Ten_pmax 22 +#define Big_10_exp 309 /* Values >= 10**Big_10_exp overflow. */ +#define Tiny_10_exp -324 /* Values < 10**Tiny_10_exp underflow to zero. */ #define Bletch 0x10 #define Bndry_mask 0xfffff #define Bndry_mask1 0xfffff @@ -1318,6 +1320,214 @@ } } +/* parse_numeric_string: Parse and validate a finite numeric string. + + Inputs: + + A NUL-terminated string s00 with an initial portion that represents a + finite decimal value. Valid numeric strings are described by the + following pseudo-grammar: + + sign = '+' | '-' + digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + point = '.' + indicator = 'e' | 'E' + digits = digit [digits] + significand = digits [point [digits]] | point digits + exponent = indicator [sign] digits + numeric-string = [sign] significand [exponent] + + NaNs, infinities and hex literals are not accepted, and leading + whitespace is not permitted. + + Outputs: + + Any nonzero finite decimal value as above can be uniquely expressed in + the form + + (-1)**sign * 0. * 10**exp + + where sign is 0 or 1, exp is an integer, and is a string of one + ore more digits with both the first and last digit in the string nonzero. + The following return values from parse_numeric_string provide access to + these values: + + *sign gives the sign of the input: 1 for negative, 0 for positive. + + *pnd is the length of + + *ps0 is a pointer into the input string s00, and *pnd0 is an integer; + their values are such that for each i in the range 0 <= i < *pnd, the + ith digit [i] can be retrieved as: + + (*ps0)[i < *pnd0 ? i : i + 1] + + [ In the current implementation, *ps0 points just past the last + leading zero in the input string, so will either point to a nonzero + digit or to the decimal point. *pnd0 gives the position of the + decimal point, or of the end of the digit string if no decimal point + is present, relative to ps0. Note that *pnd0 may be larger than *pnd, + or smaller than 0. ] + + *exp is the exponent in the formula above, clipped to lie in the range + [INT_MIN, INT_MAX]. + + For an input representing zero, *pnd will be 0. + + *se points at the first character past the largest initial sequence + of s00 that gives a valid numeric string. If the entire string + s00 is a valid numeric string, *se will point to its terminating NUL + character. If no initial portion of s00 gives a valid numeric + string then *se will be equal to s00. + + Returns 0 for a successful parse, and a nonzero value on failure. +*/ + +static int +parse_numeric_string(const char *s00, char **se, char **ps0, + Py_ssize_t *pnd, Py_ssize_t *pnd0, + int *exp, int *sign) +{ + char c; + const char *s, *s0, *s1; + int esign, rv; + unsigned int dig; + Py_ssize_t e, nd, nd0, nz; + size_t abs_exp; + + s = s00; + c = *s; + + /* Parse optional sign, if present. */ + *sign = 0; + switch (c) { + case '-': + *sign = 1; + /* no break */ + case '+': + c = *++s; + } + + /* Skip (and count) leading zeros. */ + s0 = s; + while (c == '0') + c = *++s; + nz = s - s0; + + /* Parse any remaining digits before the point. */ + while ('0' <= c && c <= '9') + c = *++s; + nd0 = nd = s - s0; + + /* Parse decimal point and following digits. */ + if (c == '.') { + c = *++s; + s1 = s; + /* If all digits so far are zeros, continue to count leading zeros. */ + if (nd == nz) { + while (c == '0') + c = *++s; + nz += s - s1; + } + while ('0' <= c && c <= '9') + c = *++s; + nd += s - s1; + } + + if (!nd) { + /* No digits in the significand, so we've got an invalid numeric + string and a parse failure. */ + rv = 1; + *exp = 0; /* Silence a gcc 'may be used uninitialized' warning. */ + goto exit; + } + + /* We've got at least one digit, so the string up to this point is a + valid numeric string and the parse is successful. */ + rv = 0; + s00 = s; + + /* Adjust s0, nd, nd0 for leading and trailing zeros. The ith digit of the + significand can be retrieved as s0[i < nd0 ? i : i + 1]. */ + s0 += nz; + nd -= nz; + nd0 -= nz; + while (nd > 0 && s0[nd - 1 < nd0 ? nd - 1 : nd] == '0') + nd--; + + /* Now s0[i < nd0 ? i : i + 1], for 0 <= i < nd, gives the ith digit of the + significand, ignoring leading and trailing zeros. It's possible to have + nd0 < 0 or nd0 > nd here, but that shouldn't be problematic. */ + + /* Parse exponent. */ + esign = 0; + abs_exp = 0U; + if (c == 'e' || c == 'E') { + c = *++s; + + /* Exponent sign. */ + esign = 0; + switch (c) { + case '-': + esign = 1; + /* no break */ + case '+': + c = *++s; + } + + /* Get absolute value abs_exp for the exponent; cap at SIZE_T_MAX. */ + s1 = s; + while ('0' <= c && c <= '9') { + dig = (unsigned int)(c - '0'); + /* Safe version of: if (10U * abs_exp + dig > SIZE_T_MAX) {...} */ + if (abs_exp >= SIZE_T_MAX / 10U && (abs_exp > SIZE_T_MAX / 10U || + dig > SIZE_T_MAX % 10U)) { + abs_exp = SIZE_T_MAX; + break; + } + abs_exp = 10U * abs_exp + dig; + c = *++s; + } + /* A valid exponent must have at least one digit. */ + if (s != s1) + s00 = s; + } + + /* Find e = nd0 + (-1)**esign * abs_exp, avoiding underflow/overflow and + clipping the result to the range [PY_SSIZE_T_MIN, PY_SSIZE_T_MAX]. + Note that if the 'true' absolute exponent (i.e., without the SIZE_T_MAX + cap) is greater than SIZE_T_MAX then either e >= PY_SSIZE_T_MAX or e <= + PY_SSIZE_T_MIN, so we still end up doing the right thing here. */ + if (esign) { + if (nd0 - (size_t)PY_SSIZE_T_MIN < abs_exp) + e = PY_SSIZE_T_MIN; + else + e = nd0 - abs_exp; + } + else { + if ((size_t)PY_SSIZE_T_MAX - nd0 < abs_exp) + e = PY_SSIZE_T_MAX; + else + e = nd0 + abs_exp; + } + + /* Clip further to [INT_MIN, INT_MAX]. */ + if (e < INT_MIN) + *exp = INT_MIN; + else if (e > INT_MAX) + *exp = INT_MAX; + else + *exp = e; + + exit: + if (se) + *se = (char *)s00; + *pnd = nd; + *pnd0 = nd0; + *ps0 = (char *)s0; + return rv; +} + /* The bigcomp function handles some hard cases for strtod, for inputs with more than STRTOD_DIGLIM digits. It's called once an initial estimate for the double corresponding to the input string has @@ -1477,165 +1687,31 @@ double _Py_dg_strtod(const char *s00, char **se) { - int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error; - int esign, i, j, k, lz, nd, nd0, odd, sign; - const char *s, *s0, *s1; + int bb2, bb5, bbe, bd2, bd5, bs2, dsign, e, e1, error, i, j, k, odd, sign; + Py_ssize_t nd, nd0; + char *s0; double aadj, aadj1; U aadj2, adj, rv, rv0; - ULong y, z, abs_exp; + ULong y, z; Long L; BCinfo bc; Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; dval(&rv) = 0.; - /* Start parsing. */ - c = *(s = s00); - - /* Parse optional sign, if present. */ - sign = 0; - switch (c) { - case '-': - sign = 1; - /* no break */ - case '+': - c = *++s; - } - - /* Skip leading zeros: lz is true iff there were leading zeros. */ - s1 = s; - while (c == '0') - c = *++s; - lz = s != s1; - - /* Point s0 at the first nonzero digit (if any). nd0 will be the position - of the point relative to s0. nd will be the total number of digits - ignoring leading zeros. */ - s0 = s1 = s; - while ('0' <= c && c <= '9') - c = *++s; - nd0 = nd = s - s1; - - /* Parse decimal point and following digits. */ - if (c == '.') { - c = *++s; - if (!nd) { - s1 = s; - while (c == '0') - c = *++s; - lz = lz || s != s1; - nd0 -= s - s1; - s0 = s; - } - s1 = s; - while ('0' <= c && c <= '9') - c = *++s; - nd += s - s1; - } - - /* Now lz is true if and only if there were leading zero digits, and nd - gives the total number of digits ignoring leading zeros. A valid input - must have at least one digit. */ - if (!nd && !lz) { - if (se) - *se = (char *)s00; + error = parse_numeric_string(s00, se, &s0, &nd, &nd0, &e, &sign); + if (error) goto parse_error; - } - /* Parse exponent. */ - e = 0; - if (c == 'e' || c == 'E') { - s00 = s; - c = *++s; - - /* Exponent sign. */ - esign = 0; - switch (c) { - case '-': - esign = 1; - /* no break */ - case '+': - c = *++s; - } - - /* Skip zeros. lz is true iff there are leading zeros. */ - s1 = s; - while (c == '0') - c = *++s; - lz = s != s1; - - /* Get absolute value of the exponent. */ - s1 = s; - abs_exp = 0; - while ('0' <= c && c <= '9') { - abs_exp = 10*abs_exp + (c - '0'); - c = *++s; - } - - /* abs_exp will be correct modulo 2**32. But 10**9 < 2**32, so if - there are at most 9 significant exponent digits then overflow is - impossible. */ - if (s - s1 > 9 || abs_exp > MAX_ABS_EXP) - e = (int)MAX_ABS_EXP; - else - e = (int)abs_exp; - if (esign) - e = -e; - - /* A valid exponent must have at least one digit. */ - if (s == s1 && !lz) - s = s00; - } - - /* Adjust exponent to take into account position of the point. */ - e += nd0; - if (nd0 <= 0) - nd0 = nd; - - /* Finished parsing. Set se to indicate how far we parsed */ - if (se) - *se = (char *)s; - - /* If all digits were zero, exit with return value +-0.0. Otherwise, - strip trailing zeros: scan back until we hit a nonzero digit. */ + /* If all digits were zero, exit with return value +-0.0. */ if (!nd) goto ret; - for (i = nd; i > 0; ) { - --i; - if (s0[i < nd0 ? i : i+1] != '0') { - ++i; - break; - } - } - nd = i; - if (nd0 > nd) - nd0 = nd; - /* Summary of parsing results. After parsing, and dealing with zero - * inputs, we have values s0, nd0, nd, e, sign, where: - * - * - s0 points to the first significant digit of the input string - * - * - nd is the total number of significant digits (here, and - * below, 'significant digits' means the set of digits of the - * significand of the input that remain after ignoring leading - * and trailing zeros). - * - * - nd0 indicates the position of the decimal point, if present; it - * satisfies 1 <= nd0 <= nd. The nd significant digits are in - * s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice - * notation. (If nd0 < nd, then s0[nd0] contains a '.' character; if - * nd0 == nd, then s0[nd0] could be any non-digit character.) - * - * - e is the adjusted exponent: the absolute value of the number - * represented by the original input string is n * 10**(e - nd), where - * n is the integer represented by the concatenation of - * s0[0:nd0] and s0[nd0+1:nd+1] - * - * - sign gives the sign of the input: 1 for negative, 0 for positive - * - * - the first and last significant digits are nonzero - */ + /* Overflow and underflow. */ + if (e > Big_10_exp) + goto ovfl; + else if (e <= Tiny_10_exp) + goto undfl; /* put first DBL_DIG+1 digits into integer y and z. *