Index: Python/dtoa.c
===================================================================
--- Python/dtoa.c	(revision 82087)
+++ Python/dtoa.c	(working copy)
@@ -185,14 +185,16 @@
 extern "C" {
 #endif
 
-typedef union { double d; ULong L[2]; } U;
+typedef union { ULong L[2]; double d; } U;
 
 #ifdef IEEE_8087
 #define word0(x) (x)->L[1]
 #define word1(x) (x)->L[0]
+#define Uint(hi, lo) {{lo, hi}}
 #else
 #define word0(x) (x)->L[0]
 #define word1(x) (x)->L[1]
+#define Uint(hi, lo) {{hi, lo}}
 #endif
 #define dval(x) (x)->d
 
@@ -254,6 +256,13 @@
 #define Quick_max 14
 #define Int_max 14
 
+/* Some double constants, defined via the union U to avoid the chicken-and-egg
+   problem of relying on the compiler to do correctly-rounded string->double
+   conversions. */
+static U Dbl_min = Uint((Bias + Emin)*Exp_msk1, 0);  /* 2.0 ** Emin */
+static U Exp2P =  Uint((Bias + P)*Exp_msk1, 0);  /* 2.0 ** P */
+static U Inf = Uint(Exp_mask, 0);
+
 #ifndef Flt_Rounds
 #ifdef FLT_ROUNDS
 #define Flt_Rounds FLT_ROUNDS
@@ -970,21 +979,6 @@
     return c;
 }
 
-/* Given a positive normal double x, return the difference between x and the
-   next double up.  Doesn't give correct results for subnormals. */
-
-static double
-ulp(U *x)
-{
-    Long L;
-    U u;
-
-    L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1;
-    word0(&u) = L;
-    word1(&u) = 0;
-    return dval(&u);
-}
-
 /* Convert a Bigint a to a double giving the value a / 2**(32 * a->wds).
    Error < 0.75 ulps.  This function is currently used only by ratio. */
 
@@ -1155,14 +1149,8 @@
     1e20, 1e21, 1e22
 };
 
-static const double
-bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
-static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
-                                   9007199254740992.*9007199254740992.e-256
-                                   /* = 2^106 * 1e-256 */
-};
-/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */
-/* flag unnecessarily.  It leads to a song and dance at the end of strtod. */
+static const double bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
+static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, 1e-256 };
 #define Scale_Bit 0x10
 #define n_bigtens 5
 
@@ -1279,29 +1267,121 @@
     return q;
 }
 
-/* sulp(x) is a version of ulp(x) that takes bc.scale into account.
+/* Round a double x to the nearest integer (still in double form). */
 
-   Assuming that x is finite and nonnegative (positive zero is fine
-   here) and x / 2^bc.scale is exactly representable as a double,
-   sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale). */
+double
+rnd(double x)
+{
+    U u;
+    ULong sign;
+    dval(&u) = x;
+    sign = word0(&u) & Sign_bit;
+    word0(&u) ^= sign;
+    if (dval(&u) < 0.5 * dval(&Exp2P)) {
+        /* Some evil trickery here:  adding 2**(P-1) forces u into the range
+           [2**(P-1), 2**P];  in this range, a double is representable if and
+           only if it's an integer, so the FPU must force the result of the sum
+           to the nearest integer.  Subtracting 2**(P-1) again leaves the rounded
+           value. */
+        dval(&u) += 0.5 * dval(&Exp2P);
+        dval(&u) -= 0.5 * dval(&Exp2P);
+    }
+    word0(&u) ^= sign;
+    return dval(&u);
+}
 
+/* In strtod, the current estimate is stored in a pair (x, scale) consisting
+   of a double x and an int scale, representing the value x / 2**scale.
+
+   For a finite double x, sulp(x) returns 1 ulp(x), taking scale into account.
+   That is, sulp(x, scale) / 2**scale == ulp(x / 2**scale), where ulp(x) is the
+   difference between abs(x) and the next largest double.
+
+   Logic in the strtod function ensures that the pair (x, scale) always
+   satisfies the following two conditions:
+
+   (0) x / 2**scale is an integral multiple of 2**(Emin - P + 1).  In other
+       words, if x / 2**scale is at most DBL_MAX then it's exactly
+       representable as a double.
+
+   (1) Either x / 2**(P-1) >= 2**Emin, or scale >= P - 1.
+
+   It follows from these conditions that on input, x is necessarily either
+   normal or zero, and that sulp(x, scale) is always positive and normal.
+   Moreover, if x is zero then scale >= P - 1.
+
+*/
+
 static double
-sulp(U *x, BCinfo *bc)
+sulp(U *x, int scale)
 {
     U u;
+    int e;
 
-    if (bc->scale && 2*P + 1 > (int)((word0(x) & Exp_mask) >> Exp_shift)) {
-        /* rv/2^bc->scale is subnormal */
-        word0(&u) = (P+2)*Exp_msk1;
-        word1(&u) = 0;
-        return u.d;
-    }
-    else {
-        assert(word0(x) || word1(x)); /* x != 0.0 */
-        return ulp(x);
-    }
+    /* Method: if 2**k <= abs(x / 2**scale) < 2**(k+1) then
+
+         ulp(x / 2**scale) == max(2**Etiny, 2**k / 2**(P - 1)).
+
+       We compute
+
+          e = max(scale + 1, k + Bias + scale)
+
+       Here k + Bias + scale corresponds exactly to the value given by the
+       exponent bits of x.  From the conditions above, e > P - 1.  Setting u =
+       2**(e - Bias - P + 1) gives
+
+          u / 2**scale == 2**(e - Bias - P + 1 - scale)
+                       == 2**max(2 - P - Bias, k - P + 1)
+                       == max(2**Etiny, 2**k / 2**(P - 1)).
+                       == ulp(x / 2**scale).
+
+       The code below also works, unaltered, with x == +-0.0,
+       returning u such that u / 2**scale == 2**Etiny.
+    */
+
+    e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+    if (e < scale + 1)
+        e = scale + 1;
+
+    assert(e > P - 1);
+    word0(&u) = (ULong)(e - P + 1) << Exp_shift;
+    word1(&u) = 0;
+    return dval(&u);
 }
 
+/* Given a scaled double (as used in _Py_dg_strtod), find the next largest
+   boundary, using the same scale.  If we're already on a boundary, return the
+   next one up.
+
+   A *boundary* is either 0.0, or a power of 2 that's at least 2**(Emin + 1);
+   2**Emin is not considered a boundary, because the spacing of consecutive
+   floats does not change at 2**Emin. */
+
+double next_boundary(U *x, int scale) {
+    U u;
+    int e;
+
+    e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+
+    if (e < scale + 1)
+        e = scale + 1;
+    word0(&u) = (ULong)(e + 1) << Exp_shift;
+    word1(&u) = 0;
+    return dval(&u);
+}
+
+double last_boundary(U *x, int scale) {
+    U u;
+    int e;
+
+    e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+    if (e <= scale + 1)
+        e = 0;
+    word0(&u) = (ULong)e << Exp_shift;
+    word1(&u) = 0;
+    return dval(&u);
+}
+
 /* parse_numeric_string: Parse and validate a finite numeric string.
 
    Inputs:
@@ -1655,22 +1735,20 @@
     Bfree(b);
     Bfree(d);
     if (dd > 0 || (dd == 0 && odd))
-        dval(rv) += sulp(rv, bc);
+        dval(rv) += sulp(rv, bc->scale);
     return 0;
 }
 
 double
 _Py_dg_strtod(const char *s00, char **se)
 {
-    int bb2, bb5, bbe, bd2, bd5, bs2, dsign, e, e1, error, i, j, k, odd, sign;
-    Py_ssize_t nd, nd0, pos;
+    int bbe, dsign, e, e1, e2, e5, error, i, k, scale, sign;
+    Py_ssize_t nd, nd0;
     char *s0;
-    double aadj, aadj1;
-    U aadj2, adj, rv, rv0;
-    ULong y, z;
-    Long L;
+    double aadj, aadj_int, dist, last, next, scalefac, ulp;
+    U rv;
+    Bigint *bb, *bd, *bs, *delta;
     BCinfo bc;
-    Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
 
     dval(&rv) = 0.;
 
@@ -1688,28 +1766,23 @@
     else if (e <= Tiny_10_exp)
         goto undfl;
 
-    /* Initial approximation based on first DBL_DIG+1 digits of the input. */
-    k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
-    for (pos = 0; pos < k; pos++)
-        dval(&rv) = 10.0 * dval(&rv) + (s0[pos < nd0 ? pos : pos + 1] - '0');
+    /* Initial approximation based on first DBL_DIG digits of the input. */
+    k = nd <= DBL_DIG ? nd : DBL_DIG;
+    for (i = 0; i < k; i++)
+        dval(&rv) = 10.0 * dval(&rv) + (s0[i < nd0 ? i : i + 1] - '0');
     e1 = e - k;
 
-   /* If there are at most Dbl_dig significant digits in the input, then rv
+    /* If there are at most DBL_DIG significant digits in the input, then rv
        is exact and there's a chance to compute the exact result with a single
        floating-point multiplication or division. */
     if (nd <= DBL_DIG) {
-        if (!e1)
-            goto ret;
-        if (e1 > 0) {
+        if (e1 >= 0) {
             if (e1 <= Ten_pmax) {
                 dval(&rv) *= tens[e1];
                 goto ret;
             }
             i = DBL_DIG - nd;
             if (e1 - i <= Ten_pmax) {
-                /* A fancier test would sometimes let us do
-                 * this for larger i values.
-                 */
                 dval(&rv) *= tens[i];
                 dval(&rv) *= tens[e1 - i];
                 goto ret;
@@ -1721,488 +1794,182 @@
         }
     }
 
-    bc.scale = 0;
+    /* Get starting approximation = rv * 10**e1
 
-    /* Get starting approximation = rv * 10**e1 */
-
+       For very small or very large inputs, we scale rv up or down
+       (respectively) by a factor of 2**(2*P).  For small inputs, this helps
+       avoid setting the underflow flag unnecessarily.  Similarly, for large
+       inputs the factor avoids having to deal with overflow until the very end
+       of strtod. */
+    scale = 0;
+    scalefac = 1.0;
     if (e1 > 0) {
         if ((i = e1 & 15))
             dval(&rv) *= tens[i];
-        if (e1 &= ~15) {
-            if (e1 > DBL_MAX_10_EXP)
-                goto ovfl;
-            e1 >>= 4;
-            for(j = 0; e1 > 1; j++, e1 >>= 1)
-                if (e1 & 1)
-                    dval(&rv) *= bigtens[j];
-            /* The last multiplication could overflow. */
-            word0(&rv) -= P*Exp_msk1;
-            dval(&rv) *= bigtens[j];
-            if ((z = word0(&rv) & Exp_mask)
-                > Exp_msk1*(DBL_MAX_EXP+Bias-P))
-                goto ovfl;
-            if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) {
-                /* set to largest number */
-                /* (Can't trust DBL_MAX) */
-                word0(&rv) = Big0;
-                word1(&rv) = Big1;
-            }
-            else
-                word0(&rv) += P*Exp_msk1;
+        e1 >>= 4;
+        if (e1 & Scale_Bit) {
+            scale = -2*P;
+            scalefac = 1.0 / (dval(&Exp2P) * dval(&Exp2P));
+            dval(&rv) *= scalefac;
         }
+        for(i = 0; e1 > 0; i++, e1 >>= 1)
+            if (e1 & 1)
+                dval(&rv) *= bigtens[i];
     }
     else if (e1 < 0) {
-        /* The input decimal value lies in [10**e1, 10**(e1+16)).
-
-           If e1 <= -512, underflow immediately.
-           If e1 <= -256, set bc.scale to 2*P.
-
-           So for input value < 1e-256, bc.scale is always set;
-           for input value >= 1e-240, bc.scale is never set.
-           For input values in [1e-256, 1e-240), bc.scale may or may
-           not be set. */
-
         e1 = -e1;
         if ((i = e1 & 15))
             dval(&rv) /= tens[i];
-        if (e1 >>= 4) {
-            if (e1 >= 1 << n_bigtens)
-                goto undfl;
-            if (e1 & Scale_Bit)
-                bc.scale = 2*P;
-            for(j = 0; e1 > 0; j++, e1 >>= 1)
-                if (e1 & 1)
-                    dval(&rv) *= tinytens[j];
-            if (bc.scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask)
-                                            >> Exp_shift)) > 0) {
-                /* scaled rv is denormal; clear j low bits */
-                if (j >= 32) {
-                    word1(&rv) = 0;
-                    if (j >= 53)
-                        word0(&rv) = (P+2)*Exp_msk1;
-                    else
-                        word0(&rv) &= 0xffffffff << (j-32);
-                }
-                else
-                    word1(&rv) &= 0xffffffff << j;
-            }
-            if (!dval(&rv))
-                goto undfl;
+        e1 >>= 4;
+        if (e1 & Scale_Bit) {
+            scale = 2*P;
+            scalefac = dval(&Exp2P) * dval(&Exp2P);
+            dval(&rv) *= scalefac;
         }
+        for(i = 0; e1 > 0; i++, e1 >>= 1)
+            if (e1 & 1)
+                dval(&rv) *= tinytens[i];
     }
 
-    /* Now the hard part -- adjusting rv to the correct value.*/
-
-    /* Put digits into bd: true value = bd * 10^e */
-
-    bc.e0 = e;
-    bc.nd = nd;
-    bc.nd0 = nd0;       /* Only needed if nd > STRTOD_DIGLIM, but done here */
-                        /* to silence an erroneous warning about bc.nd0 */
-                        /* possibly not being initialized. */
-    if (nd > STRTOD_DIGLIM) {
-        /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */
-        /* minimum number of decimal digits to distinguish double values */
-        /* in IEEE arithmetic. */
-
-        /* Truncate input to 18 significant digits, then discard any trailing
-           zeros on the result. */
-        nd = 18;
-        while (nd > 0 && s0[nd - 1  < nd0 ? nd - 1 : nd] == '0')
-            nd--;
+    /* To ensure that rv/2**scale is representable if scale > 0,
+       it's enough to add and then subtract Dbl_min * 2**scale. */
+    if (scale > 0) {
+        dval(&rv) += dval(&Dbl_min) * scalefac;
+        dval(&rv) -= dval(&Dbl_min) * scalefac;
     }
-    bd0 = s2b(s0, nd0, nd);
-    if (bd0 == NULL)
-        goto failed_malloc;
 
-    /* Notation for the comments below.  Write:
+    /* Notation for the comments below:
 
-         - dv for the absolute value of the number represented by the original
-           decimal input string.
+         dv is the absolute value of the number represented by the original
+         decimal input string.
 
-         - if we've truncated dv, write tdv for the truncated value.
-           Otherwise, set tdv == dv.
+         If we've truncated the input (because nd > STRTOD_DIGLIM), write tdv
+         for the corresponding value.  Otherwise, set tdv == dv.  In the
+         truncated case, we have tdv < dv < (1 + 10**-17)*tdv; so if rv is a
+         (reasonably close) approximation to tdv, the error dv - tdv is at
+         most 0.091 ulp(rv).
 
-         - srv for the quantity rv/2^bc.scale; so srv is the current binary
-           approximation to tdv (and dv).  It should be exactly representable
-           in an IEEE 754 double.
+         Write srv for the quantity rv/2**scale; so srv is the current binary
+         approximation to tdv (and dv).  It should be exactly representable in
+         an IEEE 754 double.
     */
 
-    for(;;) {
+    /* Now the hard part -- adjusting rv to the correct value.*/
 
-        /* This is the main correction loop for _Py_dg_strtod.
+    /* For the initial correction, truncate to STRTOD_DIGLIM digits; this
+       introduces a truncation error of at most 1e-8 ulps. */
+    k = nd < STRTOD_DIGLIM ? nd : STRTOD_DIGLIM;
+    bb = sd2b(&rv, scale, &bbe);   /* srv = bb * 2**bbe */
+    bd = s2b(s0, nd0, k);
+    bs = i2b(1);
+    if (bb == NULL || bd == NULL || bs == NULL)
+        goto failed_malloc;
 
-           We've got a decimal value tdv, and a floating-point approximation
-           srv=rv/2^bc.scale to tdv.  The aim is to determine whether srv is
-           close enough (i.e., within 0.5 ulps) to tdv, and to compute a new
-           approximation if not.
+    /* Now tdv, srv and ulp are proportional to bd * 2**(e-k-bbe) * 5**(e-k), bb
+       and bs respectively.  Scale bb, bd, bs by the appropriate
+       powers of 5 ... */
+    e5 = e - k;
+    if (e5 > 0)
+        bd = pow5mult(bd, e5);
+    else if (e5 < 0) {
+        bb = pow5mult(bb, -e5);
+        bs = pow5mult(bs, -e5);
+    }
+    if (bb == NULL || bd == NULL || bs == NULL)
+        goto failed_malloc;
 
-           To determine whether srv is close enough to tdv, compute integers
-           bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv)
-           respectively, and then use integer arithmetic to determine whether
-           |tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv).
-        */
+    /* ... and powers of 2. */
+    e2 = e - k - bbe;
+    if (e2 > 0)
+        bd = lshift(bd, e2);
+    else if (e2 < 0) {
+        bb = lshift(bb, -e2);
+        bs = lshift(bs, -e2);
+    }
+    if (bb == NULL || bd == NULL || bs == NULL)
+        goto failed_malloc;
 
-        bd = Balloc(bd0->k);
-        if (bd == NULL) {
-            Bfree(bd0);
-            goto failed_malloc;
-        }
-        Bcopy(bd, bd0);
-        bb = sd2b(&rv, bc.scale, &bbe);   /* srv = bb * 2^bbe */
-        if (bb == NULL) {
-            Bfree(bd);
-            Bfree(bd0);
-            goto failed_malloc;
-        }
-        /* Record whether lsb of bb is odd, in case we need this
-           for the round-to-even step later. */
-        odd = bb->x[0] & 1;
+    /* Now tdv, srv and ulp are proportional to bd, bb and bs
+       respectively.  Compute the quotient aadj = (srv - tdv) / ulp =
+       (bb - bd) / bs , as a double. */
+    delta = diff(bb, bd);
+    dsign = delta->sign;
+    if (delta == NULL)
+        goto failed_malloc;
+    aadj = ratio(delta, bs);
+    if (!dsign)
+        aadj = -aadj;
+    Bfree(delta);
+    Bfree(bd);
+    Bfree(bb);
+    Bfree(bs);
 
-        /* tdv = bd * 10**e;  srv = bb * 2**bbe */
-        bs = i2b(1);
-        if (bs == NULL) {
-            Bfree(bb);
-            Bfree(bd);
-            Bfree(bd0);
-            goto failed_malloc;
-        }
+    /* Finished with the integer arithmetic (for now). */
 
-        if (e >= nd) {
-            bb2 = bb5 = 0;
-            bd2 = bd5 = e - nd;
-        }
-        else {
-            bb2 = bb5 = nd - e;
-            bd2 = bd5 = 0;
-        }
-        if (bbe >= 0)
-            bb2 += bbe;
-        else
-            bd2 -= bbe;
-        bs2 = bb2;
-        bb2++;
-        bd2++;
+    /* Invariant:  here and after the correction, tdv ~=  srv + ulp * aadj. */
 
-        /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
-           and bs == 1, so:
+    ulp = sulp(&rv, scale);
+    /* next = next point at which ulp(rv) changes. */
+    next = next_boundary(&rv, scale);
+    last = last_boundary(&rv, scale);
 
-              tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
-              srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
-              0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
+    /* Does the adjustment takes us past a normal power-of-2 boundary? */
+    if (aadj >= 0.0 && aadj >= (dist = (next - dval(&rv))/ulp)) {
+        /* Adjustment takes us past a power of 2 boundary, going up. */
+        aadj *= 0.5;
+        ulp *= 2.0;
+        /* Round to integer if dist is even, else to half-odd-integer. */
+        dist *= 0.5;
+        aadj_int = dist + rnd(aadj - dist);
+    }
+    else if (aadj < 0.0 && aadj < (last - dval(&rv))/ulp && last != 0.0) {
+        /* Adjustment takes us past a power of 2 boundary, going down. */
+        aadj *= 2.0;
+        ulp *= 0.5;
+        aadj_int = rnd(aadj);
+    }
+    else {
+        /* Usual case.  Must always adjust by at least 1 ulp. */
+        aadj_int = rnd(aadj);
+        if (aadj_int == 0.0)
+            aadj_int = aadj >= 0.0 ? 1.0 : -1.0;
+    }
+    /* Adjust, and compute residual error. */
+    dval(&rv) += aadj_int * ulp;
+    aadj -= aadj_int;
 
-           It follows that:
+    /* Now dv ~ tdv ~= srv + ulp * aadj, abs(aadj) <= 0.5000001, and ulp is
+       the difference between srv and the next nearest double in the
+       direction of aadj.  This is true even if srv is a power-of-2
+       boundary. */
 
-              M * tdv = bd * 2**bd2 * 5**bd5
-              M * srv = bb * 2**bb2 * 5**bb5
-              M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
-
-           for some constant M.  (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
-           this fact is not needed below.)
-        */
-
-        /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */
-        i = bb2 < bd2 ? bb2 : bd2;
-        if (i > bs2)
-            i = bs2;
-        if (i > 0) {
-            bb2 -= i;
-            bd2 -= i;
-            bs2 -= i;
+    /* If we're in a near halfway case, use bigcomp to figure out
+       the correctly-rounded value */
+    if (aadj < -0.4999999 || aadj > 0.4999999) {
+        /* input to bigcomp should be the lower of the two possible results */
+        if (aadj < 0.0) {
+            dval(&rv) -= ulp;
+            /* aadj += 1.0; */
         }
-
-        /* Scale bb, bd, bs by the appropriate powers of 2 and 5. */
-        if (bb5 > 0) {
-            bs = pow5mult(bs, bb5);
-            if (bs == NULL) {
-                Bfree(bb);
-                Bfree(bd);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-            bb1 = mult(bs, bb);
-            Bfree(bb);
-            bb = bb1;
-            if (bb == NULL) {
-                Bfree(bs);
-                Bfree(bd);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-        }
-        if (bb2 > 0) {
-            bb = lshift(bb, bb2);
-            if (bb == NULL) {
-                Bfree(bs);
-                Bfree(bd);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-        }
-        if (bd5 > 0) {
-            bd = pow5mult(bd, bd5);
-            if (bd == NULL) {
-                Bfree(bb);
-                Bfree(bs);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-        }
-        if (bd2 > 0) {
-            bd = lshift(bd, bd2);
-            if (bd == NULL) {
-                Bfree(bb);
-                Bfree(bs);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-        }
-        if (bs2 > 0) {
-            bs = lshift(bs, bs2);
-            if (bs == NULL) {
-                Bfree(bb);
-                Bfree(bd);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-        }
-
-        /* Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv),
-           respectively.  Compute the difference |tdv - srv|, and compare
-           with 0.5 ulp(srv). */
-
-        delta = diff(bb, bd);
-        if (delta == NULL) {
-            Bfree(bb);
-            Bfree(bs);
-            Bfree(bd);
-            Bfree(bd0);
-            goto failed_malloc;
-        }
-        dsign = delta->sign;
-        delta->sign = 0;
-        i = cmp(delta, bs);
-        if (bc.nd > nd && i <= 0) {
-            if (dsign)
-                break;  /* Must use bigcomp(). */
-
-            /* Here rv overestimates the truncated decimal value by at most
-               0.5 ulp(rv).  Hence rv either overestimates the true decimal
-               value by <= 0.5 ulp(rv), or underestimates it by some small
-               amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of
-               the true decimal value, so it's possible to exit.
-
-               Exception: if scaled rv is a normal exact power of 2, but not
-               DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the
-               next double, so the correctly rounded result is either rv - 0.5
-               ulp(rv) or rv; in this case, use bigcomp to distinguish. */
-
-            if (!word1(&rv) && !(word0(&rv) & Bndry_mask)) {
-                /* rv can't be 0, since it's an overestimate for some
-                   nonzero value.  So rv is a normal power of 2. */
-                j = (int)(word0(&rv) & Exp_mask) >> Exp_shift;
-                /* rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if
-                   rv / 2^bc.scale >= 2^-1021. */
-                if (j - bc.scale >= 2) {
-                    dval(&rv) -= 0.5 * sulp(&rv, &bc);
-                    break; /* Use bigcomp. */
-                }
-            }
-
-            {
-                bc.nd = nd;
-                i = -1; /* Discarded digits make delta smaller. */
-            }
-        }
-
-        if (i < 0) {
-            /* Error is less than half an ulp -- check for
-             * special case of mantissa a power of two.
-             */
-            if (dsign || word1(&rv) || word0(&rv) & Bndry_mask
-                || (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1
-                ) {
-                break;
-            }
-            if (!delta->x[0] && delta->wds <= 1) {
-                /* exact result */
-                break;
-            }
-            delta = lshift(delta,Log2P);
-            if (delta == NULL) {
-                Bfree(bb);
-                Bfree(bs);
-                Bfree(bd);
-                Bfree(bd0);
-                goto failed_malloc;
-            }
-            if (cmp(delta, bs) > 0)
-                goto drop_down;
-            break;
-        }
-        if (i == 0) {
-            /* exactly half-way between */
-            if (dsign) {
-                if ((word0(&rv) & Bndry_mask1) == Bndry_mask1
-                    &&  word1(&rv) == (
-                        (bc.scale &&
-                         (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1) ?
-                        (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
-                        0xffffffff)) {
-                    /*boundary case -- increment exponent*/
-                    word0(&rv) = (word0(&rv) & Exp_mask)
-                        + Exp_msk1
-                        ;
-                    word1(&rv) = 0;
-                    dsign = 0;
-                    break;
-                }
-            }
-            else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) {
-              drop_down:
-                /* boundary case -- decrement exponent */
-                if (bc.scale) {
-                    L = word0(&rv) & Exp_mask;
-                    if (L <= (2*P+1)*Exp_msk1) {
-                        if (L > (P+2)*Exp_msk1)
-                            /* round even ==> */
-                            /* accept rv */
-                            break;
-                        /* rv = smallest denormal */
-                        if (bc.nd > nd)
-                            break;
-                        goto undfl;
-                    }
-                }
-                L = (word0(&rv) & Exp_mask) - Exp_msk1;
-                word0(&rv) = L | Bndry_mask1;
-                word1(&rv) = 0xffffffff;
-                break;
-            }
-            if (!odd)
-                break;
-            if (dsign)
-                dval(&rv) += sulp(&rv, &bc);
-            else {
-                dval(&rv) -= sulp(&rv, &bc);
-                if (!dval(&rv)) {
-                    if (bc.nd >nd)
-                        break;
-                    goto undfl;
-                }
-            }
-            dsign = 1 - dsign;
-            break;
-        }
-        if ((aadj = ratio(delta, bs)) <= 2.) {
-            if (dsign)
-                aadj = aadj1 = 1.;
-            else if (word1(&rv) || word0(&rv) & Bndry_mask) {
-                if (word1(&rv) == Tiny1 && !word0(&rv)) {
-                    if (bc.nd >nd)
-                        break;
-                    goto undfl;
-                }
-                aadj = 1.;
-                aadj1 = -1.;
-            }
-            else {
-                /* special case -- power of FLT_RADIX to be */
-                /* rounded down... */
-
-                if (aadj < 2./FLT_RADIX)
-                    aadj = 1./FLT_RADIX;
-                else
-                    aadj *= 0.5;
-                aadj1 = -aadj;
-            }
-        }
-        else {
-            aadj *= 0.5;
-            aadj1 = dsign ? aadj : -aadj;
-            if (Flt_Rounds == 0)
-                aadj1 += 0.5;
-        }
-        y = word0(&rv) & Exp_mask;
-
-        /* Check for overflow */
-
-        if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) {
-            dval(&rv0) = dval(&rv);
-            word0(&rv) -= P*Exp_msk1;
-            adj.d = aadj1 * ulp(&rv);
-            dval(&rv) += adj.d;
-            if ((word0(&rv) & Exp_mask) >=
-                Exp_msk1*(DBL_MAX_EXP+Bias-P)) {
-                if (word0(&rv0) == Big0 && word1(&rv0) == Big1) {
-                    Bfree(bb);
-                    Bfree(bd);
-                    Bfree(bs);
-                    Bfree(bd0);
-                    Bfree(delta);
-                    goto ovfl;
-                }
-                word0(&rv) = Big0;
-                word1(&rv) = Big1;
-                goto cont;
-            }
-            else
-                word0(&rv) += P*Exp_msk1;
-        }
-        else {
-            if (bc.scale && y <= 2*P*Exp_msk1) {
-                if (aadj <= 0x7fffffff) {
-                    if ((z = (ULong)aadj) <= 0)
-                        z = 1;
-                    aadj = z;
-                    aadj1 = dsign ? aadj : -aadj;
-                }
-                dval(&aadj2) = aadj1;
-                word0(&aadj2) += (2*P+1)*Exp_msk1 - y;
-                aadj1 = dval(&aadj2);
-            }
-            adj.d = aadj1 * ulp(&rv);
-            dval(&rv) += adj.d;
-        }
-        z = word0(&rv) & Exp_mask;
-        if (bc.nd == nd) {
-            if (!bc.scale)
-                if (y == z) {
-                    /* Can we stop now? */
-                    L = (Long)aadj;
-                    aadj -= L;
-                    /* The tolerances below are conservative. */
-                    if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) {
-                        if (aadj < .4999999 || aadj > .5000001)
-                            break;
-                    }
-                    else if (aadj < .4999999/FLT_RADIX)
-                        break;
-                }
-        }
-      cont:
-        Bfree(bb);
-        Bfree(bd);
-        Bfree(bs);
-        Bfree(delta);
-    }
-    Bfree(bb);
-    Bfree(bd);
-    Bfree(bs);
-    Bfree(bd0);
-    Bfree(delta);
-    if (bc.nd > nd) {
+	bc.nd0 = nd0;
+	bc.nd = nd;
+	bc.e0 = e;
+	bc.scale = scale;
         error = bigcomp(&rv, s0, &bc);
         if (error)
             goto failed_malloc;
     }
 
-    if (bc.scale) {
-        word0(&rv0) = Exp_1 - 2*P*Exp_msk1;
-        word1(&rv0) = 0;
-        dval(&rv) *= dval(&rv0);
-    }
+    /* Underflow and overflow checks. */
+    if (word0(&rv) == 0 && word1(&rv) == 0)
+        goto undfl;
+    if ((int)((word0(&rv) & Exp_mask) >> Exp_shift) - scale > Emax + Bias)
+        goto ovfl;
 
+    /* Undo the effects of the scaling, if applicable. */
+    if (scale)
+        dval(&rv) /= scalefac;
+
   ret:
     return sign ? -dval(&rv) : dval(&rv);
 
@@ -2210,6 +1977,9 @@
     return 0.0;
 
   failed_malloc:
+    if (bd != NULL) Bfree(bd);
+    if (bb != NULL) Bfree(bb);
+    if (bs != NULL) Bfree(bs);
     errno = ENOMEM;
     return -1.0;
 
@@ -2218,9 +1988,7 @@
 
   ovfl:
     errno = ERANGE;
-    /* Can't trust HUGE_VAL */
-    word0(&rv) = Exp_mask;
-    word1(&rv) = 0;
+    dval(&rv) = dval(&Inf);
     return sign ? -dval(&rv) : dval(&rv);
 
 }