diff -r 9a4de4a1a154 -r 9659c24fcee9 Lib/test/test_long.py
--- a/Lib/test/test_long.py	Wed Dec 23 11:30:45 2009 +0100
+++ b/Lib/test/test_long.py	Wed Dec 23 15:06:55 2009 +0000
@@ -14,6 +14,11 @@
     def __str__(self):
         return self.format % self.args
 
+# decorator for skipping tests on non-IEEE 754 platforms
+requires_IEEE_754 = unittest.skipUnless(
+    float.__getformat__("double").startswith("IEEE"),
+    "test requires IEEE 754 doubles")
+
 # SHIFT should match the value in longintrepr.h for best testing.
 SHIFT = sys.int_info.bits_per_digit
 BASE = 2 ** SHIFT
@@ -35,6 +40,43 @@
 # add complements & negations
 special += [~x for x in special] + [-x for x in special]
 
+DBL_MAX = sys.float_info.max
+DBL_MAX_EXP = sys.float_info.max_exp
+DBL_MIN_EXP = sys.float_info.min_exp
+DBL_MANT_DIG = sys.float_info.mant_dig
+DBL_MIN_OVERFLOW = 2**DBL_MAX_EXP - 2**(DBL_MAX_EXP - DBL_MANT_DIG - 1)
+
+# pure Python version of correctly-rounded true division
+def true_div(a, b):
+    """Correctly-rounded true division for integers."""
+    negative = a^b < 0
+    a, b = abs(a), abs(b)
+
+    # exceptions:  division by zero, overflow
+    if not b:
+        raise ZeroDivisionError("division by zero")
+    if a >= DBL_MIN_OVERFLOW * b:
+        raise OverflowError("int/int too large to represent as a float")
+
+   # find integer d satisfying 2**(d - 1) <= a/b < 2**d
+    d = a.bit_length() - b.bit_length()
+    if d >= 0 and a >= 2**d * b or d < 0 and a * 2**-d >= b:
+        d += 1
+
+    # compute 2**-exp * a / b for suitable exp
+    exp = max(d, DBL_MIN_EXP) - DBL_MANT_DIG
+    a, b = a << max(-exp, 0), b << max(exp, 0)
+    q, r = divmod(a, b)
+
+    # round-half-to-even: fractional part is r/b, which is > 0.5 iff
+    # 2*r > b, and == 0.5 iff 2*r == b.
+    if 2*r > b or 2*r == b and q % 2 == 1:
+        q += 1
+
+    result = float(q) * 2.**exp
+    return -result if negative else result
+
+
 class LongTest(unittest.TestCase):
 
     # Get quasi-random long consisting of ndigits digits (in base BASE).
@@ -306,10 +348,6 @@
     @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"),
                          "test requires IEEE 754 doubles")
     def test_float_conversion(self):
-        import sys
-        DBL_MAX = sys.float_info.max
-        DBL_MAX_EXP = sys.float_info.max_exp
-        DBL_MANT_DIG = sys.float_info.mant_dig
 
         exact_values = [0, 1, 2,
                          2**53-3,
@@ -614,6 +652,111 @@
         for zero in ["huge / 0", "mhuge / 0"]:
             self.assertRaises(ZeroDivisionError, eval, zero, namespace)
 
+    def check_div(self, a, b, skip_small=True):
+        """Verify that the result of a/b is correctly rounded, by
+        comparing it with a pure Python implementation of correctly
+        rounded division.  b should be nonzero."""
+
+        # skip check for small a and b: in this case, the current
+        # implementation converts the arguments to float directly and
+        # then applies a float division.  This can give doubly-rounded
+        # results on x87-using machines (particularly 32-bit Linux).
+        if skip_small and max(abs(a), abs(b)) < 2**DBL_MANT_DIG:
+            return
+
+        try:
+            expected = repr(true_div(a, b))
+        except OverflowError:
+            expected = 'overflow'
+
+        try:
+            got = repr(a / b)
+        except OverflowError:
+            got = 'overflow'
+
+        if expected != got:
+             self.fail("Incorrectly rounded division {}/{}: expected {!r}, "
+                       "got {!r}.".format(a, b, expected, got))
+
+    @requires_IEEE_754
+    def test_correctly_rounded_true_division(self):
+        # more stringent tests than those above, checking that the
+        # result of true division of ints is always correctly rounded.
+        # This test should probably be considered CPython-specific.
+
+        # Exercise all the code paths not involving Gb-sized ints.
+        # ... divisions involving zero
+        self.check_div(0, 3)
+        self.check_div(0, -3)
+        # ... overflow by large margin
+        self.check_div(671 * 12345 * 2**DBL_MAX_EXP, 12345)
+        # ... underflow by large margin
+        self.check_div(12345, 345678 * 2**(DBL_MANT_DIG - DBL_MIN_EXP))
+        # ... a much larger than b
+        self.check_div(12345*2**100, 98765)
+        # ... a much smaller than b
+        self.check_div(12345*2**30, 98765*7**81)
+        # ... a / b near: 1, 2**DBL_MANT_DIG, 2**DBL_MIN_EXP, 2**DBL_MAX_EXP,
+        #                 2**(DBL_MIN_EXP-DBL_MANT_DIG)
+        bases = (0, DBL_MANT_DIG, DBL_MIN_EXP,
+                 DBL_MAX_EXP, DBL_MIN_EXP - DBL_MANT_DIG)
+        for base in bases:
+            for exp in range(base - 15, base + 15):
+                self.check_div(75312*2**max(exp, 0), 69187*2**max(-exp, 0))
+                self.check_div(69187*2**max(exp, 0), 75312*2**max(-exp, 0))
+
+        # 1/2731 is one of the smallest division cases that's subject
+        # to double rounding on IEEE 754 machines working internally with
+        # 64-bit precision.  On such machines, the next check would fail,
+        # were it not explicitly skipped in check_div.
+        self.check_div(1, 2731)
+
+        # check detection of inexactness in shifting stage
+        for n in range(250):
+            # (2**DBL_MANT_DIG+1)/(2**DBL_MANT_DIG) lies halfway between two representable
+            # floats, and would usually be rounded down under round-half-to-even.  The
+            # tiniest of additions to the numerator should cause it to be rounded up instead.
+            self.check_div((2**DBL_MANT_DIG + 1)*12345*2**200 + 2**n, 2**DBL_MANT_DIG*12345)
+        
+        # a particularly bad case for the old algorithm:  gives an
+        # error of close to 3.5 ulps.
+        self.check_div(295147931372582273023, 295147932265116303360)
+        for i in range(1000):
+            self.check_div(10**(i+1), 10**i)
+
+        # test round-half-to-even behaviour, normal result
+        for m in [1, 2, 7, 17, 12345, 7**100, 
+                  -1, -2, -5, -23, -67891, -41**50]:
+            for n in range(-10, 10):
+                self.check_div(2**DBL_MANT_DIG*m + n, m)
+
+        # test round-half-to-even, subnormal result
+        for n in range(-20, 20):
+            self.check_div(n, 2**1076)
+
+        # largeish random divisions: a/b where |a| <= |b| <=
+        # 2*|a|; |ans| is between 0.5 and 1.0, so error should
+        # always be bounded by 2**-54 with equality possible only
+        # if the least significant bit of q=ans*2**53 is zero.
+        for M in [10**10, 10**100, 10**1000]:
+            for i in range(1000):
+                a = random.randrange(1, M)
+                b = random.randrange(a, 2*a+1)
+                self.check_div(a, b)
+                self.check_div(-a, b)
+                self.check_div(a, -b)
+                self.check_div(-a, -b)
+
+        # and some (genuinely) random tests
+        for _ in range(10000):
+            a_bits = random.randrange(1000)
+            b_bits = random.randrange(1, 1000)
+            x = random.randrange(2**a_bits)
+            y = random.randrange(1, 2**b_bits)
+            self.check_div(x, y)
+            self.check_div(x, -y)
+            self.check_div(-x, y)
+            self.check_div(-x, -y)
 
     def test_small_ints(self):
         for i in range(-5, 257):
diff -r 9a4de4a1a154 -r 9659c24fcee9 Objects/longobject.c
--- a/Objects/longobject.c	Wed Dec 23 11:30:45 2009 +0100
+++ b/Objects/longobject.c	Wed Dec 23 15:06:55 2009 +0000
@@ -3213,47 +3213,241 @@
 	return (PyObject *)div;
 }
 
+/* int/int -> float.  Result is correctly rounded on IEEE 754 platforms. */
+
 static PyObject *
-long_true_divide(PyObject *a, PyObject *b)
+long_true_divide(PyObject *aa, PyObject *bb)
 {
-	double ad, bd;
-	int failed, aexp = -1, bexp = -1;
-
-	CHECK_BINOP(a, b);
-	ad = _PyLong_AsScaledDouble((PyObject *)a, &aexp);
-	bd = _PyLong_AsScaledDouble((PyObject *)b, &bexp);
-	failed = (ad == -1.0 || bd == -1.0) && PyErr_Occurred();
-	if (failed)
-		return NULL;
-	/* 'aexp' and 'bexp' were initialized to -1 to silence gcc-4.0.x,
-	   but should really be set correctly after sucessful calls to
-	   _PyLong_AsScaledDouble() */
-	assert(aexp >= 0 && bexp >= 0);
-
-	if (bd == 0.0) {
+	PyLongObject *a, *b, *x;
+	Py_ssize_t a_size, b_size, shift, extra_bits, diff, x_size, x_bits;
+	digit mask, low;
+	int inexact, negate, a_small, b_small;
+	double dx, result;
+
+	CHECK_BINOP(aa, bb);
+	a = (PyLongObject *)aa;
+	b = (PyLongObject *)bb;
+
+	/*
+	   Method in a nutshell:
+
+	     0. reduce to case a, b > 0
+	     1. choose a suitable integer 'shift'
+	     2. use integer arithmetic to compute x = floor(2**-shift*a/b)
+	     3. adjust x for correct rounding
+	     4. convert x to a double dx with the same value
+	     5. return ldexp(dx, shift).
+
+	   In more detail:
+
+	   1. The integer shift is chosen so that x has the right number of
+	   bits for a double, plus two or three extra bits that will be used
+	   in the rounding decisions.  Writing a_bits and b_bits for the
+	   number of significant bits in a and b respectively, a
+	   straightforward formula for shift is:
+
+	       shift = a_bits - b_bits - DBL_MANT_DIG - 2
+
+	   This is fine in the usual case, but if a/b is tiny then it can lead
+	   to double rounding on an IEEE 754 platform, giving incorrectly
+	   rounded results.  So for this case we adjust the formula slightly.
+	   The actual formula used is:
+
+	       shift = MAX(a_bits - b_bits, DBL_MIN_EXP) - DBL_MANT_DIG - 2
+
+	   2. The quantity x is computed by first shifting a (left -shift bits
+	   if shift <= 0, right shift bits if shift > 0) and then dividing by
+	   b; the signs of a and b are ignored at this stage.  For both the
+	   shift and the division, we must also keep track of whether the
+	   result becomes inexact; this information is needed at the rounding
+	   stage.  Note that with the choice of shift above, x is always >= 1.
+
+	   3. To round, we just modify the bottom digit of x in-place; this
+	   can end up giving a digit with value > PyLONG_MASK, but that's
+	   okay.  The rounding involves first figuring out how many extra bits
+	   x has beyond the number that the resulting double can hold (the
+	   latter number being DBL_MANT_DIG in the usual case, but something
+	   smaller for subnormal results).  The relevant formula is:
+
+	       extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG
+
+	   With the above choice for shift, extra_bits will always be 2 or 3.
+	   Then rounding under the round-half-to-even rule, we round up iff
+	   the most significant of the extra bits is 1, and either:
+	     - the computation of x in step 2 had an inexact result, or
+	     - at least one other of the extra bits is 1, or
+	     - the least significant bit of x (above those to be rounded) is 1.
+
+	   4. Conversion to a double is straightforward; all floating-point
+	   operations involved in the conversion are exact, so there's no
+	   danger of rounding errors.
+
+	   5. Use ldexp(x, shift) to compute x*2**shift, the final result.
+	   The result will always be exactly representable as a double, except
+	   in the case that it overflows.  To avoid dependence on the exact
+	   behaviour of ldexp on overflow, we check for overflow before
+	   applying ldexp.  The result of ldexp is adjusted for sign before
+	   returning.
+	*/
+
+	/* Reduce to case where a and b are both positive. */
+	a_size = ABS(Py_SIZE(a));
+	b_size = ABS(Py_SIZE(b));
+	negate = (Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0);
+	if (b_size == 0) {
 		PyErr_SetString(PyExc_ZeroDivisionError,
-			"integer division or modulo by zero");
+				"division by zero");
 		return NULL;
 	}
-
-	/* True value is very close to ad/bd * 2**(PyLong_SHIFT*(aexp-bexp)) */
-	ad /= bd;	/* overflow/underflow impossible here */
-	aexp -= bexp;
-	if (aexp > INT_MAX / PyLong_SHIFT)
+	if (a_size == 0)
+		goto underflow_or_zero;
+
+#define MANT_DIG_DIGITS (DBL_MANT_DIG / PyLong_SHIFT)
+#define MANT_DIG_BITS (DBL_MANT_DIG % PyLong_SHIFT)
+	/* Fast path for a and b small (exactly representable in a double).
+	   Relies on floating-point division being correctly rounded; results
+	   may be subject to double rounding on x86 machines that operate with
+	   the x87 FPU set to 64-bit precision. */
+	a_small = a_size <= MANT_DIG_DIGITS ||
+		(a_size == MANT_DIG_DIGITS+1 &&
+		 a->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0);
+	b_small = b_size <= MANT_DIG_DIGITS ||
+		(b_size == MANT_DIG_DIGITS+1 &&
+		 b->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0);
+	if (a_small && b_small) {
+		double da, db;
+		da = a->ob_digit[--a_size];
+		while (a_size > 0)
+			da = da * PyLong_BASE + a->ob_digit[--a_size];
+		db = b->ob_digit[--b_size];
+		while (b_size > 0)
+			db = db * PyLong_BASE + b->ob_digit[--b_size];
+		da /= db;
+		return PyFloat_FromDouble(negate ? -da : da);
+	}
+
+	/* Figure out what value to use for 'shift'. */
+	/* Catch cases of extreme overflow or underflow. */
+	diff = a_size - b_size;
+	if (diff > PY_SSIZE_T_MAX/PyLong_SHIFT - 1)
 		goto overflow;
-	else if (aexp < -(INT_MAX / PyLong_SHIFT))
-		return PyFloat_FromDouble(0.0);	/* underflow to 0 */
-	errno = 0;
-	ad = ldexp(ad, aexp * PyLong_SHIFT);
-	if (Py_OVERFLOWED(ad)) /* ignore underflow to 0.0 */
+	else if (diff < 1 - PY_SSIZE_T_MAX/PyLong_SHIFT)
+		goto underflow_or_zero;
+
+	/* The expression in the line below is now safe from overflow.
+	   Compute diff = a_bits - b_bits. */
+	diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) -
+		bits_in_digit(b->ob_digit[b_size - 1]);
+	/* Catch most other cases of overflow and underflow here. */
+	if (diff > DBL_MAX_EXP)
 		goto overflow;
-	return PyFloat_FromDouble(ad);
-
-overflow:
+	if (diff < DBL_MIN_EXP - DBL_MANT_DIG - 1)
+		goto underflow_or_zero;
+	shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
+
+	inexact = 0;
+	/* x = abs(a * 2**-shift) */
+	if (shift <= 0) {
+		Py_ssize_t i, shift_digits = -shift / PyLong_SHIFT;
+		digit rem;
+		if (a_size >= PY_SSIZE_T_MAX - 1 - shift_digits) {
+			/* In practice, it's probably impossible to end up
+			   here.  Both a and b would have to be enormous,
+			   using close to SIZE_T_MAX bytes of memory each. */
+			PyErr_SetString(PyExc_OverflowError,
+				    "intermediate overflow during division");
+			return NULL;
+		}
+		x = _PyLong_New(a_size + shift_digits + 1);
+		if (x == NULL)
+			return NULL;
+		for (i = 0; i < shift_digits; i++)
+			x->ob_digit[i] = 0;
+		rem = v_lshift(x->ob_digit + shift_digits, a->ob_digit,
+			       a_size, -shift % PyLong_SHIFT);
+		x->ob_digit[a_size + shift_digits] = rem;
+	}
+	else {
+		Py_ssize_t shift_digits = shift / PyLong_SHIFT;
+		digit rem;
+		assert(a_size >= shift_digits);
+		x = _PyLong_New(a_size - shift_digits);
+		if (x == NULL)
+			return NULL;
+		rem = v_rshift(x->ob_digit, a->ob_digit + shift_digits,
+			       a_size - shift_digits, shift % PyLong_SHIFT);
+		/* check whether any of the bits shifted out is nonzero */
+		if (rem)
+			inexact = 1;
+		while (!inexact && shift_digits > 0)
+			if (a->ob_digit[--shift_digits])
+				inexact = 1;
+	}
+	long_normalize(x);
+	x_size = Py_SIZE(x);
+
+	/* Divide x by b. If remainder is nonzero, set inexact.  We own the
+	   only reference to x, so we're free to modify it in-place. */
+	if (b_size == 1) {
+		digit rem = inplace_divrem1(x->ob_digit, x->ob_digit, x_size,
+				      b->ob_digit[0]);
+		long_normalize(x);
+		if (rem)
+			inexact = 1;
+	}
+	else {
+		PyLongObject *rem;
+		PyLongObject *div = x_divrem(x, b, &rem);
+		Py_DECREF(x);
+		x = div;
+		if (x == NULL)
+			return NULL;
+		if (Py_SIZE(rem))
+			inexact = 1;
+		Py_DECREF(rem);
+	}
+	x_size = ABS(Py_SIZE(x));
+	/* The result of the division can never be zero. */
+	assert(x_size > 0);
+	x_bits = (x_size - 1)*PyLong_SHIFT +
+		bits_in_digit(x->ob_digit[x_size-1]);
+	/* Now round x. */
+	assert(0 < x_bits && x_bits <= DBL_MANT_DIG + 3);
+
+	/* The number of extra bits that have to be rounded away. */
+	extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
+	assert(extra_bits == 2 || extra_bits == 3);
+	mask = (digit)1 << (extra_bits - 1);
+
+	/* We round by directly modifying the low digit of x. */
+	low = x->ob_digit[0] | inexact;
+	if (low & mask && low & (3*mask-1))
+		low += mask;
+	x->ob_digit[0] = low & ~(mask-1U);
+
+	/* Convert x to a double dx; the conversion is exact. */
+	x_size = ABS(Py_SIZE(x));
+	dx = 0.0;
+	while (x_size > 0)
+		dx = dx * PyLong_BASE + x->ob_digit[--x_size];
+	Py_DECREF(x);
+	assert(0.0 <= dx && dx <= ldexp(1.0, x_bits));
+
+	/* Check whether ldexp result will overflow a double. */
+	if (shift + x_bits >= DBL_MAX_EXP &&
+	    (shift + x_bits > DBL_MAX_EXP || dx == ldexp(1.0, x_bits)))
+		goto overflow;
+	result = ldexp(dx, shift);
+	return PyFloat_FromDouble(negate ? -result : result);
+
+
+  underflow_or_zero:
+	return PyFloat_FromDouble(negate ? -0.0 : 0.0);
+
+  overflow:
 	PyErr_SetString(PyExc_OverflowError,
-		"int/int too large for a float");
+			"int/int too large for a float");
 	return NULL;
-
 }
 
 static PyObject *