=== modified file 'Objects/longobject.c' --- Objects/longobject.c 2009-03-18 20:06:12 +0000 +++ Objects/longobject.c 2009-03-18 21:16:01 +0000 @@ -1415,25 +1415,45 @@ return borrow; } -/* Multiply by a single digit, ignoring the sign. */ - -static PyLongObject * -mul1(PyLongObject *a, digit n) -{ - Py_ssize_t size_a = ABS(Py_SIZE(a)); - PyLongObject *z = _PyLong_New(size_a+1); - twodigits carry = 0; - Py_ssize_t i; - - if (z == NULL) - return NULL; - for (i = 0; i < size_a; ++i) { - carry += (twodigits)a->ob_digit[i] * n; - z->ob_digit[i] = (digit) (carry & PyLong_MASK); - carry >>= PyLong_SHIFT; - } - z->ob_digit[i] = (digit) carry; - return long_normalize(z); +/* multiply a[0:size] by 2**d, 0 <= d < PyLong_SHIFT, placing the bottom size + digits of the result in z[0:size]. The carry out of the top digit is + returned. a and z can be equal. */ + +static digit +inplace_lshift(digit *z, digit *a, Py_ssize_t size, int d) +{ + Py_ssize_t i; + digit carry = 0; + + assert(0 <= d && d < PyLong_SHIFT); + + for (i=0; i < size; i++) { + twodigits acc = (twodigits)a[i] << d | carry; + z[i] = (digit)acc & PyLong_MASK; + carry = (digit)(acc >> PyLong_SHIFT); + } + return carry; +} + +/* divide a[0:size] by 2**d, 0 <= d < PyLong_SHIFT, placing the quotient in + z[0:size]. Return the remainder. a and z can be equal. */ + +static digit +inplace_rshift(digit *z, digit*a, Py_ssize_t size, int d) +{ + Py_ssize_t i; + digit carry = 0; + digit mask; + mask = ((digit)1 << d) - 1U; + + assert(0 <= d && d < PyLong_SHIFT); + + for (i=size-1; i >= 0; i--) { + twodigits acc = (twodigits)carry << PyLong_SHIFT | a[i]; + carry = (digit)acc & mask; + z[i] = (digit)(acc >> d); + } + return carry; } /* Divide long pin, w/ size digits, by non-zero digit n, storing quotient @@ -1452,7 +1472,7 @@ pout += size; while (--size >= 0) { digit hi; - rem = (rem << PyLong_SHIFT) + *--pin; + rem = (rem << PyLong_SHIFT) | *--pin; *--pout = hi = (digit)(rem / n); rem -= (twodigits)hi * n; } @@ -2089,103 +2109,125 @@ return 0; } +/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d < + 2**k if d is nonzero, else 0. */ + +static const unsigned char BitLengthTable[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +}; + +static int +bits_in_digit(digit d) +{ + int d_bits = 0; + while (d >= 32) { + d_bits += 6; + d >>= 6; + } + d_bits += (int)BitLengthTable[d]; + return d_bits; +} + /* Unsigned long division with remainder -- the algorithm */ static PyLongObject * x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) { - Py_ssize_t size_v = ABS(Py_SIZE(v1)), size_w = ABS(Py_SIZE(w1)); - digit d = (digit) ((twodigits)PyLong_BASE / (w1->ob_digit[size_w-1] + 1)); - PyLongObject *v = mul1(v1, d); - PyLongObject *w = mul1(w1, d); - PyLongObject *a; - Py_ssize_t j, k; - - if (v == NULL || w == NULL) { - Py_XDECREF(v); - Py_XDECREF(w); + Py_ssize_t i, k, size_v = ABS(Py_SIZE(v1)), size_w = ABS(Py_SIZE(w1)); + digit wm1, wm2, carry, d; + PyLongObject *v, *w, *a; + + assert(size_v >= size_w && size_w >= 2); /* Assert checks by div() */ + + /* We follow Knuth [TAOCP, Vol. 2 (3rd edn.), 4.3.1, Algorithm D], + except that we don't explicitly handle the special case when the + initial estimate q for a quotient digit is >= PyLong_BASE: the max + value for q is PyLong_BASE+1, and that won't overflow a digit. */ + + /* normalize v1 and w1; put results in v and w */ + d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]); + w = _PyLong_New(size_w); + if (w == NULL) + return NULL; + carry = inplace_lshift(w->ob_digit, w1->ob_digit, size_w, d); + assert(carry == 0); + wm1 = w->ob_digit[size_w-1]; + wm2 = w->ob_digit[size_w-2]; + v = _PyLong_New(size_v + 1); + if (v == NULL) { + Py_DECREF(w); return NULL; } - - assert(size_v >= size_w && size_w > 1); /* Assert checks by div() */ - assert(Py_REFCNT(v) == 1); /* Since v will be used as accumulator! */ - assert(size_w == ABS(Py_SIZE(w))); /* That's how d was calculated */ - - size_v = ABS(Py_SIZE(v)); + carry = inplace_lshift(v->ob_digit, v1->ob_digit, size_v, d); + /* save iteration of outer loop when possible */ + if (carry == 0 && v->ob_digit[size_v-1] < wm1) + size_v--; + else + v->ob_digit[size_v] = carry; + k = size_v - size_w; a = _PyLong_New(k + 1); - - for (j = size_v; a != NULL && k >= 0; --j, --k) { - digit vj = (j >= size_v) ? 0 : v->ob_digit[j]; - twodigits q; - stwodigits carry = 0; - Py_ssize_t i; + for (; a != NULL && k >= 0; --k) { + digit q, r, vj = v->ob_digit[k+size_w]; + twodigits vv; SIGCHECK({ Py_DECREF(a); a = NULL; break; }) - if (vj == w->ob_digit[size_w-1]) - q = PyLong_MASK; - else - q = (((twodigits)vj << PyLong_SHIFT) + v->ob_digit[j-1]) / - w->ob_digit[size_w-1]; - while (w->ob_digit[size_w-2]*q > - (( - ((twodigits)vj << PyLong_SHIFT) - + v->ob_digit[j-1] - - q*w->ob_digit[size_w-1] - ) << PyLong_SHIFT) - + v->ob_digit[j-2]) + /* estimate quotient digit q; may (rarely) overestimate by 1 */ + assert(vj <= wm1); + vv = ((twodigits)vj << PyLong_SHIFT) | v->ob_digit[k+size_w-1]; + q = (digit)(vv / wm1); + r = (digit)(vv - (twodigits)wm1 * q); /* r = vv % wm1 */ + while ((twodigits)wm2 * q > (((twodigits)r << PyLong_SHIFT) + | v->ob_digit[k+size_w-2])) { --q; - - for (i = 0; i < size_w && i+k < size_v; ++i) { - twodigits z = w->ob_digit[i] * q; - digit zz = (digit) (z >> PyLong_SHIFT); - carry += v->ob_digit[i+k] - z - + ((twodigits)zz << PyLong_SHIFT); - v->ob_digit[i+k] = (digit)(carry & PyLong_MASK); - carry = Py_ARITHMETIC_RIGHT_SHIFT(stwodigits, - carry, PyLong_SHIFT); - carry -= zz; - } - - if (i+k < size_v) { - carry += v->ob_digit[i+k]; - v->ob_digit[i+k] = 0; - } - - if (carry == 0) - a->ob_digit[k] = (digit) q; - else { - assert(carry == -1); - a->ob_digit[k] = (digit) q-1; + r += wm1; + if (r >= PyLong_BASE) + break; + } + assert(q <= PyLong_BASE); + + /* subtract q*w from v->ob_digits[k:k+size_w+1] */ + carry = 0; + for (i = 0; i < size_w; ++i) { + twodigits z = v->ob_digit[k+i] - + (twodigits)q * w->ob_digit[i] - carry; + v->ob_digit[i+k] = (digit)z & PyLong_MASK; + carry = -(digit)(z >> PyLong_SHIFT); + assert(carry <= PyLong_BASE); + } + assert(carry == vj || carry == vj+1); + + /* correction if q was too large (this branch taken rarely) */ + if (carry > vj) { carry = 0; - for (i = 0; i < size_w && i+k < size_v; ++i) { + for (i = 0; i < size_w; ++i) { carry += v->ob_digit[i+k] + w->ob_digit[i]; - v->ob_digit[i+k] = (digit)(carry & PyLong_MASK); - carry = Py_ARITHMETIC_RIGHT_SHIFT( - stwodigits, - carry, PyLong_SHIFT); + v->ob_digit[i+k] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; } + q--; } - } /* for j, k */ + assert(q <= PyLong_MASK); + a->ob_digit[k] = q; + } /* for k */ - if (a == NULL) + if (a == NULL) { *prem = NULL; + Py_DECREF(w); + } else { a = long_normalize(a); - *prem = divrem1(v, d, &d); - /* d receives the (unused) remainder */ - if (*prem == NULL) { - Py_DECREF(a); - a = NULL; - } + d = inplace_rshift(w->ob_digit, v->ob_digit, size_w, d); + assert(d==0); + *prem = long_normalize(w); } Py_DECREF(v); - Py_DECREF(w); return a; } @@ -3793,17 +3835,11 @@ return PyLong_FromSsize_t(res); } -static const unsigned char BitLengthTable[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 -}; - static PyObject * long_bit_length(PyLongObject *v) { PyLongObject *result, *x, *y; Py_ssize_t ndigits, msd_bits = 0; - digit msd; assert(v != NULL); assert(PyLong_Check(v)); @@ -3812,12 +3848,7 @@ if (ndigits == 0) return PyLong_FromLong(0); - msd = v->ob_digit[ndigits-1]; - while (msd >= 32) { - msd_bits += 6; - msd >>= 6; - } - msd_bits += (long)(BitLengthTable[msd]); + msd_bits = bits_in_digit(v->ob_digit[ndigits-1]); if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);