Index: Python/marshal.c =================================================================== --- Python/marshal.c (revision 66830) +++ Python/marshal.c (working copy) @@ -161,7 +161,7 @@ if (n < 0) n = -n; for (i = 0; i < n; i++) - w_short(ob->ob_digit[i], p); + w_long(ob->ob_digit[i], p); } else { #if SIZEOF_LONG > 4 @@ -562,7 +562,7 @@ } Py_SIZE(ob) = n; for (i = 0; i < size; i++) { - int digit = r_short(p); + long digit = r_long(p); if (digit < 0) { Py_DECREF(ob); PyErr_SetString(PyExc_ValueError, Index: Include/longintrepr.h =================================================================== --- Include/longintrepr.h (revision 66830) +++ Include/longintrepr.h (working copy) @@ -18,13 +18,14 @@ And, at some places it is assumed that MASK fits in an int, as well. long_pow() requires that SHIFT be divisible by 5. */ -typedef unsigned short digit; -typedef unsigned int wdigit; /* digit widened to parameter size */ -#define BASE_TWODIGITS_TYPE long +typedef unsigned long digit; +typedef long sdigit; /* signed variant of digit */ +typedef long wdigit; /* digit widened to parameter size */ +#define BASE_TWODIGITS_TYPE long long typedef unsigned BASE_TWODIGITS_TYPE twodigits; typedef BASE_TWODIGITS_TYPE stwodigits; /* signed variant of twodigits */ -#define PyLong_SHIFT 15 +#define PyLong_SHIFT 30 #define PyLong_BASE ((digit)1 << PyLong_SHIFT) #define PyLong_MASK ((int)(PyLong_BASE - 1)) Index: Objects/longobject.c =================================================================== --- Objects/longobject.c (revision 66830) +++ Objects/longobject.c (working copy) @@ -14,8 +14,8 @@ #define NSMALLNEGINTS 5 #endif -#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(x)->ob_digit[0] : \ - (Py_SIZE(x) == 0 ? 0 : (x)->ob_digit[0])) +#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \ + (Py_SIZE(x) == 0 ? 0 : (sdigit)(x)->ob_digit[0])) #define ABS(x) ((x) < 0 ? -(x) : (x)) #if NSMALLNEGINTS + NSMALLPOSINTS > 0 @@ -78,6 +78,11 @@ #define KARATSUBA_CUTOFF 70 #define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) +/* For long division, use the O(N**2) school algorithm unless the + * denominator contains more than DIV_LIMIT digits. + */ +const int DIV_LIMIT = KARATSUBA_CUTOFF; + /* For exponentiation, use the binary left-to-right algorithm * unless the exponent contains more than FIVEARY_CUTOFF digits. * In that case, do 5 bits at a time. The potential drawback is that @@ -203,17 +208,6 @@ return (PyObject*)v; } - /* 2 digits */ - if (!(ival >> 2*PyLong_SHIFT)) { - v = _PyLong_New(2); - if (v) { - Py_SIZE(v) = 2*sign; - v->ob_digit[0] = (digit)ival & PyLong_MASK; - v->ob_digit[1] = ival >> PyLong_SHIFT; - } - return (PyObject*)v; - } - /* Larger numbers: loop to determine number of digits */ t = abs_ival; while (t) { @@ -361,7 +355,7 @@ switch (i) { case -1: - res = -v->ob_digit[0]; + res = -(sdigit)v->ob_digit[0]; break; case 0: res = 0; @@ -436,7 +430,7 @@ v = (PyLongObject *)vv; i = Py_SIZE(v); switch (i) { - case -1: return -v->ob_digit[0]; + case -1: return -(sdigit)v->ob_digit[0]; case 0: return 0; case 1: return v->ob_digit[0]; } @@ -739,7 +733,7 @@ /* Because we're going LSB to MSB, thisbyte is more significant than what's already in accum, so needs to be prepended to accum. */ - accum |= thisbyte << accumbits; + accum |= (twodigits)thisbyte << accumbits; accumbits += 8; if (accumbits >= PyLong_SHIFT) { /* There's enough to fill a Python digit. */ @@ -822,7 +816,7 @@ /* Because we're going LSB to MSB, thisdigit is more significant than what's already in accum, so needs to be prepended to accum. */ - accum |= thisdigit << accumbits; + accum |= (twodigits)thisdigit << accumbits; accumbits += PyLong_SHIFT; /* The most-significant digit may be (probably is) at least @@ -1229,7 +1223,7 @@ v = (PyLongObject*)vv; switch(Py_SIZE(v)) { - case -1: return -v->ob_digit[0]; + case -1: return -(sdigit)v->ob_digit[0]; case 0: return 0; case 1: return v->ob_digit[0]; } @@ -1453,7 +1447,7 @@ digit hi; rem = (rem << PyLong_SHIFT) + *--pin; *--pout = hi = (digit)(rem / n); - rem -= hi * n; + rem -= (twodigits)hi * n; } return (digit)rem; } @@ -1712,7 +1706,7 @@ while (--p >= start) { int k = _PyLong_DigitValue[Py_CHARMASK(*p)]; assert(k >= 0 && k < base); - accum |= (twodigits)(k << bits_in_accum); + accum |= (twodigits)k << bits_in_accum; bits_in_accum += bits_per_char; if (bits_in_accum >= PyLong_SHIFT) { *pdigit++ = (digit)(accum & PyLong_MASK); @@ -2038,6 +2032,8 @@ static PyObject *long_long(PyObject *v); static int long_divrem(PyLongObject *, PyLongObject *, PyLongObject **, PyLongObject **); +static PyLongObject * divmod_pos + (PyLongObject *, PyLongObject *, PyLongObject **); /* Long division with remainder, top-level routine */ @@ -2076,7 +2072,16 @@ } } else { - z = x_divrem(a, b, prem); + if ((size_b < 2*DIV_LIMIT) || + ((size_b < 4*DIV_LIMIT) && + (size_a < 0.897 * size_b + 44.97)) || + (size_a > 0.00343 * pow(size_b,2.9))) + { + z = x_divrem(a, b, prem); + } + else { + z = divmod_pos(a, b, prem); + } if (z == NULL) return -1; } @@ -2192,6 +2197,527 @@ return a; } +/* utilities for the long division algorithm */ +// shift left by shiftby limbs +static PyLongObject * +_long_int_lshift(PyLongObject *a, Py_ssize_t shiftby) +{ + PyLongObject *z = NULL; + Py_ssize_t oldsize, newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + oldsize = Py_SIZE(a); + newsize = oldsize + wordshift; + z = _PyLong_New(newsize); + if (z == NULL) + goto lshift_error; + bzero(z->ob_digit, wordshift * sizeof(digit)); + memcpy(z->ob_digit + wordshift, a->ob_digit, oldsize * sizeof(digit)); + z = long_normalize(z); +lshift_error: + return z; +} + +// shift right by shiftby limbs +static PyLongObject * +_long_int_rshift(PyLongObject *a, Py_ssize_t shiftby) +{ + PyLongObject *z = NULL; + Py_ssize_t newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + newsize = Py_SIZE(a) - wordshift; + if (newsize <= 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto rshift_error; + memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit)); + z = long_normalize(z); +rshift_error: + return z; +} + +// mask for len limbs starting from start +static PyLongObject * +_long_int_mask(PyLongObject *a, Py_ssize_t len) +{ + PyLongObject *z = NULL; + Py_ssize_t newsize; + assert(len >= 0); + newsize = MIN(len, (long)Py_SIZE(a)); + if (newsize == 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto int_mask_error; + memcpy(z->ob_digit, a->ob_digit, newsize * sizeof(digit)); + z = long_normalize(z); +int_mask_error: + return z; +} + +/* For long division a/b with with n = PySIZE(q), n > DIV_LIMIT use the + * binary splitting algorithm by Burnikel and Ziegler + * http://cr.yp.to/bib/1998/burnikel.ps + * n is required to be even. + */ +static PyObject * long_add(PyLongObject *, PyLongObject *); +static PyObject * long_sub(PyLongObject *, PyLongObject *); +static PyObject * long_mul(PyLongObject *, PyLongObject *); +static PyObject * long_div(PyObject *, PyObject *); +static int long_compare(PyLongObject *, PyLongObject *); +static PyObject * long_rshift(PyLongObject *, PyLongObject *); +static PyObject * long_lshift(PyObject *, PyObject *); +static PyObject * long_or(PyObject *, PyObject *); +static PyObject * long_and(PyObject *, PyObject *); +static PyObject * long_lshift(PyObject *, PyObject *); +static PyLongObject * div3n2n(PyLongObject *, digit *, PyLongObject *, + PyLongObject *, PyLongObject *, Py_ssize_t , PyLongObject **); + +static PyLongObject * +div2n1n(PyLongObject *a, PyLongObject *b, Py_ssize_t n, PyLongObject **prem) +{ + PyLongObject *q = NULL, *r; + Py_ssize_t size_a = Py_SIZE(a); + if (n <= DIV_LIMIT || size_a < n){ + if (size_a < n || + (size_a == n && + a->ob_digit[size_a-1] < b->ob_digit[n-1])) { + /* |a| < |b|. */ + if ((q = _PyLong_New(0)) == NULL) { + return NULL; + } + Py_INCREF(a); + *prem = (PyLongObject *) a; + return q; + } + if ((q = x_divrem(a, b, prem)) == NULL) { + return NULL; + } + return q; + } + PyLongObject *q1, *q2, *t1 = NULL, *a1 = NULL, *b1 = NULL, *b2 = NULL; + Py_ssize_t half_n = n >> 1; + if ((b1 = _long_int_rshift(b, half_n)) == NULL) return NULL; + if ((b2 = _long_int_mask(b, half_n)) == NULL) { + Py_XDECREF(b1); + return NULL; + } + if ((a1 = _long_int_rshift(a, n)) == NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + return NULL; + } + if ((q1 = div3n2n(a1, a->ob_digit+half_n, b, b1, b2, half_n, &t1)) == + NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + Py_XDECREF(a1); + return NULL; + } + Py_DECREF(a1); + if ((q2 = div3n2n(t1, a->ob_digit, b, b1, b2, half_n, &r)) == NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + return NULL; + } + Py_DECREF(b1); + Py_DECREF(b2); + Py_DECREF(t1); + if(Py_SIZE(q1)) { + if ((q = _long_int_lshift(q1, half_n)) == NULL) { + return NULL; + } + memcpy(q->ob_digit, q2->ob_digit, Py_SIZE(q2) * sizeof(digit)); + } + else { + q = q2; + Py_INCREF(q2); + } + Py_DECREF(q1); + Py_DECREF(q2); + *prem = r; + return q; +} +static PyLongObject * k_mul(PyLongObject *, PyLongObject *); +static PyLongObject * x_add(PyLongObject *, PyLongObject *); +static PyLongObject * x_sub(PyLongObject *, PyLongObject *); + +/* Helper function for div2n1n; not intended to be called directly. */ +static PyLongObject * +div3n2n(PyLongObject *a12, digit *a3, PyLongObject *b, + PyLongObject *b1, PyLongObject *b2, Py_ssize_t n, PyLongObject **prem) +{ + PyLongObject *q = NULL, *r = NULL, *t1, *t2, *t3; + if ((t1 = _long_int_rshift(a12, n)) == NULL) { + return NULL; + } + if (long_compare(t1, b1) == 0){ + PyLongObject * one; + if ((one = (PyLongObject*) PyLong_FromLong(1)) == NULL) { + Py_DECREF(t1); + return NULL; + } + if ((t2 = _long_int_lshift(one, n)) == NULL) { + Py_DECREF(t1); + Py_DECREF(one); + return NULL; + } + if ((q = (PyLongObject *) long_sub(t2, one)) == NULL) { + Py_DECREF(t1); + Py_DECREF(t2); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t2); + if ((t3 = _long_int_lshift(b1, n)) == NULL) { + Py_DECREF(t1); + Py_XDECREF(q); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t1); + if ((t1 = (PyLongObject *) long_sub(a12, t3)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t3); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t3); + if ((r = (PyLongObject *) long_add(t1, b1)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t1); + Py_DECREF(one); + return NULL; + } + Py_DECREF(one); + } + else { + if ((q = div2n1n(a12, b1, n, &r)) == NULL) { + Py_DECREF(t1); + return NULL; + } + } + Py_DECREF(t1); + if(Py_SIZE(r)) { + if ((t2 = _long_int_lshift(r, n)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + return NULL; + } + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + else { + if ((t2 = _PyLong_New(n)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + return NULL; + } + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + + if ((t3 = (PyLongObject *) k_mul(q, b2)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + Py_DECREF(t2); + return NULL; + } + Py_DECREF(r); + if ((r = x_sub(t2, t3)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t2); + Py_DECREF(t3); + return NULL; + } + Py_DECREF(t2); + Py_DECREF(t3); + while (Py_SIZE(r) < 0) { + if (q->ob_digit[0] > 0) + q->ob_digit[0]--; + else { + digit borrow = 1; + q->ob_digit[0] = (-1) & PyLong_MASK; + int i; + for (i=1; (i < Py_SIZE(q)) && borrow; ++i) { + borrow = q->ob_digit[i] - borrow; + q->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; + } + assert(borrow == 0); + } + t1 = r; + if ((r = (PyLongObject *) long_add(r, b)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t1); + return NULL; + } + Py_DECREF(t1); + } + *prem = r; + return q; +} + +/* To perform long division a/b where a has more than 2*n, where n is + * the number of bits of b, split a in chunks of n nits, then call the + * div2n1n algorithm. Since n is required to be even in div2n1n, + * in case it is not pad a and b with zeros on the right till n is + * a multiple of 2**N, where N is the number of times n must be + * divided in the div2n1n algorithm. + */ +static PyLongObject * +divmod_pos(PyLongObject *a, PyLongObject *b, PyLongObject **prem) +{ + int top = 0; + PyLongObject *a0, *a1, *b1, *q = NULL, *r = NULL, + *q_digit = NULL, *t0, *t1, *t2; + int asign = (Py_SIZE(a) < 0); + int bsign = (Py_SIZE(b) < 0); + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + + int na = _PyLong_NumBits((PyObject *) a); + int n = _PyLong_NumBits((PyObject *) b); + // make n a multiple of PyLong_SHIFT + int nr = n%PyLong_SHIFT; + int pad; + if ((t0 = (PyLongObject*) PyLong_FromLong(PyLong_SHIFT - nr)) + == NULL) { + return NULL; + } + if (nr > 0) { + pad = 1; + n = n + PyLong_SHIFT - n%PyLong_SHIFT; + na = na + PyLong_SHIFT - n%PyLong_SHIFT; + if ((a1 = (PyLongObject *) long_lshift((PyObject *) a, + (PyObject *) t0)) == NULL) { + Py_DECREF(t0); + return NULL; + } + if ((b1 = (PyLongObject *) long_lshift((PyObject *) b, + (PyObject *) t0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + return NULL; + } + } + else { + pad = 0; + a1 = a; + Py_INCREF(a); + b1 = b; + Py_INCREF(b); + } + + /* estimates the number of times n must be divided by to by div2n1n + * before falling back to x_divrem; increase till it can be divided + * that number of times + */ + int nab = na/n + 1; + int n_S = n/PyLong_SHIFT; + PyLongObject *lnn; + if ((lnn = (PyLongObject*) PyLong_FromLong(n_S/DIV_LIMIT)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + return NULL; + } + int nn = _PyLong_NumBits((PyObject *) lnn); + int mask_n = (1 << nn) - 1; + int n1 = n_S; + while(n1 & mask_n) + n1++; + int shift_n = n1 - n_S; + Py_DECREF(lnn); + n_S = n1; + t1 = a1; + if ((a1 = _long_int_lshift(a1, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(b1); + return NULL; + } + Py_DECREF(t1); + t1 = b1; + if ((b1 = _long_int_lshift(b1, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + return NULL; + } + Py_DECREF(t1); + + // slit a in chunks sized n_S + PyLongObject ** a_digits = + (PyLongObject **) calloc(nab, sizeof(PyLongObject *)); + if (a_digits == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + return NULL; + } + if ((a0 = (PyLongObject *) _PyLong_Copy(a1)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + free(a_digits); + return NULL; + } + + int i; + for(i=0; i < nab; i++) { + if ((a_digits[i] = _long_int_mask(a0, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a0); + Py_DECREF(a1); + Py_DECREF(b1); + int j; + for(j=0; j < i; j++) + Py_DECREF(a_digits[j]); + free(a_digits); + return NULL; + } + t1 = a0; + if ((a0 = _long_int_rshift(a0, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + Py_DECREF(t1); + if (Py_SIZE(a0) == 0) { + break; + } + } + top = i; + Py_DECREF(a0); + if (long_compare(a_digits[i], b1) >= 0) { + if ((r = (PyLongObject*) PyLong_FromLong(0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + } + else { + if ((r = (PyLongObject *) _PyLong_Copy(a_digits[i--])) == + NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + } + if ((q = (PyLongObject*) PyLong_FromLong(0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + goto fail1; + } + while(i >= 0) { + if ((t1 = _long_int_lshift(r, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + Py_XDECREF(q); + goto fail1; + } + if ((t2 = (PyLongObject *) long_add(t1, a_digits[i--])) + == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + Py_XDECREF(q); + goto fail1; + } + Py_DECREF(t1); + Py_XDECREF(r); + if ((q_digit = div2n1n(t2, b1, n_S, &r)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t2); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + goto fail1; + } + Py_DECREF(t2); + if ((t1 = _long_int_lshift(q, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + Py_XDECREF(q_digit); + goto fail1; + } + Py_DECREF(q); + if ((q = (PyLongObject *) long_add(t1, q_digit)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q_digit); + goto fail1; + } + Py_DECREF(t1); + Py_DECREF(q_digit); + } + + for(i=top; i >= 0; i--) { + Py_XDECREF(a_digits[i]); + } + free(a_digits); + + if (pad) { + t1 = r; + if ((r = (PyLongObject *) long_rshift(r, t0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + return NULL; + } + Py_DECREF(t1); + } + t1 = r; + if ((r = _long_int_rshift(r, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + return NULL; + } + Py_DECREF(t1); + + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + Py_DECREF(a1); + Py_DECREF(b1); + Py_DECREF(t0); + *prem = r; + return q; +fail1: + for(i=top; i >= 0; i--) { + Py_DECREF(a_digits[i]); + } + free(a_digits); + return NULL; +} + + /* Methods */ static void @@ -2254,7 +2780,7 @@ of mapping keys will turn out weird */ i = Py_SIZE(v); switch(i) { - case -1: return v->ob_digit[0]==1 ? -2 : -v->ob_digit[0]; + case -1: return v->ob_digit[0]==1 ? -2 : -(sdigit)v->ob_digit[0]; case 0: return 0; case 1: return v->ob_digit[0]; } @@ -2843,7 +3369,8 @@ if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { PyObject *r; - r = PyLong_FromLong(MEDIUM_VALUE(a)*MEDIUM_VALUE(b)); + r = PyLong_FromLongLong((stwodigits)MEDIUM_VALUE(a)* + MEDIUM_VALUE(b)); return r; }