Index: longobject.c =================================================================== --- longobject.c (revision 70592) +++ longobject.c (working copy) @@ -82,6 +82,11 @@ #define KARATSUBA_CUTOFF 70 #define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) +/* For long division, use the O(N**2) school algorithm unless the + * denominator contains more than DIV_LIMIT digits. + */ +const int DIV_LIMIT = KARATSUBA_CUTOFF; + /* For exponentiation, use the binary left-to-right algorithm * unless the exponent contains more than FIVEARY_CUTOFF digits. * In that case, do 5 bits at a time. The potential drawback is that @@ -1517,165 +1522,311 @@ Return a string object. If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ +static void +_PyLong_Format_small(PyLongObject *a, int base, int power, digit powbase, + char *pp, PyLongObject *scratch, Py_ssize_t end) +{ + Py_ssize_t size, size_a = ABS(Py_SIZE(a)); + char *p = pp + end; + digit *pin = a->ob_digit; + size = size_a; + /* Repeatedly divide by powbase. */ + do { + int ntostore = power; + digit rem = inplace_divrem1(scratch->ob_digit, + pin, size, powbase); + pin = scratch->ob_digit; /* no need to use a again */ + if (pin[size - 1] == 0) + --size; + + /* Break rem into digits. */ + assert(ntostore > 0); + do { + digit nextrem = (digit)(rem / base); + char c = (char)(rem - nextrem * base); + assert(p > PyUnicode_AS_UNICODE(str)); + c += (c < 10) ? '0' : 'a'-10; + *--p = c; + rem = nextrem; + --ntostore; + /* Termination is a bit delicate: must not + store leading zeroes, so must get out if + remaining quotient and rem are both 0. */ + } while (ntostore && (size || rem)); + } while (size != 0); +} + +#define NUMERAL_SIZE 350 + +static PyObject * long_pow(PyObject *v, PyObject *w, PyObject *x); +static int l_divmod(PyLongObject *v, PyLongObject *w, + PyLongObject **pdiv, PyLongObject **pmod); +/* return -1 on failure, 0 for normal exit */ +static int +_numeral(PyLongObject *n, int base, PyObject* pbase, int power, digit powbase, + char *p, PyLongObject *scratch, Py_ssize_t end) +{ + digit msd; + int status, ad, bd; + Py_ssize_t size, half, ndigits, msd_bits = 0; + PyLongObject *div, *mod; + PyObject *phalf, *pbase_phalf; + /* compute the approximate number of digits in the given base; + * use code from bit_length + */ + ndigits = ABS(Py_SIZE(n)); + if(ndigits == 0) + return 0; + msd = n->ob_digit[ndigits-1]; + while (msd >= 32) { + msd_bits += 6; + msd >>= 6; + } + msd_bits += (long)(BitLengthTable[msd]); + size = (ndigits-1)*PyLong_SHIFT + msd_bits; + size = (Py_ssize_t)(size / log(base) + 1); + if(size < NUMERAL_SIZE){ + _PyLong_Format_small(n, base, power, powbase, p, + scratch, end); + return 0; + } + half = (size / 2) + (size & 1); + if((phalf = PyLong_FromLong(half)) == NULL) + return -1; + pbase_phalf = long_pow(pbase, phalf, Py_None); + if(!pbase_phalf){ + Py_DECREF(phalf); + return -1; + } + Py_DECREF(phalf); + status = l_divmod(n, (PyLongObject*)pbase_phalf, &div, &mod); + if(status < 0){ + Py_DECREF(pbase_phalf); + return -1; + } + Py_DECREF(pbase_phalf); + SIGCHECK({ + return -1; + }) + ad = _numeral(div, base, pbase, power, powbase, p, scratch, + end - half); + if(ad < 0){ + return -1; + } + bd = _numeral(mod, base, pbase, power, powbase, p, scratch, end); + if(bd < 0){ + Py_DECREF(div); + return -1; + } + Py_DECREF(div); + Py_DECREF(mod); + return 0; +} + PyObject * _PyLong_Format(PyObject *aa, int base) { - register PyLongObject *a = (PyLongObject *)aa; - PyObject *str; - Py_ssize_t i, j, sz; - Py_ssize_t size_a; - Py_UNICODE *p; - int bits; - char sign = '\0'; + register PyLongObject *a = (PyLongObject *)aa; + PyObject *str; + Py_ssize_t i, j, sz; + Py_ssize_t size_a; + Py_UNICODE *p; + char *pc; + int bits; + char sign = '\0'; - if (a == NULL || !PyLong_Check(a)) { - PyErr_BadInternalCall(); - return NULL; - } - assert(base >= 2 && base <= 36); - size_a = ABS(Py_SIZE(a)); + if (a == NULL || !PyLong_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + assert(base >= 2 && base <= 36); + size_a = ABS(Py_SIZE(a)); - /* Compute a rough upper bound for the length of the string */ - i = base; - bits = 0; - while (i > 1) { - ++bits; - i >>= 1; - } - i = 5; - j = size_a*PyLong_SHIFT + bits-1; - sz = i + j / bits; - if (j / PyLong_SHIFT < size_a || sz < i) { - PyErr_SetString(PyExc_OverflowError, - "int is too large to format"); - return NULL; - } - str = PyUnicode_FromUnicode(NULL, sz); - if (str == NULL) - return NULL; - p = PyUnicode_AS_UNICODE(str) + sz; - *p = '\0'; - if (Py_SIZE(a) < 0) - sign = '-'; + /* Compute a rough upper bound for the length of the string */ + i = base; + bits = 0; + while (i > 1) { + ++bits; + i >>= 1; + } + i = 5; + j = size_a*PyLong_SHIFT + bits-1; + sz = i + j / bits; + if (j / PyLong_SHIFT < size_a || sz < i) { + PyErr_SetString(PyExc_OverflowError, + "int is too large to format"); + return NULL; + } + str = PyUnicode_FromUnicode(NULL, sz); + if (str == NULL) + return NULL; + p = PyUnicode_AS_UNICODE(str) + sz; + *p = '\0'; + if (Py_SIZE(a) < 0) + sign = '-'; + if (Py_SIZE(a) == 0) { + *--p = '0'; + } + else if ((base & (base - 1)) == 0) { + /* JRH: special case for power-of-2 bases */ + twodigits accum = 0; + int accumbits = 0; /* # of bits in accum */ + int basebits = 1; /* # of bits in base-1 */ + i = base; + while ((i >>= 1) > 1) + ++basebits; - if (Py_SIZE(a) == 0) { - *--p = '0'; - } - else if ((base & (base - 1)) == 0) { - /* JRH: special case for power-of-2 bases */ - twodigits accum = 0; - int accumbits = 0; /* # of bits in accum */ - int basebits = 1; /* # of bits in base-1 */ - i = base; - while ((i >>= 1) > 1) - ++basebits; + for (i = 0; i < size_a; ++i) { + accum |= (twodigits)a->ob_digit[i] << accumbits; + accumbits += PyLong_SHIFT; + assert(accumbits >= basebits); + do { + char cdigit = (char)(accum & (base - 1)); + cdigit += (cdigit < 10) ? '0' : 'a'-10; + assert(p > PyUnicode_AS_UNICODE(str)); + *--p = cdigit; + accumbits -= basebits; + accum >>= basebits; + } while (i < size_a-1 ? accumbits >= basebits : + accum > 0); + } + } + else { + /* Not 0, and base not a power of 2. Divide repeatedly by + base, but for speed use the highest power of base that + fits in a digit. */ + Py_ssize_t size = size_a; + digit *pin = a->ob_digit; + PyLongObject *scratch; + /* powbasw <- largest power of base that fits in a digit. */ + digit powbase = base; /* powbase == base ** power */ + int power = 1; + for (;;) { + twodigits newpow = powbase * (twodigits)base; + if (newpow >> PyLong_SHIFT) /* doesn't fit in a digit */ + break; + powbase = (digit)newpow; + ++power; + } - for (i = 0; i < size_a; ++i) { - accum |= (twodigits)a->ob_digit[i] << accumbits; - accumbits += PyLong_SHIFT; - assert(accumbits >= basebits); - do { - char cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'a'-10; - assert(p > PyUnicode_AS_UNICODE(str)); - *--p = cdigit; - accumbits -= basebits; - accum >>= basebits; - } while (i < size_a-1 ? accumbits >= basebits : - accum > 0); - } - } - else { - /* Not 0, and base not a power of 2. Divide repeatedly by - base, but for speed use the highest power of base that - fits in a digit. */ - Py_ssize_t size = size_a; - digit *pin = a->ob_digit; - PyLongObject *scratch; - /* powbasw <- largest power of base that fits in a digit. */ - digit powbase = base; /* powbase == base ** power */ - int power = 1; - for (;;) { - twodigits newpow = powbase * (twodigits)base; - if (newpow >> PyLong_SHIFT) /* doesn't fit in a digit */ - break; - powbase = (digit)newpow; - ++power; - } + /* Get a scratch area for repeated division. */ + scratch = _PyLong_New(size); + if (scratch == NULL) { + Py_DECREF(str); + return NULL; + } - /* Get a scratch area for repeated division. */ - scratch = _PyLong_New(size); - if (scratch == NULL) { - Py_DECREF(str); - return NULL; - } + if(sz < NUMERAL_SIZE){ + /* Repeatedly divide by powbase. */ + do { + int ntostore = power; + digit rem = inplace_divrem1(scratch->ob_digit, + pin, size, powbase); + pin = scratch->ob_digit; /* no need to use a again */ + if (pin[size - 1] == 0) + --size; + SIGCHECK({ + Py_DECREF(scratch); + Py_DECREF(str); + return NULL; + }) + /* Break rem into digits. */ + assert(ntostore > 0); + do { + digit nextrem = (digit)(rem / base); + char c = (char)(rem - nextrem * base); + assert(p > PyUnicode_AS_UNICODE(str)); + c += (c < 10) ? '0' : 'a'-10; + *--p = c; + rem = nextrem; + --ntostore; + /* Termination is a bit delicate: must not + store leading zeroes, so must get out if + remaining quotient and rem are both 0. */ + } while (ntostore && (size || rem)); + } while (size != 0); + } + else { + int status, j=0; + Py_ssize_t len1; + PyObject *pbase; + if(!(pc = (char*) malloc(sz))){ + Py_DECREF(scratch); + Py_DECREF(str); + return NULL; + } + for(i=0; i < sz; i++) + pc[i] = '0'; + if(!(pbase = PyLong_FromLong(base))){ + Py_DECREF(scratch); + Py_DECREF(str); + return NULL; + } + pc[sz] = '\0'; + if(sign == '-') + Py_SIZE(a) = -Py_SIZE(a); + status = _numeral(a, base, pbase, power, powbase, pc, scratch, sz); + if(sign == '-') + Py_SIZE(a) = -Py_SIZE(a); + Py_DECREF(pbase); + if (status < 0){ + Py_DECREF(scratch); + Py_DECREF(str); + return NULL; + } - /* Repeatedly divide by powbase. */ - do { - int ntostore = power; - digit rem = inplace_divrem1(scratch->ob_digit, - pin, size, powbase); - pin = scratch->ob_digit; /* no need to use a again */ - if (pin[size - 1] == 0) - --size; - SIGCHECK({ - Py_DECREF(scratch); - Py_DECREF(str); - return NULL; - }) + for(i=0; i < sz; i++){ + if (pc[i] > 48) + break; + } - /* Break rem into digits. */ - assert(ntostore > 0); - do { - digit nextrem = (digit)(rem / base); - char c = (char)(rem - nextrem * base); - assert(p > PyUnicode_AS_UNICODE(str)); - c += (c < 10) ? '0' : 'a'-10; - *--p = c; - rem = nextrem; - --ntostore; - /* Termination is a bit delicate: must not - store leading zeroes, so must get out if - remaining quotient and rem are both 0. */ - } while (ntostore && (size || rem)); - } while (size != 0); - Py_DECREF(scratch); - } - - if (base == 16) { - *--p = 'x'; - *--p = '0'; - } - else if (base == 8) { - *--p = 'o'; - *--p = '0'; - } - else if (base == 2) { - *--p = 'b'; - *--p = '0'; - } - else if (base != 10) { - *--p = '#'; - *--p = '0' + base%10; - if (base > 10) - *--p = '0' + base/10; - } - if (sign) - *--p = sign; - if (p != PyUnicode_AS_UNICODE(str)) { - Py_UNICODE *q = PyUnicode_AS_UNICODE(str); - assert(p > q); - do { - } while ((*q++ = *p++) != '\0'); - q--; - if (PyUnicode_Resize(&str, (Py_ssize_t) (q - PyUnicode_AS_UNICODE(str)))) { - Py_DECREF(str); - return NULL; - } - } - return (PyObject *)str; + for(; i < sz; i++){ + pc[j++] = (pc[i] > 0)?pc[i]:'0'; + } + pc[j] = '\0'; + len1 = strlen(pc); + *p = '\0'; + for(i=len1-1; i >= 0; i--) + *--p = pc[i]; + free(pc); + } + Py_DECREF(scratch); + } + if (base == 16) { + *--p = 'x'; + *--p = '0'; + } + else if (base == 8) { + *--p = 'o'; + *--p = '0'; + } + else if (base == 2) { + *--p = 'b'; + *--p = '0'; + } + else if (base != 10) { + *--p = '#'; + *--p = '0' + base%10; + if (base > 10) + *--p = '0' + base/10; + } + if (sign) + *--p = sign; + if (p != PyUnicode_AS_UNICODE(str)) { + Py_UNICODE *q = PyUnicode_AS_UNICODE(str); + assert(p > q); + do { + } while ((*q++ = *p++) != '\0'); + q--; + if (PyUnicode_Resize(&str, (Py_ssize_t) (q - PyUnicode_AS_UNICODE(str)))) { + Py_DECREF(str); + return NULL; + } + } + return (PyObject *)str; } + /* Table of digit values for 8-bit string -> integer conversion. * '0' maps to 0, ..., '9' maps to 9. * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. @@ -2071,6 +2222,8 @@ static PyLongObject *x_divrem (PyLongObject *, PyLongObject *, PyLongObject **); static PyObject *long_long(PyObject *v); +static PyLongObject * divmod_pos + (PyLongObject *, PyLongObject *, PyLongObject **); /* Long division with remainder, top-level routine */ @@ -2109,7 +2262,16 @@ } } else { - z = x_divrem(a, b, prem); + if ((size_b < 2*DIV_LIMIT) || + ((size_b < 4*DIV_LIMIT) && + (size_a < 0.897 * size_b + 44.97)) || + (size_a > 0.00343 * pow(size_b,2.9))) + { + z = x_divrem(a, b, prem); + } + else { + z = divmod_pos(a, b, prem); + } if (z == NULL) return -1; } @@ -2252,6 +2414,524 @@ return long_normalize(a); } +/* utilities for the long division algorithm */ +// shift left by shiftby limbs +static PyLongObject * +_long_int_lshift(PyLongObject *a, Py_ssize_t shiftby) +{ + PyLongObject *z = NULL; + Py_ssize_t oldsize, newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + oldsize = Py_SIZE(a); + newsize = oldsize + wordshift; + z = _PyLong_New(newsize); + if (z == NULL) + goto lshift_error; + bzero(z->ob_digit, wordshift * sizeof(digit)); + memcpy(z->ob_digit + wordshift, a->ob_digit, oldsize * sizeof(digit)); + z = long_normalize(z); +lshift_error: + return z; +} + +// shift right by shiftby limbs +static PyLongObject * +_long_int_rshift(PyLongObject *a, Py_ssize_t shiftby) +{ + PyLongObject *z = NULL; + Py_ssize_t newsize, wordshift; + + assert(shiftby >= 0); + wordshift = shiftby; + newsize = Py_SIZE(a) - wordshift; + if (newsize <= 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto rshift_error; + memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit)); + z = long_normalize(z); +rshift_error: + return z; +} + +// mask for len limbs starting from start +static PyLongObject * +_long_int_mask(PyLongObject *a, Py_ssize_t len) +{ + PyLongObject *z = NULL; + Py_ssize_t newsize; + assert(len >= 0); + newsize = MIN(len, (long)Py_SIZE(a)); + if (newsize == 0) { + z = _PyLong_New(0); + return z; + } + z = _PyLong_New(newsize); + if (z == NULL) + goto int_mask_error; + memcpy(z->ob_digit, a->ob_digit, newsize * sizeof(digit)); + z = long_normalize(z); +int_mask_error: + return z; +} + +/* For long division a/b with with n = PySIZE(q), n > DIV_LIMIT use the + * binary splitting algorithm by Burnikel and Ziegler + * http://cr.yp.to/bib/1998/burnikel.ps + * n is required to be even. + */ +static PyObject * long_add(PyLongObject *, PyLongObject *); +static PyObject * long_sub(PyLongObject *, PyLongObject *); +static PyObject * long_mul(PyLongObject *, PyLongObject *); +static PyObject * long_div(PyObject *, PyObject *); +static int long_compare(PyLongObject *, PyLongObject *); +static PyObject * long_rshift(PyLongObject *, PyLongObject *); +static PyObject * long_lshift(PyObject *, PyObject *); +static PyObject * long_or(PyObject *, PyObject *); +static PyObject * long_and(PyObject *, PyObject *); +static PyObject * long_lshift(PyObject *, PyObject *); +static PyLongObject * div3n2n(PyLongObject *, digit *, PyLongObject *, + PyLongObject *, PyLongObject *, Py_ssize_t , PyLongObject **); + +static PyLongObject * +div2n1n(PyLongObject *a, PyLongObject *b, Py_ssize_t n, PyLongObject **prem) +{ + PyLongObject *q = NULL, *r; + Py_ssize_t size_a = Py_SIZE(a); + if (n <= DIV_LIMIT || size_a < n){ + if (size_a < n || + (size_a == n && + a->ob_digit[size_a-1] < b->ob_digit[n-1])) { + /* |a| < |b|. */ + if ((q = _PyLong_New(0)) == NULL) { + return NULL; + } + Py_INCREF(a); + *prem = (PyLongObject *) a; + return q; + } + if ((q = x_divrem(a, b, prem)) == NULL) { + return NULL; + } + return q; + } + PyLongObject *q1, *q2, *t1 = NULL, *a1 = NULL, *b1 = NULL, *b2 = NULL; + Py_ssize_t half_n = n >> 1; + if ((b1 = _long_int_rshift(b, half_n)) == NULL) return NULL; + if ((b2 = _long_int_mask(b, half_n)) == NULL) { + Py_XDECREF(b1); + return NULL; + } + if ((a1 = _long_int_rshift(a, n)) == NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + return NULL; + } + if ((q1 = div3n2n(a1, a->ob_digit+half_n, b, b1, b2, half_n, &t1)) == + NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + Py_XDECREF(a1); + return NULL; + } + Py_DECREF(a1); + if ((q2 = div3n2n(t1, a->ob_digit, b, b1, b2, half_n, &r)) == NULL) { + Py_XDECREF(b1); + Py_XDECREF(b2); + return NULL; + } + Py_DECREF(b1); + Py_DECREF(b2); + Py_DECREF(t1); + if(Py_SIZE(q1)) { + if ((q = _long_int_lshift(q1, half_n)) == NULL) { + return NULL; + } + memcpy(q->ob_digit, q2->ob_digit, Py_SIZE(q2) * sizeof(digit)); + } + else { + q = q2; + Py_INCREF(q2); + } + Py_DECREF(q1); + Py_DECREF(q2); + *prem = r; + return q; +} +static PyLongObject * k_mul(PyLongObject *, PyLongObject *); +static PyLongObject * x_add(PyLongObject *, PyLongObject *); +static PyLongObject * x_sub(PyLongObject *, PyLongObject *); + +/* Helper function for div2n1n; not intended to be called directly. */ +static PyLongObject * +div3n2n(PyLongObject *a12, digit *a3, PyLongObject *b, + PyLongObject *b1, PyLongObject *b2, Py_ssize_t n, PyLongObject **prem) +{ + PyLongObject *q = NULL, *r = NULL, *t1, *t2, *t3; + if ((t1 = _long_int_rshift(a12, n)) == NULL) { + return NULL; + } + if (long_compare(t1, b1) == 0){ + PyLongObject * one; + if ((one = (PyLongObject*) PyLong_FromLong(1)) == NULL) { + Py_DECREF(t1); + return NULL; + } + if ((t2 = _long_int_lshift(one, n)) == NULL) { + Py_DECREF(t1); + Py_DECREF(one); + return NULL; + } + if ((q = (PyLongObject *) long_sub(t2, one)) == NULL) { + Py_DECREF(t1); + Py_DECREF(t2); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t2); + if ((t3 = _long_int_lshift(b1, n)) == NULL) { + Py_DECREF(t1); + Py_XDECREF(q); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t1); + if ((t1 = (PyLongObject *) long_sub(a12, t3)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t3); + Py_DECREF(one); + return NULL; + } + Py_DECREF(t3); + if ((r = (PyLongObject *) long_add(t1, b1)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t1); + Py_DECREF(one); + return NULL; + } + Py_DECREF(one); + } + else { + if ((q = div2n1n(a12, b1, n, &r)) == NULL) { + Py_DECREF(t1); + return NULL; + } + } + Py_DECREF(t1); + if(Py_SIZE(r)) { + if ((t2 = _long_int_lshift(r, n)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + return NULL; + } + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + else { + if ((t2 = _PyLong_New(n)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + return NULL; + } + memcpy(t2->ob_digit, a3, n * sizeof(digit)); + } + if ((t3 = (PyLongObject *) k_mul(q, b2)) == NULL) { + Py_XDECREF(q); + Py_XDECREF(r); + Py_DECREF(t2); + return NULL; + } + Py_DECREF(r); + if ((r = x_sub(t2, t3)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t2); + Py_DECREF(t3); + return NULL; + } + Py_DECREF(t2); + Py_DECREF(t3); + while (Py_SIZE(r) < 0) { + if (q->ob_digit[0] > 0) + q->ob_digit[0]--; + else { + digit borrow = 1; + q->ob_digit[0] = (-1) & PyLong_MASK; + int i; + for (i=1; (i < Py_SIZE(q)) && borrow; ++i) { + borrow = q->ob_digit[i] - borrow; + q->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; + } + assert(borrow == 0); + } + t1 = r; + if ((r = (PyLongObject *) long_add(r, b)) == NULL) { + Py_XDECREF(q); + Py_DECREF(t1); + return NULL; + } + Py_DECREF(t1); + } + *prem = r; + return q; +} + +/* To perform long division a/b where a has more than 2*n, where n is + * the number of bits of b, split a in chunks of n nits, then call the + * div2n1n algorithm. Since n is required to be even in div2n1n, + * in case it is not pad a and b with zeros on the right till n is + * a multiple of 2**N, where N is the number of times n must be + * divided in the div2n1n algorithm. + */ +static PyLongObject * +divmod_pos(PyLongObject *a, PyLongObject *b, PyLongObject **prem) +{ + int top = 0; + PyLongObject *a0, *a1, *b1, *q = NULL, *r = NULL, + *q_digit = NULL, *t0, *t1, *t2; + int asign = (Py_SIZE(a) < 0); + int bsign = (Py_SIZE(b) < 0); + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + + int na = _PyLong_NumBits((PyObject *) a); + int n = _PyLong_NumBits((PyObject *) b); + // make n a multiple of PyLong_SHIFT + int nr = n%PyLong_SHIFT; + int pad; + if ((t0 = (PyLongObject*) PyLong_FromLong(PyLong_SHIFT - nr)) + == NULL) { + return NULL; + } + if (nr > 0) { + pad = 1; + n = n + PyLong_SHIFT - n%PyLong_SHIFT; + na = na + PyLong_SHIFT - n%PyLong_SHIFT; + if ((a1 = (PyLongObject *) long_lshift((PyObject *) a, + (PyObject *) t0)) == NULL) { + Py_DECREF(t0); + return NULL; + } + if ((b1 = (PyLongObject *) long_lshift((PyObject *) b, + (PyObject *) t0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + return NULL; + } + } + else { + pad = 0; + a1 = a; + Py_INCREF(a); + b1 = b; + Py_INCREF(b); + } + + /* estimates the number of times n must be divided by to by div2n1n + * before falling back to x_divrem; increase till it can be divided + * that number of times + */ + int nab = na/n + 1; + int n_S = n/PyLong_SHIFT; + PyLongObject *lnn; + if ((lnn = (PyLongObject*) PyLong_FromLong(n_S/DIV_LIMIT)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + return NULL; + } + int nn = _PyLong_NumBits((PyObject *) lnn); + int mask_n = (1 << nn) - 1; + int n1 = n_S; + while(n1 & mask_n) + n1++; + int shift_n = n1 - n_S; + Py_DECREF(lnn); + n_S = n1; + t1 = a1; + if ((a1 = _long_int_lshift(a1, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(b1); + return NULL; + } + Py_DECREF(t1); + t1 = b1; + if ((b1 = _long_int_lshift(b1, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + return NULL; + } + Py_DECREF(t1); + // slit a in chunks sized n_S + PyLongObject ** a_digits = + (PyLongObject **) calloc(nab, sizeof(PyLongObject *)); + if (a_digits == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + return NULL; + } + if ((a0 = (PyLongObject *) _PyLong_Copy(a1)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + free(a_digits); + return NULL; + } + + int i; + for(i=0; i < nab; i++) { + if ((a_digits[i] = _long_int_mask(a0, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a0); + Py_DECREF(a1); + Py_DECREF(b1); + int j; + for(j=0; j < i; j++) + Py_DECREF(a_digits[j]); + free(a_digits); + return NULL; + } + t1 = a0; + if ((a0 = _long_int_rshift(a0, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + Py_DECREF(t1); + if (Py_SIZE(a0) == 0) { + break; + } + } + top = i; + Py_DECREF(a0); + if (long_compare(a_digits[i], b1) >= 0) { + if ((r = (PyLongObject*) PyLong_FromLong(0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + } + else { + if ((r = (PyLongObject *) _PyLong_Copy(a_digits[i--])) == + NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + goto fail1; + } + } + if ((q = (PyLongObject*) PyLong_FromLong(0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + goto fail1; + } + while(i >= 0) { + if ((t1 = _long_int_lshift(r, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + Py_XDECREF(q); + goto fail1; + } + if ((t2 = (PyLongObject *) long_add(t1, a_digits[i--])) + == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(r); + Py_XDECREF(q); + goto fail1; + } + Py_DECREF(t1); + Py_XDECREF(r); + if ((q_digit = div2n1n(t2, b1, n_S, &r)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t2); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + goto fail1; + } + Py_DECREF(t2); + if ((t1 = _long_int_lshift(q, n_S)) == NULL) { + Py_DECREF(t0); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + Py_XDECREF(q_digit); + goto fail1; + } + Py_DECREF(q); + if ((q = (PyLongObject *) long_add(t1, q_digit)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q_digit); + goto fail1; + } + Py_DECREF(t1); + Py_DECREF(q_digit); + } + for(i=top; i >= 0; i--) { + Py_XDECREF(a_digits[i]); + } + free(a_digits); + + if (pad) { + t1 = r; + if ((r = (PyLongObject *) long_rshift(r, t0)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + return NULL; + } + Py_DECREF(t1); + } + t1 = r; + if ((r = _long_int_rshift(r, shift_n)) == NULL) { + Py_DECREF(t0); + Py_DECREF(t1); + Py_DECREF(a1); + Py_DECREF(b1); + Py_XDECREF(q); + return NULL; + } + Py_DECREF(t1); + + if (asign) Py_SIZE(a) = - Py_SIZE(a); + if (bsign) Py_SIZE(b) = - Py_SIZE(b); + Py_DECREF(a1); + Py_DECREF(b1); + Py_DECREF(t0); + *prem = r; + return q; +fail1: + for(i=top; i >= 0; i--) { + Py_DECREF(a_digits[i]); + } + free(a_digits); + return NULL; +} + + /* Methods */ static void