Index: longobject.c
===================================================================
--- longobject.c	(revision 70592)
+++ longobject.c	(working copy)
@@ -82,6 +82,11 @@
 #define KARATSUBA_CUTOFF 70
 #define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF)
 
+/* For long division, use the O(N**2) school algorithm unless the
+ * denominator contains more than DIV_LIMIT digits.
+ */
+const int DIV_LIMIT = KARATSUBA_CUTOFF;
+
 /* For exponentiation, use the binary left-to-right algorithm
  * unless the exponent contains more than FIVEARY_CUTOFF digits.
  * In that case, do 5 bits at a time.  The potential drawback is that
@@ -1517,165 +1522,311 @@
    Return a string object.
    If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */
 
+static void
+_PyLong_Format_small(PyLongObject *a, int base, int power, digit powbase,
+                        char *pp, PyLongObject *scratch, Py_ssize_t end)
+{
+        Py_ssize_t size, size_a = ABS(Py_SIZE(a));
+        char *p = pp + end;
+        digit *pin = a->ob_digit;
+        size = size_a;
+        /* Repeatedly divide by powbase. */
+        do {
+                int ntostore = power;
+                digit rem = inplace_divrem1(scratch->ob_digit,
+                                     pin, size, powbase);
+                pin = scratch->ob_digit; /* no need to use a again */
+                if (pin[size - 1] == 0)
+                        --size;
+
+                /* Break rem into digits. */
+                assert(ntostore > 0);
+                do {
+                        digit nextrem = (digit)(rem / base);
+                        char c = (char)(rem - nextrem * base);
+                        assert(p > PyUnicode_AS_UNICODE(str));
+                        c += (c < 10) ? '0' : 'a'-10;
+                        *--p = c;
+                        rem = nextrem;
+                        --ntostore;
+                        /* Termination is a bit delicate:  must not
+                           store leading zeroes, so must get out if
+                           remaining quotient and rem are both 0. */
+                } while (ntostore && (size || rem));
+        } while (size != 0);
+}
+
+#define NUMERAL_SIZE 350
+
+static PyObject * long_pow(PyObject *v, PyObject *w, PyObject *x);
+static int l_divmod(PyLongObject *v, PyLongObject *w,
+             PyLongObject **pdiv, PyLongObject **pmod);
+/* return -1 on failure, 0 for normal exit */
+static int
+_numeral(PyLongObject *n, int base, PyObject* pbase, int power, digit powbase,
+         char *p, PyLongObject *scratch, Py_ssize_t end)
+{
+        digit msd;
+        int status, ad, bd;
+        Py_ssize_t size, half, ndigits, msd_bits = 0;
+        PyLongObject *div, *mod;
+        PyObject *phalf, *pbase_phalf;
+        /* compute the approximate number of digits in the given base;
+         * use code from bit_length
+         */
+        ndigits = ABS(Py_SIZE(n));
+        if(ndigits == 0)
+                return 0;
+        msd = n->ob_digit[ndigits-1];
+        while (msd >= 32) {
+                msd_bits += 6;
+                msd >>= 6;
+        }
+        msd_bits += (long)(BitLengthTable[msd]);
+        size = (ndigits-1)*PyLong_SHIFT + msd_bits;
+        size = (Py_ssize_t)(size / log(base) + 1);
+        if(size < NUMERAL_SIZE){
+                _PyLong_Format_small(n, base, power, powbase, p,
+                                scratch, end);
+                return 0;
+        }
+        half = (size / 2) + (size & 1);
+        if((phalf = PyLong_FromLong(half)) == NULL)
+                return -1;
+        pbase_phalf = long_pow(pbase, phalf, Py_None);
+        if(!pbase_phalf){
+                Py_DECREF(phalf);
+                return -1;
+        }
+        Py_DECREF(phalf);
+        status = l_divmod(n, (PyLongObject*)pbase_phalf, &div, &mod);
+        if(status < 0){
+                Py_DECREF(pbase_phalf);
+                return -1;
+        }
+        Py_DECREF(pbase_phalf);
+        SIGCHECK({
+                return -1;
+        })
+        ad = _numeral(div, base, pbase, power, powbase, p, scratch,
+                        end - half);
+        if(ad < 0){
+                return -1;
+        }
+        bd = _numeral(mod, base, pbase, power, powbase, p, scratch, end);
+        if(bd < 0){
+                Py_DECREF(div);
+                return -1;
+        }
+        Py_DECREF(div);
+        Py_DECREF(mod);
+        return 0;
+}
+
 PyObject *
 _PyLong_Format(PyObject *aa, int base)
 {
-	register PyLongObject *a = (PyLongObject *)aa;
-	PyObject *str;
-	Py_ssize_t i, j, sz;
-	Py_ssize_t size_a;
-	Py_UNICODE *p;
-	int bits;
-	char sign = '\0';
+        register PyLongObject *a = (PyLongObject *)aa;
+        PyObject *str;
+        Py_ssize_t i, j, sz;
+        Py_ssize_t size_a;
+        Py_UNICODE *p;
+        char *pc;
+        int bits;
+        char sign = '\0';
 
-	if (a == NULL || !PyLong_Check(a)) {
-		PyErr_BadInternalCall();
-		return NULL;
-	}
-	assert(base >= 2 && base <= 36);
-	size_a = ABS(Py_SIZE(a));
+        if (a == NULL || !PyLong_Check(a)) {
+                PyErr_BadInternalCall();
+                return NULL;
+        }
+        assert(base >= 2 && base <= 36);
+        size_a = ABS(Py_SIZE(a));
 
-	/* Compute a rough upper bound for the length of the string */
-	i = base;
-	bits = 0;
-	while (i > 1) {
-		++bits;
-		i >>= 1;
-	}
-	i = 5;
-	j = size_a*PyLong_SHIFT + bits-1;
-	sz = i + j / bits;
-	if (j / PyLong_SHIFT < size_a || sz < i) {
-		PyErr_SetString(PyExc_OverflowError,
-				"int is too large to format");
-		return NULL;
-	}
-	str = PyUnicode_FromUnicode(NULL, sz);
-	if (str == NULL)
-		return NULL;
-	p = PyUnicode_AS_UNICODE(str) + sz;
-	*p = '\0';
-	if (Py_SIZE(a) < 0)
-		sign = '-';
+        /* Compute a rough upper bound for the length of the string */
+        i = base;
+        bits = 0;
+        while (i > 1) {
+                ++bits;
+                i >>= 1;
+        }
+        i = 5;
+        j = size_a*PyLong_SHIFT + bits-1;
+        sz = i + j / bits;
+        if (j / PyLong_SHIFT < size_a || sz < i) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "int is too large to format");
+                return NULL;
+        }
+        str = PyUnicode_FromUnicode(NULL, sz);
+        if (str == NULL)
+                return NULL;
+        p = PyUnicode_AS_UNICODE(str) + sz;
+        *p = '\0';
+        if (Py_SIZE(a) < 0)
+                sign = '-';
+        if (Py_SIZE(a) == 0) {
+                *--p = '0';
+        }
+        else if ((base & (base - 1)) == 0) {
+                /* JRH: special case for power-of-2 bases */
+                twodigits accum = 0;
+                int accumbits = 0;      /* # of bits in accum */
+                int basebits = 1;       /* # of bits in base-1 */
+                i = base;
+                while ((i >>= 1) > 1)
+                        ++basebits;
 
-	if (Py_SIZE(a) == 0) {
-		*--p = '0';
-	}
-	else if ((base & (base - 1)) == 0) {
-		/* JRH: special case for power-of-2 bases */
-		twodigits accum = 0;
-		int accumbits = 0;	/* # of bits in accum */
-		int basebits = 1;	/* # of bits in base-1 */
-		i = base;
-		while ((i >>= 1) > 1)
-			++basebits;
+                for (i = 0; i < size_a; ++i) {
+                        accum |= (twodigits)a->ob_digit[i] << accumbits;
+                        accumbits += PyLong_SHIFT;
+                        assert(accumbits >= basebits);
+                        do {
+                                char cdigit = (char)(accum & (base - 1));
+                                cdigit += (cdigit < 10) ? '0' : 'a'-10;
+                                assert(p > PyUnicode_AS_UNICODE(str));
+                                *--p = cdigit;
+                                accumbits -= basebits;
+                                accum >>= basebits;
+                        } while (i < size_a-1 ? accumbits >= basebits :
+                                                accum > 0);
+                }
+        }
+        else {
+                /* Not 0, and base not a power of 2.  Divide repeatedly by
+                   base, but for speed use the highest power of base that
+                   fits in a digit. */
+                Py_ssize_t size = size_a;
+                digit *pin = a->ob_digit;
+                PyLongObject *scratch;
+                /* powbasw <- largest power of base that fits in a digit. */
+                digit powbase = base;  /* powbase == base ** power */
+                int power = 1;
+                for (;;) {
+                        twodigits newpow = powbase * (twodigits)base;
+                        if (newpow >> PyLong_SHIFT)  /* doesn't fit in a digit */
+                                break;
+                        powbase = (digit)newpow;
+                        ++power;
+                }
 
-		for (i = 0; i < size_a; ++i) {
-			accum |= (twodigits)a->ob_digit[i] << accumbits;
-			accumbits += PyLong_SHIFT;
-			assert(accumbits >= basebits);
-			do {
-				char cdigit = (char)(accum & (base - 1));
-				cdigit += (cdigit < 10) ? '0' : 'a'-10;
-				assert(p > PyUnicode_AS_UNICODE(str));
-				*--p = cdigit;
-				accumbits -= basebits;
-				accum >>= basebits;
-			} while (i < size_a-1 ? accumbits >= basebits :
-					 	accum > 0);
-		}
-	}
-	else {
-		/* Not 0, and base not a power of 2.  Divide repeatedly by
-		   base, but for speed use the highest power of base that
-		   fits in a digit. */
-		Py_ssize_t size = size_a;
-		digit *pin = a->ob_digit;
-		PyLongObject *scratch;
-		/* powbasw <- largest power of base that fits in a digit. */
-		digit powbase = base;  /* powbase == base ** power */
-		int power = 1;
-		for (;;) {
-			twodigits newpow = powbase * (twodigits)base;
-			if (newpow >> PyLong_SHIFT)  /* doesn't fit in a digit */
-				break;
-			powbase = (digit)newpow;
-			++power;
-		}
+                /* Get a scratch area for repeated division. */
+                scratch = _PyLong_New(size);
+                if (scratch == NULL) {
+                        Py_DECREF(str);
+                        return NULL;
+                }
 
-		/* Get a scratch area for repeated division. */
-		scratch = _PyLong_New(size);
-		if (scratch == NULL) {
-			Py_DECREF(str);
-			return NULL;
-		}
+                if(sz < NUMERAL_SIZE){
+                /* Repeatedly divide by powbase. */
+                        do {
+                                int ntostore = power;
+                                digit rem = inplace_divrem1(scratch->ob_digit,
+                                                pin, size, powbase);
+                                pin = scratch->ob_digit; /* no need to use a again */
+                                if (pin[size - 1] == 0)
+                                        --size;
+                                SIGCHECK({
+                                        Py_DECREF(scratch);
+                                        Py_DECREF(str);
+                                        return NULL;
+                                })
+                                /* Break rem into digits. */
+                                assert(ntostore > 0);
+                                do {
+                                        digit nextrem = (digit)(rem / base);
+                                        char c = (char)(rem - nextrem * base);
+                                        assert(p > PyUnicode_AS_UNICODE(str));
+                                        c += (c < 10) ? '0' : 'a'-10;
+                                        *--p = c;
+                                        rem = nextrem;
+                                        --ntostore;
+                                /* Termination is a bit delicate:  must not
+                                store leading zeroes, so must get out if
+                                remaining quotient and rem are both 0. */
+                                } while (ntostore && (size || rem));
+                        } while (size != 0);
+                }
+                else {
+                        int status, j=0;
+                        Py_ssize_t len1;
+                        PyObject *pbase;
+                        if(!(pc = (char*) malloc(sz))){
+                                Py_DECREF(scratch);
+                                Py_DECREF(str);
+                                return NULL;
+                        }
+                        for(i=0; i < sz; i++)
+                                pc[i] = '0';
+                        if(!(pbase = PyLong_FromLong(base))){
+                                Py_DECREF(scratch);
+                                Py_DECREF(str);
+                                return NULL;
+                        }
+                        pc[sz] = '\0';
+                        if(sign == '-')
+                                Py_SIZE(a) = -Py_SIZE(a);
+                        status = _numeral(a, base, pbase, power, powbase, pc, scratch, sz);
+                        if(sign == '-')
+                                Py_SIZE(a) = -Py_SIZE(a);
+                        Py_DECREF(pbase);
+                        if (status < 0){
+                                Py_DECREF(scratch);
+                                Py_DECREF(str);
+                                return NULL;
+                        }
 
-		/* Repeatedly divide by powbase. */
-		do {
-			int ntostore = power;
-			digit rem = inplace_divrem1(scratch->ob_digit,
-						     pin, size, powbase);
-			pin = scratch->ob_digit; /* no need to use a again */
-			if (pin[size - 1] == 0)
-				--size;
-			SIGCHECK({
-				Py_DECREF(scratch);
-				Py_DECREF(str);
-				return NULL;
-			})
+                        for(i=0; i < sz; i++){
+                                if (pc[i] > 48)
+                                        break;
+                        }
 
-			/* Break rem into digits. */
-			assert(ntostore > 0);
-			do {
-				digit nextrem = (digit)(rem / base);
-				char c = (char)(rem - nextrem * base);
-				assert(p > PyUnicode_AS_UNICODE(str));
-				c += (c < 10) ? '0' : 'a'-10;
-				*--p = c;
-				rem = nextrem;
-				--ntostore;
-				/* Termination is a bit delicate:  must not
-				   store leading zeroes, so must get out if
-				   remaining quotient and rem are both 0. */
-			} while (ntostore && (size || rem));
-		} while (size != 0);
-		Py_DECREF(scratch);
-	}
-
-	if (base == 16) {
-		*--p = 'x';
-		*--p = '0';
-	}
-	else if (base == 8) {
-		*--p = 'o';
-		*--p = '0';
-	}
-	else if (base == 2) {
-		*--p = 'b';
-		*--p = '0';
-	}
-	else if (base != 10) {
-		*--p = '#';
-		*--p = '0' + base%10;
-		if (base > 10)
-			*--p = '0' + base/10;
-	}
-	if (sign)
-		*--p = sign;
-	if (p != PyUnicode_AS_UNICODE(str)) {
-		Py_UNICODE *q = PyUnicode_AS_UNICODE(str);
-		assert(p > q);
-		do {
-		} while ((*q++ = *p++) != '\0');
-		q--;
-		if (PyUnicode_Resize(&str, (Py_ssize_t) (q - PyUnicode_AS_UNICODE(str)))) {
-			Py_DECREF(str);
-			return NULL;
-		}
-	}
-	return (PyObject *)str;
+                        for(; i < sz; i++){
+                                pc[j++] = (pc[i] > 0)?pc[i]:'0';
+                        }
+                        pc[j] = '\0';
+                        len1 = strlen(pc);
+                        *p = '\0';
+                        for(i=len1-1; i >= 0; i--)
+                                *--p = pc[i];
+                        free(pc);
+                }
+                Py_DECREF(scratch);
+        }
+        if (base == 16) {
+                *--p = 'x';
+                *--p = '0';
+        }
+        else if (base == 8) {
+                *--p = 'o';
+                *--p = '0';
+        }
+        else if (base == 2) {
+                *--p = 'b';
+                *--p = '0';
+        }
+        else if (base != 10) {
+                *--p = '#';
+                *--p = '0' + base%10;
+                if (base > 10)
+                        *--p = '0' + base/10;
+        }
+        if (sign)
+                *--p = sign;
+        if (p != PyUnicode_AS_UNICODE(str)) {
+                Py_UNICODE *q = PyUnicode_AS_UNICODE(str);
+                assert(p > q);
+                do {
+                } while ((*q++ = *p++) != '\0');
+                q--;
+                if (PyUnicode_Resize(&str, (Py_ssize_t) (q - PyUnicode_AS_UNICODE(str)))) {
+                        Py_DECREF(str);
+                        return NULL;
+                }
+        }
+        return (PyObject *)str;
 }
 
+
 /* Table of digit values for 8-bit string -> integer conversion.
  * '0' maps to 0, ..., '9' maps to 9.
  * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35.
@@ -2071,6 +2222,8 @@
 static PyLongObject *x_divrem
 	(PyLongObject *, PyLongObject *, PyLongObject **);
 static PyObject *long_long(PyObject *v);
+static PyLongObject * divmod_pos
+        (PyLongObject *, PyLongObject *, PyLongObject **);
 
 /* Long division with remainder, top-level routine */
 
@@ -2109,7 +2262,16 @@
 		}
 	}
 	else {
-		z = x_divrem(a, b, prem);
+                if ((size_b < 2*DIV_LIMIT) ||
+                ((size_b < 4*DIV_LIMIT) &&
+                (size_a < 0.897 * size_b + 44.97)) ||
+                  (size_a > 0.00343 * pow(size_b,2.9)))
+                {
+                        z = x_divrem(a, b, prem);
+                }
+                else {
+                        z = divmod_pos(a, b, prem);
+                }
 		if (z == NULL)
 			return -1;
 	}
@@ -2252,6 +2414,524 @@
 	return long_normalize(a);
 }
 
+/* utilities for the long division algorithm */
+// shift left by shiftby limbs 
+static PyLongObject *
+_long_int_lshift(PyLongObject *a, Py_ssize_t shiftby)
+{
+        PyLongObject *z = NULL;
+        Py_ssize_t oldsize, newsize, wordshift;
+
+        assert(shiftby >= 0);
+        wordshift = shiftby;
+        oldsize = Py_SIZE(a);
+        newsize = oldsize + wordshift;
+        z = _PyLong_New(newsize);
+        if (z == NULL)
+                goto lshift_error;
+        bzero(z->ob_digit, wordshift * sizeof(digit));
+        memcpy(z->ob_digit + wordshift, a->ob_digit, oldsize * sizeof(digit));
+        z = long_normalize(z);
+lshift_error:
+        return z;
+}
+
+// shift right by shiftby limbs 
+static PyLongObject *
+_long_int_rshift(PyLongObject *a, Py_ssize_t shiftby)
+{
+        PyLongObject *z = NULL;
+        Py_ssize_t newsize, wordshift;
+
+        assert(shiftby >= 0);
+        wordshift = shiftby;
+        newsize = Py_SIZE(a) - wordshift;
+        if (newsize <= 0) {
+                z = _PyLong_New(0);
+        return z;
+        }
+        z = _PyLong_New(newsize);
+        if (z == NULL)
+                goto rshift_error;
+        memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit));
+        z = long_normalize(z);
+rshift_error:
+        return z;
+}
+
+// mask for len limbs starting from start
+static PyLongObject *
+_long_int_mask(PyLongObject *a, Py_ssize_t len)
+{
+        PyLongObject *z = NULL;
+        Py_ssize_t newsize;
+        assert(len >= 0);
+        newsize = MIN(len, (long)Py_SIZE(a));
+        if (newsize == 0) {
+                z = _PyLong_New(0);
+                return z;
+        }
+        z = _PyLong_New(newsize);
+        if (z == NULL)
+                goto int_mask_error;
+        memcpy(z->ob_digit, a->ob_digit, newsize * sizeof(digit));
+        z = long_normalize(z);
+int_mask_error:
+        return z;
+}
+
+/* For long division a/b with with n = PySIZE(q), n > DIV_LIMIT use the 
+ * binary splitting algorithm by Burnikel and Ziegler
+ * http://cr.yp.to/bib/1998/burnikel.ps
+ *  n is required to be even.
+ */
+static PyObject * long_add(PyLongObject *, PyLongObject *);
+static PyObject * long_sub(PyLongObject *, PyLongObject *);
+static PyObject * long_mul(PyLongObject *, PyLongObject *);
+static PyObject * long_div(PyObject *, PyObject *);
+static int long_compare(PyLongObject *, PyLongObject *);
+static PyObject * long_rshift(PyLongObject *, PyLongObject *);
+static PyObject * long_lshift(PyObject *, PyObject *);
+static PyObject * long_or(PyObject *, PyObject *);
+static PyObject * long_and(PyObject *, PyObject *);
+static PyObject * long_lshift(PyObject *, PyObject *);
+static PyLongObject * div3n2n(PyLongObject *, digit *, PyLongObject *,
+             PyLongObject *, PyLongObject *, Py_ssize_t , PyLongObject **);
+
+static PyLongObject *
+div2n1n(PyLongObject *a, PyLongObject *b, Py_ssize_t n, PyLongObject **prem)
+{
+        PyLongObject *q = NULL, *r;
+        Py_ssize_t size_a = Py_SIZE(a);
+        if (n <= DIV_LIMIT || size_a < n){
+                if (size_a < n ||
+                   (size_a == n &&
+                     a->ob_digit[size_a-1] < b->ob_digit[n-1])) {
+                        /* |a| < |b|. */
+                        if ((q = _PyLong_New(0)) == NULL) {
+                                  return NULL;
+                        }
+                        Py_INCREF(a);
+                        *prem = (PyLongObject *) a;
+                        return q;
+                }
+                if ((q = x_divrem(a, b, prem)) == NULL) {
+                        return NULL;
+                }
+                return q;
+        }
+        PyLongObject *q1, *q2, *t1 = NULL, *a1 = NULL, *b1 = NULL, *b2 = NULL;
+        Py_ssize_t half_n = n >> 1;
+        if ((b1 = _long_int_rshift(b, half_n)) == NULL) return NULL;
+        if ((b2 = _long_int_mask(b, half_n)) == NULL) {
+                Py_XDECREF(b1);
+                return NULL;
+        }
+        if ((a1 = _long_int_rshift(a, n)) == NULL) {
+                Py_XDECREF(b1);
+                Py_XDECREF(b2);
+                return NULL;
+        }
+        if ((q1 = div3n2n(a1, a->ob_digit+half_n, b, b1, b2, half_n, &t1)) ==
+            NULL) {
+                Py_XDECREF(b1);
+                Py_XDECREF(b2);
+                Py_XDECREF(a1);
+                return NULL;
+        }
+        Py_DECREF(a1);
+        if ((q2 = div3n2n(t1, a->ob_digit, b, b1, b2, half_n, &r)) == NULL) {
+                Py_XDECREF(b1);
+                Py_XDECREF(b2);
+                return NULL;
+        }
+        Py_DECREF(b1);
+        Py_DECREF(b2);
+        Py_DECREF(t1);
+        if(Py_SIZE(q1)) {
+                if ((q = _long_int_lshift(q1, half_n)) == NULL) {
+                        return NULL;
+                }
+                memcpy(q->ob_digit, q2->ob_digit, Py_SIZE(q2) * sizeof(digit));
+        }
+        else {
+                q = q2;
+                Py_INCREF(q2);
+        }
+        Py_DECREF(q1);
+        Py_DECREF(q2);
+        *prem = r;
+        return q;
+}
+static PyLongObject * k_mul(PyLongObject *, PyLongObject *);
+static PyLongObject * x_add(PyLongObject *, PyLongObject *);
+static PyLongObject * x_sub(PyLongObject *, PyLongObject *);
+
+/* Helper function for div2n1n; not intended to be called directly. */
+static PyLongObject *
+div3n2n(PyLongObject *a12, digit *a3, PyLongObject *b,
+         PyLongObject *b1, PyLongObject *b2, Py_ssize_t n, PyLongObject **prem)
+{
+        PyLongObject *q = NULL, *r = NULL, *t1, *t2, *t3;
+        if ((t1 = _long_int_rshift(a12, n)) == NULL) {
+                return NULL;
+        }
+        if (long_compare(t1, b1) == 0){
+                PyLongObject * one;
+                if ((one = (PyLongObject*) PyLong_FromLong(1)) == NULL) {
+                        Py_DECREF(t1);
+                        return NULL;
+                }
+                if ((t2 = _long_int_lshift(one, n)) == NULL) {
+                        Py_DECREF(t1);
+                        Py_DECREF(one);
+                        return NULL;
+                }
+                if ((q = (PyLongObject *) long_sub(t2, one)) == NULL) {
+                        Py_DECREF(t1);
+                        Py_DECREF(t2);
+                        Py_DECREF(one);
+                        return NULL;
+                }
+                Py_DECREF(t2);
+                if ((t3 = _long_int_lshift(b1, n)) == NULL) {
+                        Py_DECREF(t1);
+                        Py_XDECREF(q);
+                        Py_DECREF(one);
+                        return NULL;
+                }
+                Py_DECREF(t1);
+                if ((t1 = (PyLongObject *) long_sub(a12, t3)) == NULL) {
+                        Py_XDECREF(q);
+                        Py_DECREF(t3);
+                        Py_DECREF(one);
+                        return NULL;
+                }
+                Py_DECREF(t3);
+                if ((r = (PyLongObject *) long_add(t1, b1)) == NULL) {
+                        Py_XDECREF(q);
+                        Py_DECREF(t1);
+                        Py_DECREF(one);
+                        return NULL;
+                }
+                Py_DECREF(one);
+        }
+        else {
+                if ((q = div2n1n(a12, b1, n, &r)) == NULL) {
+                        Py_DECREF(t1);
+                        return NULL;
+                }
+        }
+        Py_DECREF(t1);
+        if(Py_SIZE(r)) {
+                if ((t2 = _long_int_lshift(r, n)) == NULL) {
+                        Py_XDECREF(q);
+                        Py_XDECREF(r);
+                        return NULL;
+                }
+                memcpy(t2->ob_digit, a3, n * sizeof(digit));
+        }
+        else {
+                if ((t2 = _PyLong_New(n)) == NULL) {
+                        Py_XDECREF(q);
+                        Py_XDECREF(r);
+                        return NULL;
+                }
+                memcpy(t2->ob_digit, a3, n * sizeof(digit));
+        }
+        if ((t3 = (PyLongObject *) k_mul(q, b2)) == NULL) {
+                Py_XDECREF(q);
+                Py_XDECREF(r);
+                Py_DECREF(t2);
+                return NULL;
+        }
+        Py_DECREF(r);
+        if ((r = x_sub(t2, t3)) == NULL) {
+                Py_XDECREF(q);
+                Py_DECREF(t2);
+                Py_DECREF(t3);
+                return NULL;
+        }
+        Py_DECREF(t2);
+        Py_DECREF(t3);
+        while (Py_SIZE(r) < 0) {
+                if (q->ob_digit[0] > 0)
+                        q->ob_digit[0]--;
+                else {
+                        digit borrow = 1;
+                        q->ob_digit[0] = (-1) & PyLong_MASK;
+                        int i;
+                        for (i=1; (i < Py_SIZE(q)) && borrow; ++i) {
+                                borrow = q->ob_digit[i] - borrow;
+                                q->ob_digit[i] = borrow & PyLong_MASK;
+                                borrow >>= PyLong_SHIFT;
+                                borrow &= 1;
+                        }
+                        assert(borrow == 0);
+                }
+                t1 = r;
+                if ((r = (PyLongObject *) long_add(r, b)) == NULL) {
+                        Py_XDECREF(q);
+                        Py_DECREF(t1);
+                        return NULL;
+                }
+                Py_DECREF(t1);
+        }
+        *prem = r;
+        return q;
+}
+
+/* To perform long division a/b where a has more than 2*n, where n is 
+ * the number of bits of b, split a in chunks of n nits, then call the
+ * div2n1n algorithm. Since n is required to be even in div2n1n, 
+ * in case it is not pad a and b with zeros on the right till n is
+ * a multiple of 2**N, where N is the number of times n must be
+ * divided in the div2n1n algorithm.
+ */
+static PyLongObject *
+divmod_pos(PyLongObject *a, PyLongObject *b, PyLongObject **prem)
+{
+        int top = 0;
+        PyLongObject *a0, *a1, *b1, *q = NULL, *r = NULL,
+                     *q_digit = NULL, *t0, *t1, *t2;
+        int asign = (Py_SIZE(a) < 0);
+        int bsign = (Py_SIZE(b) < 0);
+        if (asign) Py_SIZE(a) = - Py_SIZE(a);
+        if (bsign) Py_SIZE(b) = - Py_SIZE(b);
+
+        int na = _PyLong_NumBits((PyObject *) a);
+        int n = _PyLong_NumBits((PyObject *) b);
+        // make n a multiple of PyLong_SHIFT
+        int nr = n%PyLong_SHIFT;
+        int pad;
+        if ((t0 = (PyLongObject*) PyLong_FromLong(PyLong_SHIFT - nr))
+            == NULL) {
+                return NULL;
+        }
+        if (nr > 0) {
+                pad = 1;
+                n = n + PyLong_SHIFT - n%PyLong_SHIFT;
+                na = na + PyLong_SHIFT - n%PyLong_SHIFT;
+                if ((a1 = (PyLongObject *) long_lshift((PyObject *) a,
+                               (PyObject *) t0)) == NULL) {
+                        Py_DECREF(t0);
+                        return NULL;
+                }
+                if ((b1 = (PyLongObject *) long_lshift((PyObject *) b,
+                               (PyObject *) t0)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a1);
+                        return NULL;
+                }
+        }
+        else {
+                pad = 0;
+                a1 = a;
+                Py_INCREF(a);
+                b1 = b;
+                Py_INCREF(b);
+        }
+
+        /* estimates the number of times n must be divided by to by div2n1n
+         * before falling back to x_divrem; increase till it can be divided
+         * that number of times
+         */
+        int nab = na/n + 1;
+        int n_S = n/PyLong_SHIFT;
+        PyLongObject *lnn;
+        if ((lnn = (PyLongObject*) PyLong_FromLong(n_S/DIV_LIMIT)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(a1);
+                Py_DECREF(b1);
+                return NULL;
+        }
+        int nn = _PyLong_NumBits((PyObject *) lnn);
+        int mask_n = (1 << nn) - 1;
+        int n1 = n_S;
+        while(n1 & mask_n)
+                n1++;
+        int shift_n = n1 - n_S;
+        Py_DECREF(lnn);
+        n_S = n1;
+        t1 = a1;
+        if ((a1 = _long_int_lshift(a1, shift_n)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(t1);
+                Py_DECREF(b1);
+                return NULL;
+        }
+        Py_DECREF(t1);
+        t1 = b1;
+        if ((b1 = _long_int_lshift(b1, shift_n)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(t1);
+                Py_DECREF(a1);
+                return NULL;
+        }
+        Py_DECREF(t1);
+        // slit a in chunks sized n_S 
+        PyLongObject ** a_digits =
+          (PyLongObject **) calloc(nab, sizeof(PyLongObject *));
+        if (a_digits == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(a1);
+                Py_DECREF(b1);
+                return NULL;
+        }
+        if ((a0 = (PyLongObject *) _PyLong_Copy(a1)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(a1);
+                Py_DECREF(b1);
+                free(a_digits);
+                return NULL;
+        }
+
+        int i;
+        for(i=0; i < nab; i++) {
+                if ((a_digits[i] = _long_int_mask(a0, n_S)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a0);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        int j;
+                        for(j=0; j < i; j++)
+                                Py_DECREF(a_digits[j]);
+                        free(a_digits);
+                        return NULL;
+                }
+                t1 = a0;
+                if ((a0 = _long_int_rshift(a0, n_S)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(t1);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        goto fail1;
+                }
+                Py_DECREF(t1);
+                if (Py_SIZE(a0) == 0) {
+                        break;
+                }
+        }
+        top = i;
+        Py_DECREF(a0);
+        if (long_compare(a_digits[i], b1) >= 0) {
+                if ((r = (PyLongObject*) PyLong_FromLong(0)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        goto fail1;
+                }
+        }
+        else {
+                if ((r = (PyLongObject *) _PyLong_Copy(a_digits[i--])) ==
+                    NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        goto fail1;
+                }
+        }
+        if ((q = (PyLongObject*) PyLong_FromLong(0)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(a1);
+                Py_DECREF(b1);
+                Py_XDECREF(r);
+                goto fail1;
+        }
+        while(i >= 0) {
+                if ((t1 = _long_int_lshift(r, n_S)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(r);
+                        Py_XDECREF(q);
+                        goto fail1;
+                }
+                if ((t2 = (PyLongObject *) long_add(t1, a_digits[i--]))
+                    == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(t1);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(r);
+                        Py_XDECREF(q);
+                        goto fail1;
+                }
+                Py_DECREF(t1);
+                Py_XDECREF(r);
+                if ((q_digit = div2n1n(t2, b1, n_S, &r)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(t2);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(q);
+                        goto fail1;
+                }
+                Py_DECREF(t2);
+                if ((t1 = _long_int_lshift(q, n_S)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(q);
+                        Py_XDECREF(q_digit);
+                        goto fail1;
+                }
+                Py_DECREF(q);
+                if ((q = (PyLongObject *) long_add(t1, q_digit)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(t1);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(q_digit);
+                        goto fail1;
+                }
+                Py_DECREF(t1);
+                Py_DECREF(q_digit);
+        }
+        for(i=top; i >= 0; i--) {
+                Py_XDECREF(a_digits[i]);
+        }
+        free(a_digits);
+
+        if (pad) {
+                t1 = r;
+                if ((r = (PyLongObject *) long_rshift(r, t0)) == NULL) {
+                        Py_DECREF(t0);
+                        Py_DECREF(t1);
+                        Py_DECREF(a1);
+                        Py_DECREF(b1);
+                        Py_XDECREF(q);
+                        return NULL;
+                }
+                Py_DECREF(t1);
+        }
+        t1 = r;
+        if ((r = _long_int_rshift(r, shift_n)) == NULL) {
+                Py_DECREF(t0);
+                Py_DECREF(t1);
+                Py_DECREF(a1);
+                Py_DECREF(b1);
+                Py_XDECREF(q);
+                return NULL;
+        }
+        Py_DECREF(t1);
+
+        if (asign) Py_SIZE(a) = - Py_SIZE(a);
+        if (bsign) Py_SIZE(b) = - Py_SIZE(b);
+        Py_DECREF(a1);
+        Py_DECREF(b1);
+        Py_DECREF(t0);
+        *prem = r;
+        return q;
+fail1:
+        for(i=top; i >= 0; i--) {
+                Py_DECREF(a_digits[i]);
+        }
+        free(a_digits);
+        return NULL;
+}
+
+
 /* Methods */
 
 static void