Index: Python/marshal.c
===================================================================
--- Python/marshal.c	(revision 69715)
+++ Python/marshal.c	(working copy)
@@ -11,6 +11,8 @@
 #include "code.h"
 #include "marshal.h"
 
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
 /* High water mark to determine when the marshalled object is dangerously deep
  * and risks coring the interpreter.  When the object stack gets this deep,
  * raise an exception instead of continuing.
@@ -122,7 +124,57 @@
 }
 #endif
 
+/* We assume that Python longs are stored internally in base some power of
+   2**15; for the sake of portability we'll always read and write them in base
+   exactly 2**15. */
+
+#define PyLong_MARSHAL_SHIFT 15
+#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
+#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
+#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
+#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
+#endif
+#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
+
 static void
+w_PyLong(const PyLongObject *ob, WFILE *p)
+{
+	Py_ssize_t i, j, n, l;
+	digit d;
+
+	w_byte(TYPE_LONG, p);
+	if (Py_SIZE(ob) == 0) {
+		w_long((long)0, p);
+		return;
+	}
+
+	/* set l to number of base PyLong_MARSHAL_BASE digits */
+	n = ABS(Py_SIZE(ob));
+	l = (n-1) * PyLong_MARSHAL_RATIO;
+	d = ob->ob_digit[n-1];
+	assert(d != 0); /* a PyLong is always normalized */
+	do {
+		d >>= PyLong_MARSHAL_SHIFT;
+		l++;
+	} while (d != 0);
+	w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
+
+	for (i=0; i < n-1; i++) {
+		d = ob->ob_digit[i];
+		for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
+			w_short(d & PyLong_MARSHAL_MASK, p);
+			d >>= PyLong_MARSHAL_SHIFT;
+		}
+		assert (d == 0);
+	}
+	d = ob->ob_digit[n-1];
+	do {
+		w_short(d & PyLong_MARSHAL_MASK, p);
+		d >>= PyLong_MARSHAL_SHIFT;
+	} while (d != 0);
+}
+
+static void
 w_object(PyObject *v, WFILE *p)
 {
 	Py_ssize_t i, n;
@@ -155,14 +207,8 @@
 		if ((x == -1)  && PyErr_Occurred()) {
 			PyLongObject *ob = (PyLongObject *)v;
 			PyErr_Clear();
-			w_byte(TYPE_LONG, p);
-			n = Py_SIZE(ob);
-			w_long((long)n, p);
-			if (n < 0)
-				n = -n;
-			for (i = 0; i < n; i++)
-				w_short(ob->ob_digit[i], p);
-		} 
+			w_PyLong(ob, p);
+		}
 		else {
 #if SIZEOF_LONG > 4
 			long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
@@ -481,6 +527,55 @@
 }
 
 static PyObject *
+r_PyLong(RFILE *p)
+{
+	PyLongObject *ob;
+	int size, i, j, md;
+	long n;
+	digit d;
+
+	n = r_long(p);
+	if (n == 0)
+		return (PyObject *)_PyLong_New(0);
+	if (n < -INT_MAX || n > INT_MAX)
+		goto bad_data;
+
+	size = 1 + (ABS(n)-1) / PyLong_MARSHAL_RATIO;
+	ob = _PyLong_New(size);
+	if (ob == NULL)
+		return NULL;
+	Py_SIZE(ob) = n > 0 ? size : -size;
+
+	for (i = 0; i < size-1; i++) {
+		d = 0;
+		for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
+			md = r_short(p);
+			if (md < 0 || md > PyLong_MARSHAL_BASE) {
+				Py_DECREF(ob);
+				goto bad_data;
+			}
+			d += (digit)md << j*PyLong_MARSHAL_SHIFT;
+		}
+		ob->ob_digit[i] = d;
+	}
+	d = 0;
+	for (j=0; j < (ABS(n)-1)%PyLong_MARSHAL_RATIO + 1; j++) {
+		md = r_short(p);
+		if (md < 0 || md > PyLong_MARSHAL_BASE) {
+			Py_DECREF(ob);
+			goto bad_data;
+		}
+		d += (digit)md << (j*PyLong_MARSHAL_SHIFT);
+	}
+	ob->ob_digit[size-1] = d;
+	return (PyObject *)ob;
+  bad_data:
+	PyErr_SetString(PyExc_ValueError, "bad marshal data");
+	return NULL;
+}
+
+
+static PyObject *
 r_object(RFILE *p)
 {
 	/* NULL is a valid return value, it does not necessarily means that
@@ -544,38 +639,8 @@
 		break;
 
 	case TYPE_LONG:
-		{
-			int size;
-			PyLongObject *ob;
-			n = r_long(p);
-			if (n < -INT_MAX || n > INT_MAX) {
-				PyErr_SetString(PyExc_ValueError,
-						"bad marshal data");
-				retval = NULL;
-				break;
-			}
-			size = n<0 ? -n : n;
-			ob = _PyLong_New(size);
-			if (ob == NULL) {
-				retval = NULL;
-				break;
-			}
-			Py_SIZE(ob) = n;
-			for (i = 0; i < size; i++) {
-				int digit = r_short(p);
-				if (digit < 0) {
-					Py_DECREF(ob);
-					PyErr_SetString(PyExc_ValueError,
-							"bad marshal data");
-					ob = NULL;
-					break;
-				}
-				if (ob != NULL)
-					ob->ob_digit[i] = digit;
-			}
-			retval = (PyObject *)ob;
-			break;
-		}
+		retval = r_PyLong(p);
+		break;
 
 	case TYPE_FLOAT:
 		{
Index: Python/sysmodule.c
===================================================================
--- Python/sysmodule.c	(revision 69715)
+++ Python/sysmodule.c	(working copy)
@@ -1012,6 +1012,7 @@
 Static objects:\n\
 \n\
 float_info -- a dict with information about the float implementation.\n\
+int_info -- a struct sequence with information about the int implementation.\n\
 maxsize -- the largest supported length of containers.\n\
 maxunicode -- the largest supported character\n\
 builtin_module_names -- tuple of module names built into this interpreter\n\
@@ -1375,6 +1376,8 @@
 			    PyLong_FromSsize_t(PY_SSIZE_T_MAX));
 	SET_SYS_FROM_STRING("float_info",
 			    PyFloat_GetInfo());
+	SET_SYS_FROM_STRING("int_info",
+			    PyLong_GetInfo());
 	SET_SYS_FROM_STRING("maxunicode",
 			    PyLong_FromLong(PyUnicode_GetMax()));
 	SET_SYS_FROM_STRING("builtin_module_names",
Index: Include/pyport.h
===================================================================
--- Include/pyport.h	(revision 69715)
+++ Include/pyport.h	(working copy)
@@ -69,6 +69,44 @@
 #endif
 #endif /* HAVE_LONG_LONG */
 
+/* a build with 30-bit digits for Python long integers needs an exact-width
+ * 32-bit unsigned integer type to store thoses digits.  (We could just use
+ * type 'long', but that would be wasteful on a system where longs are
+ * 64-bits.)  On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines
+ * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t.
+ * However, it doesn't set HAVE_UINT32_T, so we do that here.
+ */
+#if (defined UINT32_MAX || defined uint32_t)
+#ifndef PY_UINT32_T
+#define HAVE_UINT32_T
+#define PY_UINT32_T uint32_t
+#endif
+#endif
+
+/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the
+ * long integer implementation, when 30-bit digits are enabled.
+ */
+#if (defined UINT64_MAX || defined uint64_t)
+#ifndef PY_UINT64_T
+#define HAVE_UINT64_T
+#define PY_UINT64_T uint64_t
+#endif
+#endif
+
+/* Signed variants of the above */
+#if (defined INT32_MAX || defined int32_t)
+#ifndef PY_INT32_T
+#define HAVE_INT32_T
+#define PY_INT32_T int32_t
+#endif
+#endif
+#if (defined INT64_MAX || defined int64_t)
+#ifndef PY_INT64_T
+#define HAVE_INT64_T
+#define PY_INT64_T int64_t
+#endif
+#endif
+
 /* uintptr_t is the C9X name for an unsigned integral type such that a
  * legitimate void* can be cast to uintptr_t and then back to void* again
  * without loss of information.  Similarly for intptr_t, wrt a signed
Index: Include/longintrepr.h
===================================================================
--- Include/longintrepr.h	(revision 69715)
+++ Include/longintrepr.h	(working copy)
@@ -18,13 +18,31 @@
    And, at some places it is assumed that MASK fits in an int, as well.
    long_pow() requires that SHIFT be divisible by 5. */
 
+/* Additional restrictions: PyLong_SHIFT should be greater than or equal to 8,
+   and strictly less than the number of bits in an unsigned long.  It should
+   also be strictly less than the number of bits in a Py_ssize_t.
+   Furthermore, NSMALLNEGINTS and NSMALLPOSINTS should fit in a digit. */
+
+
+#if HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#if (defined HAVE_UINT64_T && defined HAVE_UINT32_T &&          \
+     defined HAVE_INT64_T && defined HAVE_INT32_T &&            \
+     defined PYLONG_DIGIT_SIZE && (PYLONG_DIGIT_SIZE == 30))
+typedef PY_UINT32_T digit;
+typedef PY_INT32_T sdigit; /* signed variant of digit */
+typedef PY_UINT64_T twodigits;
+typedef PY_INT64_T stwodigits; /* signed variant of twodigits */
+#define PyLong_SHIFT	30
+#else
 typedef unsigned short digit;
-typedef short sdigit;                   /* signed variant of digit */
-#define BASE_TWODIGITS_TYPE long
-typedef unsigned BASE_TWODIGITS_TYPE twodigits;
-typedef BASE_TWODIGITS_TYPE stwodigits; /* signed variant of twodigits */
-
+typedef short sdigit; /* signed variant of digit */
+typedef unsigned long twodigits;
+typedef long stwodigits; /* signed variant of twodigits */
 #define PyLong_SHIFT	15
+#endif
 #define PyLong_BASE	((digit)1 << PyLong_SHIFT)
 #define PyLong_MASK	((digit)(PyLong_BASE - 1))
 
Index: Include/longobject.h
===================================================================
--- Include/longobject.h	(revision 69715)
+++ Include/longobject.h	(working copy)
@@ -26,6 +26,7 @@
 PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *);
 PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *);
 PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *);
+PyAPI_FUNC(PyObject *) PyLong_GetInfo(void);
 
 /* It may be useful in the future. I've added it in the PyInt -> PyLong
    cleanup to keep the extra information. [CH] */
Index: configure.in
===================================================================
--- configure.in	(revision 69715)
+++ configure.in	(working copy)
@@ -1288,6 +1288,10 @@
 AC_TYPE_SIGNAL
 AC_TYPE_SIZE_T
 AC_TYPE_UID_T
+AC_TYPE_UINT32_T
+AC_TYPE_UINT64_T
+AC_TYPE_INT32_T
+AC_TYPE_INT64_T
 AC_CHECK_TYPE(ssize_t,
   AC_DEFINE(HAVE_SSIZE_T, 1, Define if your compiler provides ssize_t),,)
 
@@ -3125,6 +3129,30 @@
 
 LIBS=$LIBS_SAVE
 
+# determine what size digit to use for Python's longs
+AC_MSG_CHECKING([digit size for Python's longs])
+AC_ARG_ENABLE(big-digits,
+AC_HELP_STRING([--enable-big-digits@<:@=BITS@:>@],[use big digits for Python longs [[BITS=30]]]),
+[case $enableval in
+[yes|no|15|30])
+  ;;
+*)
+  AC_MSG_ERROR([bad value $enableval for --enable-big-digits; value should be 15 or 30]) ;;
+esac],
+[enable_big_digits=30])
+
+case $enable_big_digits in
+yes)
+  enable_big_digits=30 ;;
+no)
+  enable_big_digits=15 ;;
+*)
+  ;;
+esac
+
+AC_MSG_RESULT($enable_big_digits)
+AC_DEFINE_UNQUOTED(PYLONG_DIGIT_SIZE, $enable_big_digits, [Define as the size in bits of long digits])
+
 # check for wchar.h
 AC_CHECK_HEADER(wchar.h, [
   AC_DEFINE(HAVE_WCHAR_H, 1, 
Index: Objects/longobject.c
===================================================================
--- Objects/longobject.c	(revision 69715)
+++ Objects/longobject.c	(working copy)
@@ -4,6 +4,7 @@
 
 #include "Python.h"
 #include "longintrepr.h"
+#include "structseq.h"
 
 #include <ctype.h>
 #include <stddef.h>
@@ -204,19 +205,6 @@
 		return (PyObject*)v;
 	}
 
-	/* 2 digits */
-	if (!(abs_ival >> 2*PyLong_SHIFT)) {
-		v = _PyLong_New(2);
-		if (v) {
-			Py_SIZE(v) = 2*sign;
-			v->ob_digit[0] = Py_SAFE_DOWNCAST(
-				abs_ival & PyLong_MASK, unsigned long, digit);
-			v->ob_digit[1] = Py_SAFE_DOWNCAST(
-			      abs_ival >> PyLong_SHIFT, unsigned long, digit);
-		}
-		return (PyObject*)v;
-	}
-
 	/* Larger numbers: loop to determine number of digits */
 	t = abs_ival;
 	while (t) {
@@ -2097,6 +2085,7 @@
 	PyLongObject *w = mul1(w1, d);
 	PyLongObject *a;
 	Py_ssize_t j, k;
+	digit wm1, wm2, carry;
 
 	if (v == NULL || w == NULL) {
 		Py_XDECREF(v);
@@ -2104,71 +2093,75 @@
 		return NULL;
 	}
 
-	assert(size_v >= size_w && size_w > 1); /* Assert checks by div() */
+	assert(size_v >= size_w && size_w >= 2); /* Assert checks by div() */
 	assert(Py_REFCNT(v) == 1); /* Since v will be used as accumulator! */
 	assert(size_w == ABS(Py_SIZE(w))); /* That's how d was calculated */
 
 	size_v = ABS(Py_SIZE(v));
+	wm1 = w->ob_digit[size_w-1]; /* top digit of w */
+	wm2 = w->ob_digit[size_w-2]; /* and next one down */
+	/* we can often save an iteration */
+	if (v->ob_digit[size_v-1] < wm1)
+		size_v--;
+	else
+		assert(v->ob_digit[size_v] == 0);
+	assert(v->ob_digit[size_v] < wm1);
+
 	k = size_v - size_w;
 	a = _PyLong_New(k + 1);
 
+	carry = 1;
 	for (j = size_v; a != NULL && k >= 0; --j, --k) {
-		digit vj = (j >= size_v) ? 0 : v->ob_digit[j];
-		twodigits q;
-		stwodigits carry = 0;
+		digit q, r, zz, vj;
+		twodigits z;
 		Py_ssize_t i;
-
 		SIGCHECK({
 			Py_DECREF(a);
 			a = NULL;
 			break;
 		})
-		if (vj == w->ob_digit[size_w-1])
-			q = PyLong_MASK;
-		else
-			q = (((twodigits)vj << PyLong_SHIFT) + v->ob_digit[j-1]) /
-				w->ob_digit[size_w-1];
+		vj = v->ob_digit[j];
+		assert(vj <= wm1);
 
-		while (w->ob_digit[size_w-2]*q >
-				((
-					((twodigits)vj << PyLong_SHIFT)
-					+ v->ob_digit[j-1]
-					- q*w->ob_digit[size_w-1]
-								) << PyLong_SHIFT)
-				+ v->ob_digit[j-2])
+		/* estimate quotient q; may (rarely) overestimate by 1 */
+		z = ((twodigits)vj << PyLong_SHIFT) + v->ob_digit[j-1];
+		q = (digit)(z / wm1);
+		r = (digit)(z % wm1);
+		while (r <= PyLong_MASK && (twodigits)wm2 * q >
+		       ((twodigits)r << PyLong_SHIFT) + v->ob_digit[j-2]) {
 			--q;
-
-		for (i = 0; i < size_w && i+k < size_v; ++i) {
-			twodigits z = w->ob_digit[i] * q;
-			digit zz = (digit) (z >> PyLong_SHIFT);
-			carry += v->ob_digit[i+k] - z
-				+ ((twodigits)zz << PyLong_SHIFT);
-			v->ob_digit[i+k] = (digit)(carry & PyLong_MASK);
-			carry = Py_ARITHMETIC_RIGHT_SHIFT(stwodigits,
-							  carry, PyLong_SHIFT);
-			carry -= zz;
+			r += wm1;
 		}
 
-		if (i+k < size_v) {
-			carry += v->ob_digit[i+k];
-			v->ob_digit[i+k] = 0;
+		/* subtract q*w from remaining top limbs of v */
+		zz = 0;
+		for (i = 0; i < size_w; ++i) {
+			z = (twodigits)w->ob_digit[i] * q + zz;
+			zz = (digit)(z >> PyLong_SHIFT);
+			carry += v->ob_digit[i+k] + 
+				(digit)((z & PyLong_MASK) ^ PyLong_MASK);
+			v->ob_digit[i+k] = carry & PyLong_MASK;
+			carry >>= PyLong_SHIFT;
+			assert(carry == 0 || carry == 1);
 		}
+		carry += v->ob_digit[i+k] + (zz ^ PyLong_MASK);
+		assert(carry == PyLong_MASK || carry == PyLong_BASE);
+		carry >>= PyLong_SHIFT;
+		assert(carry == 0 || carry == 1);
 
-		if (carry == 0)
-			a->ob_digit[k] = (digit) q;
-		else {
-			assert(carry == -1);
-			a->ob_digit[k] = (digit) q-1;
-			carry = 0;
-			for (i = 0; i < size_w && i+k < size_v; ++i) {
+		if (carry == 0) {
+			/* this branch taken only if q was too large (rare) */
+			for (i = 0; i < size_w; ++i) {
 				carry += v->ob_digit[i+k] + w->ob_digit[i];
 				v->ob_digit[i+k] = (digit)(carry & PyLong_MASK);
-				carry = Py_ARITHMETIC_RIGHT_SHIFT(
-						stwodigits,
-						carry, PyLong_SHIFT);
+				carry >>= PyLong_SHIFT;
 			}
+			assert(carry == 1);
+			q--;
 		}
+		a->ob_digit[k] = q;
 	} /* for j, k */
+	Py_SIZE(v) = size_w;
 
 	if (a == NULL)
 		*prem = NULL;
@@ -2457,6 +2450,118 @@
 	return (PyObject *)z;
 }
 
+/* Here's a simple optimization for basecase multiplication that can achieve
+   speedups of up to 400% on some 64-bit platforms.  It uses the fact that
+   twodigits can represent values up to 16*PyLong_BASE*PyLong_BASE (assuming
+   that PyLong_SHIFT is 30), so up to 16 partial products can be accumulated
+   at once.  The innermost loop then generally contains fewer instructions
+   than in the usual long multiplication algorithm. */
+
+#if PyLong_SHIFT == 15
+#define MAX_PARTIALS 4
+#elif PyLong_SHIFT == 30
+#define MAX_PARTIALS 16
+#else
+#error "expected PyLong_SHIFT to be 15 or 30"
+#endif
+
+/* res[0:a_size+b_size] := a*b, assuming that b_size <= MAX_PARTIALS,
+   b_size <= a_size. */
+
+static void
+digits_multiply_init(digit *res, const digit *a, Py_ssize_t a_size,
+		     const digit *b, Py_ssize_t b_size)
+{
+	twodigits acc = 0;
+	Py_ssize_t j, k;
+	assert(b_size <= MAX_PARTIALS && b_size <= a_size);
+	for (k=0; k<b_size; k++) {
+		for (j=0; j<=k; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	for (; k<a_size; k++) {
+		for (j=0; j<b_size; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	for (; k<a_size+b_size; k++) {
+		for (j=k+1-a_size; j<b_size; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	assert(acc == 0);
+}
+
+/* variant of the above: res[0:a_size+b_size] := a*b + res[0:a_size], assuming
+   that b_size <= MAX_PARTIALS, b_size <= a_size */
+
+static void
+digits_multiply_add(digit *res, const digit *a, Py_ssize_t a_size,
+		    const digit *b, Py_ssize_t b_size)
+{
+	twodigits acc = 0;
+	Py_ssize_t j, k;
+	assert(b_size <= MAX_PARTIALS && b_size <= a_size);
+	for (k=0; k<b_size; k++) {
+		acc += res[k];
+		for (j=0; j<=k; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	for (; k<a_size; k++) {
+		acc += res[k];
+		for (j=0; j<b_size; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	for (; k<a_size+b_size; k++) {
+		for (j=k+1-a_size; j<b_size; j++)
+			acc += (twodigits)a[k-j]*b[j];
+		res[k] = (digit)(acc & PyLong_MASK);
+		acc >>= PyLong_SHIFT;
+	}
+	assert(acc == 0);
+}
+
+/* res[0:a_size+b_size] := a * b */
+
+static void
+digits_multiply(digit *res, const digit *a, Py_ssize_t a_size,
+		const digit *b, Py_ssize_t b_size)
+{
+	if (a_size < b_size) {
+		const digit *temp;
+		Py_ssize_t temp_size;
+		temp = a; a = b; b = temp;
+		temp_size = a_size; a_size = b_size; b_size = temp_size;
+	}
+
+	/* split b up into pieces, each piece having <= MAX_PARTIALS limbs.
+	   Then use digits_multiply_init and digits_multiply_add to do
+	   the real work. */
+	if (b_size < MAX_PARTIALS)
+		digits_multiply_init(res, a, a_size, b, b_size);
+	else {
+		digits_multiply_init(res, a, a_size, b, MAX_PARTIALS);
+		b_size -= MAX_PARTIALS;
+		b += MAX_PARTIALS;
+		res += MAX_PARTIALS;
+		while (b_size >= MAX_PARTIALS) {
+			digits_multiply_add(res, a, a_size, b, MAX_PARTIALS);
+			b_size -= MAX_PARTIALS;
+			b += MAX_PARTIALS;
+			res += MAX_PARTIALS;
+		}
+		digits_multiply_add(res, a, a_size, b, b_size);
+	}
+}
+
 /* Grade school multiplication, ignoring the signs.
  * Returns the absolute value of the product, or NULL if error.
  */
@@ -2472,8 +2577,8 @@
 	if (z == NULL)
 		return NULL;
 
-	memset(z->ob_digit, 0, Py_SIZE(z) * sizeof(digit));
 	if (a == b) {
+		memset(z->ob_digit, 0, Py_SIZE(z) * sizeof(digit));
 		/* Efficient squaring per HAC, Algorithm 14.16:
 		 * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
 		 * Gives slightly less than a 2x speedup when a == b,
@@ -2518,28 +2623,8 @@
 		}
 	}
 	else {	/* a is not the same as b -- gradeschool long mult */
-		for (i = 0; i < size_a; ++i) {
-			twodigits carry = 0;
-			twodigits f = a->ob_digit[i];
-			digit *pz = z->ob_digit + i;
-			digit *pb = b->ob_digit;
-			digit *pbend = b->ob_digit + size_b;
-
-			SIGCHECK({
-				Py_DECREF(z);
-				return NULL;
-			})
-
-			while (pb < pbend) {
-				carry += *pz + *pb++ * f;
-				*pz++ = (digit)(carry & PyLong_MASK);
-				carry >>= PyLong_SHIFT;
-				assert(carry <= PyLong_MASK);
-			}
-			if (carry)
-				*pz += (digit)(carry & PyLong_MASK);
-			assert((carry >> PyLong_SHIFT) == 0);
-		}
+		digits_multiply(z->ob_digit, a->ob_digit, size_a,
+				b->ob_digit, size_b);
 	}
 	return long_normalize(z);
 }
@@ -2861,20 +2946,64 @@
 long_mul(PyLongObject *a, PyLongObject *b)
 {
 	PyLongObject *z;
+	Py_ssize_t a_size, b_size;
 
 	CHECK_BINOP(a, b);
 
+	/* fast path for single-digit multiplication */
 	if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) {
-		PyObject *r;
-		r = PyLong_FromLong(MEDIUM_VALUE(a)*MEDIUM_VALUE(b));
-		return r;
+		/* XXX benchmark this! Is is worth keeping? */
+		twodigits absz;
+		int sign;
+		sign = Py_SIZE(a) * Py_SIZE(b);
+		if (sign == 0)
+			return PyLong_FromLong(0L);
+		absz = (twodigits)a->ob_digit[0] * b->ob_digit[0];
+		if (absz < PyLong_BASE) {
+			CHECK_SMALL_INT((sdigit)(sign*absz));
+			z = _PyLong_New(1);
+			if (z != NULL) {
+				Py_SIZE(z) = sign;
+				z->ob_digit[0] = (digit)absz;
+			}
+		}
+		else {
+			z = _PyLong_New(2);
+			if (z != NULL) {
+				Py_SIZE(z) = 2*sign;
+				z->ob_digit[0] = (digit)(absz & PyLong_MASK);
+				assert(absz >>= 2*PyLong_SHIFT == 0);
+				z->ob_digit[1] = (digit)(absz >> PyLong_SHIFT);
+			}
+		}
+		return (PyObject *)z;
 	}
 
-	z = k_mul(a, b);
+	a_size = ABS(Py_SIZE(a));
+	b_size = ABS(Py_SIZE(b));
+
+	/* use fast basecase multiplication if the smaller of a and b has at
+	   most 480 bits (60 bits if PyLong_SHIFT = 15) */
+	if (b_size <= MAX_PARTIALS || a_size <= MAX_PARTIALS) {
+		z = _PyLong_New(a_size + b_size);
+		if (z == NULL)
+			return NULL;
+		if (b_size <= a_size)
+			digits_multiply_init(z->ob_digit,
+					     a->ob_digit, a_size,
+					     b->ob_digit, b_size);
+		else
+			digits_multiply_init(z->ob_digit,
+					     b->ob_digit, b_size,
+					     a->ob_digit, a_size);
+	}
+	else
+		z = k_mul(a, b);
+
+	if ((Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0))
 	/* Negate if exactly one of the inputs is negative. */
-	if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z)
 		NEGATE(z);
-	return (PyObject *)z;
+	return (PyObject *)(long_normalize(z));
 }
 
 /* The / and % operators are now defined in terms of divmod().
@@ -3991,6 +4120,45 @@
 	PyObject_Del,				/* tp_free */
 };
 
+static PyTypeObject Int_InfoType;
+
+PyDoc_STRVAR(int_info__doc__,
+"sys.int_info\n\
+\n\
+A struct sequence that holds information about Python's\n\
+internal representation of integers.  The attributes are read only.");
+
+static PyStructSequence_Field int_info_fields[] = {
+	{"bits_per_digit", "size of a digit in bits"},
+	{"sizeof_digit", "size in bytes of the C type used to "
+	                 "represent a digit"},
+	{NULL, NULL}
+};
+
+static PyStructSequence_Desc int_info_desc = {
+	"sys.int_info",   /* name */
+	int_info__doc__,  /* doc */
+	int_info_fields,  /* fields */
+	2                 /* number of fields */
+};
+
+PyObject *
+PyLong_GetInfo(void)
+{
+	PyObject* int_info;
+	int field = 0;
+	int_info = PyStructSequence_New(&Int_InfoType);
+	if (int_info == NULL)
+		return NULL;
+	PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(PyLong_SHIFT));
+	PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit)));
+	if (PyErr_Occurred()) {
+		Py_CLEAR(int_info);
+		return NULL;
+	}
+	return int_info;
+}
+
 int
 _PyLong_Init(void)
 {
@@ -4023,6 +4191,10 @@
 		v->ob_digit[0] = abs(ival);
 	}
 #endif
+	/* initialize int_info */
+	if (Int_InfoType.tp_name == 0)
+		PyStructSequence_InitType(&Int_InfoType, &int_info_desc);
+
 	return 1;
 }
 
Index: PC/pyconfig.h
===================================================================
--- PC/pyconfig.h	(revision 69715)
+++ PC/pyconfig.h	(working copy)
@@ -307,6 +307,27 @@
 #	define PY_ULLONG_MAX _UI64_MAX
 #endif
 
+#define PYLONG_DIGIT_SIZE 30
+
+/* define signed and unsigned exact-width 32-bit and 64-bit types, used in the
+   implementation of Python long integers */
+#define HAVE_UINT32_T 1
+#ifndef PY_UINT32_T
+#define PY_UINT32_T unsigned __int32
+#endif
+#define HAVE_UINT64_T 1
+#ifndef PY_UINT64_T
+#define PY_UINT64_T unsigned __int64
+#endif
+#define HAVE_INT32_T 1
+#ifndef PY_INT32_T
+#define PY_INT32_T __int32
+#endif
+#define HAVE_INT64_T 1
+#ifndef PY_INT64_T
+#define PY_INT64_T __int64
+#endif
+
 /* For Windows the Python core is in a DLL by default.  Test
 Py_NO_ENABLE_SHARED to find out.  Also support MS_NO_COREDLL for b/w compat */
 #if !defined(MS_NO_COREDLL) && !defined(Py_NO_ENABLE_SHARED)
Index: Doc/library/sys.rst
===================================================================
--- Doc/library/sys.rst	(revision 69715)
+++ Doc/library/sys.rst	(working copy)
@@ -413,6 +413,23 @@
    same information.
 
 
+.. data:: int_info
+
+   A struct sequence that holds information about Python's
+   internal representation of integers.  The attributes are read only.
+
+   +-------------------------+----------------------------------------------+
+   | attribute               | explanation                                  |
+   +=========================+==============================================+
+   | :const:`bits_per_digit` | number of bits held in each digit.  Python   |
+   |                         | integers are stored internally in base       |
+   |                         | ``2**int_info.bits_per_digit``               |
+   +-------------------------+----------------------------------------------+
+   | :const:`sizeof_digit`   | size in bytes of the C type used to          |
+   |                         | represent a digit                            |
+   +-------------------------+----------------------------------------------+
+
+
 .. function:: intern(string)
 
    Enter *string* in the table of "interned" strings and return the interned string
Index: Lib/test/test_long.py
===================================================================
--- Lib/test/test_long.py	(revision 69715)
+++ Lib/test/test_long.py	(working copy)
@@ -15,7 +15,7 @@
         return self.format % self.args
 
 # SHIFT should match the value in longintrepr.h for best testing.
-SHIFT = 15
+SHIFT = sys.int_info.bits_per_digit
 BASE = 2 ** SHIFT
 MASK = BASE - 1
 KARATSUBA_CUTOFF = 70   # from longobject.c
Index: Lib/test/test_sys.py
===================================================================
--- Lib/test/test_sys.py	(revision 69715)
+++ Lib/test/test_sys.py	(working copy)
@@ -291,6 +291,9 @@
         self.assert_(isinstance(sys.executable, str))
         self.assertEqual(len(sys.float_info), 11)
         self.assertEqual(sys.float_info.radix, 2)
+        self.assertEqual(len(sys.int_info), 2)
+        self.assert_(sys.int_info.bits_per_digit % 5 == 0)
+        self.assert_(sys.int_info.sizeof_digit >= 1)
         self.assert_(isinstance(sys.hexversion, int))
         self.assert_(isinstance(sys.maxsize, int))
         self.assert_(isinstance(sys.maxunicode, int))
@@ -395,6 +398,7 @@
         if hasattr(sys, "gettotalrefcount"):
             self.header += '2P'
             self.vheader += '2P'
+        self.longdigit = sys.int_info.sizeof_digit
         import _testcapi
         self.gc_headsize = _testcapi.SIZEOF_PYGC_HEAD
         self.file = open(test.support.TESTFN, 'wb')
@@ -429,7 +433,7 @@
         size = self.calcsize
         gc_header_size = self.gc_headsize
         # bool objects are not gc tracked
-        self.assertEqual(sys.getsizeof(True), size(vh) + self.H)
+        self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit)
         # but lists are
         self.assertEqual(sys.getsizeof([]), size(vh + 'PP') + gc_header_size)
 
@@ -437,8 +441,8 @@
         h = self.header
         vh = self.vheader
         size = self.calcsize
-        self.assertEqual(sys.getsizeof(True), size(vh) + self.H)
-        self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.H)
+        self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit)
+        self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.longdigit)
 
     def test_objecttypes(self):
         # check all types defined in Objects/
@@ -447,7 +451,7 @@
         size = self.calcsize
         check = self.check_sizeof
         # bool
-        check(True, size(vh) + self.H)
+        check(True, size(vh) + self.longdigit)
         # buffer
         # XXX
         # builtin_function_or_method
@@ -565,11 +569,12 @@
         check(reversed([]), size(h + 'lP'))
         # long
         check(0, size(vh))
-        check(1, size(vh) + self.H)
-        check(-1, size(vh) + self.H)
-        check(32768, size(vh) + 2*self.H)
-        check(32768*32768-1, size(vh) + 2*self.H)
-        check(32768*32768, size(vh) + 3*self.H)
+        check(1, size(vh) + self.longdigit)
+        check(-1, size(vh) + self.longdigit)
+        PyLong_BASE = 2**sys.int_info.bits_per_digit
+        check(PyLong_BASE, size(vh) + 2*self.longdigit)
+        check(PyLong_BASE**2-1, size(vh) + 2*self.longdigit)
+        check(PyLong_BASE**2, size(vh) + 3*self.longdigit)
         # memory
         check(memoryview(b''), size(h + 'P PP2P2i7P'))
         # module