Index: Objects/longobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v
retrieving revision 1.165
diff -r1.165 longobject.c
24a25,32
> /* For modular exponentiation, use normal reduction unless the
>  * exponent is greater than MONTGOMERY_CUTOFF.  In that case, switch
>  * to Montgomery Reduction.  The potential drawback is that values
>  * need to be converted into Montgomery representation and then
>  * back again.
> */
> #define MONTGOMERY_CUTOFF 64
> 
2318a2327,2491
> /*
> Helper for long_pow
> --------------------
> Use the Extended Euclidean algorithm to calculate the inverse of a modulo b.
> I.e., calculate x such that (a * x) % b == 1.
> 
> This is translated from pseudocode in "Practical Cryptography" section
> 11.3.5, Ferguson & Schneier.
> 
> Below is the algorithm in Python:
> 
> def invMod(a, b):
>     c, d = a, b
>     uc, ud = 1, 0
>     while c != 0:
>         q = d // c
>         c, d = d - (q * c), c
>         uc, ud = ud - (q * uc), uc
>     if d == 1:
>         return ud % b
>     else:
>         raise ValueError("argument has no inverse")
> */
> PyLongObject* l_invmod(PyLongObject *a, PyLongObject *b)
> {
> 	PyLongObject *c = NULL;
> 	PyLongObject *d = NULL;
> 	PyLongObject *uc = NULL;
> 	PyLongObject *ud = NULL;
> 	PyLongObject *q = NULL;
> 	PyLongObject *temp = NULL;
> 	PyLongObject *temp2 = NULL;
> 	PyLongObject *retval = NULL;
> 
> 	c = (PyLongObject *)_PyLong_Copy(a);
> 	if (c == NULL)
> 		goto Error;
> 	d = (PyLongObject *)_PyLong_Copy(b);
> 	if (d == NULL)
> 		goto Error;
> 	uc = (PyLongObject *)PyLong_FromLong(1L);
> 	if (uc == NULL)
> 		goto Error;
> 	ud = (PyLongObject *)PyLong_FromLong(0L);
> 	if (ud == NULL)
> 		goto Error;
> 
> 	while (c->ob_size != 0) {
> 		Py_XDECREF(q);
> 		if (l_divmod(d, c, &q, NULL) < 0)
> 			goto Error;
> 
> 		Py_XDECREF(temp);
> 		temp = (PyLongObject *)long_mul(q, c);
> 		if (temp == NULL)
> 			goto Error;
> 		temp2 = (PyLongObject *)long_sub(d, temp);
> 		if (temp2 == NULL)
> 			goto Error;
> 		Py_DECREF(d);
> 		d = c;
> 		c = temp2;
> 		temp2 = NULL;
> 
> 		Py_XDECREF(temp);
> 		temp = (PyLongObject *)long_mul(q, uc);
> 		if (temp == NULL)
> 			goto Error;
> 		temp2 = (PyLongObject *)long_sub(ud, temp);
> 		if (temp2 == NULL)
> 			goto Error;
> 		Py_DECREF(ud);
> 		ud = uc;
> 		uc = temp2;
> 		temp2 = NULL;
> 	}
> 
> 	if ((d->ob_size == 1) && (d->ob_digit[0] == 1))
> 		l_divmod(ud, b, NULL, &retval);
> 	else
> 		PyErr_SetString(PyExc_ValueError, "argument has no inverse");
> 
> Error:
> 	Py_XDECREF(c);
> 	Py_XDECREF(d);
> 	Py_XDECREF(uc);
> 	Py_XDECREF(ud);
> 	Py_XDECREF(q);
> 	Py_XDECREF(temp);
> 	Py_XDECREF(temp2);
> 	return retval;
> }
> 
> /*
> Helper for long_pow
> --------------------
> Implement Montgomery Reduction per HAC, Algorithm 14.32:
> http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
> 
> INPUT:  T, m, m'
> 	BASE = 2**15
> 	n = len(m)   (# of digits in m)
> 	R = BASE**n  (next power of BASE greater than m)
> 
> OUTPUT: T*(R**-1) % m
> */
> PyLongObject* mont_reduce(PyLongObject *T, PyLongObject *m, digit mPrime)
> {
> 	int n, i;
> 	PyLongObject* A = NULL;
> 	PyLongObject* retval = NULL;
> 
> 	n = m->ob_size;
> 
> 	/* A = T */
> 	A = (PyLongObject *)_PyLong_New(2*n+1);
> 	if (A == NULL)
> 		return NULL;
> 	if (T->ob_size > 2*n)
> 		goto Error;
> 	memset(A->ob_digit, 0, A->ob_size * sizeof(digit));
> 	memcpy(A->ob_digit, T->ob_digit, T->ob_size * sizeof(digit));
> 
> 	for (i=0; i < n; i++) {
> 		digit *aptr, *mptr, *mendptr;
> 		twodigits carry = 0;
> 		digit u;
> 
> 		/* u = a[i] * m' mod base  */
> 		u = (digit) ((A->ob_digit[i] * mPrime) & MASK);
> 
> 		/* A += u * m * base**i
> 		   Multiply and add in place:
> 		   A[i+j] += (u * m[j]) + carry */
> 		aptr = A->ob_digit + i;
> 		mptr = m->ob_digit;
> 		mendptr = mptr + n;
> 		while (mptr < mendptr) {
> 			carry += (twodigits) ( (*aptr) + (u * (*mptr++)) );
> 			*aptr++ = (digit) (carry & MASK);
> 			carry >>= SHIFT;
> 		}
> 		while (carry) {
> 			carry += (twodigits) (*aptr);
> 			*aptr++ = (digit) (carry & MASK);
> 			carry >>= SHIFT;
> 		}
> 	}
> 
> 	/*  A = A/base**n  (i.e. A >>= n)  */
> 	memmove(A->ob_digit, A->ob_digit + n, (n+1) * sizeof(digit));
> 	A->ob_size = n+1;
> 	long_normalize(A);
> 
> 	/*  If A >= m then A = A - m  */
> 	if (long_compare(A, m) >= 0)
> 		retval = (PyLongObject *)long_sub(A, m);		
> 	else
> 		return A;
> 		
> Error:
> 	Py_DECREF(A);
> 	return retval;
> }
> 
2329a2503,2508
> 	/* Montgomery values */
> 	int doMontgomery = 0;
> 	PyLongObject *R = NULL;
> 	PyLongObject *base = NULL;
> 	digit cPrime = 0;
> 
2342c2521
< 	else if (PyInt_Check(x))
---
> 	else if (PyInt_Check(x)) {
2343a2523,2525
> 		if (c == NULL)
> 			goto Error;
> 	}
2393c2575,2580
< 		       return 0 */
---
> 		       return 0
> 		This check is not necessary when Montgomery Reduction
> 		is in the code.  However, if Montgomery Reduction is
> 		removed or disabled, we want to be able to set z=1
> 		and be sure that z is inside the group modulo c.		
> 		*/
2414,2416c2601,2655
< 	z = (PyLongObject *)PyLong_FromLong(1L);
< 	if (z == NULL)
< 		goto Error;
---
> 	/* If modulus exists and is odd, we can use Montgomery Reduction 
> 	   If exponent is greater than MONTGOMERY_CUTOFF, it's worth it */
> 	if (c && (c->ob_digit[0] & 1) && 
> 	   (b->ob_digit[0] > MONTGOMERY_CUTOFF)) {
> 		doMontgomery = 1;
> 
> 		/*First, calculate the Montgomery parameters (R, m')
> 		  (or in our case, (R, c')).
> 
> 		  Then set (a, z) to the Montgomery representations
> 		  of (a, 1), in preparation for the exponentiation loop.
> 		*/
> 
> 		/* R = BASE ** n for smallest n such that R > c, (n == c->ob_size) */
> 		R = _PyLong_New(c->ob_size + 1);
> 		if (R == NULL)
> 			goto Error;
> 		memset(R->ob_digit, 0, R->ob_size * sizeof(digit));
> 		R->ob_digit[R->ob_size-1] = 1;
> 
> 		/* c' = (-pow(c, -1, BASE)) % BASE */
> 		base = (PyLongObject *)PyLong_FromLong(BASE);
> 		if (base == NULL)
> 			goto Error;
> 		temp = (PyLongObject *)l_invmod(c, base);
> 		if (temp == NULL)
> 			goto Error;
> 		cPrime = BASE - temp->ob_digit[0];
> 		Py_DECREF(temp);
> 		temp = NULL;
> 
> 		/* a = a * R % c  # keep in mind a * R == (a << size_c) */
> 		temp = _PyLong_New(c->ob_size + a->ob_size);
> 		if (temp == NULL)
> 			goto Error;
> 		memset(temp->ob_digit, 0, c->ob_size * sizeof(digit));
> 		memcpy(temp->ob_digit + c->ob_size, a->ob_digit,
> 			a->ob_size * sizeof(digit));
> 
> 		Py_DECREF(a);
> 		a = NULL;
> 		if (l_divmod(temp, c, NULL, &a) < 0)
> 			goto Error;
> 		Py_DECREF(temp);
> 		temp = NULL;
> 
> 		/* z = 1 * R % c */
> 		if (l_divmod(R, c, NULL, &z) < 0)
> 			goto Error;
> 	}
> 	else {
> 		z = (PyLongObject *)PyLong_FromLong(1L);
> 		if (z == NULL)
> 			goto Error;
> 	}
2419c2658
< 	 * is NULL.
---
> 	 * is NULL.  Use Montgomery Reduction if applicable.
2421,2427c2660,2673
< #define REDUCE(X)					\
< 	if (c != NULL) {				\
< 		if (l_divmod(X, c, NULL, &temp) < 0)	\
< 			goto Error;			\
< 		Py_XDECREF(X);				\
< 		X = temp;				\
< 		temp = NULL;				\
---
> #define REDUCE(X)						\
> 	if (c != NULL) {					\
> 		if (doMontgomery) {				\
> 			temp = mont_reduce(X, c, cPrime);	\
> 			if (temp == NULL)			\
> 				goto Error;			\
> 		}						\
> 		else {						\
> 			if (l_divmod(X, c, NULL, &temp) < 0)	\
> 				goto Error;			\
> 		}						\
> 		Py_XDECREF(X);					\
> 		X = temp;					\
> 		temp = NULL;					\
2458c2704
< 		Py_INCREF(z);	/* still holds 1L */
---
> 		Py_INCREF(z);	/* still holds 1L (or the Montgomery representation thereof) */
2475a2722,2724
> 	if (doMontgomery)
> 		REDUCE(z)
> 
2496a2746,2747
> 	Py_XDECREF(R);
> 	Py_XDECREF(base);