Index: Objects/longobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v
retrieving revision 1.164
diff -c -r1.164 longobject.c
*** Objects/longobject.c	30 Aug 2004 02:58:59 -0000	1.164
--- Objects/longobject.c	13 Sep 2004 08:03:12 -0000
***************
*** 2305,2310 ****
--- 2305,2473 ----
  	return z;
  }
  
+ /*
+ Helper for long_pow
+ --------------------
+ Use the Extended Euclidean algorithm to calculate the inverse of a modulo b.
+ I.e., calculate x such that (a * x) % b == 1.
+ 
+ This is translated from pseudocode in "Practical Cryptography" section
+ 11.3.5, Ferguson & Schneier.
+ 
+ Below is the algorithm in Python:
+ 
+ def invMod(a, b):
+     c, d = a, b
+     uc, ud = 1, 0
+     while c != 0:
+         q = d // c
+         c, d = d - (q * c), c
+         uc, ud = ud - (q * uc), uc
+     if d == 1:
+         return ud % b
+     else:
+         raise ValueError("argument has no inverse")
+ */
+ PyLongObject* l_invmod(PyLongObject *a, PyLongObject *b)
+ {
+ 	PyLongObject *c = NULL;
+ 	PyLongObject *d = NULL;
+ 	PyLongObject *uc = NULL;
+ 	PyLongObject *ud = NULL;
+ 	PyLongObject *q = NULL;
+ 	PyLongObject *temp = NULL;
+ 	PyLongObject *temp2 = NULL;
+ 	PyLongObject *retval = NULL;
+ 
+ 	c = (PyLongObject *)_PyLong_Copy(a);
+ 	if (c == NULL)
+ 		goto Error;
+ 	d = (PyLongObject *)_PyLong_Copy(b);
+ 	if (d == NULL)
+ 		goto Error;
+ 	uc = (PyLongObject *)PyLong_FromLong(1L);
+ 	if (uc == NULL)
+ 		goto Error;
+ 	ud = (PyLongObject *)PyLong_FromLong(0L);
+ 	if (ud == NULL)
+ 		goto Error;
+ 
+ 	while (c->ob_size != 0) {
+ 		Py_XDECREF(q);
+ 		if (l_divmod(d, c, &q, NULL) < 0)
+ 			goto Error;
+ 
+ 		Py_XDECREF(temp);
+ 		temp = (PyLongObject *)long_mul(q, c);
+ 		if (temp == NULL)
+ 			goto Error;
+ 		temp2 = (PyLongObject *)long_sub(d, temp);
+ 		if (temp2 == NULL)
+ 			goto Error;
+ 		Py_DECREF(d);
+ 		d = c;
+ 		c = temp2;
+ 		temp2 = NULL;
+ 
+ 		Py_XDECREF(temp);
+ 		temp = (PyLongObject *)long_mul(q, uc);
+ 		if (temp == NULL)
+ 			goto Error;
+ 		temp2 = (PyLongObject *)long_sub(ud, temp);
+ 		if (temp2 == NULL)
+ 			goto Error;
+ 		Py_DECREF(ud);
+ 		ud = uc;
+ 		uc = temp2;
+ 		temp2 = NULL;
+ 	}
+ 
+ 	if ((d->ob_size == 1) && (d->ob_digit[0] == 1))
+ 		l_divmod(ud, b, NULL, &retval);
+ 	else
+ 		PyErr_SetString(PyExc_ValueError, "argument has no inverse");
+ 
+ Error:
+ 	Py_XDECREF(c);
+ 	Py_XDECREF(d);
+ 	Py_XDECREF(uc);
+ 	Py_XDECREF(ud);
+ 	Py_XDECREF(q);
+ 	Py_XDECREF(temp);
+ 	Py_XDECREF(temp2);
+ 	return retval;
+ }
+ 
+ /*
+ Helper for long_pow
+ --------------------
+ Implement Montgomery Reduction per HAC, Algorithm 14.32:
+ http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
+ 
+ INPUT:  T, m, m'
+ 	BASE = 2**15
+ 	n = len(m)   (# of digits in m)
+ 	R = BASE**n  (next power of BASE greater than m)
+ 
+ OUTPUT: T*(R**-1) % m
+ */
+ PyLongObject* mont_reduce(PyLongObject *T, PyLongObject *m, int mPrime)
+ {
+ 	int n, i;
+ 	PyLongObject* A = NULL;
+ 	PyLongObject* retval = NULL;
+ 
+ 	n = m->ob_size;
+ 
+ 	/* A = T */
+ 	A = (PyLongObject *)_PyLong_New(2*n+1);
+ 	if (A == NULL)
+ 		return NULL;
+ 	if (T->ob_size > 2*n)
+ 		goto Error;
+ 	memset(A->ob_digit, 0, A->ob_size * sizeof(digit));
+ 	memcpy(A->ob_digit, T->ob_digit, T->ob_size * sizeof(digit));
+ 
+ 	for (i=0; i < n; i++) {
+ 		digit *aptr, *mptr, *mendptr;
+ 		twodigits carry = 0;
+ 		int u;
+ 
+ 		/* u = a[i] * m' mod base  */
+ 		u = (A->ob_digit[i] * mPrime) & MASK;
+ 
+ 		/* A += u * m * base**i
+ 		   Multiply and add in place:
+ 		   A[i+j] += (u * m[j]) + carry */
+ 		aptr = A->ob_digit + i;
+ 		mptr = m->ob_digit;
+ 		mendptr = mptr + n;
+ 		while (mptr < mendptr) {
+ 			carry += (twodigits)( (*aptr) + (u * (*mptr++)) );
+ 			*aptr++ = (digit) (carry & MASK);
+ 			carry >>= SHIFT;
+ 		}
+ 		if (carry)
+ 			*aptr += (digit)(carry & MASK);
+ 		assert((carry >> SHIFT) == 0);
+ 	}
+ 
+ 	/*  A = A/base**n  (i.e. A >>= n)  */
+ 	memmove(A->ob_digit, A->ob_digit + n, (n+1) * sizeof(digit));
+ 	A->ob_size = n+1;
+ 	long_normalize(A);
+ 
+ 	/*  If A >= m then A = A - m  */
+ 	if (long_compare(A, m) >= 0)
+ 		retval = (PyLongObject *)long_sub(A, m);		
+ 	else
+ 		return A;
+ 		
+ Error:
+ 	Py_DECREF(A);
+ 	return retval;
+ }
+ 
  /* pow(v, w, x) */
  static PyObject *
  long_pow(PyObject *v, PyObject *w, PyObject *x)
***************
*** 2316,2321 ****
--- 2479,2490 ----
  	int i, j, k;             /* counters */
  	PyLongObject *temp = NULL;
  
+ 	/* Montgomery values */
+ 	int doMontgomery = 0;
+ 	PyLongObject *R = NULL;
+ 	PyLongObject *base = NULL;
+ 	int cPrime = 0;
+ 
  	/* 5-ary values.  If the exponent is large enough, table is
  	 * precomputed so that table[i] == a**i % c for i in range(32).
  	 */
***************
*** 2328,2335 ****
  		c = (PyLongObject *)x;
  		Py_INCREF(x);
  	}
! 	else if (PyInt_Check(x))
  		c = (PyLongObject *)PyLong_FromLong(PyInt_AS_LONG(x));
  	else if (x == Py_None)
  		c = NULL;
  	else {
--- 2497,2507 ----
  		c = (PyLongObject *)x;
  		Py_INCREF(x);
  	}
! 	else if (PyInt_Check(x)) {
  		c = (PyLongObject *)PyLong_FromLong(PyInt_AS_LONG(x));
+ 		if (c == NULL)
+ 			goto Error;
+ 	}
  	else if (x == Py_None)
  		c = NULL;
  	else {
***************
*** 2379,2385 ****
  		}
  
  		/* if modulus == 1:
! 		       return 0 */
  		if ((c->ob_size == 1) && (c->ob_digit[0] == 1)) {
  			z = (PyLongObject *)PyLong_FromLong(0L);
  			goto Done;
--- 2551,2562 ----
  		}
  
  		/* if modulus == 1:
! 		       return 0
! 		This check is not necessary when Montgomery Reduction
! 		is in the code.  However, if Montgomery Reduction is
! 		removed or disabled, we want to be able to set z=1
! 		and be sure that z is inside the group modulo c.		
! 		*/
  		if ((c->ob_size == 1) && (c->ob_digit[0] == 1)) {
  			z = (PyLongObject *)PyLong_FromLong(0L);
  			goto Done;
***************
*** 2400,2419 ****
  	/* At this point a, b, and c are guaranteed non-negative UNLESS
  	   c is NULL, in which case a may be negative. */
  
! 	z = (PyLongObject *)PyLong_FromLong(1L);
! 	if (z == NULL)
! 		goto Error;
  
  	/* Perform a modular reduction, X = X % c, but leave X alone if c
! 	 * is NULL.
  	 */
! #define REDUCE(X)					\
! 	if (c != NULL) {				\
! 		if (l_divmod(X, c, NULL, &temp) < 0)	\
! 			goto Error;			\
! 		Py_XDECREF(X);				\
! 		X = temp;				\
! 		temp = NULL;				\
  	}
  
  	/* Multiply two values, then reduce the result:
--- 2577,2653 ----
  	/* At this point a, b, and c are guaranteed non-negative UNLESS
  	   c is NULL, in which case a may be negative. */
  
! 	/* If modulus exists and is odd, use Montgomery Reduction */
! 	if (c && (c->ob_digit[0] & 1)) {
! 		doMontgomery = 1;
! 
! 		/*First, calculate the Montgomery parameters (R, m')
! 		  (or in our case, (R, c')).
! 
! 		  Then set (a, z) to the Montgomery representations
! 		  of (a, 1), in preparation for the exponentiation loop.
! 		*/
! 
! 		/* R = BASE ** n for smallest n such that R > c, (n == c->ob_size) */
! 		R = _PyLong_New(c->ob_size + 1);
! 		if (R == NULL)
! 			goto Error;
! 		memset(R->ob_digit, 0, R->ob_size * sizeof(digit));
! 		R->ob_digit[R->ob_size-1] = 1;
! 
! 		/* c' = (-pow(c, -1, BASE)) % BASE */
! 		base = (PyLongObject *)PyLong_FromLong(BASE);
! 		if (base == NULL)
! 			goto Error;
! 		temp = (PyLongObject *)l_invmod(c, base);
! 		if (temp == NULL)
! 			goto Error;
! 		cPrime = BASE - temp->ob_digit[0];
! 		Py_DECREF(temp);
! 		temp = NULL;
! 
! 		/* a = a * R % c  # keep in mind a * R == (a << size_c) */
! 		temp = _PyLong_New(c->ob_size + a->ob_size);
! 		if (temp == NULL)
! 			goto Error;
! 		memset(temp->ob_digit, 0, c->ob_size * sizeof(digit));
! 		memcpy(temp->ob_digit + c->ob_size, a->ob_digit,
! 			a->ob_size * sizeof(digit));
! 
! 		Py_DECREF(a);
! 		a = NULL;
! 		if (l_divmod(temp, c, NULL, &a) < 0)
! 			goto Error;
! 		Py_DECREF(temp);
! 		temp = NULL;
! 
! 		/* z = 1 * R % c */
! 		if (l_divmod(R, c, NULL, &z) < 0)
! 			goto Error;
! 	}
! 	else {
! 		z = (PyLongObject *)PyLong_FromLong(1L);
! 		if (z == NULL)
! 			goto Error;
! 	}
  
  	/* Perform a modular reduction, X = X % c, but leave X alone if c
! 	 * is NULL.  Use Montgomery Reduction if applicable.
  	 */
! #define REDUCE(X)						\
! 	if (c != NULL) {					\
! 		if (doMontgomery) {				\
! 			temp = mont_reduce(X, c, cPrime);	\
! 			if (temp == NULL)			\
! 				goto Error;			\
! 		}						\
! 		else {						\
! 			if (l_divmod(X, c, NULL, &temp) < 0)	\
! 				goto Error;			\
! 		}						\
! 		Py_XDECREF(X);					\
! 		X = temp;					\
! 		temp = NULL;					\
  	}
  
  	/* Multiply two values, then reduce the result:
***************
*** 2444,2450 ****
  	}
  	else {
  		/* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */
! 		Py_INCREF(z);	/* still holds 1L */
  		table[0] = z;
  		for (i = 1; i < 32; ++i)
  			MULT(table[i-1], a, table[i])
--- 2678,2684 ----
  	}
  	else {
  		/* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */
! 		Py_INCREF(z);	/* still holds 1L (or the Montgomery representation thereof) */
  		table[0] = z;
  		for (i = 1; i < 32; ++i)
  			MULT(table[i-1], a, table[i])
***************
*** 2462,2467 ****
--- 2696,2704 ----
  		}
  	}
  
+ 	if (doMontgomery)
+ 		REDUCE(z)
+ 
  	if (negativeOutput && (z->ob_size != 0)) {
  		temp = (PyLongObject *)long_sub(z, c);
  		if (temp == NULL)
***************
*** 2483,2488 ****
--- 2720,2727 ----
  	Py_XDECREF(b);
  	Py_XDECREF(c);
  	Py_XDECREF(temp);
+ 	Py_XDECREF(R);
+ 	Py_XDECREF(base);
  	if (b->ob_size > FIVEARY_CUTOFF) {
  		for (i = 0; i < 32; ++i)
  			Py_XDECREF(table[i]);