? long_pow_2005_9_28.diff
? Mac/Python
Index: Objects/longobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v
retrieving revision 1.169
diff -c -r1.169 longobject.c
*** Objects/longobject.c	17 Jul 2005 23:45:23 -0000	1.169
--- Objects/longobject.c	29 Sep 2005 06:12:28 -0000
***************
*** 22,27 ****
--- 22,36 ----
   */
  #define FIVEARY_CUTOFF 8
  
+ /* For modular exponentiation, use normal reduction unless the
+  * exponent is greater than MONTGOMERY_CUTOFF.  In that case, switch
+  * to Montgomery Reduction.  The potential drawback is that values
+  * need to be converted into Montgomery representation and then
+  * back again.
+  */
+ #define MONTGOMERY_CUTOFF 64
+ 
+ 
  #define ABS(x) ((x) < 0 ? -(x) : (x))
  
  #undef MIN
***************
*** 2337,2342 ****
--- 2346,2516 ----
  	return z;
  }
  
+ /*
+ Helper for long_pow
+ --------------------
+ Use the Extended Euclidean algorithm to calculate the inverse of a modulo b.
+ I.e., calculate x such that (a * x) % b == 1.
+ 
+ This is translated from pseudocode in "Practical Cryptography" section
+ 11.3.5, Ferguson & Schneier.
+ 
+ Below is the algorithm in Python:
+ 
+ def invMod(a, b):
+     c, d = a, b
+     uc, ud = 1, 0
+     while c != 0:
+         q = d // c
+         c, d = d - (q * c), c
+         uc, ud = ud - (q * uc), uc
+     if d == 1:
+         return ud % b
+     else:
+         raise ValueError("argument has no inverse")
+ */
+ PyLongObject* l_invmod(PyLongObject *a, PyLongObject *b)
+ {
+ 	PyLongObject *c = NULL;
+ 	PyLongObject *d = NULL;
+ 	PyLongObject *uc = NULL;
+ 	PyLongObject *ud = NULL;
+ 	PyLongObject *q = NULL;
+ 	PyLongObject *temp = NULL;
+ 	PyLongObject *temp2 = NULL;
+ 	PyLongObject *retval = NULL;
+ 
+ 	c = (PyLongObject *)_PyLong_Copy(a);
+ 	if (c == NULL)
+ 		goto Error;
+ 	d = (PyLongObject *)_PyLong_Copy(b);
+ 	if (d == NULL)
+ 		goto Error;
+ 	uc = (PyLongObject *)PyLong_FromLong(1L);
+ 	if (uc == NULL)
+ 		goto Error;
+ 	ud = (PyLongObject *)PyLong_FromLong(0L);
+ 	if (ud == NULL)
+ 		goto Error;
+ 
+ 	while (c->ob_size != 0) {
+ 		Py_XDECREF(q);
+ 		if (l_divmod(d, c, &q, NULL) < 0)
+ 			goto Error;
+ 
+ 		Py_XDECREF(temp);
+ 		temp = (PyLongObject *)long_mul(q, c);
+ 		if (temp == NULL)
+ 			goto Error;
+ 		temp2 = (PyLongObject *)long_sub(d, temp);
+ 		if (temp2 == NULL)
+ 			goto Error;
+ 		Py_DECREF(d);
+ 		d = c;
+ 		c = temp2;
+ 		temp2 = NULL;
+ 
+ 		Py_XDECREF(temp);
+ 		temp = (PyLongObject *)long_mul(q, uc);
+ 		if (temp == NULL)
+ 			goto Error;
+ 		temp2 = (PyLongObject *)long_sub(ud, temp);
+ 		if (temp2 == NULL)
+ 			goto Error;
+ 		Py_DECREF(ud);
+ 		ud = uc;
+ 		uc = temp2;
+ 		temp2 = NULL;
+ 	}
+ 
+ 	if ((d->ob_size == 1) && (d->ob_digit[0] == 1))
+ 		l_divmod(ud, b, NULL, &retval);
+ 	else
+ 		PyErr_SetString(PyExc_ValueError, "argument has no inverse");
+ 
+ Error:
+ 	Py_XDECREF(c);
+ 	Py_XDECREF(d);
+ 	Py_XDECREF(uc);
+ 	Py_XDECREF(ud);
+ 	Py_XDECREF(q);
+ 	Py_XDECREF(temp);
+ 	Py_XDECREF(temp2);
+ 	return retval;
+ }
+ 
+ /*
+ Helper for long_pow
+ --------------------
+ Implement Montgomery Reduction per HAC, Algorithm 14.32:
+ http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
+ 
+ INPUT:  T, m, m'
+ 	BASE = 2**15
+ 	n = len(m)   (# of digits in m)
+ 	R = BASE**n  (next power of BASE greater than m)
+ 
+ OUTPUT: T*(R**-1) % m
+ */
+ PyLongObject* mont_reduce(PyLongObject *T, PyLongObject *m, digit mPrime)
+ {
+ 	int n, i;
+ 	PyLongObject* A = NULL;
+ 	PyLongObject* retval = NULL;
+ 
+ 	n = m->ob_size;
+ 
+ 	/* A = T */
+ 	A = (PyLongObject *)_PyLong_New(2*n+1);
+ 	if (A == NULL)
+ 		return NULL;
+ 	if (T->ob_size > 2*n)
+ 		goto Error;
+ 	memset(A->ob_digit, 0, A->ob_size * sizeof(digit));
+ 	memcpy(A->ob_digit, T->ob_digit, T->ob_size * sizeof(digit));
+ 
+ 	for (i=0; i < n; i++) {
+ 		digit *aptr, *mptr, *mendptr;
+ 		twodigits carry = 0;
+ 		digit u;
+ 
+ 		/* u = a[i] * m' mod base  */
+ 		u = (digit) ((A->ob_digit[i] * mPrime) & MASK);
+ 
+ 		/* A += u * m * base**i
+ 		   Multiply and add in place:
+ 		   A[i+j] += (u * m[j]) + carry */
+ 		aptr = A->ob_digit + i;
+ 		mptr = m->ob_digit;
+ 		mendptr = mptr + n;
+ 		while (mptr < mendptr) {
+ 			carry += (twodigits) ( (*aptr) + (u * (*mptr++)) );
+ 			*aptr++ = (digit) (carry & MASK);
+ 			carry >>= SHIFT;
+ 		}
+ 		while (carry) {
+ 			carry += (twodigits) (*aptr);
+ 			*aptr++ = (digit) (carry & MASK);
+ 			carry >>= SHIFT;
+ 		}
+ 	}
+ 
+ 	/*  A = A/base**n  (i.e. A >>= n)  */
+ 	memmove(A->ob_digit, A->ob_digit + n, (n+1) * sizeof(digit));
+ 	A->ob_size = n+1;
+ 	long_normalize(A);
+ 
+ 	/*  If A >= m then A = A - m  */
+ 	if (long_compare(A, m) >= 0)
+ 		retval = (PyLongObject *)long_sub(A, m);		
+ 	else
+ 		return A;
+ 		
+ Error:
+ 	Py_DECREF(A);
+ 	return retval;
+ }
+ 
  /* pow(v, w, x) */
  static PyObject *
  long_pow(PyObject *v, PyObject *w, PyObject *x)
***************
*** 2354,2359 ****
--- 2528,2539 ----
  	PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  				   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
  
+ 	/* Montgomery values */
+ 	int doMontgomery = 0;
+ 	PyLongObject *R = NULL;
+ 	PyLongObject *base = NULL;
+ 	digit cPrime = 0;
+ 
  	/* a, b, c = v, w, x */
  	CONVERT_BINOP(v, w, &a, &b);
  	if (PyLong_Check(x)) {
***************
*** 2435,2454 ****
  	/* At this point a, b, and c are guaranteed non-negative UNLESS
  	   c is NULL, in which case a may be negative. */
  
! 	z = (PyLongObject *)PyLong_FromLong(1L);
! 	if (z == NULL)
! 		goto Error;
  
  	/* Perform a modular reduction, X = X % c, but leave X alone if c
! 	 * is NULL.
  	 */
! #define REDUCE(X)					\
! 	if (c != NULL) {				\
! 		if (l_divmod(X, c, NULL, &temp) < 0)	\
! 			goto Error;			\
! 		Py_XDECREF(X);				\
! 		X = temp;				\
! 		temp = NULL;				\
  	}
  
  	/* Multiply two values, then reduce the result:
--- 2615,2693 ----
  	/* At this point a, b, and c are guaranteed non-negative UNLESS
  	   c is NULL, in which case a may be negative. */
  
! 	/* If modulus exists and is odd, we can use Montgomery Reduction 
! 	   If exponent is greater than MONTGOMERY_CUTOFF, it's worth it */
! 	if (c && (c->ob_digit[0] & 1) && 
! 	   (b->ob_digit[0] > MONTGOMERY_CUTOFF)) {
! 		doMontgomery = 1;
! 
! 		/*First, calculate the Montgomery parameters (R, m')
! 		  (or in our case, (R, c')).
! 
! 		  Then set (a, z) to the Montgomery representations
! 		  of (a, 1), in preparation for the exponentiation loop.
! 		*/
! 
! 		/* R = BASE ** n for smallest n such that R > c, (n == c->ob_size) */
! 		R = _PyLong_New(c->ob_size + 1);
! 		if (R == NULL)
! 			goto Error;
! 		memset(R->ob_digit, 0, R->ob_size * sizeof(digit));
! 		R->ob_digit[R->ob_size-1] = 1;
! 
! 		/* c' = (-pow(c, -1, BASE)) % BASE */
! 		base = (PyLongObject *)PyLong_FromLong(BASE);
! 		if (base == NULL)
! 			goto Error;
! 		temp = (PyLongObject *)l_invmod(c, base);
! 		if (temp == NULL)
! 			goto Error;
! 		cPrime = BASE - temp->ob_digit[0];
! 		Py_DECREF(temp);
! 		temp = NULL;
! 
! 		/* a = a * R % c  # keep in mind a * R == (a << size_c) */
! 		temp = _PyLong_New(c->ob_size + a->ob_size);
! 		if (temp == NULL)
! 			goto Error;
! 		memset(temp->ob_digit, 0, c->ob_size * sizeof(digit));
! 		memcpy(temp->ob_digit + c->ob_size, a->ob_digit,
! 			a->ob_size * sizeof(digit));
! 
! 		Py_DECREF(a);
! 		a = NULL;
! 		if (l_divmod(temp, c, NULL, &a) < 0)
! 			goto Error;
! 		Py_DECREF(temp);
! 		temp = NULL;
! 
! 		/* z = 1 * R % c */
! 		if (l_divmod(R, c, NULL, &z) < 0)
! 			goto Error;
! 	}
! 	else {
! 		z = (PyLongObject *)PyLong_FromLong(1L);
! 		if (z == NULL)
! 			goto Error;
! 	}
  
  	/* Perform a modular reduction, X = X % c, but leave X alone if c
! 	 * is NULL.  Use Montgomery Reduction if applicable.
  	 */
! #define REDUCE(X)						\
! 	if (c != NULL) {					\
! 		if (doMontgomery) {				\
! 			temp = mont_reduce(X, c, cPrime);	\
! 			if (temp == NULL)			\
! 				goto Error;			\
! 		}						\
! 		else {						\
! 			if (l_divmod(X, c, NULL, &temp) < 0)	\
! 				goto Error;			\
! 		}						\
! 		Py_XDECREF(X);					\
! 		X = temp;					\
! 		temp = NULL;					\
  	}
  
  	/* Multiply two values, then reduce the result:
***************
*** 2479,2485 ****
  	}
  	else {
  		/* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */
! 		Py_INCREF(z);	/* still holds 1L */
  		table[0] = z;
  		for (i = 1; i < 32; ++i)
  			MULT(table[i-1], a, table[i])
--- 2718,2724 ----
  	}
  	else {
  		/* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */
! 		Py_INCREF(z);	/* still holds 1L (or Montgomery rep. of 1L) */
  		table[0] = z;
  		for (i = 1; i < 32; ++i)
  			MULT(table[i-1], a, table[i])
***************
*** 2497,2502 ****
--- 2736,2744 ----
  		}
  	}
  
+ 	if (doMontgomery)
+ 		REDUCE(z)
+ 
  	if (negativeOutput && (z->ob_size != 0)) {
  		temp = (PyLongObject *)long_sub(z, c);
  		if (temp == NULL)
***************
*** 2522,2527 ****
--- 2764,2771 ----
  	Py_DECREF(b);
  	Py_XDECREF(c);
  	Py_XDECREF(temp);
+ 	Py_XDECREF(R);
+ 	Py_XDECREF(base);
  	return (PyObject *)z;
  }