? long_pow_2005_9_28.diff ? Mac/Python Index: Objects/longobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v retrieving revision 1.169 diff -c -r1.169 longobject.c *** Objects/longobject.c 17 Jul 2005 23:45:23 -0000 1.169 --- Objects/longobject.c 29 Sep 2005 06:12:28 -0000 *************** *** 22,27 **** --- 22,36 ---- */ #define FIVEARY_CUTOFF 8 + /* For modular exponentiation, use normal reduction unless the + * exponent is greater than MONTGOMERY_CUTOFF. In that case, switch + * to Montgomery Reduction. The potential drawback is that values + * need to be converted into Montgomery representation and then + * back again. + */ + #define MONTGOMERY_CUTOFF 64 + + #define ABS(x) ((x) < 0 ? -(x) : (x)) #undef MIN *************** *** 2337,2342 **** --- 2346,2516 ---- return z; } + /* + Helper for long_pow + -------------------- + Use the Extended Euclidean algorithm to calculate the inverse of a modulo b. + I.e., calculate x such that (a * x) % b == 1. + + This is translated from pseudocode in "Practical Cryptography" section + 11.3.5, Ferguson & Schneier. + + Below is the algorithm in Python: + + def invMod(a, b): + c, d = a, b + uc, ud = 1, 0 + while c != 0: + q = d // c + c, d = d - (q * c), c + uc, ud = ud - (q * uc), uc + if d == 1: + return ud % b + else: + raise ValueError("argument has no inverse") + */ + PyLongObject* l_invmod(PyLongObject *a, PyLongObject *b) + { + PyLongObject *c = NULL; + PyLongObject *d = NULL; + PyLongObject *uc = NULL; + PyLongObject *ud = NULL; + PyLongObject *q = NULL; + PyLongObject *temp = NULL; + PyLongObject *temp2 = NULL; + PyLongObject *retval = NULL; + + c = (PyLongObject *)_PyLong_Copy(a); + if (c == NULL) + goto Error; + d = (PyLongObject *)_PyLong_Copy(b); + if (d == NULL) + goto Error; + uc = (PyLongObject *)PyLong_FromLong(1L); + if (uc == NULL) + goto Error; + ud = (PyLongObject *)PyLong_FromLong(0L); + if (ud == NULL) + goto Error; + + while (c->ob_size != 0) { + Py_XDECREF(q); + if (l_divmod(d, c, &q, NULL) < 0) + goto Error; + + Py_XDECREF(temp); + temp = (PyLongObject *)long_mul(q, c); + if (temp == NULL) + goto Error; + temp2 = (PyLongObject *)long_sub(d, temp); + if (temp2 == NULL) + goto Error; + Py_DECREF(d); + d = c; + c = temp2; + temp2 = NULL; + + Py_XDECREF(temp); + temp = (PyLongObject *)long_mul(q, uc); + if (temp == NULL) + goto Error; + temp2 = (PyLongObject *)long_sub(ud, temp); + if (temp2 == NULL) + goto Error; + Py_DECREF(ud); + ud = uc; + uc = temp2; + temp2 = NULL; + } + + if ((d->ob_size == 1) && (d->ob_digit[0] == 1)) + l_divmod(ud, b, NULL, &retval); + else + PyErr_SetString(PyExc_ValueError, "argument has no inverse"); + + Error: + Py_XDECREF(c); + Py_XDECREF(d); + Py_XDECREF(uc); + Py_XDECREF(ud); + Py_XDECREF(q); + Py_XDECREF(temp); + Py_XDECREF(temp2); + return retval; + } + + /* + Helper for long_pow + -------------------- + Implement Montgomery Reduction per HAC, Algorithm 14.32: + http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + + INPUT: T, m, m' + BASE = 2**15 + n = len(m) (# of digits in m) + R = BASE**n (next power of BASE greater than m) + + OUTPUT: T*(R**-1) % m + */ + PyLongObject* mont_reduce(PyLongObject *T, PyLongObject *m, digit mPrime) + { + int n, i; + PyLongObject* A = NULL; + PyLongObject* retval = NULL; + + n = m->ob_size; + + /* A = T */ + A = (PyLongObject *)_PyLong_New(2*n+1); + if (A == NULL) + return NULL; + if (T->ob_size > 2*n) + goto Error; + memset(A->ob_digit, 0, A->ob_size * sizeof(digit)); + memcpy(A->ob_digit, T->ob_digit, T->ob_size * sizeof(digit)); + + for (i=0; i < n; i++) { + digit *aptr, *mptr, *mendptr; + twodigits carry = 0; + digit u; + + /* u = a[i] * m' mod base */ + u = (digit) ((A->ob_digit[i] * mPrime) & MASK); + + /* A += u * m * base**i + Multiply and add in place: + A[i+j] += (u * m[j]) + carry */ + aptr = A->ob_digit + i; + mptr = m->ob_digit; + mendptr = mptr + n; + while (mptr < mendptr) { + carry += (twodigits) ( (*aptr) + (u * (*mptr++)) ); + *aptr++ = (digit) (carry & MASK); + carry >>= SHIFT; + } + while (carry) { + carry += (twodigits) (*aptr); + *aptr++ = (digit) (carry & MASK); + carry >>= SHIFT; + } + } + + /* A = A/base**n (i.e. A >>= n) */ + memmove(A->ob_digit, A->ob_digit + n, (n+1) * sizeof(digit)); + A->ob_size = n+1; + long_normalize(A); + + /* If A >= m then A = A - m */ + if (long_compare(A, m) >= 0) + retval = (PyLongObject *)long_sub(A, m); + else + return A; + + Error: + Py_DECREF(A); + return retval; + } + /* pow(v, w, x) */ static PyObject * long_pow(PyObject *v, PyObject *w, PyObject *x) *************** *** 2354,2359 **** --- 2528,2539 ---- PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + /* Montgomery values */ + int doMontgomery = 0; + PyLongObject *R = NULL; + PyLongObject *base = NULL; + digit cPrime = 0; + /* a, b, c = v, w, x */ CONVERT_BINOP(v, w, &a, &b); if (PyLong_Check(x)) { *************** *** 2435,2454 **** /* At this point a, b, and c are guaranteed non-negative UNLESS c is NULL, in which case a may be negative. */ ! z = (PyLongObject *)PyLong_FromLong(1L); ! if (z == NULL) ! goto Error; /* Perform a modular reduction, X = X % c, but leave X alone if c ! * is NULL. */ ! #define REDUCE(X) \ ! if (c != NULL) { \ ! if (l_divmod(X, c, NULL, &temp) < 0) \ ! goto Error; \ ! Py_XDECREF(X); \ ! X = temp; \ ! temp = NULL; \ } /* Multiply two values, then reduce the result: --- 2615,2693 ---- /* At this point a, b, and c are guaranteed non-negative UNLESS c is NULL, in which case a may be negative. */ ! /* If modulus exists and is odd, we can use Montgomery Reduction ! If exponent is greater than MONTGOMERY_CUTOFF, it's worth it */ ! if (c && (c->ob_digit[0] & 1) && ! (b->ob_digit[0] > MONTGOMERY_CUTOFF)) { ! doMontgomery = 1; ! ! /*First, calculate the Montgomery parameters (R, m') ! (or in our case, (R, c')). ! ! Then set (a, z) to the Montgomery representations ! of (a, 1), in preparation for the exponentiation loop. ! */ ! ! /* R = BASE ** n for smallest n such that R > c, (n == c->ob_size) */ ! R = _PyLong_New(c->ob_size + 1); ! if (R == NULL) ! goto Error; ! memset(R->ob_digit, 0, R->ob_size * sizeof(digit)); ! R->ob_digit[R->ob_size-1] = 1; ! ! /* c' = (-pow(c, -1, BASE)) % BASE */ ! base = (PyLongObject *)PyLong_FromLong(BASE); ! if (base == NULL) ! goto Error; ! temp = (PyLongObject *)l_invmod(c, base); ! if (temp == NULL) ! goto Error; ! cPrime = BASE - temp->ob_digit[0]; ! Py_DECREF(temp); ! temp = NULL; ! ! /* a = a * R % c # keep in mind a * R == (a << size_c) */ ! temp = _PyLong_New(c->ob_size + a->ob_size); ! if (temp == NULL) ! goto Error; ! memset(temp->ob_digit, 0, c->ob_size * sizeof(digit)); ! memcpy(temp->ob_digit + c->ob_size, a->ob_digit, ! a->ob_size * sizeof(digit)); ! ! Py_DECREF(a); ! a = NULL; ! if (l_divmod(temp, c, NULL, &a) < 0) ! goto Error; ! Py_DECREF(temp); ! temp = NULL; ! ! /* z = 1 * R % c */ ! if (l_divmod(R, c, NULL, &z) < 0) ! goto Error; ! } ! else { ! z = (PyLongObject *)PyLong_FromLong(1L); ! if (z == NULL) ! goto Error; ! } /* Perform a modular reduction, X = X % c, but leave X alone if c ! * is NULL. Use Montgomery Reduction if applicable. */ ! #define REDUCE(X) \ ! if (c != NULL) { \ ! if (doMontgomery) { \ ! temp = mont_reduce(X, c, cPrime); \ ! if (temp == NULL) \ ! goto Error; \ ! } \ ! else { \ ! if (l_divmod(X, c, NULL, &temp) < 0) \ ! goto Error; \ ! } \ ! Py_XDECREF(X); \ ! X = temp; \ ! temp = NULL; \ } /* Multiply two values, then reduce the result: *************** *** 2479,2485 **** } else { /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ ! Py_INCREF(z); /* still holds 1L */ table[0] = z; for (i = 1; i < 32; ++i) MULT(table[i-1], a, table[i]) --- 2718,2724 ---- } else { /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ ! Py_INCREF(z); /* still holds 1L (or Montgomery rep. of 1L) */ table[0] = z; for (i = 1; i < 32; ++i) MULT(table[i-1], a, table[i]) *************** *** 2497,2502 **** --- 2736,2744 ---- } } + if (doMontgomery) + REDUCE(z) + if (negativeOutput && (z->ob_size != 0)) { temp = (PyLongObject *)long_sub(z, c); if (temp == NULL) *************** *** 2522,2527 **** --- 2764,2771 ---- Py_DECREF(b); Py_XDECREF(c); Py_XDECREF(temp); + Py_XDECREF(R); + Py_XDECREF(base); return (PyObject *)z; }