Index: Objects/longobject.c =================================================================== --- Objects/longobject.c (revision 77142) +++ Objects/longobject.c (working copy) @@ -26,6 +26,15 @@ */ #define FIVEARY_CUTOFF 8 +/* For modular exponentiation, use normal reduction unless the + * exponent is greater than MONTGOMERY_CUTOFF. In that case, switch + * to Montgomery Reduction. The potential drawback is that values + * need to be converted into Montgomery representation and then + * back again. + */ +#define MONTGOMERY_CUTOFF 64 + + #define ABS(x) ((x) < 0 ? -(x) : (x)) #undef MIN @@ -3345,6 +3354,160 @@ return z; } +/* +Helper for long_pow +-------------------- +Use the Extended Euclidean algorithm to calculate the inverse of a modulo b. +I.e., calculate x such that (a * x) % b == 1. + +This is translated from pseudocode in "Practical Cryptography" section +11.3.5, Ferguson & Schneier. + +Below is the algorithm in Python: + +def invMod(a, b): + c, d = a, b + uc, ud = 1, 0 + while c != 0: + q = d // c + c, d = d - (q * c), c + uc, ud = ud - (q * uc), uc + if d == 1: + return ud % b + else: + raise ValueError("argument has no inverse") +*/ +PyLongObject* l_invmod(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *c, *d, *uc, *ud; + PyLongObject *retval = NULL; + + uc = (PyLongObject *)PyLong_FromLong(1L); + if (uc == NULL) + return NULL; + ud = (PyLongObject *)PyLong_FromLong(0L); + if (ud == NULL) { + Py_DECREF(uc); + return NULL; + } + c = a; + Py_INCREF(c); + d = b; + Py_INCREF(d); + + while (c->ob_size != 0) { + PyLongObject *temp, *temp2, *q; + /* q = d // c; c, d = d % c, c */ + if (l_divmod(d, c, &q, &temp) < 0) + goto error; + Py_DECREF(d); + d = c; + c = temp; + + /* uc, ud = ud - q * uc, uc */ + temp = (PyLongObject *)long_mul(q, uc); + Py_DECREF(q); + if (temp == NULL) + goto error; + temp2 = (PyLongObject *)long_sub(ud, temp); + Py_DECREF(temp); + if (temp2 == NULL) + goto error; + Py_DECREF(ud); + ud = uc; + uc = temp2; + } + + if ((d->ob_size == 1) && (d->ob_digit[0] == 1)) + l_divmod(ud, b, NULL, &retval); + else + PyErr_SetString(PyExc_ValueError, "argument has no inverse"); + +error: + Py_DECREF(c); + Py_DECREF(d); + Py_DECREF(uc); + Py_DECREF(ud); + return retval; +} + +/* +Helper for long_pow +-------------------- +Implement Montgomery Reduction per HAC, Algorithm 14.32: +http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + +INPUT: T, m, m' + PyLong_BASE = 2**15 + n = len(m) (# of digits in m) + R = PyLong_BASE**n (next power of PyLong_BASE greater than m) + +OUTPUT: T*(R**-1) % m +*/ +PyLongObject* mont_reduce(PyLongObject *T, PyLongObject *m, digit mPrime) +{ + int n, i; + PyLongObject* A = NULL; + PyLongObject* retval = NULL; + + n = m->ob_size; + + /* A = T */ + A = (PyLongObject *)_PyLong_New(2*n+1); + if (A == NULL) + return NULL; + if (T->ob_size > 2*n) + goto Error; + memset(A->ob_digit, 0, A->ob_size * sizeof(digit)); + memcpy(A->ob_digit, T->ob_digit, T->ob_size * sizeof(digit)); + + for (i=0; i < n; i++) { + digit *aptr, *mptr, *mendptr; + twodigits carry = 0; + digit u; + + /* u = a[i] * m' mod base */ + /* the * 1U forces the multiplication to be done using + unsigned types; else we could end up with both + arguments being promoted to int, and the + multplication overflowing to give undefined + behaviour. */ + u = (digit) ((A->ob_digit[i] * 1U * mPrime) & PyLong_MASK); + + /* A += u * m * base**i + Multiply and add in place: + A[i+j] += (u * m[j]) + carry */ + aptr = A->ob_digit + i; + mptr = m->ob_digit; + mendptr = mptr + n; + while (mptr < mendptr) { + carry += *aptr + (twodigits)u * *mptr++; + *aptr++ = (digit) (carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + } + while (carry) { + carry += (twodigits) (*aptr); + *aptr++ = (digit) (carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + } + } + + /* A = A/base**n (i.e. A >>= n) */ + memmove(A->ob_digit, A->ob_digit + n, (n+1) * sizeof(digit)); + A->ob_size = n+1; + long_normalize(A); + + /* If A >= m then A = A - m */ + if (long_compare(A, m) >= 0) + retval = (PyLongObject *)long_sub(A, m); + else + return A; + +Error: + Py_DECREF(A); + return retval; +} + /* pow(v, w, x) */ static PyObject * long_pow(PyObject *v, PyObject *w, PyObject *x) @@ -3362,6 +3525,12 @@ PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + /* Montgomery values */ + int doMontgomery = 0; + PyLongObject *R = NULL; + PyLongObject *base = NULL; + digit cPrime = 0; + /* a, b, c = v, w, x */ CONVERT_BINOP(v, w, &a, &b); if (PyLong_Check(x)) { @@ -3443,20 +3612,79 @@ /* At this point a, b, and c are guaranteed non-negative UNLESS c is NULL, in which case a may be negative. */ - z = (PyLongObject *)PyLong_FromLong(1L); - if (z == NULL) - goto Error; + /* If modulus exists and is odd, we can use Montgomery Reduction + If exponent is greater than MONTGOMERY_CUTOFF, it's worth it */ + if (c && (c->ob_digit[0] & 1) && + (b->ob_digit[0] > MONTGOMERY_CUTOFF)) { + doMontgomery = 1; + /*First, calculate the Montgomery parameters (R, m') + (or in our case, (R, c')). + + Then set (a, z) to the Montgomery representations + of (a, 1), in preparation for the exponentiation loop. + */ + + /* R = PyLong_BASE ** n for smallest n such that R > c, (n == c->ob_size) */ + R = _PyLong_New(c->ob_size + 1); + if (R == NULL) + goto Error; + memset(R->ob_digit, 0, R->ob_size * sizeof(digit)); + R->ob_digit[R->ob_size-1] = 1; + + /* c' = (-pow(c, -1, PyLong_BASE)) % PyLong_BASE */ + base = (PyLongObject *)PyLong_FromLong(PyLong_BASE); + if (base == NULL) + goto Error; + temp = (PyLongObject *)l_invmod(c, base); + if (temp == NULL) + goto Error; + cPrime = PyLong_BASE - temp->ob_digit[0]; + Py_DECREF(temp); + temp = NULL; + + /* a = a * R % c # keep in mind a * R == (a << size_c) */ + temp = _PyLong_New(c->ob_size + a->ob_size); + if (temp == NULL) + goto Error; + memset(temp->ob_digit, 0, c->ob_size * sizeof(digit)); + memcpy(temp->ob_digit + c->ob_size, a->ob_digit, + a->ob_size * sizeof(digit)); + + Py_DECREF(a); + a = NULL; + if (l_divmod(temp, c, NULL, &a) < 0) + goto Error; + Py_DECREF(temp); + temp = NULL; + + /* z = 1 * R % c */ + if (l_divmod(R, c, NULL, &z) < 0) + goto Error; + } + else { + z = (PyLongObject *)PyLong_FromLong(1L); + if (z == NULL) + goto Error; + } + /* Perform a modular reduction, X = X % c, but leave X alone if c - * is NULL. + * is NULL. Use Montgomery Reduction if applicable. */ -#define REDUCE(X) \ - if (c != NULL) { \ - if (l_divmod(X, c, NULL, &temp) < 0) \ - goto Error; \ - Py_XDECREF(X); \ - X = temp; \ - temp = NULL; \ +#define REDUCE(X) \ + if (c != NULL) { \ + if (doMontgomery) { \ + temp = mont_reduce(X, c, cPrime); \ + if (temp == NULL) \ + goto Error; \ + } \ + else { \ + if (l_divmod(X, c, NULL, &temp) < 0) \ + goto Error; \ + } \ + Py_XDECREF(X); \ + X = temp; \ + temp = NULL; \ } /* Multiply two values, then reduce the result: @@ -3487,7 +3715,7 @@ } else { /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ - Py_INCREF(z); /* still holds 1L */ + Py_INCREF(z); /* still holds 1L (or Montgomery rep. of 1L) */ table[0] = z; for (i = 1; i < 32; ++i) MULT(table[i-1], a, table[i]) @@ -3505,6 +3733,9 @@ } } + if (doMontgomery) + REDUCE(z); + if (negativeOutput && (Py_SIZE(z) != 0)) { temp = (PyLongObject *)long_sub(z, c); if (temp == NULL) @@ -3530,6 +3761,8 @@ Py_DECREF(b); Py_XDECREF(c); Py_XDECREF(temp); + Py_XDECREF(R); + Py_XDECREF(base); return (PyObject *)z; }