X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpz%2Fpowm.c;fp=gmp%2Fmpz%2Fpowm.c;h=8f3ce97cc4a1eaa70969612427535de74c1337d1;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpz/powm.c b/gmp/mpz/powm.c new file mode 100644 index 00000000..8f3ce97c --- /dev/null +++ b/gmp/mpz/powm.c @@ -0,0 +1,435 @@ +/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. + + Contributed by Paul Zimmermann. + +Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009 +Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + +/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and + t is defined by (tp,mn). */ +static void +reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn) +{ + mp_ptr qp; + TMP_DECL; + + TMP_MARK; + qp = TMP_ALLOC_LIMBS (an - mn + 1); + + mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn); + + TMP_FREE; +} + +#if REDUCE_EXPONENT +/* Return the group order of the ring mod m. */ +static mp_limb_t +phi (mp_limb_t t) +{ + mp_limb_t d, m, go; + + go = 1; + + if (t % 2 == 0) + { + t = t / 2; + while (t % 2 == 0) + { + go *= 2; + t = t / 2; + } + } + for (d = 3;; d += 2) + { + m = d - 1; + for (;;) + { + unsigned long int q = t / d; + if (q < d) + { + if (t <= 1) + return go; + if (t == d) + return go * m; + return go * (t - 1); + } + if (t != q * d) + break; + go *= m; + m = d; + t = q; + } + } +} +#endif + +/* average number of calls to redc for an exponent of n bits + with the sliding window algorithm of base 2^k: the optimal is + obtained for the value of k which minimizes 2^(k-1)+n/(k+1): + + n\k 4 5 6 7 8 + 128 156* 159 171 200 261 + 256 309 307* 316 343 403 + 512 617 607* 610 632 688 + 1024 1231 1204 1195* 1207 1256 + 2048 2461 2399 2366 2360* 2396 + 4096 4918 4787 4707 4665* 4670 +*/ + + +/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC + each modular multiplication costs about 2*n^2 limbs operations, whereas + using usual reduction it costs 3*K(n), where K(n) is the cost of a + multiplication using Karatsuba, and a division is assumed to cost 2*K(n), + for example using Burnikel-Ziegler's algorithm. This gives a theoretical + threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ + 2.66. */ +/* For now, also disable REDC when MOD is even, as the inverse can't handle + that. At some point, we might want to make the code faster for that case, + perhaps using CRR. */ + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) +#endif + +#define HANDLE_NEGATIVE_EXPONENT 1 +#undef REDUCE_EXPONENT + +void +#ifndef BERKELEY_MP +mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) +#else /* BERKELEY_MP */ +pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) +#endif /* BERKELEY_MP */ +{ + mp_ptr xp, tp, qp, gp, this_gp; + mp_srcptr bp, ep, mp; + mp_size_t bn, es, en, mn, xn; + mp_limb_t invm, c; + unsigned long int enb; + mp_size_t i, K, j, l, k; + int m_zero_cnt, e_zero_cnt; + int sh; + int use_redc; +#if HANDLE_NEGATIVE_EXPONENT + mpz_t new_b; +#endif +#if REDUCE_EXPONENT + mpz_t new_e; +#endif + TMP_DECL; + + mp = PTR(m); + mn = ABSIZ (m); + if (mn == 0) + DIVIDE_BY_ZERO; + + TMP_MARK; + + es = SIZ (e); + if (es <= 0) + { + if (es == 0) + { + /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if + m equals 1. */ + SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; + PTR(r)[0] = 1; + TMP_FREE; /* we haven't really allocated anything here */ + return; + } +#if HANDLE_NEGATIVE_EXPONENT + MPZ_TMP_INIT (new_b, mn + 1); + + if (! mpz_invert (new_b, b, m)) + DIVIDE_BY_ZERO; + b = new_b; + es = -es; +#else + DIVIDE_BY_ZERO; +#endif + } + en = es; + +#if REDUCE_EXPONENT + /* Reduce exponent by dividing it by phi(m) when m small. */ + if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) + { + MPZ_TMP_INIT (new_e, 2); + mpz_mod_ui (new_e, e, phi (mp[0])); + e = new_e; + } +#endif + + use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; + if (use_redc) + { + /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ + modlimb_invert (invm, mp[0]); + invm = -invm; + } + else + { + /* Normalize m (i.e. make its most significant bit set) as required by + division functions below. */ + count_leading_zeros (m_zero_cnt, mp[mn - 1]); + m_zero_cnt -= GMP_NAIL_BITS; + if (m_zero_cnt != 0) + { + mp_ptr new_mp; + new_mp = TMP_ALLOC_LIMBS (mn); + mpn_lshift (new_mp, mp, mn, m_zero_cnt); + mp = new_mp; + } + } + + /* Determine optimal value of k, the number of exponent bits we look at + at a time. */ + count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); + e_zero_cnt -= GMP_NAIL_BITS; + enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ + k = 1; + K = 2; + while (2 * enb > K * (2 + k * (3 + k))) + { + k++; + K *= 2; + if (k == 10) /* cap allocation */ + break; + } + + tp = TMP_ALLOC_LIMBS (2 * mn); + qp = TMP_ALLOC_LIMBS (mn + 1); + + gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); + + /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ + bn = ABSIZ (b); + bp = PTR(b); + /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary + for speed or correctness to do this when b and m have the same number of + limbs, perhaps remove mpn_cmp call. */ + if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) + { + /* Reduce possibly huge base while moving it to gp[0]. Use a function + call to reduce, since we don't want the quotient allocation to + live until function return. */ + if (use_redc) + { + reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ + MPN_ZERO (tp, mn); + mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ + } + else + { + reduce (gp, bp, bn, mp, mn); + } + } + else + { + /* |b| < m. We pad out operands to become mn limbs, which simplifies + the rest of the function, but slows things down when |b| << m. */ + if (use_redc) + { + MPN_ZERO (tp, mn); + MPN_COPY (tp + mn, bp, bn); + MPN_ZERO (tp + mn + bn, mn - bn); + mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); + } + else + { + MPN_COPY (gp, bp, bn); + MPN_ZERO (gp + bn, mn - bn); + } + } + + /* Compute xx^i for odd g < 2^i. */ + + xp = TMP_ALLOC_LIMBS (mn); + mpn_sqr_n (tp, gp, mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */ + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + this_gp = gp; + for (i = 1; i < K / 2; i++) + { + mpn_mul_n (tp, this_gp, xp, mn); + this_gp += mn; + if (use_redc) + mpn_redc_1 (this_gp, tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */ + else + mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); + } + + /* Start the real stuff. */ + ep = PTR (e); + i = en - 1; /* current index */ + c = ep[i]; /* current limb */ + sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ + sh -= k; /* index of lower bit of ep[i] to take into account */ + if (sh < 0) + { /* k-sh extra bits are needed */ + if (i > 0) + { + i--; + c <<= (-sh); + sh += GMP_NUMB_BITS; + c |= ep[i] >> sh; + } + } + else + c >>= sh; + + for (j = 0; c % 2 == 0; j++) + c >>= 1; + + MPN_COPY (xp, gp + mn * (c >> 1), mn); + while (--j >= 0) + { + mpn_sqr_n (tp, xp, mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + } + + while (i > 0 || sh > 0) + { + c = ep[i]; + l = k; /* number of bits treated */ + sh -= l; + if (sh < 0) + { + if (i > 0) + { + i--; + c <<= (-sh); + sh += GMP_NUMB_BITS; + c |= ep[i] >> sh; + } + else + { + l += sh; /* last chunk of bits from e; l < k */ + } + } + else + c >>= sh; + c &= ((mp_limb_t) 1 << l) - 1; + + /* This while loop implements the sliding window improvement--loop while + the most significant bit of c is zero, squaring xx as we go. */ + while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0)) + { + mpn_sqr_n (tp, xp, mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + if (sh != 0) + { + sh--; + c = (c << 1) + ((ep[i] >> sh) & 1); + } + else + { + i--; + sh = GMP_NUMB_BITS - 1; + c = (c << 1) + (ep[i] >> sh); + } + } + + /* Replace xx by xx^(2^l)*x^c. */ + if (c != 0) + { + for (j = 0; c % 2 == 0; j++) + c >>= 1; + + /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */ + l -= j; + while (--l >= 0) + { + mpn_sqr_n (tp, xp, mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + } + mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + } + else + j = l; /* case c=0 */ + while (--j >= 0) + { + mpn_sqr_n (tp, xp, mn); + if (use_redc) + mpn_redc_1 (xp, tp, mp, mn, invm); + else + mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); + } + } + + if (use_redc) + { + /* Convert back xx to xx/R^n. */ + MPN_COPY (tp, xp, mn); + MPN_ZERO (tp + mn, mn); + mpn_redc_1 (xp, tp, mp, mn, invm); + if (mpn_cmp (xp, mp, mn) >= 0) + mpn_sub_n (xp, xp, mp, mn); + } + else + { + if (m_zero_cnt != 0) + { + mp_limb_t cy; + cy = mpn_lshift (tp, xp, mn, m_zero_cnt); + tp[mn] = cy; + mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn); + mpn_rshift (xp, xp, mn, m_zero_cnt); + } + } + xn = mn; + MPN_NORMALIZE (xp, xn); + + if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0) + { + mp = PTR(m); /* want original, unnormalized m */ + mpn_sub (xp, mp, mn, xp, xn); + xn = mn; + MPN_NORMALIZE (xp, xn); + } + MPZ_REALLOC (r, xn); + SIZ (r) = xn; + MPN_COPY (PTR(r), xp, xn); + + __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn); + TMP_FREE; +}