X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpz%2Fpowm.c;fp=gmp%2Fmpz%2Fpowm.c;h=8f3ce97cc4a1eaa70969612427535de74c1337d1;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git

diff --git a/gmp/mpz/powm.c b/gmp/mpz/powm.c
new file mode 100644
index 00000000..8f3ce97c
--- /dev/null
+++ b/gmp/mpz/powm.c
@@ -0,0 +1,435 @@
+/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+   Contributed by Paul Zimmermann.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
+   t is defined by (tp,mn).  */
+static void
+reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
+{
+  mp_ptr qp;
+  TMP_DECL;
+
+  TMP_MARK;
+  qp = TMP_ALLOC_LIMBS (an - mn + 1);
+
+  mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
+
+  TMP_FREE;
+}
+
+#if REDUCE_EXPONENT
+/* Return the group order of the ring mod m.  */
+static mp_limb_t
+phi (mp_limb_t t)
+{
+  mp_limb_t d, m, go;
+
+  go = 1;
+
+  if (t % 2 == 0)
+    {
+      t = t / 2;
+      while (t % 2 == 0)
+	{
+	  go *= 2;
+	  t = t / 2;
+	}
+    }
+  for (d = 3;; d += 2)
+    {
+      m = d - 1;
+      for (;;)
+	{
+	  unsigned long int q = t / d;
+	  if (q < d)
+	    {
+	      if (t <= 1)
+		return go;
+	      if (t == d)
+		return go * m;
+	      return go * (t - 1);
+	    }
+	  if (t != q * d)
+	    break;
+	  go *= m;
+	  m = d;
+	  t = q;
+	}
+    }
+}
+#endif
+
+/* average number of calls to redc for an exponent of n bits
+   with the sliding window algorithm of base 2^k: the optimal is
+   obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
+
+   n\k    4     5     6     7     8
+   128    156*  159   171   200   261
+   256    309   307*  316   343   403
+   512    617   607*  610   632   688
+   1024   1231  1204  1195* 1207  1256
+   2048   2461  2399  2366  2360* 2396
+   4096   4918  4787  4707  4665* 4670
+*/
+
+
+/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.  In REDC
+   each modular multiplication costs about 2*n^2 limbs operations, whereas
+   using usual reduction it costs 3*K(n), where K(n) is the cost of a
+   multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
+   for example using Burnikel-Ziegler's algorithm. This gives a theoretical
+   threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
+   2.66.  */
+/* For now, also disable REDC when MOD is even, as the inverse can't handle
+   that.  At some point, we might want to make the code faster for that case,
+   perhaps using CRR.  */
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
+#endif
+
+#define HANDLE_NEGATIVE_EXPONENT 1
+#undef REDUCE_EXPONENT
+
+void
+#ifndef BERKELEY_MP
+mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
+#else /* BERKELEY_MP */
+pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
+#endif /* BERKELEY_MP */
+{
+  mp_ptr xp, tp, qp, gp, this_gp;
+  mp_srcptr bp, ep, mp;
+  mp_size_t bn, es, en, mn, xn;
+  mp_limb_t invm, c;
+  unsigned long int enb;
+  mp_size_t i, K, j, l, k;
+  int m_zero_cnt, e_zero_cnt;
+  int sh;
+  int use_redc;
+#if HANDLE_NEGATIVE_EXPONENT
+  mpz_t new_b;
+#endif
+#if REDUCE_EXPONENT
+  mpz_t new_e;
+#endif
+  TMP_DECL;
+
+  mp = PTR(m);
+  mn = ABSIZ (m);
+  if (mn == 0)
+    DIVIDE_BY_ZERO;
+
+  TMP_MARK;
+
+  es = SIZ (e);
+  if (es <= 0)
+    {
+      if (es == 0)
+	{
+	  /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
+	     m equals 1.  */
+	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
+	  PTR(r)[0] = 1;
+	  TMP_FREE;	/* we haven't really allocated anything here */
+	  return;
+	}
+#if HANDLE_NEGATIVE_EXPONENT
+      MPZ_TMP_INIT (new_b, mn + 1);
+
+      if (! mpz_invert (new_b, b, m))
+	DIVIDE_BY_ZERO;
+      b = new_b;
+      es = -es;
+#else
+      DIVIDE_BY_ZERO;
+#endif
+    }
+  en = es;
+
+#if REDUCE_EXPONENT
+  /* Reduce exponent by dividing it by phi(m) when m small.  */
+  if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
+    {
+      MPZ_TMP_INIT (new_e, 2);
+      mpz_mod_ui (new_e, e, phi (mp[0]));
+      e = new_e;
+    }
+#endif
+
+  use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
+  if (use_redc)
+    {
+      /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
+      modlimb_invert (invm, mp[0]);
+      invm = -invm;
+    }
+  else
+    {
+      /* Normalize m (i.e. make its most significant bit set) as required by
+	 division functions below.  */
+      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
+      m_zero_cnt -= GMP_NAIL_BITS;
+      if (m_zero_cnt != 0)
+	{
+	  mp_ptr new_mp;
+	  new_mp = TMP_ALLOC_LIMBS (mn);
+	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
+	  mp = new_mp;
+	}
+    }
+
+  /* Determine optimal value of k, the number of exponent bits we look at
+     at a time.  */
+  count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
+  e_zero_cnt -= GMP_NAIL_BITS;
+  enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
+  k = 1;
+  K = 2;
+  while (2 * enb > K * (2 + k * (3 + k)))
+    {
+      k++;
+      K *= 2;
+      if (k == 10)			/* cap allocation */
+	break;
+    }
+
+  tp = TMP_ALLOC_LIMBS (2 * mn);
+  qp = TMP_ALLOC_LIMBS (mn + 1);
+
+  gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
+
+  /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
+  bn = ABSIZ (b);
+  bp = PTR(b);
+  /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
+     for speed or correctness to do this when b and m have the same number of
+     limbs, perhaps remove mpn_cmp call.  */
+  if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
+    {
+      /* Reduce possibly huge base while moving it to gp[0].  Use a function
+	 call to reduce, since we don't want the quotient allocation to
+	 live until function return.  */
+      if (use_redc)
+	{
+	  reduce (tp + mn, bp, bn, mp, mn);	/* b mod m */
+	  MPN_ZERO (tp, mn);
+	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
+	}
+      else
+	{
+	  reduce (gp, bp, bn, mp, mn);
+	}
+    }
+  else
+    {
+      /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
+	 the rest of the function, but slows things down when |b| << m.  */
+      if (use_redc)
+	{
+	  MPN_ZERO (tp, mn);
+	  MPN_COPY (tp + mn, bp, bn);
+	  MPN_ZERO (tp + mn + bn, mn - bn);
+	  mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
+	}
+      else
+	{
+	  MPN_COPY (gp, bp, bn);
+	  MPN_ZERO (gp + bn, mn - bn);
+	}
+    }
+
+  /* Compute xx^i for odd g < 2^i.  */
+
+  xp = TMP_ALLOC_LIMBS (mn);
+  mpn_sqr_n (tp, gp, mn);
+  if (use_redc)
+    mpn_redc_1 (xp, tp, mp, mn, invm);		/* xx = x^2*R^n */
+  else
+    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+  this_gp = gp;
+  for (i = 1; i < K / 2; i++)
+    {
+      mpn_mul_n (tp, this_gp, xp, mn);
+      this_gp += mn;
+      if (use_redc)
+	mpn_redc_1 (this_gp, tp, mp, mn, invm);	/* g[i] = x^(2i+1)*R^n */
+      else
+	mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
+    }
+
+  /* Start the real stuff.  */
+  ep = PTR (e);
+  i = en - 1;				/* current index */
+  c = ep[i];				/* current limb */
+  sh = GMP_NUMB_BITS - e_zero_cnt;	/* significant bits in ep[i] */
+  sh -= k;				/* index of lower bit of ep[i] to take into account */
+  if (sh < 0)
+    {					/* k-sh extra bits are needed */
+      if (i > 0)
+	{
+	  i--;
+	  c <<= (-sh);
+	  sh += GMP_NUMB_BITS;
+	  c |= ep[i] >> sh;
+	}
+    }
+  else
+    c >>= sh;
+
+  for (j = 0; c % 2 == 0; j++)
+    c >>= 1;
+
+  MPN_COPY (xp, gp + mn * (c >> 1), mn);
+  while (--j >= 0)
+    {
+      mpn_sqr_n (tp, xp, mn);
+      if (use_redc)
+	mpn_redc_1 (xp, tp, mp, mn, invm);
+      else
+	mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+    }
+
+  while (i > 0 || sh > 0)
+    {
+      c = ep[i];
+      l = k;				/* number of bits treated */
+      sh -= l;
+      if (sh < 0)
+	{
+	  if (i > 0)
+	    {
+	      i--;
+	      c <<= (-sh);
+	      sh += GMP_NUMB_BITS;
+	      c |= ep[i] >> sh;
+	    }
+	  else
+	    {
+	      l += sh;			/* last chunk of bits from e; l < k */
+	    }
+	}
+      else
+	c >>= sh;
+      c &= ((mp_limb_t) 1 << l) - 1;
+
+      /* This while loop implements the sliding window improvement--loop while
+	 the most significant bit of c is zero, squaring xx as we go. */
+      while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
+	{
+	  mpn_sqr_n (tp, xp, mn);
+	  if (use_redc)
+	    mpn_redc_1 (xp, tp, mp, mn, invm);
+	  else
+	    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+	  if (sh != 0)
+	    {
+	      sh--;
+	      c = (c << 1) + ((ep[i] >> sh) & 1);
+	    }
+	  else
+	    {
+	      i--;
+	      sh = GMP_NUMB_BITS - 1;
+	      c = (c << 1) + (ep[i] >> sh);
+	    }
+	}
+
+      /* Replace xx by xx^(2^l)*x^c.  */
+      if (c != 0)
+	{
+	  for (j = 0; c % 2 == 0; j++)
+	    c >>= 1;
+
+	  /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
+	  l -= j;
+	  while (--l >= 0)
+	    {
+	      mpn_sqr_n (tp, xp, mn);
+	      if (use_redc)
+		mpn_redc_1 (xp, tp, mp, mn, invm);
+	      else
+		mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+	    }
+	  mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
+	  if (use_redc)
+	    mpn_redc_1 (xp, tp, mp, mn, invm);
+	  else
+	    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+	}
+      else
+	j = l;				/* case c=0 */
+      while (--j >= 0)
+	{
+	  mpn_sqr_n (tp, xp, mn);
+	  if (use_redc)
+	    mpn_redc_1 (xp, tp, mp, mn, invm);
+	  else
+	    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
+	}
+    }
+
+  if (use_redc)
+    {
+      /* Convert back xx to xx/R^n.  */
+      MPN_COPY (tp, xp, mn);
+      MPN_ZERO (tp + mn, mn);
+      mpn_redc_1 (xp, tp, mp, mn, invm);
+      if (mpn_cmp (xp, mp, mn) >= 0)
+	mpn_sub_n (xp, xp, mp, mn);
+    }
+  else
+    {
+      if (m_zero_cnt != 0)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
+	  tp[mn] = cy;
+	  mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
+	  mpn_rshift (xp, xp, mn, m_zero_cnt);
+	}
+    }
+  xn = mn;
+  MPN_NORMALIZE (xp, xn);
+
+  if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
+    {
+      mp = PTR(m);			/* want original, unnormalized m */
+      mpn_sub (xp, mp, mn, xp, xn);
+      xn = mn;
+      MPN_NORMALIZE (xp, xn);
+    }
+  MPZ_REALLOC (r, xn);
+  SIZ (r) = xn;
+  MPN_COPY (PTR(r), xp, xn);
+
+  __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
+  TMP_FREE;
+}