X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Fmod_1_4.c;fp=gmp%2Fmpn%2Fgeneric%2Fmod_1_4.c;h=74893386a959a3567e0b472658174ec2ad7d1de3;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git

diff --git a/gmp/mpn/generic/mod_1_4.c b/gmp/mpn/generic/mod_1_4.c
new file mode 100644
index 00000000..74893386
--- /dev/null
+++ b/gmp/mpn/generic/mod_1_4.c
@@ -0,0 +1,148 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that d < B / 4.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  int cnt;
+
+  ASSERT (b <= GMP_NUMB_MAX / 4);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
+  udiv_rnd_preinv (B5modb, B4modb, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+  cps[4] = B3modb >> cnt;
+  cps[5] = B4modb >> cnt;
+  cps[6] = B5modb >> cnt;
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
+{
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mp_size_t i;
+  int cnt;
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+  B5modb = cps[6];
+
+  umul_ppmm (ph, pl, ap[n - 3], B1modb);
+  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+
+  umul_ppmm (ch, cl, ap[n - 2], B2modb);
+  add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+  umul_ppmm (ch, cl, ap[n - 1], B3modb);
+  add_ssaaaa (rh, rl, ph, pl, ch, cl);
+
+  for (i = n - 8; i >= 0; i -= 4)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
+	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, ap[i + 3], B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B4modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B5modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  if (i >= -3)
+    {
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 3]);
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      if (i >= -2)
+	{
+	  umul_ppmm (ph, pl, rl, B1modb);
+	  add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
+	  umul_ppmm (rh, rl, rh, B2modb);
+	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	  if (i >= -1)
+	    {
+	      umul_ppmm (ph, pl, rl, B1modb);
+	      add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
+	      umul_ppmm (rh, rl, rh, B2modb);
+	      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	    }
+	}
+    }
+
+  bi = cps[0];
+  cnt = cps[1];
+
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 4);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}