X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Fmu_bdiv_q.c;fp=gmp%2Fmpn%2Fgeneric%2Fmu_bdiv_q.c;h=3b5f56d08830841545b40b8bf9fe85167913c5cb;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git

diff --git a/gmp/mpn/generic/mu_bdiv_q.c b/gmp/mpn/generic/mu_bdiv_q.c
new file mode 100644
index 00000000..3b5f56d0
--- /dev/null
+++ b/gmp/mpn/generic/mu_bdiv_q.c
@@ -0,0 +1,255 @@
+/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
+   storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
+   overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* We use the "misunderstanding algorithm" (MU), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
+
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
+
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+		 D >= 1
+		 N mod D = 0
+		 D odd
+		 dn >= 2
+		 nn >= 2
+		 scratch space as determined by mpn_divexact_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+	  particular, when dn==in, tp and rp could use the same space.
+   FIXME: Trim final quotient calculation to qn limbs of precision.
+*/
+void
+mpn_mu_bdiv_q (mp_ptr qp,
+	       mp_srcptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mp_ptr ip;
+  mp_ptr rp;
+  mp_size_t qn;
+  mp_size_t in;
+
+  qn = nn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+      mp_ptr tp;
+      mp_limb_t cy;
+      int k;
+      mp_size_t m, wn;
+      mp_size_t i;
+
+      /* |_______________________|   dividend
+			|________|   divisor  */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+	 When in = dn, R dies when mpn_mullow returns, if in < dn the low in
+	 limbs of R dies at that point.  We could save memory by letting T live
+	 just under R, and let the upper part of T expand into R. These changes
+	 should reduce itch to perhaps 3dn.
+       */
+
+      ip = scratch;			/* in limbs */
+      rp = scratch + in;		/* dn limbs */
+      tp = scratch + in + dn;		/* dn + in limbs FIXME: mpn_fft_next_size */
+      scratch += in;			/* Roughly 2in+1 limbs */
+
+      mpn_binvert (ip, dp, in, scratch);
+
+      cy = 0;
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      mpn_mullow_n (qp, rp, ip, in);
+      qn -= in;
+
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+	{
+	  k = mpn_fft_best_k (dn, 0);
+	  m = mpn_fft_next_size (dn, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+	  ASSERT_ALWAYS (wn >= 0);		/* could handle this below */
+	}
+
+      while (qn > in)
+	{
+#if WANT_FFT
+	  if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+	    {
+	      /* The two multiplicands are dn and 'in' limbs, with dn >= in.
+		 The relevant part of the result will typically partially wrap,
+		 and that part will come out as subtracted to the right.  The
+		 unwrapped part, m-in limbs at the high end of tp, is the lower
+		 part of the sought product.  The wrapped part, at the low end
+		 of tp, will be subtracted from the low part of the partial
+		 remainder; we undo that operation with another subtraction. */
+	      int c0;
+
+	      mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	      c0 = mpn_sub_n (tp + m, rp, tp, wn);
+
+	      for (i = wn; c0 != 0 && i < in; i++)
+		c0 = tp[i] == GMP_NUMB_MASK;
+	      mpn_incr_u (tp + in, c0);
+	    }
+	  else
+#endif
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
+	  qp += in;
+	  if (dn != in)
+	    {
+	      /* Subtract tp[dn-1...in] from partial remainder.  */
+	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	      if (cy == 2)
+		{
+		  mpn_incr_u (tp + dn, 1);
+		  cy = 1;
+		}
+	    }
+	  /* Subtract tp[dn+in-1...dn] from dividend.  */
+	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+	  np += in;
+	  mpn_mullow_n (qp, rp, ip, in);
+	  qn -= in;
+	}
+
+      /* Generate last qn limbs.
+	 FIXME: It should be possible to limit precision here, since qn is
+	 typically somewhat smaller than dn.  No big gains expected.  */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+	{
+	  int c0;
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	  c0 = mpn_sub_n (tp + m, rp, tp, wn);
+
+	  for (i = wn; c0 != 0 && i < in; i++)
+	    c0 = tp[i] == GMP_NUMB_MASK;
+	  mpn_incr_u (tp + in, c0);
+	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
+      qp += in;
+      if (dn != in)
+	{
+	  cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	  if (cy == 2)
+	    {
+	      mpn_incr_u (tp + dn, 1);
+	      cy = 1;
+	    }
+	}
+
+      mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
+      mpn_mullow_n (qp, rp, ip, qn);
+    }
+  else
+    {
+      /* |_______________________|   dividend
+		|________________|   divisor  */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      ip = scratch;			/* ceil(qn/2) limbs */
+      rp = scratch + in;		/* ceil(qn/2)+qn limbs */
+      scratch += in;			/* 2*ceil(qn/2)+2 */
+
+      mpn_binvert (ip, dp, in, scratch);
+
+      mpn_mullow_n (qp, np, ip, in);		/* low `in' quotient limbs */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD))
+	{
+	  int k;
+	  mp_size_t m;
+
+	  k = mpn_fft_best_k (qn, 0);
+	  m = mpn_fft_next_size (qn, k);
+	  mpn_mul_fft (rp, m, dp, qn, qp, in, k);
+	  if (mpn_cmp (np, rp, in) < 0)
+	    mpn_incr_u (rp + in, 1);
+	}
+      else
+#endif
+	mpn_mul (rp, dp, qn, qp, in);		/* mulhigh */
+
+      mpn_sub_n (rp, np + in, rp + in, qn - in);
+      mpn_mullow_n (qp + in, rp, ip, qn - in);	/* high qn-in quotient limbs */
+    }
+}
+
+mp_size_t
+mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn;
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      return 4 * dn;		/* FIXME FIXME FIXME need mpn_fft_next_size */
+    }
+  else
+    {
+      return 2 * qn + 1 + 2;	/* FIXME FIXME FIXME need mpn_fft_next_size */
+    }
+}