Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / generic / dive_1.c
diff --git a/gmp/mpn/generic/dive_1.c b/gmp/mpn/generic/dive_1.c

new file mode 100644 (file)

index 0000000..27df57b
--- /dev/null
+++ b/gmp/mpn/generic/dive_1.c
@@ -0,0 +1,148 @@
+/* mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
+   q will only be correct if d divides a exactly.
+
+   A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
+   give zero on all CPUs (for instance it doesn't on the x86s).  This
+   separate loop might run faster too, helping odd divisors.
+
+   Possibilities:
+
+   mpn_divexact_1c could be created, accepting and returning c.  This would
+   let a long calculation be done piece by piece.  Currently there's no
+   particular need for that, and not returning c means that a final umul can
+   be skipped.
+
+   Another use for returning c would be letting the caller know whether the
+   division was in fact exact.  It would work just to return the carry bit
+   "c=(l>s)" and let the caller do a final umul if interested.
+
+   When the divisor is even, the factors of two could be handled with a
+   separate mpn_rshift, instead of shifting on the fly.  That might be
+   faster on some CPUs and would mean just the shift==0 style loop would be
+   needed.
+
+   If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
+   shift==0 loop is unnecessary, and could be eliminated if there's no great
+   speed difference.
+
+   It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
+   2.95 for instance has a poor "/" and might prefer the modular method.
+   Perhaps a tuned parameter should control this.
+
+   If src[size-1] < divisor then dst[size-1] will be zero, and one divide
+   step could be skipped.  A test at last step for s<divisor (or ls in the
+   even case) might be a good way to do that.  But if this code is often
+   used with small divisors then it might not be worth bothering  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  s = src[0];
+
+  if (size == 1)
+    {
+      dst[0] = s / divisor;
+      return;
+    }
+
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (shift, divisor);
+      divisor >>= shift;
+    }
+  else
+    shift = 0;
+
+  binvert_limb (inverse, divisor);
+  divisor <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+      i = 0;
+      size--;
+
+      do
+       {
+         s_next = src[i+1];
+         ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+         s = s_next;
+
+         SUBC_LIMB (c, l, ls, c);
+
+         l = (l * inverse) & GMP_NUMB_MASK;
+         dst[i] = l;
+
+         umul_ppmm (h, dummy, l, divisor);
+         c += h;
+
+         i++;
+       }
+      while (i < size);
+
+      ls = s >> shift;
+      l = ls - c;
+      l = (l * inverse) & GMP_NUMB_MASK;
+      dst[i] = l;
+    }
+  else
+    {
+      l = (s * inverse) & GMP_NUMB_MASK;
+      dst[0] = l;
+      i = 1;
+      c = 0;
+
+      do
+       {
+         umul_ppmm (h, dummy, l, divisor);
+         c += h;
+
+         s = src[i];
+         SUBC_LIMB (c, l, s, c);
+
+         l = (l * inverse) & GMP_NUMB_MASK;
+         dst[i] = l;
+         i++;
+       }
+      while (i < size);
+    }
+}