Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / generic / toom_interpolate_7pts.c
diff --git a/gmp/mpn/generic/toom_interpolate_7pts.c b/gmp/mpn/generic/toom_interpolate_7pts.c

new file mode 100644 (file)

index 0000000..872da26
--- /dev/null
+++ b/gmp/mpn/generic/toom_interpolate_7pts.c
@@ -0,0 +1,201 @@
+/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
+
+   Contributed to the GNU project by Niels Möller.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Arithmetic right shift, requiring that the shifted out bits are zero. */
+static inline void
+divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift)
+{
+  mp_limb_t sign;
+  sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift);
+  ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift));
+  rp[n-1] |= sign & GMP_NUMB_MASK;
+}
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#ifndef mpn_divexact_by9
+#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
+#endif
+#ifndef mpn_divexact_by15
+#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
+#endif
+
+/* Interpolation for toom4, using the evaluation points infinity, 2,
+   1, -1, 1/2, -1/2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
+   seven values
+
+     w0 = f(0),
+     w1 = 64 f(-1/2),
+     w2 = 64 f(1/2),
+     w3 = f(-1),
+     w4 = f(1)
+     w5 = f(2)
+     w6 = limit at infinity of f(x) / x^6,
+
+   The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
+   w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
+   w6n }. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-1/2) may be negative, signs
+   determined by the flag bits. All intermediate results are
+   represented in two's complement. Inputs are destroyed.
+
+   Needs (2*n + 1) limbs of temporary storage.
+*/
+
+void
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags,
+                          mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
+                          mp_size_t w6n, mp_ptr tp)
+{
+  mp_size_t m = 2*n + 1;
+  mp_ptr w2 = rp + 2*n;
+  mp_ptr w6 = rp + 6*n;
+  mp_limb_t cy;
+
+  ASSERT (w6n > 0);
+  ASSERT (w6n <= 2*n);
+
+  /* Using Marco Bodrato's formulas
+
+     W5 = W5 + W2
+     W3 =(W3 + W4)/2
+     W1 = W1 + W2
+     W2 = W2 - W6 - W0*64
+     W2 =(W2*2 - W1)/8
+     W4 = W4 - W3
+
+     W5 = W5 - W4*65
+     W4 = W4 - W6 - W0
+     W5 = W5 + W4*45
+     W2 =(W2 - W4)/3
+     W4 = W4 - W2
+
+     W1 = W1 - W5
+     W5 =(W5 - W3*16)/ 18
+     W3 = W3 - W5
+     W1 =(W1/30 + W5)/ 2
+     W5 = W5 - W1
+
+     where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1),
+          W4 = f(1), W5 = f(2), W6 = f(oo),
+  */
+
+  mpn_add_n (w5, w5, w2, m);
+  if (flags & toom4_w3_neg)
+    mpn_add_n (w3, w3, w4, m);
+  else
+    mpn_sub_n (w3, w4, w3, m);
+  divexact_2exp (w3, w3, m, 1);
+  if (flags & toom4_w1_neg)
+    mpn_add_n (w1, w1, w2, m);
+  else
+    mpn_sub_n (w1, w2, w1, m);
+  mpn_sub (w2, w2, m, w6, w6n);
+  tp[2*n] = mpn_lshift (tp, rp, 2*n, 6);
+  mpn_sub_n (w2, w2, tp, m);
+  mpn_lshift (w2, w2, m, 1);
+  mpn_sub_n (w2, w2, w1, m);
+  divexact_2exp (w2, w2, m, 3);
+  mpn_sub_n (w4, w4, w3, m);
+
+  mpn_submul_1 (w5, w4, m, 65);
+  mpn_sub (w4, w4, m, w6, w6n);
+  mpn_sub (w4, w4, m, rp, 2*n);
+  mpn_addmul_1 (w5, w4, m, 45);
+  mpn_sub_n (w2, w2, w4, m);
+  /* Rely on divexact working with two's complement */
+  mpn_divexact_by3 (w2, w2, m);
+  mpn_sub_n (w4, w4, w2, m);
+
+  mpn_sub_n (w1, w1, w5, m);
+  mpn_lshift (tp, w3, m, 4);
+  mpn_sub_n (w5, w5, tp, m);
+  divexact_2exp (w5, w5, m, 1);
+  mpn_divexact_by9 (w5, w5, m);
+  mpn_sub_n (w3, w3, w5, m);
+  divexact_2exp (w1, w1, m, 1);
+  mpn_divexact_by15 (w1, w1, m);
+  mpn_add_n (w1, w1, w5, m);
+  divexact_2exp (w1, w1, m, 1);
+  mpn_sub_n (w5, w5, w1, m);
+
+  /* Two's complement coefficients must be non-negative at the end of
+     this procedure. */
+  ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT));
+
+  /* Addition chain. Note carries and the 2n'th limbs that need to be
+   * added in.
+   *
+   * Special care is needed for w2[2n] and the corresponding carry,
+   * since the "simple" way of adding it all together would overwrite
+   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
+   * the high half of w3 and the low half of w4.
+   *
+   *         7    6    5    4    3    2    1    0
+   *    |    |    |    |    |    |    |    |    |
+   *                  ||w3 (2n+1)|
+   *             ||w4 (2n+1)|
+   *        ||w5 (2n+1)|        ||w1 (2n+1)|
+   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
+   *  -----------------------------------------------
+   *  r |    |    |    |    |    |    |    |    |
+   *        c7   c6   c5   c4   c3                 Carries to propagate
+   */
+
+  cy = mpn_add_n (rp + n, rp + n, w1, 2*n);
+  MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy);
+  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
+  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
+  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
+  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
+  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
+  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
+  if (w6n > n + 1)
+    {
+      mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+      MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
+#if WANT_ASSERT
+      {
+       mp_size_t i;
+       for (i = w6n; i <= n; i++)
+         ASSERT (w5[n + i] == 0);
+      }
+#endif
+    }
+}