X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Ftoom_interpolate_5pts.c;fp=gmp%2Fmpn%2Fgeneric%2Ftoom_interpolate_5pts.c;h=67260cc3d590e0ac51cfbffefd92448f302b92e5;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git

diff --git a/gmp/mpn/generic/toom_interpolate_5pts.c b/gmp/mpn/generic/toom_interpolate_5pts.c
new file mode 100644
index 00000000..67260cc3
--- /dev/null
+++ b/gmp/mpn/generic/toom_interpolate_5pts.c
@@ -0,0 +1,170 @@
+/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
+
+   Contributed to the GNU project by Robert Harley.
+   Improvements by Paul Zimmermann and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
+			   mp_size_t k, mp_size_t twor, int sa,
+			   mp_limb_t vinf0, mp_ptr ws)
+{
+  mp_limb_t cy, saved;
+  mp_size_t twok = k + k;
+  mp_size_t kk1 = twok + 1;
+  mp_ptr c1, v1, c3, vinf, c5;
+  mp_limb_t cout; /* final carry, should be zero at the end */
+
+  c1 = c  + k;
+  v1 = c1 + k;
+  c3 = v1 + k;
+  vinf = c3 + k;
+  c5 = vinf + k;
+
+#define v0 (c)
+  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
+     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
+  */
+  if (sa <= 0)
+    mpn_add_n (v2, v2, vm1, kk1);
+  else
+    mpn_sub_n (v2, v2, vm1, kk1);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
+
+  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
+						      /* (5 3 1 1 0)*/
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
+
+  /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                            (0  1 0  1 0)
+     No carry comes out from {v1, kk1} +/- {vm1, kk1},
+     and the division by two is exact */
+  if (sa <= 0)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, kk1);
+#else
+      mpn_add_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
+#else
+      mpn_sub_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
+#endif
+    }
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
+     t1 >= 0
+  */
+  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
+     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
+  */
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (v2, v2, v1, kk1);
+#else
+  mpn_sub_n (v2, v2, v1, kk1);
+  mpn_rshift (v2, v2, kk1, 1);
+#endif
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
+     result is v1 >= 0
+  */
+  mpn_sub_n (v1, v1, vm1, kk1);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0   v1-v0-tm1      hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
+     result is v2 >= 0 */
+  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
+  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
+#ifdef HAVE_NATIVE_mpn_sublsh1_n
+  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
+#else
+  cy = mpn_lshift (ws, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, ws, twor);
+#endif
+  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
+
+  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
+     result is >= 0 */
+  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
+  vinf[0] = saved;
+  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+  __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+
+  /* (8) vm1 <- vm1-t2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     vm1 >= 0
+  */
+  mpn_sub_n (vm1, vm1, v2, kk1);            /* No overlapping here.        */
+
+  /********************* Beginning the final phase **********************/
+
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (9) add t2 in {c+3k, ...} */
+  cy = mpn_add_n (c3, c3, v2, kk1);
+  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+		    +t2 */
+
+  /* add vm1 in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
+
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+	  +tm1      +t2    */
+
+#undef v0
+#undef t2
+}