Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / generic / gcd.c
diff --git a/gmp/mpn/generic/gcd.c b/gmp/mpn/generic/gcd.c

new file mode 100644 (file)

index 0000000..542e0fe
--- /dev/null
+++ b/gmp/mpn/generic/gcd.c
@@ -0,0 +1,286 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     computation, Math. Comp. 77 (2008), 589-607.
+
+  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+  then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+
+  mp_size_t gn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCD_LEHMER_N_ITCH(n);
+
+  /* For initial division */
+  scratch = usize - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+#if TUNE_GCD_P
+  if (CHOOSE_P (n) > 0)
+#else
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t scratch;
+#if TUNE_GCD_P
+      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+        is increasing */
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+      hgcd_scratch = mpn_hgcd_itch (n);
+      update_scratch = 2*(n - 1);
+#else
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+#endif
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+       talloc = scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (usize > n)
+    {
+      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+      if (mpn_zero_p (up, n))
+       {
+         MPN_COPY (gp, vp, n);
+         TMP_FREE;
+         return n;
+       }
+    }
+
+#if TUNE_GCD_P
+  while (CHOOSE_P (n) > 0)
+#else
+  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+       {
+         ASSERT (M.n <= (n - p - 1)/2);
+         ASSERT (M.n + p <= (p + n - 1) / 2);
+         /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+         n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+       }
+      else
+       {
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
+         if (n == 0)
+           {
+             TMP_FREE;
+             return gn;
+           }
+       }
+    }
+
+  gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+  TMP_FREE;
+  return gn;
+}
+
+#ifdef TUNE_GCD_P
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "speed.h"
+
+static int
+compare_double(const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
+
+static double
+median (double *v, size_t n)
+{
+  qsort(v, n, sizeof(*v), compare_double);
+
+  return v[n/2];
+}
+
+#define TIME(res, code) do {                           \
+  double time_measurement[5];                          \
+  unsigned time_i;                                     \
+                                                       \
+  for (time_i = 0; time_i < 5; time_i++)               \
+    {                                                  \
+      speed_starttime();                               \
+      code;                                            \
+      time_measurement[time_i] = speed_endtime();      \
+    }                                                  \
+  res = median(time_measurement, 5);                   \
+} while (0)
+
+int
+main(int argc, char *argv)
+{
+  gmp_randstate_t rands;
+  mp_size_t n;
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr up;
+  mp_ptr vp;
+  mp_ptr gp;
+  mp_ptr tp;
+  TMP_DECL;
+
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  gmp_randinit_default (rands);
+
+  TMP_MARK;
+
+  ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
+
+  mpn_random (ap, P_TABLE_SIZE);
+  mpn_random (bp, P_TABLE_SIZE);
+
+  memset (p_table, 0, sizeof(p_table));
+
+  for (n = 100; n++; n < P_TABLE_SIZE)
+    {
+      mp_size_t p;
+      mp_size_t best_p;
+      double best_time;
+      double lehmer_time;
+
+      if (ap[n-1] == 0)
+       ap[n-1] = 1;
+
+      if (bp[n-1] == 0)
+       bp[n-1] = 1;
+
+      p_table[n] = 0;
+      TIME(lehmer_time, {
+         MPN_COPY (up, ap, n);
+         MPN_COPY (vp, bp, n);
+         mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+       });
+
+      best_time = lehmer_time;
+      best_p = 0;
+
+      for (p = n * 0.48; p < n * 0.77; p++)
+       {
+         double t;
+
+         p_table[n] = p;
+
+         TIME(t, {
+             MPN_COPY (up, ap, n);
+             MPN_COPY (vp, bp, n);
+             mpn_gcd (gp, up, n, vp, n);
+           });
+
+         if (t < best_time)
+           {
+             best_time = t;
+             best_p = p;
+           }
+       }
+      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+      if (best_p > 0)
+       {
+         double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+         printf(" %5.3g%%", speedup);
+         if (speedup < 1.0)
+           {
+             printf(" (ignored)");
+             best_p = 0;
+           }
+       }
+      printf("\n");
+
+      p_table[n] = best_p;
+    }
+  TMP_FREE;
+  gmp_randclear(rands);
+  return 0;
+}
+#endif /* TUNE_GCD_P */