Imported gcc-4.4.3

[msp430-gcc.git] / mpfr / atan.c
diff --git a/mpfr/atan.c b/mpfr/atan.c

new file mode 100644 (file)

index 0000000..17b65aa
--- /dev/null
+++ b/mpfr/atan.c
@@ -0,0 +1,385 @@
+/* mpfr_atan -- arc-tangent of a floating-point number
+
+Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+Contributed by the Arenaire and Cacao projects, INRIA.
+
+This file is part of the GNU MPFR Library, and was contributed by Mathieu Dutour.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+MA 02110-1301, USA. */
+
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+
+/* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms
+   for the series expansion, with an error of at most 1 ulp.
+   Assumes |x| < 1.
+
+   If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ...
+*/
+static void
+mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab)
+{
+  mpz_t *S, *Q, *ptoj;
+  unsigned long n, i, k, j, l;
+  mp_exp_t diff, expo;
+  int im, done;
+  mp_prec_t mult, *accu, *log2_nb_terms;
+  mp_prec_t precy = MPFR_PREC(y);
+
+  if (mpz_cmp_ui (p, 0) == 0)
+    {
+      mpfr_set_ui (y, 1, GMP_RNDN); /* limit(atan(x)/x, x=0) */
+      return;
+    }
+
+  accu = (mp_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mp_prec_t));
+  log2_nb_terms = accu + m + 1;
+
+  /* Set Tables */
+  S    = tab;           /* S */
+  ptoj = S + 1*(m+1);   /* p^2^j Precomputed table */
+  Q    = S + 2*(m+1);   /* Product of Odd integer  table  */
+
+  /* From p to p^2, and r to 2r */
+  mpz_mul (p, p, p);
+  MPFR_ASSERTD (2 * r > r);
+  r = 2 * r;
+
+  /* Normalize p */
+  n = mpz_scan1 (p, 0);
+  mpz_tdiv_q_2exp (p, p, n); /* exact */
+  MPFR_ASSERTD (r > n);
+  r -= n;
+  /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */
+
+  MPFR_ASSERTD (mpz_sgn (p) > 0);
+  MPFR_ASSERTD (m > 0);
+
+  /* check if p=1 (special case) */
+  l = 0;
+  /*
+    We compute by binary splitting, with X = x^2 = p/2^r:
+    P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
+    Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
+    S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise
+    Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough.
+    The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it
+    into account when we compute with Q.
+  */
+  accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the
+                  number of bits of the corresponding term S[j]/Q[j] */
+  if (mpz_cmp_ui (p, 1) != 0)
+    {
+      /* p <> 1: precompute ptoj table */
+      mpz_set (ptoj[0], p);
+      for (im = 1 ; im <= m ; im ++)
+        mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]);
+      /* main loop */
+      n = 1UL << m;
+      /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when
+         p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */
+      for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++)
+        {
+          /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */
+          mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */
+          mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */
+          mpz_mul_2exp (S[k], Q[k+1], r);
+          mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */
+          mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */
+          log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
+          for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --)
+            {
+              /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond
+                 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */
+              MPFR_ASSERTD (k > 0);
+              mpz_mul (S[k], S[k], Q[k-1]);
+              mpz_mul (S[k], S[k], ptoj[l]);
+              mpz_mul (S[k-1], S[k-1], Q[k]);
+              mpz_mul_2exp (S[k-1], S[k-1], r << l);
+              mpz_add (S[k-1], S[k-1], S[k]);
+              mpz_mul (Q[k-1], Q[k-1], Q[k]);
+              log2_nb_terms[k-1] = l + 1;
+              /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */
+              MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]);
+              /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */
+              mult = (r << (l + 1)) - mult - 1;
+              accu[k-1] = (k == 1) ? mult : accu[k-2] + mult;
+              if (accu[k-1] > precy)
+                done = 1;
+            }
+        }
+    }
+  else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r,
+          we can stop when r*i > precy i.e. i > precy/r */
+    {
+      n = 1UL << m;
+      for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++)
+        {
+          mpz_set_ui (Q[k + 1], 2 * i + 3);
+          mpz_mul_2exp (S[k], Q[k+1], r);
+          mpz_sub_ui (S[k], S[k], 1 + 2 * i);
+          mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i);
+          log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
+          for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --)
+            {
+              MPFR_ASSERTD (k > 0);
+              mpz_mul (S[k], S[k], Q[k-1]);
+              mpz_mul (S[k-1], S[k-1], Q[k]);
+              mpz_mul_2exp (S[k-1], S[k-1], r << l);
+              mpz_add (S[k-1], S[k-1], S[k]);
+              mpz_mul (Q[k-1], Q[k-1], Q[k]);
+              log2_nb_terms[k-1] = l + 1;
+            }
+        }
+    }
+
+  /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */
+  l = 0; /* number of terms accumulated in S[k]/Q[k] */
+  while (k > 1)
+    {
+      k --;
+      /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */
+      j = log2_nb_terms[k-1];
+      mpz_mul (S[k], S[k], Q[k-1]);
+      if (mpz_cmp_ui (p, 1) != 0)
+        mpz_mul (S[k], S[k], ptoj[j]);
+      mpz_mul (S[k-1], S[k-1], Q[k]);
+      l += 1 << log2_nb_terms[k];
+      mpz_mul_2exp (S[k-1], S[k-1], r * l);
+      mpz_add (S[k-1], S[k-1], S[k]);
+      mpz_mul (Q[k-1], Q[k-1], Q[k]);
+    }
+  (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mp_prec_t));
+
+  MPFR_MPZ_SIZEINBASE2 (diff, S[0]);
+  diff -= 2 * precy;
+  expo = diff;
+  if (diff >= 0)
+    mpz_tdiv_q_2exp (S[0], S[0], diff);
+  else
+    mpz_mul_2exp (S[0], S[0], -diff);
+
+  MPFR_MPZ_SIZEINBASE2 (diff, Q[0]);
+  diff -= precy;
+  expo -= diff;
+  if (diff >= 0)
+    mpz_tdiv_q_2exp (Q[0], Q[0], diff);
+  else
+    mpz_mul_2exp (Q[0], Q[0], -diff);
+
+  mpz_tdiv_q (S[0], S[0], Q[0]);
+  mpfr_set_z (y, S[0], GMP_RNDD);
+  MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1));
+}
+
+int
+mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
+{
+  mpfr_t xp, arctgt, sk, tmp, tmp2;
+  mpz_t  ukz;
+  mpz_t *tabz;
+  mp_exp_t exptol;
+  mp_prec_t prec, realprec;
+  unsigned long twopoweri;
+  int comparaison, inexact, inexact2;
+  int i, n0, oldn0;
+  MPFR_GROUP_DECL (group);
+  MPFR_SAVE_EXPO_DECL (expo);
+  MPFR_ZIV_DECL (loop);
+
+  MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
+                 ("atan[%#R]=%R inexact=%d", atan, atan, inexact));
+
+  /* Singular cases */
+  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
+    {
+      if (MPFR_IS_NAN (x))
+        {
+          MPFR_SET_NAN (atan);
+          MPFR_RET_NAN;
+        }
+      else if (MPFR_IS_INF (x))
+        {
+          if (MPFR_IS_POS (x))  /* arctan(+inf) = Pi/2 */
+            inexact = mpfr_const_pi (atan, rnd_mode);
+          else /* arctan(-inf) = -Pi/2 */
+            {
+              inexact = -mpfr_const_pi (atan,
+                                        MPFR_INVERT_RND (rnd_mode));
+              MPFR_CHANGE_SIGN (atan);
+            }
+          inexact2 = mpfr_div_2ui (atan, atan, 1, rnd_mode);
+          if (MPFR_UNLIKELY (inexact2))
+            inexact = inexact2; /* An underflow occurs */
+          MPFR_RET (inexact);
+        }
+      else /* x is necessarily 0 */
+        {
+          MPFR_ASSERTD (MPFR_IS_ZERO (x));
+          MPFR_SET_ZERO (atan);
+          MPFR_SET_SAME_SIGN (atan, x);
+          MPFR_RET (0);
+        }
+    }
+
+  /* atan(x) = x - x^3/3 + x^5/5...
+     so the error is < 2^(3*EXP(x)-1)
+     so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */
+  MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0,
+                                    rnd_mode, {});
+
+  /* Set x_p=|x| */
+  MPFR_TMP_INIT_ABS (xp, x);
+
+  /* Other simple case arctan(-+1)=-+pi/4 */
+  comparaison = mpfr_cmp_ui (xp, 1);
+  if (MPFR_UNLIKELY (comparaison == 0))
+    {
+      int neg = MPFR_IS_NEG (x);
+      inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode
+                               : MPFR_INVERT_RND (rnd_mode));
+      if (neg)
+        {
+          inexact = -inexact;
+          MPFR_CHANGE_SIGN (atan);
+        }
+      inexact2 = mpfr_div_2ui (atan, atan, 2, rnd_mode);
+      if (MPFR_UNLIKELY (inexact2))
+        inexact = inexact2; /* an underflow occurs */
+      return inexact;
+    }
+
+  realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
+  prec = realprec + BITS_PER_MP_LIMB;
+
+  MPFR_SAVE_EXPO_MARK (expo);
+
+  /* Initialisation    */
+  mpz_init (ukz);
+  MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt);
+  oldn0 = 0;
+  tabz = (mpz_t *) 0;
+
+  MPFR_ZIV_INIT (loop, prec);
+  for (;;)
+    {
+      /* First, if |x| < 1, we need to have more prec to be able to round (sup)
+         n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */
+      mp_prec_t sup;
+#if 0
+      sup = 1;
+      if (MPFR_GET_EXP (xp) < 0
+          && (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) > realprec)
+        sup = (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) - realprec;
+#else
+      sup = MPFR_GET_EXP (xp) < 0 ? 2-MPFR_GET_EXP (xp) : 1;
+#endif
+      n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3);
+      MPFR_ASSERTD (3*n0 > 2);
+      prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2);
+
+      /* Initialisation */
+      MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt);
+      if (MPFR_LIKELY (oldn0 == 0))
+        {
+          oldn0 = 3*(n0+1);
+          tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0*sizeof (mpz_t));
+          for (i = 0; i < oldn0; i++)
+            mpz_init (tabz[i]);
+        }
+      else if (MPFR_UNLIKELY (oldn0 < 3*n0+1))
+        {
+          tabz = (mpz_t *) (*__gmp_reallocate_func)
+            (tabz, oldn0*sizeof (mpz_t), 3*(n0+1)*sizeof (mpz_t));
+          for (i = oldn0; i < 3*(n0+1); i++)
+            mpz_init (tabz[i]);
+          oldn0 = 3*(n0+1);
+        }
+
+      if (comparaison > 0)
+        mpfr_ui_div (sk, 1, xp, GMP_RNDN);
+      else
+        mpfr_set (sk, xp, GMP_RNDN);
+
+      /* sk is 1/|x| if |x| > 1, and |x| otherwise, i.e. min(|x|, 1/|x|) */
+
+      /* Assignation  */
+      MPFR_SET_ZERO (arctgt);
+      twopoweri = 1<<0;
+      MPFR_ASSERTD (n0 >= 4);
+      /* FIXME: further reduce the argument so that it is less than
+         1/n where n is the output precision. In such a way, the
+         first calls to mpfr_atan_aux will not be too expensive,
+         since the number of needed terms will be n/log(n), so the
+         factorial contribution will be O(n). */
+      for (i = 0 ; i < n0; i++)
+        {
+          if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk)))
+            break;
+          /* Calculation of trunc(tmp) --> mpz */
+          mpfr_mul_2ui (tmp, sk, twopoweri, GMP_RNDN);
+          mpfr_trunc (tmp, tmp);
+          if (!MPFR_IS_ZERO (tmp))
+            {
+              exptol = mpfr_get_z_exp (ukz, tmp);
+              /* since the s_k are decreasing (see algorithms.tex),
+                 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1,
+                 thus exptol < 0 */
+              MPFR_ASSERTD (exptol < 0);
+              mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol));
+              /* Calculation of arctan(Ak) */
+              mpfr_set_z (tmp, ukz, GMP_RNDN);
+              mpfr_div_2ui (tmp, tmp, twopoweri, GMP_RNDN);
+              mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz);
+              mpfr_mul (tmp2, tmp2, tmp, GMP_RNDN);
+              /* Addition */
+              mpfr_add (arctgt, arctgt, tmp2, GMP_RNDN);
+              /* Next iteration */
+              mpfr_sub (tmp2, sk, tmp, GMP_RNDN);
+              mpfr_mul (sk, sk, tmp, GMP_RNDN);
+              mpfr_add_ui (sk, sk, 1, GMP_RNDN);
+              mpfr_div (sk, tmp2, sk, GMP_RNDN);
+            }
+          twopoweri <<= 1;
+        }
+      /* Add last step (Arctan(sk) ~= sk */
+      mpfr_add (arctgt, arctgt, sk, GMP_RNDN);
+      if (comparaison > 0)
+        {
+          mpfr_const_pi (tmp, GMP_RNDN);
+          mpfr_div_2ui (tmp, tmp, 1, GMP_RNDN);
+          mpfr_sub (arctgt, tmp, arctgt, GMP_RNDN);
+        }
+      MPFR_SET_POS (arctgt);
+
+      if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec, MPFR_PREC (atan),
+                                       rnd_mode)))
+        break;
+      MPFR_ZIV_NEXT (loop, realprec);
+    }
+  MPFR_ZIV_FREE (loop);
+
+  inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x));
+
+  for (i = 0 ; i < oldn0 ; i++)
+    mpz_clear (tabz[i]);
+  mpz_clear (ukz);
+  (*__gmp_free_func) (tabz, oldn0*sizeof (mpz_t));
+  MPFR_GROUP_CLEAR (group);
+
+  MPFR_SAVE_EXPO_FREE (expo);
+  return mpfr_check_range (arctgt, inexact, rnd_mode);
+}