X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpz%2Ffac_ui.c;fp=gmp%2Fmpz%2Ffac_ui.c;h=7e394fcb1859c232492cb503b32162cf064ca558;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git

diff --git a/gmp/mpz/fac_ui.c b/gmp/mpz/fac_ui.c
new file mode 100644
index 00000000..7e394fcb
--- /dev/null
+++ b/gmp/mpz/fac_ui.c
@@ -0,0 +1,396 @@
+/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "fac_ui.h"
+
+
+static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
+static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
+
+
+/* must be >=2	*/
+#define APCONST	5
+
+/* for single non-zero limb */
+#define MPZ_SET_1_NZ(z,n)	\
+  do {				\
+    mpz_ptr  __z = (z);		\
+    ASSERT ((n) != 0);		\
+    PTR(__z)[0] = (n);		\
+    SIZ(__z) = 1;		\
+  } while (0)
+
+/* for src>0 and n>0 */
+#define MPZ_MUL_1_POS(dst,src,n)			\
+  do {							\
+    mpz_ptr    __dst = (dst);				\
+    mpz_srcptr __src = (src);				\
+    mp_size_t  __size = SIZ(__src);			\
+    mp_ptr     __dst_p;					\
+    mp_limb_t  __c;					\
+							\
+    ASSERT (__size > 0);				\
+    ASSERT ((n) != 0);					\
+							\
+    MPZ_REALLOC (__dst, __size+1);			\
+    __dst_p = PTR(__dst);				\
+							\
+    __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n);	\
+    __dst_p[__size] = __c;				\
+    SIZ(__dst) = __size + (__c != 0);			\
+  } while (0)
+
+
+#if BITS_PER_ULONG == GMP_LIMB_BITS
+#define BSWAP_ULONG(x,y)	BSWAP_LIMB(x,y)
+#endif
+
+/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
+   by a shift down to get the high part.  But it provoked incorrect code
+   from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode.  This
+   case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
+   can get that directly muxing a 4-byte ulong if it matters enough.  */
+
+#if ! defined (BSWAP_ULONG)
+#define BSWAP_ULONG(dst, src)						\
+  do {									\
+    unsigned long  __bswapl_src = (src);				\
+    unsigned long  __bswapl_dst = 0;					\
+    int	       __i;							\
+    for (__i = 0; __i < sizeof(unsigned long); __i++)			\
+      {									\
+	__bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF);	\
+	__bswapl_src >>= 8;						\
+      }									\
+    (dst) = __bswapl_dst;						\
+  } while (0)
+#endif
+
+/* x is bit reverse of y */
+/* Note the divides below are all exact */
+#define BITREV_ULONG(x,y)						   \
+  do {									   \
+   unsigned long __dst;							   \
+   BSWAP_ULONG(__dst,y);						   \
+   __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
+   __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4)  ); \
+   __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2)  ); \
+   (x) = __dst;								   \
+  } while(0)
+/* above could be improved if cpu has a nibble/bit swap/muxing instruction */
+/* above code is serialized, possible to write as a big parallel expression */
+
+
+
+void
+mpz_fac_ui (mpz_ptr x, unsigned long n)
+{
+  unsigned long z, stt;
+  int i, j;
+  mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
+  mp_limb_t d[4];
+
+  static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
+
+  if (n < numberof (table))
+    {
+      MPZ_SET_1_NZ (x, table[n]);
+      return;
+    }
+
+  /*  NOTE : MUST have n>=3 here */
+  ASSERT (n >= 3);
+  /* for estimating the alloc sizes the calculation of these formula's is not
+     exact and also the formulas are only approximations, also we ignore
+     the few "side" calculations, correct allocation seems to speed up the
+     small sizes better, having very little effect on the large sizes */
+
+  /* estimate space for stack entries see below
+     number of bits for n! is
+     (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
+     2.325748065-n*1.442695041+(n+0.5)*log_2(n)  */
+  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
+  /* d[1] is 2n/e, d[0] ignored        */
+  count_leading_zeros (z, d[1]);
+  z = GMP_LIMB_BITS - z - 1;	/* z=floor(log_2(2n/e))   */
+  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
+  /* d=n*floor(log_2(2n/e))   */
+  d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
+  d[1] >>= 2;
+  /* d=n*floor(log_2(2n/e))/4   */
+  z = d[0] + 1;			/* have to ignore any overflow */
+  /* so z is the number of bits wanted for st[0]    */
+
+
+  if (n <= ((unsigned long) 1) << (APCONST))
+    {
+      mpz_realloc2 (x, 4 * z);
+      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
+      return;
+    }
+  if (n <= ((unsigned long) 1) << (APCONST + 1))
+    {				/*  use n!=odd(1,n)*(n/2)!*2^(n/2)         */
+      mpz_init2 (t1, 2 * z);
+      mpz_realloc2 (x, 4 * z);
+      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
+      ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
+      mpz_mul (x, x, t1);
+      mpz_clear (t1);
+      mpz_mul_2exp (x, x, n / 2);
+      return;
+    }
+  if (n <= ((unsigned long) 1) << (APCONST + 2))
+    {
+      /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
+	 so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
+      mpz_init2 (t1, 2 * z);
+      mpz_realloc2 (x, 4 * z);
+      for (i = 0; i < 4; i++)
+	{
+	  mpz_init2 (st[i], z);
+	  z >>= 1;
+	}
+      odd_product (1, n / 2, st);
+      mpz_set (x, st[0]);
+      odd_product (n / 2, n, st);
+      mpz_mul (x, x, x);
+      ASSERT (n / 4 <= FACMUL4 + 6);
+      ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
+      /* must have 2^APCONST odd numbers max */
+      mpz_mul (t1, t1, st[0]);
+      for (i = 0; i < 4; i++)
+	mpz_clear (st[i]);
+      mpz_mul (x, x, t1);
+      mpz_clear (t1);
+      mpz_mul_2exp (x, x, n / 2 + n / 4);
+      return;
+    }
+
+  count_leading_zeros (stt, (mp_limb_t) n);
+  stt = GMP_LIMB_BITS - stt + 1 - APCONST;
+
+  for (i = 0; i < (signed long) stt; i++)
+    {
+      mpz_init2 (st[i], z);
+      z >>= 1;
+    }
+
+  count_leading_zeros (z, (mp_limb_t) (n / 3));
+  /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
+  z = GMP_LIMB_BITS - z;
+
+  /*
+     n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
+     where 2^e || n!   3.2^z>n   C_2(a,b)=PRODUCT of odd z such that a<z<=b
+   */
+
+
+  mpz_init_set_ui (t1, 1);
+  for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
+    {
+      MPZ_SET_1_NZ (x, 1);
+      for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
+	if ((signed long) z >= i)
+	  {
+	    odd_product (n >> i, n >> (i - 1), st);
+	    /* largest odd product when j=i=1 then we have
+	       odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
+	       so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
+	       number of bits is n*log_base2(2n/e)/4+1  */
+	    if (i != j)
+	      mpz_pow_ui (st[0], st[0], i / j);
+	    mpz_mul (x, x, st[0]);
+	  }
+      if ((signed long) z >= j && j != 1)
+	{
+	  mpz_mul (t1, t1, x);
+	  mpz_mul (t1, t1, t1);
+	}
+    }
+  for (i = 0; i < (signed long) stt; i++)
+    mpz_clear (st[i]);
+  mpz_mul (x, x, t1);
+  mpz_clear (t1);
+  popc_limb (i, (mp_limb_t) n);
+  mpz_mul_2exp (x, x, n - i);
+  return;
+}
+
+/* start,step are mp_limb_t although they will fit in unsigned long	*/
+static void
+ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
+		  unsigned long count, unsigned long nm)
+{
+  unsigned long a;
+  mp_limb_t b;
+
+  ASSERT (count <= (((unsigned long) 1) << APCONST));
+/* count can never be zero ? check this and remove test below */
+  if (count == 0)
+    {
+      MPZ_SET_1_NZ (ret, 1);
+      return;
+    }
+  if (count == 1)
+    {
+      MPZ_SET_1_NZ (ret, start);
+      return;
+    }
+  switch (nm)
+    {
+    case 1:
+      MPZ_SET_1_NZ (ret, start);
+      b = start + step;
+      for (a = 0; a < count - 1; b += step, a++)
+	MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    case 2:
+      MPZ_SET_1_NZ (ret, start * (start + step));
+      if (count == 2)
+	return;
+      for (b = start + 2 * step, a = count / 2 - 1; a != 0;
+	   a--, b += 2 * step)
+	MPZ_MUL_1_POS (ret, ret, b * (b + step));
+      if (count % 2 == 1)
+	MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    case 3:
+      if (count == 2)
+	{
+	  MPZ_SET_1_NZ (ret, start * (start + step));
+	  return;
+	}
+      MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
+      if (count == 3)
+	return;
+      for (b = start + 3 * step, a = count / 3 - 1; a != 0;
+	   a--, b += 3 * step)
+	MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
+      if (count % 3 == 2)
+	b = b * (b + step);
+      if (count % 3 != 0)
+	MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    default:			/* ie nm=4      */
+      if (count == 2)
+	{
+	  MPZ_SET_1_NZ (ret, start * (start + step));
+	  return;
+	}
+      if (count == 3)
+	{
+	  MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
+	  return;
+	}
+      MPZ_SET_1_NZ (ret,
+		    start * (start + step) * (start + 2 * step) * (start +
+								   3 * step));
+      if (count == 4)
+	return;
+      for (b = start + 4 * step, a = count / 4 - 1; a != 0;
+	   a--, b += 4 * step)
+	MPZ_MUL_1_POS (ret, ret,
+		       b * (b + step) * (b + 2 * step) * (b + 3 * step));
+      if (count % 4 == 2)
+	b = b * (b + step);
+      if (count % 4 == 3)
+	b = b * (b + step) * (b + 2 * step);
+      if (count % 4 != 0)
+	MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    }
+}
+
+/* return value in st[0]
+   odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
+   so st[0] needs enough bits for above, st[1] needs half these bits and
+   st[2] needs 1/4 of these bits etc	*/
+static void
+odd_product (unsigned long low, unsigned long high, mpz_t * st)
+{
+  unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
+  signed long z;
+
+  low++;
+  if (low % 2 == 0)
+    low++;
+  if (high == 0)
+    high = 1;
+  if (high % 2 == 0)
+    high--;
+/* must have high>=low ? check this and remove test below */
+  if (high < low)
+    {
+      MPZ_SET_1_NZ (st[0], 1);
+      return;
+    }
+  if (high == low)
+    {
+      MPZ_SET_1_NZ (st[0], low);
+      return;
+    }
+  if (high <= FACMUL2 + 2)
+    {
+      nm = 2;
+      if (high <= FACMUL3 + 4)
+	{
+	  nm = 3;
+	  if (high <= FACMUL4 + 6)
+	    nm = 4;
+	}
+    }
+  high = (high - low) / 2 + 1;	/* high is now count,high<=2^(BITS_PER_ULONG-1) */
+  if (high <= (((unsigned long) 1) << APCONST))
+    {
+      ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
+      return;
+    }
+  count_leading_zeros (n, (mp_limb_t) high);
+/* assumes clz above is LIMB based not NUMB based */
+  n = GMP_LIMB_BITS - n - APCONST;
+  mask = (((unsigned long) 1) << n);
+  a = mask << 1;
+  mask--;
+/* have 2^(BITS_IN_N-APCONST) iterations so need
+   (BITS_IN_N-APCONST+1) stack entries	*/
+  for (z = mask; z >= 0; z--)
+    {
+      BITREV_ULONG (y, z);
+      y >>= (BITS_PER_ULONG - n);
+      ap_product_small (st[stn],
+			(mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
+			(high + y) >> n, nm);
+      ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
+      stn++;
+      y = stc++;
+      while ((y & 1) == 0)
+	{
+	  mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
+	  stn--;
+	  y >>= 1;
+	}
+    }
+  ASSERT (stn == 1);
+  return;
+}