X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Fperfsqr.c;fp=gmp%2Fmpn%2Fgeneric%2Fperfsqr.c;h=1995a944df9a5aff42fd4465718ae3f2586da9d6;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/generic/perfsqr.c b/gmp/mpn/generic/perfsqr.c new file mode 100644 index 00000000..1995a944 --- /dev/null +++ b/gmp/mpn/generic/perfsqr.c @@ -0,0 +1,229 @@ +/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, + zero otherwise. + +Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#include "perfsqr.h" + + +/* change this to "#define TRACE(x) x" for diagnostics */ +#define TRACE(x) + + + +/* PERFSQR_MOD_* detects non-squares using residue tests. + + A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h. It takes + {up,usize} modulo a selected modulus to get a remainder r. For 32-bit or + 64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34, + or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP + used. PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this + case too. + + PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or + PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table + data indicating residues and non-residues modulo those divisors. The + table data is in 1 or 2 limbs worth of bits respectively, per the size of + each d. + + A "modexact" style remainder is taken to reduce r modulo d. + PERFSQR_MOD_IDX implements this, producing an index "idx" for use with + the table data. Notice there's just one multiplication by a constant + "inv", for each d. + + The modexact doesn't produce a true r%d remainder, instead idx satisfies + "-(idx<> MOD34_BITS); \ + } while (0) + +/* FIXME: The %= here isn't good, and might destroy any savings from keeping + the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm). + Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor + and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our + normal case, so lets not worry too much about mod_1. */ +#define PERFSQR_MOD_PP(r, up, usize) \ + do { \ + if (USE_PREINV_MOD_1) \ + { \ + (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM, \ + PERFSQR_PP_INVERTED); \ + (r) %= PERFSQR_PP; \ + } \ + else \ + { \ + (r) = mpn_mod_1 (up, usize, PERFSQR_PP); \ + } \ + } while (0) + +#define PERFSQR_MOD_IDX(idx, r, d, inv) \ + do { \ + mp_limb_t q; \ + ASSERT ((r) <= PERFSQR_MOD_MASK); \ + ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1); \ + ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK); \ + \ + q = ((r) * (inv)) & PERFSQR_MOD_MASK; \ + ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK)); \ + (idx) = (q * (d)) >> PERFSQR_MOD_BITS; \ + } while (0) + +#define PERFSQR_MOD_1(r, d, inv, mask) \ + do { \ + unsigned idx; \ + ASSERT ((d) <= GMP_LIMB_BITS); \ + PERFSQR_MOD_IDX(idx, r, d, inv); \ + TRACE (printf (" PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \ + d, r%d, idx)); \ + if ((((mask) >> idx) & 1) == 0) \ + { \ + TRACE (printf (" non-square\n")); \ + return 0; \ + } \ + } while (0) + +/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the + sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */ +#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \ + do { \ + mp_limb_t m; \ + unsigned idx; \ + ASSERT ((d) <= 2*GMP_LIMB_BITS); \ + \ + PERFSQR_MOD_IDX (idx, r, d, inv); \ + TRACE (printf (" PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \ + d, r%d, idx)); \ + m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi)); \ + idx %= GMP_LIMB_BITS; \ + if (((m >> idx) & 1) == 0) \ + { \ + TRACE (printf (" non-square\n")); \ + return 0; \ + } \ + } while (0) + + +int +mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) +{ + ASSERT (usize >= 1); + + TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize)); + + /* The first test excludes 212/256 (82.8%) of the perfect square candidates + in O(1) time. */ + { + unsigned idx = up[0] % 0x100; + if (((sq_res_0x100[idx / GMP_LIMB_BITS] + >> (idx % GMP_LIMB_BITS)) & 1) == 0) + return 0; + } + +#if 0 + /* Check that we have even multiplicity of 2, and then check that the rest is + a possible perfect square. Leave disabled until we can determine this + really is an improvement. It it is, it could completely replace the + simple probe above, since this should through out more non-squares, but at + the expense of somewhat more cycles. */ + { + mp_limb_t lo; + int cnt; + lo = up[0]; + while (lo == 0) + up++, lo = up[0], usize--; + count_trailing_zeros (cnt, lo); + if ((cnt & 1) != 0) + return 0; /* return of not even multiplicity of 2 */ + lo >>= cnt; /* shift down to align lowest non-zero bit */ + lo >>= 1; /* shift away lowest non-zero bit */ + if ((lo & 3) != 0) + return 0; + } +#endif + + + /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares + according to their residues modulo small primes (or powers of + primes). See perfsqr.h. */ + PERFSQR_MOD_TEST (up, usize); + + + /* For the third and last test, we finally compute the square root, + to make sure we've really got a perfect square. */ + { + mp_ptr root_ptr; + int res; + TMP_DECL; + + TMP_MARK; + root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB); + + /* Iff mpn_sqrtrem returns zero, the square is perfect. */ + res = ! mpn_sqrtrem (root_ptr, NULL, up, usize); + TMP_FREE; + + return res; + } +}