X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Ftoom2_sqr.c;fp=gmp%2Fmpn%2Fgeneric%2Ftoom2_sqr.c;h=445cff8f5d8e3a22fdfb9e66df15b0d7d6e1337a;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/generic/toom2_sqr.c b/gmp/mpn/generic/toom2_sqr.c new file mode 100644 index 00000000..445cff8f --- /dev/null +++ b/gmp/mpn/generic/toom2_sqr.c @@ -0,0 +1,135 @@ +/* mpn_toom2_sqr -- Square {ap,an}. + + Contributed to the GNU project by Torbjorn Granlund. + + THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 2006, 2007, 2008 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + +#include "gmp.h" +#include "gmp-impl.h" + +/* Evaluate in: -1, 0, +inf + + <-s--><--n--> + ____ ______ + |_a1_|___a0_| + |b1_|___b0_| + <-t-><--n--> + + v0 = a0 * b0 # A(0)*B(0) + vm1 = (a0- a1)*(b0- b1) # A(-1)*B(-1) + vinf= a1 * b1 # A(inf)*B(inf) +*/ + +#if TUNE_PROGRAM_BUILD +#define MAYBE_sqr_toom2 1 +#else +#define MAYBE_sqr_toom2 \ + (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD) +#endif + +#define TOOM2_SQR_N_REC(p, a, n, ws) \ + do { \ + if (! MAYBE_sqr_toom2 \ + || BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD)) \ + mpn_sqr_basecase (p, a, n); \ + else \ + mpn_toom2_sqr (p, a, n, ws); \ + } while (0) + +void +mpn_toom2_sqr (mp_ptr pp, + mp_srcptr ap, mp_size_t an, + mp_ptr scratch) +{ + mp_size_t n, s; + mp_limb_t cy, cy2; + mp_ptr asm1; + +#define a0 ap +#define a1 (ap + n) + + s = an >> 1; + n = an - s; + + ASSERT (0 < s && s <= n); + + asm1 = pp; + + /* Compute asm1. */ + if (s == n) + { + if (mpn_cmp (a0, a1, n) < 0) + { + mpn_sub_n (asm1, a1, a0, n); + } + else + { + mpn_sub_n (asm1, a0, a1, n); + } + } + else + { + if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0) + { + mpn_sub_n (asm1, a1, a0, s); + MPN_ZERO (asm1 + s, n - s); + } + else + { + mpn_sub (asm1, a0, n, a1, s); + } + } + +#define v0 pp /* 2n */ +#define vinf (pp + 2 * n) /* s+s */ +#define vm1 scratch /* 2n */ + + /* vm1, 2n limbs */ + TOOM2_SQR_N_REC (vm1, asm1, n, scratch); + + /* vinf, s+s limbs */ + TOOM2_SQR_N_REC (vinf, a1, s, scratch); + + /* v0, 2n limbs */ + TOOM2_SQR_N_REC (v0, ap, n, scratch); + + /* H(v0) + L(vinf) */ + cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); + + /* L(v0) + H(v0) */ + cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); + + /* L(vinf) + H(vinf) */ + cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n); + + cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); + + ASSERT (cy + 1 <= 3); + ASSERT (cy2 <= 2); + + mpn_incr_u (pp + 2 * n, cy2); + if (LIKELY (cy <= 2)) + mpn_incr_u (pp + 3 * n, cy); + else + mpn_decr_u (pp + 3 * n, 1); +}