X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Fmu_bdiv_q.c;fp=gmp%2Fmpn%2Fgeneric%2Fmu_bdiv_q.c;h=3b5f56d08830841545b40b8bf9fe85167913c5cb;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/generic/mu_bdiv_q.c b/gmp/mpn/generic/mu_bdiv_q.c new file mode 100644 index 00000000..3b5f56d0 --- /dev/null +++ b/gmp/mpn/generic/mu_bdiv_q.c @@ -0,0 +1,255 @@ +/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn. + storing the result in {qp,nn}. Overlap allowed between Q and N; all other + overlap disallowed. + + Contributed to the GNU project by Torbjorn Granlund. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS + ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS + ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP + RELEASE. + +Copyright 2005, 2006, 2007 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + +/* We use the "misunderstanding algorithm" (MU), discovered by Paul Zimmermann + and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of + Jebelean's bidirectional exact division algorithm. + + The idea of this algorithm is to compute a smaller inverted value than used + in the standard Barrett algorithm, and thus save time in the Newton + iterations, and pay just a small price when using the inverted value for + developing quotient bits. + + Written by Torbjorn Granlund. Paul Zimmermann suggested the use of the + "wrap around" trick. +*/ + +#include "gmp.h" +#include "gmp-impl.h" + + +/* N = {np,nn} + D = {dp,dn} + + Requirements: N >= D + D >= 1 + N mod D = 0 + D odd + dn >= 2 + nn >= 2 + scratch space as determined by mpn_divexact_itch(nn,dn). + + Write quotient to Q = {qp,nn}. + + FIXME: When iterating, perhaps do the small step before loop, not after. + FIXME: Try to avoid the scalar divisions when computing inverse size. + FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In + particular, when dn==in, tp and rp could use the same space. + FIXME: Trim final quotient calculation to qn limbs of precision. +*/ +void +mpn_mu_bdiv_q (mp_ptr qp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) +{ + mp_ptr ip; + mp_ptr rp; + mp_size_t qn; + mp_size_t in; + + qn = nn; + + ASSERT (dn >= 2); + ASSERT (qn >= 2); + + if (qn > dn) + { + mp_size_t b; + mp_ptr tp; + mp_limb_t cy; + int k; + mp_size_t m, wn; + mp_size_t i; + + /* |_______________________| dividend + |________| divisor */ + + /* Compute an inverse size that is a nice partition of the quotient. */ + b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ + in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ + + /* Some notes on allocation: + + When in = dn, R dies when mpn_mullow returns, if in < dn the low in + limbs of R dies at that point. We could save memory by letting T live + just under R, and let the upper part of T expand into R. These changes + should reduce itch to perhaps 3dn. + */ + + ip = scratch; /* in limbs */ + rp = scratch + in; /* dn limbs */ + tp = scratch + in + dn; /* dn + in limbs FIXME: mpn_fft_next_size */ + scratch += in; /* Roughly 2in+1 limbs */ + + mpn_binvert (ip, dp, in, scratch); + + cy = 0; + + MPN_COPY (rp, np, dn); + np += dn; + mpn_mullow_n (qp, rp, ip, in); + qn -= in; + + if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) + { + k = mpn_fft_best_k (dn, 0); + m = mpn_fft_next_size (dn, k); + wn = dn + in - m; /* number of wrapped limbs */ + ASSERT_ALWAYS (wn >= 0); /* could handle this below */ + } + + while (qn > in) + { +#if WANT_FFT + if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) + { + /* The two multiplicands are dn and 'in' limbs, with dn >= in. + The relevant part of the result will typically partially wrap, + and that part will come out as subtracted to the right. The + unwrapped part, m-in limbs at the high end of tp, is the lower + part of the sought product. The wrapped part, at the low end + of tp, will be subtracted from the low part of the partial + remainder; we undo that operation with another subtraction. */ + int c0; + + mpn_mul_fft (tp, m, dp, dn, qp, in, k); + + c0 = mpn_sub_n (tp + m, rp, tp, wn); + + for (i = wn; c0 != 0 && i < in; i++) + c0 = tp[i] == GMP_NUMB_MASK; + mpn_incr_u (tp + in, c0); + } + else +#endif + mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */ + qp += in; + if (dn != in) + { + /* Subtract tp[dn-1...in] from partial remainder. */ + cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); + if (cy == 2) + { + mpn_incr_u (tp + dn, 1); + cy = 1; + } + } + /* Subtract tp[dn+in-1...dn] from dividend. */ + cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy); + np += in; + mpn_mullow_n (qp, rp, ip, in); + qn -= in; + } + + /* Generate last qn limbs. + FIXME: It should be possible to limit precision here, since qn is + typically somewhat smaller than dn. No big gains expected. */ +#if WANT_FFT + if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) + { + int c0; + + mpn_mul_fft (tp, m, dp, dn, qp, in, k); + + c0 = mpn_sub_n (tp + m, rp, tp, wn); + + for (i = wn; c0 != 0 && i < in; i++) + c0 = tp[i] == GMP_NUMB_MASK; + mpn_incr_u (tp + in, c0); + } + else +#endif + mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[qn+in-1...in] */ + qp += in; + if (dn != in) + { + cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); + if (cy == 2) + { + mpn_incr_u (tp + dn, 1); + cy = 1; + } + } + + mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy); + mpn_mullow_n (qp, rp, ip, qn); + } + else + { + /* |_______________________| dividend + |________________| divisor */ + + /* Compute half-sized inverse. */ + in = qn - (qn >> 1); + + ip = scratch; /* ceil(qn/2) limbs */ + rp = scratch + in; /* ceil(qn/2)+qn limbs */ + scratch += in; /* 2*ceil(qn/2)+2 */ + + mpn_binvert (ip, dp, in, scratch); + + mpn_mullow_n (qp, np, ip, in); /* low `in' quotient limbs */ +#if WANT_FFT + if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD)) + { + int k; + mp_size_t m; + + k = mpn_fft_best_k (qn, 0); + m = mpn_fft_next_size (qn, k); + mpn_mul_fft (rp, m, dp, qn, qp, in, k); + if (mpn_cmp (np, rp, in) < 0) + mpn_incr_u (rp + in, 1); + } + else +#endif + mpn_mul (rp, dp, qn, qp, in); /* mulhigh */ + + mpn_sub_n (rp, np + in, rp + in, qn - in); + mpn_mullow_n (qp + in, rp, ip, qn - in); /* high qn-in quotient limbs */ + } +} + +mp_size_t +mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn) +{ + mp_size_t qn; + + qn = nn; + + if (qn > dn) + { + return 4 * dn; /* FIXME FIXME FIXME need mpn_fft_next_size */ + } + else + { + return 2 * qn + 1 + 2; /* FIXME FIXME FIXME need mpn_fft_next_size */ + } +}