X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Ftdiv_qr.c;fp=gmp%2Fmpn%2Fgeneric%2Ftdiv_qr.c;h=8ac4d3881364cc783b0111607f0216d5c43355f1;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/generic/tdiv_qr.c b/gmp/mpn/generic/tdiv_qr.c new file mode 100644 index 00000000..8ac4d388 --- /dev/null +++ b/gmp/mpn/generic/tdiv_qr.c @@ -0,0 +1,401 @@ +/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and + write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If + qxn is non-zero, generate that many fraction limbs and append them after the + other quotient limbs, and update the remainder accordningly. The input + operands are unaffected. + + Preconditions: + 1. The most significant limb of of the divisor must be non-zero. + 2. No argument overlap is permitted. (??? relax this ???) + 3. nn >= dn, even if qxn is non-zero. (??? relax this ???) + + The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time + complexity of multiplication. + +Copyright 1997, 2000, 2001, 2002, 2005 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +void +mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, + mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) +{ + /* FIXME: + 1. qxn + 2. pass allocated storage in additional parameter? + */ + ASSERT_ALWAYS (qxn == 0); + + ASSERT (qxn >= 0); + ASSERT (nn >= 0); + ASSERT (dn >= 0); + ASSERT (dn == 0 || dp[dn - 1] != 0); + ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); + ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); + + switch (dn) + { + case 0: + DIVIDE_BY_ZERO; + + case 1: + { + rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]); + return; + } + + case 2: + { + mp_ptr n2p, d2p; + mp_limb_t qhl, cy; + TMP_DECL; + TMP_MARK; + if ((dp[1] & GMP_NUMB_HIGHBIT) == 0) + { + int cnt; + mp_limb_t dtmp[2]; + count_leading_zeros (cnt, dp[1]); + cnt -= GMP_NAIL_BITS; + d2p = dtmp; + d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt)); + d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK; + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); + if (cy == 0) + qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ + rp[0] = (n2p[0] >> cnt) + | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK); + rp[1] = (n2p[1] >> cnt); + } + else + { + d2p = (mp_ptr) dp; + n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ + rp[0] = n2p[0]; + rp[1] = n2p[1]; + } + TMP_FREE; + return; + } + + default: + { + int adjust; + TMP_DECL; + TMP_MARK; + adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ + if (nn + adjust >= 2 * dn) + { + mp_ptr n2p, d2p, q2p; + mp_limb_t cy; + int cnt; + + qp[nn - dn] = 0; /* zero high quotient limb */ + if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */ + { + count_leading_zeros (cnt, dp[dn - 1]); + cnt -= GMP_NAIL_BITS; + d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp, dn, cnt); + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + nn += adjust; + } + else + { + cnt = 0; + d2p = (mp_ptr) dp; + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + n2p[nn] = 0; + nn += adjust; + } + + if (dn < DIV_DC_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); + else + { + /* Divide 2*dn / dn limbs as long as the limbs in np last. */ + q2p = qp + nn - dn; + n2p += nn - dn; + do + { + q2p -= dn; n2p -= dn; + mpn_dc_divrem_n (q2p, n2p, d2p, dn); + nn -= dn; + } + while (nn >= 2 * dn); + + if (nn != dn) + { + mp_limb_t ql; + n2p -= nn - dn; + + /* We have now dn < nn - dn < 2dn. Make a recursive call, + since falling out to the code below isn't pretty. + Unfortunately, mpn_tdiv_qr returns nn-dn+1 quotient + limbs, which would overwrite one already generated + quotient limbs. Preserve it with an ugly hack. */ + /* FIXME: This suggests that we should have an + mpn_tdiv_qr_internal that instead returns the most + significant quotient limb and move the meat of this + function there. */ + /* FIXME: Perhaps call mpn_sb_divrem_mn here for certain + operand ranges, to decrease overhead for small + operands? */ + ql = qp[nn - dn]; /* preserve quotient limb... */ + mpn_tdiv_qr (qp, n2p, 0L, n2p, nn, d2p, dn); + qp[nn - dn] = ql; /* ...restore it again */ + } + } + + + if (cnt != 0) + mpn_rshift (rp, n2p, dn, cnt); + else + MPN_COPY (rp, n2p, dn); + TMP_FREE; + return; + } + + /* When we come here, the numerator/partial remainder is less + than twice the size of the denominator. */ + + { + /* Problem: + + Divide a numerator N with nn limbs by a denominator D with dn + limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small + compared to dn, conventional division algorithms perform poorly. + We want an algorithm that has an expected running time that is + dependent only on qn. + + Algorithm (very informally stated): + + 1) Divide the 2 x qn most significant limbs from the numerator + by the qn most significant limbs from the denominator. Call + the result qest. This is either the correct quotient, but + might be 1 or 2 too large. Compute the remainder from the + division. (This step is implemented by a mpn_divrem call.) + + 2) Is the most significant limb from the remainder < p, where p + is the product of the most significant limb from the quotient + and the next(d)? (Next(d) denotes the next ignored limb from + the denominator.) If it is, decrement qest, and adjust the + remainder accordingly. + + 3) Is the remainder >= qest? If it is, qest is the desired + quotient. The algorithm terminates. + + 4) Subtract qest x next(d) from the remainder. If there is + borrow out, decrement qest, and adjust the remainder + accordingly. + + 5) Skip one word from the denominator (i.e., let next(d) denote + the next less significant limb. */ + + mp_size_t qn; + mp_ptr n2p, d2p; + mp_ptr tp; + mp_limb_t cy; + mp_size_t in, rn; + mp_limb_t quotient_too_large; + unsigned int cnt; + + qn = nn - dn; + qp[qn] = 0; /* zero high quotient limb */ + qn += adjust; /* qn cannot become bigger */ + + if (qn == 0) + { + MPN_COPY (rp, np, dn); + TMP_FREE; + return; + } + + in = dn - qn; /* (at least partially) ignored # of limbs in ops */ + /* Normalize denominator by shifting it to the left such that its + most significant bit is set. Then shift the numerator the same + amount, to mathematically preserve quotient. */ + if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) + { + count_leading_zeros (cnt, dp[dn - 1]); + cnt -= GMP_NAIL_BITS; + + d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp + in, qn, cnt); + d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt); + + n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); + if (adjust) + { + n2p[2 * qn] = cy; + n2p++; + } + else + { + n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt); + } + } + else + { + cnt = 0; + d2p = (mp_ptr) dp + in; + + n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); + if (adjust) + { + n2p[2 * qn] = 0; + n2p++; + } + } + + /* Get an approximate quotient using the extracted operands. */ + if (qn == 1) + { + mp_limb_t q0, r0; + mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0; + /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some + temps here. This doesn't hurt code quality on any machines + so we do it unconditionally. */ + gcc272bug_n1 = n2p[1]; + gcc272bug_n0 = n2p[0]; + gcc272bug_d0 = d2p[0]; + udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS, + gcc272bug_d0 << GMP_NAIL_BITS); + r0 >>= GMP_NAIL_BITS; + n2p[0] = r0; + qp[0] = q0; + } + else if (qn == 2) + mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); + else if (qn < DIV_DC_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn); + else + mpn_dc_divrem_n (qp, n2p, d2p, qn); + + rn = qn; + /* Multiply the first ignored divisor limb by the most significant + quotient limb. If that product is > the partial remainder's + most significant limb, we know the quotient is too large. This + test quickly catches most cases where the quotient is too large; + it catches all cases where the quotient is 2 too large. */ + { + mp_limb_t dl, x; + mp_limb_t h, dummy; + + if (in - 2 < 0) + dl = 0; + else + dl = dp[in - 2]; + +#if GMP_NAIL_BITS == 0 + x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB)); +#else + x = (dp[in - 1] << cnt) & GMP_NUMB_MASK; + if (cnt != 0) + x |= dl >> (GMP_NUMB_BITS - cnt); +#endif + umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS); + + if (n2p[qn - 1] < h) + { + mp_limb_t cy; + + mpn_decr_u (qp, (mp_limb_t) 1); + cy = mpn_add_n (n2p, n2p, d2p, qn); + if (cy) + { + /* The partial remainder is safely large. */ + n2p[qn] = cy; + ++rn; + } + } + } + + quotient_too_large = 0; + if (cnt != 0) + { + mp_limb_t cy1, cy2; + + /* Append partially used numerator limb to partial remainder. */ + cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt); + n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt); + + /* Update partial remainder with partially used divisor limb. */ + cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt)); + if (qn != rn) + { + ASSERT_ALWAYS (n2p[qn] >= cy2); + n2p[qn] -= cy2; + } + else + { + n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */ + + quotient_too_large = (cy1 < cy2); + ++rn; + } + --in; + } + /* True: partial remainder now is neutral, i.e., it is not shifted up. */ + + tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + + if (in < qn) + { + if (in == 0) + { + MPN_COPY (rp, n2p, rn); + ASSERT_ALWAYS (rn == dn); + goto foo; + } + mpn_mul (tp, qp, qn, dp, in); + } + else + mpn_mul (tp, dp, in, qp, qn); + + cy = mpn_sub (n2p, n2p, rn, tp + in, qn); + MPN_COPY (rp + in, n2p, dn - in); + quotient_too_large |= cy; + cy = mpn_sub_n (rp, np, tp, in); + cy = mpn_sub_1 (rp + in, rp + in, rn, cy); + quotient_too_large |= cy; + foo: + if (quotient_too_large) + { + mpn_decr_u (qp, (mp_limb_t) 1); + mpn_add_n (rp, rp, dp, dn); + } + } + TMP_FREE; + return; + } + } +}