X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fgeneric%2Fmode1o.c;fp=gmp%2Fmpn%2Fgeneric%2Fmode1o.c;h=064becdadfdf9569fbf6576df4cdae521a30ad1a;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/generic/mode1o.c b/gmp/mpn/generic/mode1o.c new file mode 100644 index 00000000..064becda --- /dev/null +++ b/gmp/mpn/generic/mode1o.c @@ -0,0 +1,225 @@ +/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder. + + THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST + CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN + FUTURE GNU MP RELEASES. + +Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* Calculate an r satisfying + + r*B^k + a - c == q*d + + where B=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1 + (the caller won't know which), and q is the quotient (discarded). d must + be odd, c can be any limb value. + + If c=d then 0<=r<=d. + + This slightly strange function suits the initial Nx1 reduction for GCDs + or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving + -r == a mod d (by passing c=0). For a GCD the factor of -1 on r can be + ignored, or for the Jacobi symbol it can be accounted for. The function + also suits divisibility and congruence testing since if r=0 (or r=d) is + obtained then a==c mod d. + + + r is a bit like the remainder returned by mpn_divexact_by3c, and is the + sort of remainder mpn_divexact_1 might return. Like mpn_divexact_by3c, r + represents a borrow, since effectively quotient limbs are chosen so that + subtracting that multiple of d from src at each step will produce a zero + limb. + + A long calculation can be done piece by piece from low to high by passing + the return value from one part as the carry parameter to the next part. + The effective final k becomes anything between size and size-n, if n + pieces are used. + + + A similar sort of routine could be constructed based on adding multiples + of d at each limb, much like redc in mpz_powm does. Subtracting however + has a small advantage that when subtracting to cancel out l there's never + a borrow into h, whereas using an addition would put a carry into h + depending whether l==0 or l!=0. + + + In terms of efficiency, this function is similar to a mul-by-inverse + mpn_mod_1. Both are essentially two multiplies and are best suited to + CPUs with low latency multipliers (in comparison to a divide instruction + at least.) But modexact has a few less supplementary operations, only + needs low part and high part multiplies, and has fewer working quantities + (helping CPUs with few registers). + + + In the main loop it will be noted that the new carry (call it r) is the + sum of the high product h and any borrow from l=s-c. If c=B-d+1 and hence will + never have h=d-1 and so r=h+borrow <= d-1. + + When c>=d, on the other hand, h=d-1 can certainly occur together with a + borrow, thereby giving only r<=d, as per the function definition above. + + As a design decision it's left to the caller to check for r=d if it might + be passing c>=d. Several applications have c= 1); + ASSERT (d & 1); + ASSERT_MPN (src, size); + ASSERT_LIMB (d); + ASSERT_LIMB (c); + + if (size == 1) + { + s = src[0]; + if (s > c) + { + l = s-c; + h = l % d; + if (h != 0) + h = d - h; + } + else + { + l = c-s; + h = l % d; + } + return h; + } + + + binvert_limb (inverse, d); + dmul = d << GMP_NAIL_BITS; + + i = 0; + do + { + s = src[i]; + SUBC_LIMB (c, l, s, c); + l = (l * inverse) & GMP_NUMB_MASK; + umul_ppmm (h, dummy, l, dmul); + c += h; + } + while (++i < size-1); + + + s = src[i]; + if (s <= d) + { + /* With high<=d the final step can be a subtract and addback. If c==0 + then the addback will restore to l>=0. If c==d then will get l==d + if s==0, but that's ok per the function definition. */ + + l = c - s; + if (c < s) + l += d; + + ret = l; + } + else + { + /* Can't skip a divide, just do the loop code once more. */ + + SUBC_LIMB (c, l, s, c); + l = (l * inverse) & GMP_NUMB_MASK; + umul_ppmm (h, dummy, l, dmul); + c += h; + ret = c; + } + + ASSERT (orig_c < d ? ret < d : ret <= d); + return ret; +} + + + +#if 0 + +/* The following is an alternate form that might shave one cycle on a + superscalar processor since it takes c+=h off the dependent chain, + leaving just a low product, high product, and a subtract. + + This is for CPU specific implementations to consider. A special case for + highs) could become + c=(x==0xFF..FF) too, if that helped. */ + +mp_limb_t +mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h) +{ + mp_limb_t s, x, y, inverse, dummy, dmul, c1, c2; + mp_limb_t c = 0; + mp_size_t i; + + ASSERT (size >= 1); + ASSERT (d & 1); + + binvert_limb (inverse, d); + dmul = d << GMP_NAIL_BITS; + + for (i = 0; i < size; i++) + { + ASSERT (c==0 || c==1); + + s = src[i]; + SUBC_LIMB (c1, x, s, c); + + SUBC_LIMB (c2, y, x, h); + c = c1 + c2; + + y = (y * inverse) & GMP_NUMB_MASK; + umul_ppmm (h, dummy, y, dmul); + } + + h += c; + return h; +} + +#endif