dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and dnl store difference in a third limb vector. dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C UltraSPARC 1&2: 4 C UltraSPARC 3: 4.5 C Compute carry-out from the most significant bits of u,v, and r, where C r=u-v-carry_in, using logic operations. C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated. C Therefore, it seems futile to try to optimize this any further... C INPUT PARAMETERS define(`rp',`%i0') define(`up',`%i1') define(`vp',`%i2') define(`n',`%i3') define(`u0',`%l0') define(`u1',`%l2') define(`u2',`%l4') define(`u3',`%l6') define(`v0',`%l1') define(`v1',`%l3') define(`v2',`%l5') define(`v3',`%l7') define(`cy',`%i4') define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe ASM_START() REGISTER(%g2,#scratch) REGISTER(%g3,#scratch) PROLOGUE(mpn_sub_n) save %sp,-160,%sp fitod %f0,%f0 C make sure f0 contains small, quiet number subcc n,4,%g0 bl,pn %icc,.Loop0 mov 0,cy ldx [up+0],u0 ldx [vp+0],v0 add up,32,up ldx [up-24],u1 ldx [vp+8],v1 add vp,32,vp ldx [up-16],u2 ldx [vp-16],v2 ldx [up-8],u3 ldx [vp-8],v3 subcc n,8,n sub u0,v0,%g1 C main sub sub %g1,cy,%g4 C carry sub orn u0,v0,%g2 bl,pn %icc,.Lend4567 fanop b,a .Loop .align 16 C START MAIN LOOP .Loop: orn %g4,%g2,%g2 andn u0,v0,%g3 ldx [up+0],u0 fanop C -- andn %g2,%g3,%g2 ldx [vp+0],v0 add up,32,up fanop C -- srlx %g2,63,cy sub u1,v1,%g1 stx %g4,[rp+0] fanop C -- sub %g1,cy,%g4 orn u1,v1,%g2 fmnop fanop C -- orn %g4,%g2,%g2 andn u1,v1,%g3 ldx [up-24],u1 fanop C -- andn %g2,%g3,%g2 ldx [vp+8],v1 add vp,32,vp fanop C -- srlx %g2,63,cy sub u2,v2,%g1 stx %g4,[rp+8] fanop C -- sub %g1,cy,%g4 orn u2,v2,%g2 fmnop fanop C -- orn %g4,%g2,%g2 andn u2,v2,%g3 ldx [up-16],u2 fanop C -- andn %g2,%g3,%g2 ldx [vp-16],v2 add rp,32,rp fanop C -- srlx %g2,63,cy sub u3,v3,%g1 stx %g4,[rp-16] fanop C -- sub %g1,cy,%g4 orn u3,v3,%g2 fmnop fanop C -- orn %g4,%g2,%g2 andn u3,v3,%g3 ldx [up-8],u3 fanop C -- andn %g2,%g3,%g2 subcc n,4,n ldx [vp-8],v3 fanop C -- srlx %g2,63,cy sub u0,v0,%g1 stx %g4,[rp-8] fanop C -- sub %g1,cy,%g4 orn u0,v0,%g2 bge,pt %icc,.Loop fanop C END MAIN LOOP .Lend4567: orn %g4,%g2,%g2 andn u0,v0,%g3 andn %g2,%g3,%g2 srlx %g2,63,cy sub u1,v1,%g1 stx %g4,[rp+0] sub %g1,cy,%g4 orn u1,v1,%g2 orn %g4,%g2,%g2 andn u1,v1,%g3 andn %g2,%g3,%g2 srlx %g2,63,cy sub u2,v2,%g1 stx %g4,[rp+8] sub %g1,cy,%g4 orn u2,v2,%g2 orn %g4,%g2,%g2 andn u2,v2,%g3 andn %g2,%g3,%g2 add rp,32,rp srlx %g2,63,cy sub u3,v3,%g1 stx %g4,[rp-16] sub %g1,cy,%g4 orn u3,v3,%g2 orn %g4,%g2,%g2 andn u3,v3,%g3 andn %g2,%g3,%g2 srlx %g2,63,cy stx %g4,[rp-8] addcc n,4,n bz,pn %icc,.Lret fanop .Loop0: ldx [up],u0 add up,8,up ldx [vp],v0 add vp,8,vp add rp,8,rp subcc n,1,n sub u0,v0,%g1 orn u0,v0,%g2 sub %g1,cy,%g4 andn u0,v0,%g3 orn %g4,%g2,%g2 stx %g4,[rp-8] andn %g2,%g3,%g2 bnz,pt %icc,.Loop0 srlx %g2,63,cy .Lret: mov cy,%i0 ret restore EPILOGUE(mpn_sub_n)