dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store dnl sum in a third limb vector. dnl Copyright 2001 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C INPUT PARAMETERS define(rp,%o0) define(s1p,%o1) define(s2p,%o2) define(n,%o3) define(cy,%g1) C This code uses 64-bit operations on `o' and `g' registers. It doesn't C require that `o' registers' upper 32 bits are preserved by the operating C system, but if they are not, they must be zeroed. That is indeed what C happens at least on Slowaris 2.5 and 2.6. C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at C about 10 cycles/limb from the Ecache. ASM_START() PROLOGUE(mpn_add_n) lduw [s1p+0],%o4 lduw [s2p+0],%o5 addcc n,-2,n bl,pn %icc,L(end1) lduw [s1p+4],%g2 lduw [s2p+4],%g3 be,pn %icc,L(end2) mov 0,cy .align 16 L(loop): add %o4,%o5,%g4 add rp,8,rp lduw [s1p+8],%o4 fitod %f0,%f2 C --- add cy,%g4,%g4 addcc n,-1,n lduw [s2p+8],%o5 fitod %f0,%f2 C --- srlx %g4,32,cy add s2p,8,s2p stw %g4,[rp-8] be,pn %icc,L(exito)+4 C --- add %g2,%g3,%g4 addcc n,-1,n lduw [s1p+12],%g2 fitod %f0,%f2 C --- add cy,%g4,%g4 add s1p,8,s1p lduw [s2p+4],%g3 fitod %f0,%f2 C --- srlx %g4,32,cy bne,pt %icc,L(loop) stw %g4,[rp-4] C --- L(exite): add %o4,%o5,%g4 add cy,%g4,%g4 srlx %g4,32,cy stw %g4,[rp+0] add %g2,%g3,%g4 add cy,%g4,%g4 stw %g4,[rp+4] retl srlx %g4,32,%o0 L(exito): add %g2,%g3,%g4 add cy,%g4,%g4 srlx %g4,32,cy stw %g4,[rp-4] add %o4,%o5,%g4 add cy,%g4,%g4 stw %g4,[rp+0] retl srlx %g4,32,%o0 L(end1): add %o4,%o5,%g4 stw %g4,[rp+0] retl srlx %g4,32,%o0 L(end2): add %o4,%o5,%g4 srlx %g4,32,cy stw %g4,[rp+0] add %g2,%g3,%g4 add cy,%g4,%g4 stw %g4,[rp+4] retl srlx %g4,32,%o0 EPILOGUE(mpn_add_n)