X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fsparc32%2Fv9%2Fsub_n.asm;fp=gmp%2Fmpn%2Fsparc32%2Fv9%2Fsub_n.asm;h=cea474326cc4d1c176f17d3353649601196f24e1;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/sparc32/v9/sub_n.asm b/gmp/mpn/sparc32/v9/sub_n.asm new file mode 100644 index 00000000..cea47432 --- /dev/null +++ b/gmp/mpn/sparc32/v9/sub_n.asm @@ -0,0 +1,118 @@ +dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +dnl store difference in a third limb vector. + +dnl Copyright 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + + +include(`../config.m4') + +C INPUT PARAMETERS +define(rp,%o0) +define(s1p,%o1) +define(s2p,%o2) +define(n,%o3) +define(cy,%g1) + +C This code uses 64-bit operations on `o' and `g' registers. It doesn't +C require that `o' registers' upper 32 bits are preserved by the operating +C system, but if they are not, they must be zeroed. That is indeed what +C happens at least on Slowaris 2.5 and 2.6. + +C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at +C about 10 cycles/limb from the Ecache. + +ASM_START() +PROLOGUE(mpn_sub_n) + lduw [s1p+0],%o4 + lduw [s2p+0],%o5 + addcc n,-2,n + bl,pn %icc,L(end1) + lduw [s1p+4],%g2 + lduw [s2p+4],%g3 + be,pn %icc,L(end2) + mov 0,cy + + .align 16 +L(loop): + sub %o4,%o5,%g4 + add rp,8,rp + lduw [s1p+8],%o4 + fitod %f0,%f2 +C --- + sub %g4,cy,%g4 + addcc n,-1,n + lduw [s2p+8],%o5 + fitod %f0,%f2 +C --- + srlx %g4,63,cy + add s2p,8,s2p + stw %g4,[rp-8] + be,pn %icc,L(exito)+4 +C --- + sub %g2,%g3,%g4 + addcc n,-1,n + lduw [s1p+12],%g2 + fitod %f0,%f2 +C --- + sub %g4,cy,%g4 + add s1p,8,s1p + lduw [s2p+4],%g3 + fitod %f0,%f2 +C --- + srlx %g4,63,cy + bne,pt %icc,L(loop) + stw %g4,[rp-4] +C --- +L(exite): + sub %o4,%o5,%g4 + sub %g4,cy,%g4 + srlx %g4,63,cy + stw %g4,[rp+0] + sub %g2,%g3,%g4 + sub %g4,cy,%g4 + stw %g4,[rp+4] + retl + srlx %g4,63,%o0 + +L(exito): + sub %g2,%g3,%g4 + sub %g4,cy,%g4 + srlx %g4,63,cy + stw %g4,[rp-4] + sub %o4,%o5,%g4 + sub %g4,cy,%g4 + stw %g4,[rp+0] + retl + srlx %g4,63,%o0 + +L(end1): + sub %o4,%o5,%g4 + stw %g4,[rp+0] + retl + srlx %g4,63,%o0 + +L(end2): + sub %o4,%o5,%g4 + srlx %g4,63,cy + stw %g4,[rp+0] + sub %g2,%g3,%g4 + sub %g4,cy,%g4 + stw %g4,[rp+4] + retl + srlx %g4,63,%o0 +EPILOGUE(mpn_sub_n)