Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / sparc32 / v9 / add_n.asm
diff --git a/gmp/mpn/sparc32/v9/add_n.asm b/gmp/mpn/sparc32/v9/add_n.asm

new file mode 100644 (file)

index 0000000..a21cf10
--- /dev/null
+++ b/gmp/mpn/sparc32/v9/add_n.asm
@@ -0,0 +1,118 @@
+dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(rp,%o0)
+define(s1p,%o1)
+define(s2p,%o2)
+define(n,%o3)
+define(cy,%g1)
+
+C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
+C require that `o' registers' upper 32 bits are preserved by the operating
+C system, but if they are not, they must be zeroed.  That is indeed what
+C happens at least on Slowaris 2.5 and 2.6.
+
+C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
+C about 10 cycles/limb from the Ecache.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       lduw    [s1p+0],%o4
+       lduw    [s2p+0],%o5
+       addcc   n,-2,n
+       bl,pn   %icc,L(end1)
+       lduw    [s1p+4],%g2
+       lduw    [s2p+4],%g3
+       be,pn   %icc,L(end2)
+       mov     0,cy
+
+       .align  16
+L(loop):
+       add     %o4,%o5,%g4
+       add     rp,8,rp
+       lduw    [s1p+8],%o4
+       fitod   %f0,%f2
+C ---
+       add     cy,%g4,%g4
+       addcc   n,-1,n
+       lduw    [s2p+8],%o5
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,32,cy
+       add     s2p,8,s2p
+       stw     %g4,[rp-8]
+       be,pn   %icc,L(exito)+4
+C ---
+       add     %g2,%g3,%g4
+       addcc   n,-1,n
+       lduw    [s1p+12],%g2
+       fitod   %f0,%f2
+C ---
+       add     cy,%g4,%g4
+       add     s1p,8,s1p
+       lduw    [s2p+4],%g3
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,32,cy
+       bne,pt  %icc,L(loop)
+       stw     %g4,[rp-4]
+C ---
+L(exite):
+       add     %o4,%o5,%g4
+       add     cy,%g4,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp+0]
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,32,%o0
+
+L(exito):
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp-4]
+       add     %o4,%o5,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,32,%o0
+
+L(end1):
+       add     %o4,%o5,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,32,%o0
+
+L(end2):
+       add     %o4,%o5,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp+0]
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,32,%o0
+EPILOGUE(mpn_add_n)