]> oss.titaniummirror.com Git - msp430-gcc.git/blobdiff - gmp/mpn/alpha/ev6/nails/addmul_2.asm
Imported gcc-4.4.3
[msp430-gcc.git] / gmp / mpn / alpha / ev6 / nails / addmul_2.asm
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_2.asm b/gmp/mpn/alpha/ev6/nails/addmul_2.asm
new file mode 100644 (file)
index 0000000..9edaed8
--- /dev/null
@@ -0,0 +1,135 @@
+dnl  Alpha ev6 nails mpn_addmul_2.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Runs at 4.0 cycles/limb.
+
+C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l,
+C or 4-way unrolling over 20 cycles, for 2.5 c/l.
+
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n',`r18')
+define(`vp',`r19')
+
+C  Useful register aliases
+define(`numb_mask',`r24')
+define(`ulimb',`r25')
+define(`rlimb',`r27')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+
+define(`acc0',`r4')
+define(`acc1',`r5')
+
+define(`v0',`r6')
+define(`v1',`r7')
+
+C Used for temps: r8 r19 r28
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+C  This declaration is munged by configure
+NAILS_SUPPORT(3-63)
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+       lda     numb_mask,-1(r31)
+       srl     numb_mask,NAIL_BITS,numb_mask
+
+       ldq     v0,     0(vp)
+       ldq     v1,     8(vp)
+
+       bis     r31,    r31,    acc0            C       zero acc0
+       sll     v0,NAIL_BITS,   v0
+       bis     r31,    r31,    acc1            C       zero acc1
+       sll     v1,NAIL_BITS,   v1
+       bis     r31,    r31,    r19
+
+       ldq     ulimb,  0(up)
+       lda     up,     8(up)
+       mulq    v0,     ulimb,  m0a             C U1
+       umulh   v0,     ulimb,  m0b             C U1
+       mulq    v1,     ulimb,  m1a             C U1
+       umulh   v1,     ulimb,  m1b             C U1
+       lda     n,      -1(n)
+       beq     n,      L(end)                  C U0
+
+       ALIGN(16)
+L(top):        bis     r31,    r31,    r31             C U1    nop
+       addq    r19,    acc0,   acc0            C U0    propagate nail
+       ldq     rlimb,  0(rp)                   C L0
+       ldq     ulimb,  0(up)                   C L1
+
+       lda     rp,     8(rp)                   C L1
+       srl     m0a,NAIL_BITS,  r8              C U0
+       lda     up,     8(up)                   C L0
+       mulq    v0,     ulimb,  m0a             C U1
+
+       addq    r8,     acc0,   r19             C U0
+       addq    m0b,    acc1,   acc0            C L1
+       umulh   v0,     ulimb,  m0b             C U1
+       bis     r31,    r31,    r31             C L0    nop
+
+       addq    rlimb,  r19,    r19             C L1    FINAL PROD-SUM
+       srl     m1a,NAIL_BITS,  r8              C U0
+       lda     n,      -1(n)                   C L0
+       mulq    v1,     ulimb,  m1a             C U1
+
+       addq    r8,     acc0,   acc0            C U0
+       bis     r31,    m1b,    acc1            C L1
+       umulh   v1,     ulimb,  m1b             C U1
+       and     r19,numb_mask,  r28             C L0    extract numb part
+
+       unop
+       srl     r19,NUMB_BITS,  r19             C U1    extract nail part
+       stq     r28,    -8(rp)                  C L1
+       bne     n,      L(top)                  C U0
+
+L(end):        ldq     rlimb,  0(rp)
+       addq    r19,    acc0,   acc0            C       propagate nail
+       lda     rp,     8(rp)
+       srl     m0a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   r19
+       addq    m0b,    acc1,   acc0
+       addq    rlimb,  r19,    r19
+       srl     m1a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   acc0
+       bis     r31,    m1b,    acc1
+       and     r19,numb_mask,  r28             C extract limb
+
+       srl     r19,NUMB_BITS,  r19             C extract nail
+       stq     r28,    -8(rp)
+
+       addq    r19,    acc0,   acc0            C propagate nail
+       and     acc0,numb_mask, r28
+       stq     r28,    0(rp)
+       srl     acc0,NUMB_BITS, r19
+       addq    r19,    acc1,   r0
+
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()