Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / powerpc64 / mode64 / dive_1.asm
diff --git a/gmp/mpn/powerpc64/mode64/dive_1.asm b/gmp/mpn/powerpc64/mode64/dive_1.asm

new file mode 100644 (file)

index 0000000..a4a06da
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/dive_1.asm
@@ -0,0 +1,118 @@
+dnl  PowerPC-64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:           16
+
+C TODO
+C  * Check if n=1 code is really an improvment.  It probably isn't.
+C  * Perhaps remove L(norm) code, it is currently unreachable.
+C  * Make more similar to mode1o.asm.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`d',  `r6')
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_divexact_1)
+       addic.  n, n, -1
+       ld      r12, 0(up)
+       bne     cr0, L(2)
+       divdu   r0, r12, d
+       std     r0, 0(rp)
+       blr
+L(2):
+       rldicl. r0, d, 0, 63
+       li      r10, 0
+       bne     cr0, L(7)
+       neg     r0, d
+       and     r0, d, r0
+       cntlzd  r0, r0
+       subfic  r0, r0, 63
+       rldicl  r10, r0, 0, 32
+       srd     d, d, r0
+L(7):
+       mtctr   n
+       LEA(    r5, binvert_limb_table)
+       rldicl  r11, d, 63, 57
+C      cmpdi   cr7, r0, 0
+       lbzx    r0, r5, r11
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r0, r9, r0
+       mulld   r5, r0, r0
+       sldi    r0, r0, 1
+       mulld   r5, d, r5
+       subf    r0, r5, r0
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r7, r9, r0              C r7 = 1/d mod 2^64
+C      beq     cr7, L(norm)
+       subfic  r8, r10, 64             C set carry as side effect
+       li      r5, 0
+
+       ALIGN(16)
+L(loop0):
+       srd     r11, r12, r10
+       ld      r12, 8(up)
+       addi    up, up, 8
+       sld     r0, r12, r8
+       or      r11, r11, r0
+       subfe   r9, r5, r11
+       mulld   r0, r7, r9
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       mulhdu  r5, r0, d
+       bdnz    L(loop0)
+
+       srd     r0, r12, r10
+       subfe   r0, r5, r0
+       mulld   r0, r7, r0
+       std     r0, 0(rp)
+       blr
+
+       ALIGN(16)
+L(norm):
+       mulld   r11, r12, r7
+       std     r11, 0(rp)
+       ALIGN(16)
+L(loop1):
+       mulhdu  r5, r11, d
+       ld      r9, 8(up)
+       addi    up, up, 8
+       subfe   r5, r5, r9
+       mulld   r11, r7, r5
+       std     r11, 8(rp)
+       addi    rp, rp, 8
+       bdnz    L(loop1)
+       blr
+EPILOGUE()
+ASM_END()