Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / powerpc64 / mode64 / mode1o.asm
diff --git a/gmp/mpn/powerpc64/mode64/mode1o.asm b/gmp/mpn/powerpc64/mode64/mode1o.asm

new file mode 100644 (file)

index 0000000..95aa287
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/mode1o.asm
@@ -0,0 +1,104 @@
+dnl  PowerPC-64 mpn_modexact_1_odd -- mpn by limb exact remainder.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:            16
+
+C TODO
+C  * Check if n=1 code is really an improvment.  It probably isn't.
+C  * Make more similar to dive_1.asm..
+
+C INPUT PARAMETERS
+define(`up', `r3')
+define(`n',  `r4')
+define(`d',  `r5')
+define(`cy', `r6')
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_modexact_1c_odd)
+       addic.  n, n, -1                C set carry as side effect
+       ld      r8, 0(up)
+       bne     cr0, L(2)
+       cmpld   cr7, r6, r8
+       bge     cr7, L(4)
+       subf    r8, r6, r8
+       divdu   r3, r8, d
+       mulld   r3, r3, d
+       subf.   r3, r3, r8
+       beqlr   cr0
+       subf    r3, r3, d
+       blr
+
+L(4):  subf    r3, r8, r6
+       divdu   r8, r3, d
+       mulld   r8, r8, d
+       subf    r3, r8, r3
+       blr
+
+L(2):  LEA(    r7, binvert_limb_table)
+       rldicl  r9, d, 63, 57
+       mtctr   n
+       lbzx    r0, r7, r9
+       mulld   r7, r0, r0
+       sldi    r0, r0, 1
+       mulld   r7, d, r7
+       subf    r0, r7, r0
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r0, r9, r0
+       mulld   r7, r0, r0
+       sldi    r0, r0, 1
+       mulld   r7, d, r7
+       subf    r9, r7, r0
+
+       ALIGN(16)
+L(loop):
+       subfe   r0, r6, r8
+       ld      r8, 8(up)
+       addi    up, up, 8
+       mulld   r0, r9, r0
+       mulhdu  r6, r0, d
+       bdnz    L(loop)
+
+       cmpld   cr7, d, r8
+       blt     cr7, L(10)
+
+       subfe   r0, r0, r0
+       subf    r6, r0, r6
+       cmpld   cr7, r6, r8
+       subf    r3, r8, r6
+       bgelr   cr7
+       add     r3, d, r3
+       blr
+
+L(10): subfe   r0, r6, r8
+       mulld   r0, r9, r0
+       mulhdu  r3, r0, d
+       blr
+EPILOGUE()
+ASM_END()