Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / powerpc32 / 750 / rshift.asm
diff --git a/gmp/mpn/powerpc32/750/rshift.asm b/gmp/mpn/powerpc32/750/rshift.asm

new file mode 100644 (file)

index 0000000..944e869
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/rshift.asm
@@ -0,0 +1,142 @@
+dnl  PowerPC 750 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       cycles/limb
+C 750:     3.0
+C 7400:    3.0
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+       C r3    dst
+       C r4    src
+       C r5    size
+       C r6    shift
+
+       mtctr   r5              C size
+       lwz     r8, 0(r4)       C src[0]
+
+       subfic  r7, r6, 32      C 32-shift
+       addi    r5, r3, -4      C dst-4
+
+       slw     r3, r8, r7      C return value
+       bdz     L(one)
+
+       lwzu    r9, 4(r4)       C src[1]
+       srw     r8, r8, r6      C src[0] >> shift
+       bdz     L(two)
+
+
+L(top):
+       C r3    return value
+       C r4    src, incrementing
+       C r5    dst, incrementing
+       C r6    shift
+       C r7    32-shift
+       C r8    src[i-1] >> shift
+       C r9    src[i]
+       C r10
+
+       lwzu    r10, 4(r4)
+       slw     r11, r9, r7
+
+       or      r8, r8, r11
+       stwu    r8, 4(r5)
+
+       srw     r8, r9, r6
+       bdz     L(odd)
+
+       C r8    src[i-1] >> shift
+       C r9
+       C r10   src[i]
+
+       lwzu    r9, 4(r4)
+       slw     r11, r10, r7
+
+       or      r8, r8, r11
+       stwu    r8, 4(r5)
+
+       srw     r8, r10, r6
+       bdnz    L(top)
+
+
+L(two):
+       C r3    return value
+       C r4
+       C r5    &dst[size-2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[size-2] >> shift
+       C r9    src[size-1]
+       C r10
+
+       slw     r11, r9, r7
+       srw     r12, r9, r6     C src[size-1] >> shift
+
+       or      r8, r8, r11
+       stw     r12, 8(r5)      C dst[size-1]
+
+       stw     r8, 4(r5)       C dst[size-2]
+       blr
+
+
+L(odd):
+       C r3    return value
+       C r4
+       C r5    &dst[size-2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[size-2] >> shift
+       C r9
+       C r10   src[size-1]
+
+       slw     r11, r10, r7
+       srw     r12, r10, r6
+
+       or      r8, r8, r11
+       stw     r12, 8(r5)      C dst[size-1]
+
+       stw     r8, 4(r5)       C dst[size-2]
+       blr
+
+
+L(one):
+       C r3    return value
+       C r4
+       C r5    dst-4
+       C r6    shift
+       C r7
+       C r8    src[0]
+
+       srw     r8, r8, r6
+
+       stw     r8, 4(r5)       C dst[0]
+       blr
+
+EPILOGUE(mpn_rshift)