]> oss.titaniummirror.com Git - msp430-gcc.git/blobdiff - gmp/mpn/powerpc32/750/lshift.asm
Imported gcc-4.4.3
[msp430-gcc.git] / gmp / mpn / powerpc32 / 750 / lshift.asm
diff --git a/gmp/mpn/powerpc32/750/lshift.asm b/gmp/mpn/powerpc32/750/lshift.asm
new file mode 100644 (file)
index 0000000..9298793
--- /dev/null
@@ -0,0 +1,144 @@
+dnl  PowerPC 750 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       cycles/limb
+C 750:     3.0
+C 7400:    3.0
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+       C r3    dst
+       C r4    src
+       C r5    size
+       C r6    shift
+
+       mtctr   r5              C size
+       slwi    r5, r5, 2       C 4*size
+
+       subfic  r7, r6, 32      C 32-shift
+       add     r4, r4, r5      C &src[size]
+
+       add     r5, r3, r5      C &dst[size]
+       lwz     r8, -4(r4)      C src[size-1]
+       bdz     L(one)
+
+       lwzu    r9, -8(r4)      C src[size-2]
+
+       srw     r3, r8, r7      C return value
+       slw     r8, r8, r6      C src[size-1] << shift
+       bdz     L(two)
+
+
+L(top):
+       C r3    return value
+       C r4    src, incrementing
+       C r5    dst, incrementing
+       C r6    lshift
+       C r7    32-shift
+       C r8    src[i+1] << shift
+       C r9    src[i]
+       C r10
+
+       lwzu    r10, -4(r4)
+       srw     r11, r9, r7
+
+       or      r8, r8, r11
+       stwu    r8, -4(r5)
+
+       slw     r8, r9, r6
+       bdz     L(odd)
+
+       C r8    src[i+1] << shift
+       C r9
+       C r10   src[i]
+
+       lwzu    r9, -4(r4)
+       srw     r11, r10, r7
+
+       or      r8, r8, r11
+       stwu    r8, -4(r5)
+
+       slw     r8, r10, r6
+       bdnz    L(top)
+
+
+L(two):
+       C r3    return value
+       C r4
+       C r5    &dst[2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[1] << shift
+       C r9    src[0]
+       C r10
+
+       srw     r11, r9, r7
+       slw     r12, r9, r6     C src[0] << shift
+
+       or      r8, r8, r11
+       stw     r12, -8(r5)     C dst[0]
+
+       stw     r8, -4(r5)      C dst[1]
+       blr
+
+
+L(odd):
+       C r3    return value
+       C r4
+       C r5    &dst[2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[1] << shift
+       C r9
+       C r10   src[0]
+
+       srw     r11, r10, r7
+       slw     r12, r10, r6
+
+       or      r8, r8, r11
+       stw     r12, -8(r5)     C dst[0]
+
+       stw     r8, -4(r5)      C dst[1]
+       blr
+
+
+L(one):
+       C r5    &dst[1]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[0]
+
+       srw     r3, r8, r7      C return value
+       slw     r8, r8, r6      C src[size-1] << shift
+
+       stw     r8, -4(r5)      C dst[0]
+       blr
+
+EPILOGUE(mpn_lshift)