X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Falpha%2Faorslsh1_n.asm;fp=gmp%2Fmpn%2Falpha%2Faorslsh1_n.asm;h=3694f787616e8abb44eb460013e44dc125877311;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/alpha/aorslsh1_n.asm b/gmp/mpn/alpha/aorslsh1_n.asm new file mode 100644 index 00000000..3694f787 --- /dev/null +++ b/gmp/mpn/alpha/aorslsh1_n.asm @@ -0,0 +1,234 @@ +dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). + +dnl Copyright 2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: 12.5 +C EV5: 6.25 +C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875) + +C TODO +C * Write special version for ev6, as this is a slowdown for 100 < n < 2200 +C compared to separate mpn_lshift and mpn_add_n. +C * Use addq instead of sll for left shift, and similarly cmplt instead of srl +C for right shift. + +dnl INPUT PARAMETERS +define(`rp',`r16') +define(`up',`r17') +define(`vp',`r18') +define(`n', `r19') + +define(`u0', `r8') +define(`u1', `r1') +define(`u2', `r2') +define(`u3', `r3') +define(`v0', `r4') +define(`v1', `r5') +define(`v2', `r6') +define(`v3', `r7') + +define(`cy0', `r0') +define(`cy1', `r20') +define(`cy', `r22') +define(`rr', `r24') +define(`ps', `r25') +define(`sl', `r28') + +ifdef(`OPERATION_addlsh1_n',` + define(ADDSUB, addq) + define(CARRY, `cmpult $1,$2,$3') + define(func, mpn_addlsh1_n) +') +ifdef(`OPERATION_sublsh1_n',` + define(ADDSUB, subq) + define(CARRY, `cmpult $2,$1,$3') + define(func, mpn_sublsh1_n) +') + +MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) + +ASM_START() +PROLOGUE(func) + lda n, -4(n) + bis r31, r31, cy1 + and n, 3, r1 + beq r1, $Lb00 + cmpeq r1, 1, r2 + bne r2, $Lb01 + cmpeq r1, 2, r2 + bne r2, $Lb10 +$Lb11: C n = 3, 7, 11, ... + ldq v0, 0(vp) + ldq u0, 0(up) + ldq v1, 8(vp) + ldq u1, 8(up) + ldq v2, 16(vp) + ldq u2, 16(up) + lda vp, 24(vp) + lda up, 24(up) + bge n, $Loop + br r31, $Lcj3 +$Lb10: C n = 2, 6, 10, ... + bis r31, r31, cy0 + ldq v1, 0(vp) + ldq u1, 0(up) + ldq v2, 8(vp) + ldq u2, 8(up) + lda rp, -8(rp) + blt n, $Lcj2 + ldq v3, 16(vp) + ldq u3, 16(up) + lda vp, 48(vp) + lda up, 16(up) + br r31, $LL10 +$Lb01: C n = 1, 5, 9, ... + ldq v2, 0(vp) + ldq u2, 0(up) + lda rp, -16(rp) + blt n, $Lcj1 + ldq v3, 8(vp) + ldq u3, 8(up) + ldq v0, 16(vp) + ldq u0, 16(up) + lda vp, 40(vp) + lda up, 8(up) + lda rp, 32(rp) + br r31, $LL01 +$Lb00: C n = 4, 8, 12, ... + bis r31, r31, cy0 + ldq v3, 0(vp) + ldq u3, 0(up) + ldq v0, 8(vp) + ldq u0, 8(up) + ldq v1, 16(vp) + ldq u1, 16(up) + lda vp, 32(vp) + lda rp, 8(rp) + br r31, $LL00x + ALIGN(16) +C 0 +$Loop: sll v0, 1, sl C left shift vlimb + ldq v3, 0(vp) +C 1 + ADDSUB u0, sl, ps C ulimb + (vlimb << 1) + ldq u3, 0(up) +C 2 + ADDSUB ps, cy1, rr C consume carry from previous operation + srl v0, 63, cy0 C carry out #1 +C 3 + CARRY( ps, u0, cy) C carry out #2 + stq rr, 0(rp) +C 4 + addq cy, cy0, cy0 C combine carry out #1 and #2 + CARRY( rr, ps, cy) C carry out #3 +C 5 + addq cy, cy0, cy0 C final carry out + lda vp, 32(vp) C bookkeeping +C 6 +$LL10: sll v1, 1, sl + ldq v0, -24(vp) +C 7 + ADDSUB u1, sl, ps + ldq u0, 8(up) +C 8 + ADDSUB ps, cy0, rr + srl v1, 63, cy1 +C 9 + CARRY( ps, u1, cy) + stq rr, 8(rp) +C 10 + addq cy, cy1, cy1 + CARRY( rr, ps, cy) +C 11 + addq cy, cy1, cy1 + lda rp, 32(rp) C bookkeeping +C 12 +$LL01: sll v2, 1, sl + ldq v1, -16(vp) +C 13 + ADDSUB u2, sl, ps + ldq u1, 16(up) +C 14 + ADDSUB ps, cy1, rr + srl v2, 63, cy0 +C 15 + CARRY( ps, u2, cy) + stq rr, -16(rp) +C 16 + addq cy, cy0, cy0 + CARRY( rr, ps, cy) +C 17 + addq cy, cy0, cy0 +$LL00x: lda up, 32(up) C bookkeeping +C 18 + sll v3, 1, sl + ldq v2, -8(vp) +C 19 + ADDSUB u3, sl, ps + ldq u2, -8(up) +C 20 + ADDSUB ps, cy0, rr + srl v3, 63, cy1 +C 21 + CARRY( ps, u3, cy) + stq rr, -8(rp) +C 22 + addq cy, cy1, cy1 + CARRY( rr, ps, cy) +C 23 + addq cy, cy1, cy1 + lda n, -4(n) C bookkeeping +C 24 + bge n, $Loop + +$Lcj3: sll v0, 1, sl + ADDSUB u0, sl, ps + ADDSUB ps, cy1, rr + srl v0, 63, cy0 + CARRY( ps, u0, cy) + stq rr, 0(rp) + addq cy, cy0, cy0 + CARRY( rr, ps, cy) + addq cy, cy0, cy0 + +$Lcj2: sll v1, 1, sl + ADDSUB u1, sl, ps + ADDSUB ps, cy0, rr + srl v1, 63, cy1 + CARRY( ps, u1, cy) + stq rr, 8(rp) + addq cy, cy1, cy1 + CARRY( rr, ps, cy) + addq cy, cy1, cy1 + +$Lcj1: sll v2, 1, sl + ADDSUB u2, sl, ps + ADDSUB ps, cy1, rr + srl v2, 63, cy0 + CARRY( ps, u2, cy) + stq rr, 16(rp) + addq cy, cy0, cy0 + CARRY( rr, ps, cy) + addq cy, cy0, cy0 + + ret r31,(r26),1 +EPILOGUE() +ASM_END()