dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. dnl Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C INPUT PARAMETERS define(`res_ptr',`%r26') define(`s1_ptr',`%r25') define(`size_param',`%r24') define(`s2_limb',`%r23') define(`cylimb',`%r28') define(`s0',`%r19') define(`s1',`%r20') define(`s2',`%r3') define(`s3',`%r4') define(`lo0',`%r21') define(`lo1',`%r5') define(`lo2',`%r6') define(`lo3',`%r7') define(`hi0',`%r22') define(`hi1',`%r23') C safe to reuse define(`hi2',`%r29') define(`hi3',`%r1') ASM_START() PROLOGUE(mpn_submul_1) C .callinfo frame=128,no_calls ldo 128(%r30),%r30 stws s2_limb,-16(%r30) add %r0,%r0,cylimb C clear cy and cylimb addib,< -4,size_param,L(few_limbs) fldws -16(%r30),%fr31R ldo -112(%r30),%r31 stw %r3,-96(%r30) stw %r4,-92(%r30) stw %r5,-88(%r30) stw %r6,-84(%r30) stw %r7,-80(%r30) bb,>=,n s1_ptr,29,L(0) fldws,ma 4(s1_ptr),%fr4 ldws 0(res_ptr),s0 xmpyu %fr4,%fr31R,%fr5 fstds %fr5,-16(%r31) ldws -16(%r31),cylimb ldws -12(%r31),lo0 sub s0,lo0,s0 add s0,lo0,%r0 C invert cy addib,< -1,size_param,L(few_limbs) stws,ma s0,4(res_ptr) C start software pipeline ---------------------------------------------------- LDEF(0) fldds,ma 8(s1_ptr),%fr4 fldds,ma 8(s1_ptr),%fr8 xmpyu %fr4L,%fr31R,%fr5 xmpyu %fr4R,%fr31R,%fr6 xmpyu %fr8L,%fr31R,%fr9 xmpyu %fr8R,%fr31R,%fr10 fstds %fr5,-16(%r31) fstds %fr6,-8(%r31) fstds %fr9,0(%r31) fstds %fr10,8(%r31) ldws -16(%r31),hi0 ldws -12(%r31),lo0 ldws -8(%r31),hi1 ldws -4(%r31),lo1 ldws 0(%r31),hi2 ldws 4(%r31),lo2 ldws 8(%r31),hi3 ldws 12(%r31),lo3 addc lo0,cylimb,lo0 addc lo1,hi0,lo1 addc lo2,hi1,lo2 addc lo3,hi2,lo3 addib,< -4,size_param,L(end) addc %r0,hi3,cylimb C propagate carry into cylimb C main loop ------------------------------------------------------------------ LDEF(loop) fldds,ma 8(s1_ptr),%fr4 fldds,ma 8(s1_ptr),%fr8 ldws 0(res_ptr),s0 xmpyu %fr4L,%fr31R,%fr5 ldws 4(res_ptr),s1 xmpyu %fr4R,%fr31R,%fr6 ldws 8(res_ptr),s2 xmpyu %fr8L,%fr31R,%fr9 ldws 12(res_ptr),s3 xmpyu %fr8R,%fr31R,%fr10 fstds %fr5,-16(%r31) sub s0,lo0,s0 fstds %fr6,-8(%r31) subb s1,lo1,s1 fstds %fr9,0(%r31) subb s2,lo2,s2 fstds %fr10,8(%r31) subb s3,lo3,s3 subb %r0,%r0,lo0 C these two insns ... add lo0,lo0,%r0 C ... just invert cy ldws -16(%r31),hi0 ldws -12(%r31),lo0 ldws -8(%r31),hi1 ldws -4(%r31),lo1 ldws 0(%r31),hi2 ldws 4(%r31),lo2 ldws 8(%r31),hi3 ldws 12(%r31),lo3 addc lo0,cylimb,lo0 stws,ma s0,4(res_ptr) addc lo1,hi0,lo1 stws,ma s1,4(res_ptr) addc lo2,hi1,lo2 stws,ma s2,4(res_ptr) addc lo3,hi2,lo3 stws,ma s3,4(res_ptr) addib,>= -4,size_param,L(loop) addc %r0,hi3,cylimb C propagate carry into cylimb C finish software pipeline --------------------------------------------------- LDEF(end) ldws 0(res_ptr),s0 ldws 4(res_ptr),s1 ldws 8(res_ptr),s2 ldws 12(res_ptr),s3 sub s0,lo0,s0 stws,ma s0,4(res_ptr) subb s1,lo1,s1 stws,ma s1,4(res_ptr) subb s2,lo2,s2 stws,ma s2,4(res_ptr) subb s3,lo3,s3 stws,ma s3,4(res_ptr) subb %r0,%r0,lo0 C these two insns ... add lo0,lo0,%r0 C ... invert cy C restore callee-saves registers --------------------------------------------- ldw -96(%r30),%r3 ldw -92(%r30),%r4 ldw -88(%r30),%r5 ldw -84(%r30),%r6 ldw -80(%r30),%r7 LDEF(few_limbs) addib,=,n 4,size_param,L(ret) LDEF(loop2) fldws,ma 4(s1_ptr),%fr4 ldws 0(res_ptr),s0 xmpyu %fr4,%fr31R,%fr5 fstds %fr5,-16(%r30) ldws -16(%r30),hi0 ldws -12(%r30),lo0 addc lo0,cylimb,lo0 addc %r0,hi0,cylimb sub s0,lo0,s0 add s0,lo0,%r0 C invert cy stws,ma s0,4(res_ptr) addib,<> -1,size_param,L(loop2) nop LDEF(ret) addc %r0,cylimb,cylimb bv 0(%r2) ldo -128(%r30),%r30 EPILOGUE(mpn_submul_1)