X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gmp%2Fmpn%2Fpowerpc64%2Fmode64%2Fmode1o.asm;fp=gmp%2Fmpn%2Fpowerpc64%2Fmode64%2Fmode1o.asm;h=95aa2870da0f4fe5c47970091040ee57a57ec47f;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gmp/mpn/powerpc64/mode64/mode1o.asm b/gmp/mpn/powerpc64/mode64/mode1o.asm new file mode 100644 index 00000000..95aa2870 --- /dev/null +++ b/gmp/mpn/powerpc64/mode64/mode1o.asm @@ -0,0 +1,104 @@ +dnl PowerPC-64 mpn_modexact_1_odd -- mpn by limb exact remainder. + +dnl Copyright 2006 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C POWER3/PPC630: 13-19 +C POWER4/PPC970: 16 +C POWER5: 16 + +C TODO +C * Check if n=1 code is really an improvment. It probably isn't. +C * Make more similar to dive_1.asm.. + +C INPUT PARAMETERS +define(`up', `r3') +define(`n', `r4') +define(`d', `r5') +define(`cy', `r6') + + +ASM_START() + +EXTERN(binvert_limb_table) + +PROLOGUE(mpn_modexact_1c_odd) + addic. n, n, -1 C set carry as side effect + ld r8, 0(up) + bne cr0, L(2) + cmpld cr7, r6, r8 + bge cr7, L(4) + subf r8, r6, r8 + divdu r3, r8, d + mulld r3, r3, d + subf. r3, r3, r8 + beqlr cr0 + subf r3, r3, d + blr + +L(4): subf r3, r8, r6 + divdu r8, r3, d + mulld r8, r8, d + subf r3, r8, r3 + blr + +L(2): LEA( r7, binvert_limb_table) + rldicl r9, d, 63, 57 + mtctr n + lbzx r0, r7, r9 + mulld r7, r0, r0 + sldi r0, r0, 1 + mulld r7, d, r7 + subf r0, r7, r0 + mulld r9, r0, r0 + sldi r0, r0, 1 + mulld r9, d, r9 + subf r0, r9, r0 + mulld r7, r0, r0 + sldi r0, r0, 1 + mulld r7, d, r7 + subf r9, r7, r0 + + ALIGN(16) +L(loop): + subfe r0, r6, r8 + ld r8, 8(up) + addi up, up, 8 + mulld r0, r9, r0 + mulhdu r6, r0, d + bdnz L(loop) + + cmpld cr7, d, r8 + blt cr7, L(10) + + subfe r0, r0, r0 + subf r6, r0, r6 + cmpld cr7, r6, r8 + subf r3, r8, r6 + bgelr cr7 + add r3, d, r3 + blr + +L(10): subfe r0, r6, r8 + mulld r0, r9, r0 + mulhdu r3, r0, d + blr +EPILOGUE() +ASM_END()