dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63. dnl Copyright 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C K8: 2.167 C P4: 12.0 C P6-15: 4.0 C TODO C * Perhaps handle various n mod 3 sizes better. The code now is too large. C INPUT PARAMETERS define(`rp', `%rdi') define(`ap', `%rsi') define(`bp_param', `%rdx') define(`n', `%rcx') define(`u0', `%r8') define(`v0', `%r9') define(`bp', `%rbp') ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_addaddmul_1msb0) push %r12 push %rbp lea (ap,n,8), ap lea (bp_param,n,8), bp lea (rp,n,8), rp neg n mov (ap,n,8), %rax mul %r8 mov %rax, %r12 mov (bp,n,8), %rax mov %rdx, %r10 add $3, n jns L(end) ALIGN(16) L(top): mul %r9 add %rax, %r12 mov -16(ap,n,8), %rax adc %rdx, %r10 mov %r12, -24(rp,n,8) mul %r8 add %rax, %r10 mov -16(bp,n,8), %rax mov $0, %r11d adc %rdx, %r11 mul %r9 add %rax, %r10 mov -8(ap,n,8), %rax adc %rdx, %r11 mov %r10, -16(rp,n,8) mul %r8 add %rax, %r11 mov -8(bp,n,8), %rax mov $0, %r12d adc %rdx, %r12 mul %r9 add %rax, %r11 adc %rdx, %r12 mov (ap,n,8), %rax mul %r8 add %rax, %r12 mov %r11, -8(rp,n,8) mov (bp,n,8), %rax mov $0, %r10d adc %rdx, %r10 add $3, n js L(top) L(end): cmp $1, R32(n) ja 2f jz 1f mul %r9 add %rax, %r12 mov -16(ap), %rax adc %rdx, %r10 mov %r12, -24(rp) mul %r8 add %rax, %r10 mov -16(bp), %rax mov $0, %r11d adc %rdx, %r11 mul %r9 add %rax, %r10 mov -8(ap), %rax adc %rdx, %r11 mov %r10, -16(rp) mul %r8 add %rax, %r11 mov -8(bp), %rax mov $0, %r12d adc %rdx, %r12 mul %r9 add %rax, %r11 adc %rdx, %r12 mov %r11, -8(rp) mov %r12, %rax pop %rbp pop %r12 ret 1: mul %r9 add %rax, %r12 mov -8(ap), %rax adc %rdx, %r10 mov %r12, -16(rp) mul %r8 add %rax, %r10 mov -8(bp), %rax mov $0, %r11d adc %rdx, %r11 mul %r9 add %rax, %r10 adc %rdx, %r11 mov %r10, -8(rp) mov %r11, %rax pop %rbp pop %r12 ret 2: mul %r9 add %rax, %r12 mov %r12, -8(rp) adc %rdx, %r10 mov %r10, %rax pop %rbp pop %r12 ret EPILOGUE()