dnl AMD K7 mpn_mul_1. dnl Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation, dnl Inc. dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 3 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C P5: C P6 model 0-8,10-12) C P6 model 9 (Banias) C P6 model 13 (Dothan) C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) C K6: C K7: 3.25 C K8: C TODO C * Improve feed-in and wind-down code. We beat the old code for all n != 1, C but we might be able to do even better. C * The feed-in code for mul_1c is crude. ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mul_1c) add $-16, %esp mov %ebp, (%esp) mov %ebx, 4(%esp) mov %esi, 8(%esp) mov %edi, 12(%esp) mov 20(%esp), %edi mov 24(%esp), %esi mov 28(%esp), %ebp mov 32(%esp), %ecx mov %ebp, %ebx shr $2, %ebp mov %ebp, 28(%esp) mov (%esi), %eax and $3, %ebx jz L(c0) cmp $2, %ebx mov 36(%esp), %ebx jz L(c2) jg L(c3) L(c1): lea -4(%edi), %edi mul %ecx test %ebp, %ebp jnz 1f add %ebx, %eax mov %eax, 4(%edi) mov %edx, %eax adc %ebp, %eax jmp L(rt) 1: add %eax, %ebx mov $0, %ebp adc %edx, %ebp mov 4(%esi), %eax jmp L(1) L(c2): lea 4(%esi), %esi mul %ecx test %ebp, %ebp mov %ebx, %ebp jnz 2f add %eax, %ebp mov $0, %ebx adc %edx, %ebx mov (%esi), %eax jmp L(cj2) 2: add %eax, %ebp mov $0, %ebx adc %edx, %ebx mov (%esi), %eax jmp L(2) L(c3): lea 8(%esi), %esi lea -12(%edi), %edi mul %ecx add %eax, %ebx mov $0, %ebp adc %edx, %ebp mov -4(%esi), %eax incl 28(%esp) jmp L(3) L(c0): mov 36(%esp), %ebx lea -4(%esi), %esi lea -8(%edi), %edi mul %ecx mov %ebx, %ebp add %eax, %ebp mov $0, %ebx adc %edx, %ebx mov 8(%esi), %eax jmp L(0) EPILOGUE() ALIGN(16) PROLOGUE(mpn_mul_1) add $-16, %esp mov %ebp, (%esp) mov %ebx, 4(%esp) mov %esi, 8(%esp) mov %edi, 12(%esp) mov 20(%esp), %edi mov 24(%esp), %esi mov 28(%esp), %ebp mov 32(%esp), %ecx mov %ebp, %ebx shr $2, %ebp mov %ebp, 28(%esp) mov (%esi), %eax and $3, %ebx jz L(b0) cmp $2, %ebx jz L(b2) jg L(b3) L(b1): lea -4(%edi), %edi mul %ecx test %ebp, %ebp jnz L(gt1) mov %eax, 4(%edi) mov %edx, %eax jmp L(rt) L(gt1): mov %eax, %ebx mov %edx, %ebp mov 4(%esi), %eax jmp L(1) L(b2): lea 4(%esi), %esi mul %ecx test %ebp, %ebp mov %eax, %ebp mov %edx, %ebx mov (%esi), %eax jnz L(2) jmp L(cj2) L(b3): lea 8(%esi), %esi lea -12(%edi), %edi mul %ecx mov %eax, %ebx mov %edx, %ebp mov -4(%esi), %eax incl 28(%esp) jmp L(3) L(b0): lea -4(%esi), %esi lea -8(%edi), %edi mul %ecx mov %eax, %ebp mov %edx, %ebx mov 8(%esi), %eax jmp L(0) ALIGN(16) L(top): mov $0, %ebx adc %edx, %ebx L(2): mul %ecx add %eax, %ebx mov %ebp, 0(%edi) mov 4(%esi), %eax mov $0, %ebp adc %edx, %ebp L(1): mul %ecx add %eax, %ebp mov 8(%esi), %eax mov %ebx, 4(%edi) mov $0, %ebx adc %edx, %ebx L(0): mov %ebp, 8(%edi) mul %ecx add %eax, %ebx mov 12(%esi), %eax lea 16(%esi), %esi mov $0, %ebp adc %edx, %ebp L(3): mov %ebx, 12(%edi) mul %ecx lea 16(%edi), %edi add %eax, %ebp decl 28(%esp) mov 0(%esi), %eax jnz L(top) L(end): mov $0, %ebx adc %edx, %ebx L(cj2): mul %ecx add %eax, %ebx mov %ebp, (%edi) L(cj1): mov %ebx, 4(%edi) adc $0, %edx mov %edx, %eax L(rt): mov (%esp), %ebp mov 4(%esp), %ebx mov 8(%esp), %esi mov 12(%esp), %edi add $16, %esp ret EPILOGUE() ASM_END()