Imported gcc-4.4.3

[msp430-gcc.git] / gmp / mpn / x86 / divrem_2.asm
diff --git a/gmp/mpn/x86/divrem_2.asm b/gmp/mpn/x86/divrem_2.asm

new file mode 100644 (file)

index 0000000..bbadda9
--- /dev/null
+++ b/gmp/mpn/x86/divrem_2.asm
@@ -0,0 +1,188 @@
+dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              norm    frac
+C 486
+C P5
+C P6-13                29.2
+C P6-15                *26
+C K6
+C K7           22
+C K8           *19
+C P4-f1
+C P4-f2                *65
+C P4-f3
+C P4-f4                *72
+
+C A star means numbers not updated for the latest version of the code.
+
+
+C TODO
+C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
+C  * The loop has not been carefully tuned.  We should at the very least do
+C    some local insn swapping.
+C  * The code outside the main loop is what gcc generated.  Clean up!
+C  * Clean up stack slot usage.
+
+C INPUT PARAMETERS
+C qp
+C fn
+C up_param
+C un_param
+C dp
+
+
+C eax ebx ecx edx esi edi ebp
+C         cnt         qp
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_divrem_2)
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       sub     $36, %esp
+       mov     68(%esp), %ecx          C un
+       mov     72(%esp), %esi          C dp
+       movl    $0, 32(%esp)
+       lea     0(,%ecx,4), %edi
+       add     64(%esp), %edi          C up
+       mov     (%esi), %ebx
+       mov     4(%esi), %eax
+       mov     %ebx, 20(%esp)
+       sub     $12, %edi
+       mov     %eax, 24(%esp)
+       mov     %edi, 12(%esp)
+       mov     8(%edi), %ebx
+       mov     4(%edi), %ebp
+       cmp     %eax, %ebx
+       jb      L(8)
+       seta    %dl
+       cmp     20(%esp), %ebp
+       setae   %al
+       or      %dl, %al
+       jne     L(35)
+L(8):
+       mov     60(%esp), %esi          C fn
+       lea     -3(%esi,%ecx), %edi
+       test    %edi, %edi
+       js      L(9)
+       mov     24(%esp), %edx
+       mov     $-1, %esi
+       mov     %esi, %eax
+       mov     %esi, %ecx
+       not     %edx
+       divl    24(%esp)
+       mov     %eax, %esi
+       imul    24(%esp), %eax
+       mov     %eax, (%esp)
+       mov     %esi, %eax
+       mull    20(%esp)
+       mov     (%esp), %eax
+       add     20(%esp), %eax
+       adc     $0, %ecx
+       add     %eax, %edx
+       adc     $0, %ecx
+       mov     %ecx, %eax
+       js      L(32)
+L(36): dec     %esi
+       sub     24(%esp), %edx
+       sbb     $0, %eax
+       jns     L(36)
+L(32):
+       mov     %esi, 16(%esp)          C di
+       mov     %edi, %ecx              C un
+       mov     12(%esp), %esi          C up
+       mov     24(%esp), %eax
+       neg     %eax
+       mov     %eax, 4(%esp)           C -d1
+       ALIGN(16)
+       nop
+
+C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
+C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
+
+L(loop):
+       mov     16(%esp), %eax          C di
+       mul     %ebx
+       add     %ebp, %eax
+       mov     %eax, (%esp)            C q0
+       adc     %ebx, %edx
+       mov     %edx, %edi              C q
+       imul    4(%esp), %edx
+       mov     20(%esp), %eax
+       lea     (%edx, %ebp), %ebx      C n1 -= ...
+       mul     %edi
+       xor     %ebp, %ebp
+       cmp     60(%esp), %ecx
+       jl      L(19)
+       mov     (%esi), %ebp
+       sub     $4, %esi
+L(19): sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       sub     %eax, %ebp
+       sbb     %edx, %ebx
+       mov     20(%esp), %eax          C d1
+       inc     %edi
+       xor     %edx, %edx
+       cmp     (%esp), %ebx
+       adc     $-1, %edx               C mask
+       add     %edx, %edi              C q--
+       and     %edx, %eax              C d0 or 0
+       and     24(%esp), %edx          C d1 or 0
+       add     %eax, %ebp
+       adc     %edx, %ebx
+       cmp     24(%esp), %ebx
+       jae     L(fix)
+L(bck):        mov     56(%esp), %edx
+       mov     %edi, (%edx, %ecx, 4)
+       dec     %ecx
+       jns     L(loop)
+
+L(9):  mov     64(%esp), %esi          C up
+       mov     %ebp, (%esi)
+       mov     %ebx, 4(%esi)
+       mov     32(%esp), %eax
+       add     $36, %esp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+       ret
+
+L(fix):        seta    %dl
+       cmp     20(%esp), %ebp
+       setae   %al
+       or      %dl, %al
+       je      L(bck)
+       inc     %edi
+       sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       jmp     L(bck)
+
+L(35): sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       movl    $1, 32(%esp)
+       jmp     L(8)
+EPILOGUE()