Imported gcc-4.4.3

[msp430-gcc.git] / gcc / config / mips / mips16.S
diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S

index 2be57ba6d4bfaf1a110d916d3791748f300e7507..bab7b7942b52d4b061db8fd39e6c807b80d3a7bb 100644 (file)
--- a/gcc/config/mips/mips16.S
+++ b/gcc/config/mips/mips16.S
@@ -1,36 +1,25 @@
  /* mips16 floating point support code
-   Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
     Contributed by Cygnus Support
  
  This file is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
+Free Software Foundation; either version 3, or (at your option) any
  later version.
  
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
  This file is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.
  
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
  
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
  
  /* This file contains mips16 floating point support functions.  These
     functions are called by mips16 code to handle floating point when
@@ -38,7 +27,9 @@ Boston, MA 02111-1307, USA.  */
     values using the soft-float calling convention, but do the actual
     operation using the hard floating point instructions.  */
  
-/* This file contains 32 bit assembly code.  */
+#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
+
+/* This file contains 32-bit assembly code.  */
         .set nomips16
  
  /* Start a function.  */
@@ -49,69 +40,204 @@ Boston, MA 02111-1307, USA.  */
  
  #define ENDFN(NAME) .end NAME
  
-/* Single precision math.  */
+/* ARG1
+       The FPR that holds the first floating-point argument.
+
+   ARG2
+       The FPR that holds the second floating-point argument.
+
+   RET
+       The FPR that holds a floating-point return value.  */
+
+#define RET $f0
+#define ARG1 $f12
+#ifdef __mips64
+#define ARG2 $f13
+#else
+#define ARG2 $f14
+#endif
+
+/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
+   and so that its low 32 bits contain LOW_FPR.  */
+#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)     \
+       .set    noat;                           \
+       mfc1    GPR, HIGH_FPR;                  \
+       mfc1    $1, LOW_FPR;                    \
+       dsll    GPR, GPR, 32;                   \
+       or      GPR, GPR, $1;                   \
+       .set    at
+
+/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
+   GPR to LOW_FPR.  */
+#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)     \
+       .set    noat;                           \
+       dsrl    $1, GPR, 32;                    \
+       mtc1    GPR, LOW_FPR;                   \
+       mtc1    $1, HIGH_FPR;                   \
+       .set    at
+
+/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
+#define DELAYt(T, OPCODE, OP2)                 \
+       .set    noreorder;                      \
+       jr      T;                              \
+       OPCODE, OP2;                            \
+       .set    reorder
+
+/* Use "OPCODE. OP2" and jump to T.  */
+#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
+
+/* MOVE_SF_BYTE0(D)
+       Move the first single-precision floating-point argument between
+       GPRs and FPRs.
+
+   MOVE_SI_BYTE0(D)
+       Likewise the first single-precision integer argument.
+
+   MOVE_SF_BYTE4(D)
+       Move the second single-precision floating-point argument between
+       GPRs and FPRs, given that the first argument occupies 4 bytes.
+
+   MOVE_SF_BYTE8(D)
+       Move the second single-precision floating-point argument between
+       GPRs and FPRs, given that the first argument occupies 8 bytes.
+
+   MOVE_DF_BYTE0(D)
+       Move the first double-precision floating-point argument between
+       GPRs and FPRs.
+
+   MOVE_DF_BYTE8(D)
+       Likewise the second double-precision floating-point argument.
  
-/* This macro defines a function which loads two single precision
-   values, performs an operation, and returns the single precision
-   result.  */
+   MOVE_SF_RET(D, T)
+       Likewise a single-precision floating-point return value,
+       then jump to T.
+
+   MOVE_SC_RET(D, T)
+       Likewise a complex single-precision floating-point return value.
+
+   MOVE_DF_RET(D, T)
+       Likewise a double-precision floating-point return value.
+
+   MOVE_DC_RET(D, T)
+       Likewise a complex double-precision floating-point return value.
+
+   MOVE_SI_RET(D, T)
+       Likewise a single-precision integer return value.
+
+   The D argument is "t" to move to FPRs and "f" to move from FPRs.
+   The return macros may assume that the target of the jump does not
+   use a floating-point register.  */
+
+#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+
+#if defined(__mips64) && defined(__MIPSEB__)
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
+#elif defined(__mips64)
+/* The high 32 bits of $2 correspond to the second word in memory;
+   i.e. the imaginary part.  */
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
+#elif __mips_fpr == 64
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#else
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
+#endif
+
+#if defined(__mips64)
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
+#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
+#else
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
+#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
+#endif
+#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
  
-#define SFOP(NAME, OPCODE)     \
+#if defined(__mips64)
+#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
+#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
+#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
+#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64 && defined(__MIPSEB__)
+#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
+#elif defined(__MIPSEB__)
+/* FPRs are little-endian.  */
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
+#else
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
+#endif
+
+/* Single-precision math.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs FPU operation OPCODE on them, and returns the single-
+   precision result.  */
+
+#define OPSF3(NAME, OPCODE)    \
  STARTFN (NAME);                        \
-       .set    noreorder;      \
-       mtc1    $4,$f0;         \
-       mtc1    $5,$f2;         \
-       nop;                    \
-       OPCODE  $f0,$f0,$f2;    \
-       mfc1    $2,$f0;         \
-       j       $31;            \
-       nop;                    \
-       .set    reorder;        \
+       MOVE_SF_BYTE0 (t);      \
+       MOVE_SF_BYTE4 (t);      \
+       OPCODE  RET,ARG1,ARG2;  \
+       MOVE_SF_RET (f, $31);   \
         ENDFN (NAME)
  
  #ifdef L_m16addsf3
-SFOP(__mips16_addsf3, add.s)
+OPSF3 (__mips16_addsf3, add.s)
  #endif
  #ifdef L_m16subsf3
-SFOP(__mips16_subsf3, sub.s)
+OPSF3 (__mips16_subsf3, sub.s)
  #endif
  #ifdef L_m16mulsf3
-SFOP(__mips16_mulsf3, mul.s)
+OPSF3 (__mips16_mulsf3, mul.s)
  #endif
  #ifdef L_m16divsf3
-SFOP(__mips16_divsf3, div.s)
+OPSF3 (__mips16_divsf3, div.s)
  #endif
  
-#define SFOP2(NAME, OPCODE)    \
+/* Define a function NAME that loads a single-precision value,
+   performs FPU operation OPCODE on it, and returns the single-
+   precision result.  */
+
+#define OPSF2(NAME, OPCODE)    \
  STARTFN (NAME);                        \
-       .set    noreorder;      \
-       mtc1    $4,$f0;         \
-       nop;                    \
-       OPCODE  $f0,$f0;        \
-       mfc1    $2,$f0;         \
-       j       $31;            \
-       nop;                    \
-       .set    reorder;        \
+       MOVE_SF_BYTE0 (t);      \
+       OPCODE  RET,ARG1;       \
+       MOVE_SF_RET (f, $31);   \
         ENDFN (NAME)
-       
+
  #ifdef L_m16negsf2
-SFOP2(__mips16_negsf2, neg.s)
+OPSF2 (__mips16_negsf2, neg.s)
  #endif
  #ifdef L_m16abssf2
-SFOP2(__mips16_abssf2, abs.s)
+OPSF2 (__mips16_abssf2, abs.s)
  #endif
-       
-/* Single precision comparisons.  */
  
-/* This macro defines a function which loads two single precision
-   values, performs a floating point comparison, and returns the
-   specified values according to whether the comparison is true or
-   false.  */
+/* Single-precision comparisons.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
  
-#define SFCMP(NAME, OPCODE, TRUE, FALSE)       \
+#define CMPSF(NAME, OPCODE, TRUE, FALSE)       \
  STARTFN (NAME);                                        \
-       mtc1    $4,$f0;                         \
-       mtc1    $5,$f2;                         \
-       OPCODE  $f0,$f2;                        \
+       MOVE_SF_BYTE0 (t);                      \
+       MOVE_SF_BYTE4 (t);                      \
+       OPCODE  ARG1,ARG2;                      \
         li      $2,TRUE;                        \
         bc1t    1f;                             \
         li      $2,FALSE;                       \
@@ -119,13 +245,13 @@ STARTFN (NAME);                                   \
         j       $31;                            \
         ENDFN (NAME)
  
-/* This macro is like SFCMP, but it reverses the comparison.  */
+/* Like CMPSF, but reverse the comparison operands.  */
  
-#define SFREVCMP(NAME, OPCODE, TRUE, FALSE)    \
+#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)    \
  STARTFN (NAME);                                        \
-       mtc1    $4,$f0;                         \
-       mtc1    $5,$f2;                         \
-       OPCODE  $f2,$f0;                        \
+       MOVE_SF_BYTE0 (t);                      \
+       MOVE_SF_BYTE4 (t);                      \
+       OPCODE  ARG2,ARG1;                      \
         li      $2,TRUE;                        \
         bc1t    1f;                             \
         li      $2,FALSE;                       \
@@ -134,189 +260,141 @@ STARTFN (NAME);                                 \
         ENDFN (NAME)
  
  #ifdef L_m16eqsf2
-SFCMP(__mips16_eqsf2, c.eq.s, 0, 1)
+CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
  #endif
  #ifdef L_m16nesf2
-SFCMP(__mips16_nesf2, c.eq.s, 0, 1)
+CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
  #endif
  #ifdef L_m16gtsf2
-SFREVCMP(__mips16_gtsf2, c.lt.s, 1, 0)
+REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
  #endif
  #ifdef L_m16gesf2
-SFREVCMP(__mips16_gesf2, c.le.s, 0, -1)
+REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
  #endif
  #ifdef L_m16lesf2
-SFCMP(__mips16_lesf2, c.le.s, 0, 1)
+CMPSF (__mips16_lesf2, c.le.s, 0, 1)
  #endif
  #ifdef L_m16ltsf2
-SFCMP(__mips16_ltsf2, c.lt.s, -1, 0)
+CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
+#endif
+#ifdef L_m16unordsf2
+CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
  #endif
  
-/* Single precision conversions.  */
+
+/* Single-precision conversions.  */
  
  #ifdef L_m16fltsisf
  STARTFN (__mips16_floatsisf)
-       .set    noreorder
-       mtc1    $4,$f0
-       nop
-       cvt.s.w $f0,$f0
-       mfc1    $2,$f0
-       j       $31
-       nop
-       .set    reorder
+       MOVE_SF_BYTE0 (t)
+       cvt.s.w RET,ARG1
+       MOVE_SF_RET (f, $31)
         ENDFN (__mips16_floatsisf)
  #endif
  
-#ifdef L_m16fixsfsi
-STARTFN (__mips16_fixsfsi)
+#ifdef L_m16fltunsisf
+STARTFN (__mips16_floatunsisf)
         .set    noreorder
-       mtc1    $4,$f0
-       nop
-       trunc.w.s $f0,$f0,$4
-       mfc1    $2,$f0
-       j       $31
-       nop     
+       bltz    $4,1f
+       MOVE_SF_BYTE0 (t)
         .set    reorder
-       ENDFN (__mips16_fixsfsi)
+       cvt.s.w RET,ARG1
+       MOVE_SF_RET (f, $31)
+1:             
+       and     $2,$4,1
+       srl     $3,$4,1
+       or      $2,$2,$3
+       mtc1    $2,RET
+       cvt.s.w RET,RET
+       add.s   RET,RET,RET
+       MOVE_SF_RET (f, $31)
+       ENDFN (__mips16_floatunsisf)
  #endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-               
-/* The double precision operations.  We need to use different code
-   based on the preprocessor symbol __mips64, because the way in which
-   double precision values will change.  Without __mips64, the value
-   is passed in two 32 bit registers.  With __mips64, the value is
-   passed in a single 64 bit register.  */
-
-/* Load the first double precision operand.  */
-
-#if defined(__mips64)
-#define LDDBL1 dmtc1 $4,$f12
-#elif defined(__mipsfp64)
-#define LDDBL1 sw $4,0($29); sw $5,4($29); l.d $f12,0($29)
-#elif defined(__MIPSEB__)      
-#define LDDBL1 mtc1 $4,$f13; mtc1 $5,$f12
-#else  
-#define LDDBL1 mtc1 $4,$f12; mtc1 $5,$f13
+       
+#ifdef L_m16fix_truncsfsi
+STARTFN (__mips16_fix_truncsfsi)
+       MOVE_SF_BYTE0 (t)
+       trunc.w.s RET,ARG1,$4
+       MOVE_SI_RET (f, $31)
+       ENDFN (__mips16_fix_truncsfsi)
  #endif
  
-/* Load the second double precision operand.  */
-
-#if defined(__mips64)
-/* XXX this should be $6 for Algo arg passing model */
-#define LDDBL2 dmtc1 $5,$f14
-#elif defined(__mipsfp64)
-#define LDDBL2 sw $6,8($29); sw $7,12($29); l.d $f14,8($29)
-#elif defined(__MIPSEB__)      
-#define LDDBL2 mtc1 $6,$f15; mtc1 $7,$f14
-#else  
-#define LDDBL2 mtc1 $6,$f14; mtc1 $7,$f15
-#endif
-       
-/* Move the double precision return value to the right place.  */
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
  
-#if defined(__mips64)
-#define RETDBL dmfc1 $2,$f0
-#elif defined(__mipsfp64)
-#define RETDBL s.d $f0,0($29); lw $2,0($29); lw $3,4($29)
-#elif defined(__MIPSEB__)      
-#define RETDBL mfc1 $2,$f1; mfc1 $3,$f0
-#else  
-#define RETDBL mfc1 $2,$f0; mfc1 $3,$f1
-#endif
-       
-/* Double precision math.  */
+/* Double-precision math.  */
  
-/* This macro defines a function which loads two double precision
-   values, performs an operation, and returns the double precision
-   result.  */
+/* Define a function NAME that loads two double-precision values,
+   performs FPU operation OPCODE on them, and returns the double-
+   precision result.  */
  
-#define DFOP(NAME, OPCODE)     \
+#define OPDF3(NAME, OPCODE)    \
  STARTFN (NAME);                        \
-       .set    noreorder;      \
-       LDDBL1;                 \
-       LDDBL2;                 \
-       nop;                    \
-       OPCODE  $f0,$f12,$f14;  \
-       RETDBL;                 \
-       j       $31;            \
-       nop;                    \
-       .set    reorder;        \
+       MOVE_DF_BYTE0 (t);      \
+       MOVE_DF_BYTE8 (t);      \
+       OPCODE RET,ARG1,ARG2;   \
+       MOVE_DF_RET (f, $31);   \
         ENDFN (NAME)
  
  #ifdef L_m16adddf3
-DFOP(__mips16_adddf3, add.d)
+OPDF3 (__mips16_adddf3, add.d)
  #endif
  #ifdef L_m16subdf3
-DFOP(__mips16_subdf3, sub.d)
+OPDF3 (__mips16_subdf3, sub.d)
  #endif
  #ifdef L_m16muldf3
-DFOP(__mips16_muldf3, mul.d)
+OPDF3 (__mips16_muldf3, mul.d)
  #endif
  #ifdef L_m16divdf3
-DFOP(__mips16_divdf3, div.d)
+OPDF3 (__mips16_divdf3, div.d)
  #endif
  
-#define DFOP2(NAME, OPCODE)    \
+/* Define a function NAME that loads a double-precision value,
+   performs FPU operation OPCODE on it, and returns the double-
+   precision result.  */
+
+#define OPDF2(NAME, OPCODE)    \
  STARTFN (NAME);                        \
-       .set    noreorder;      \
-       LDDBL1;                 \
-       nop;                    \
-       OPCODE  $f0,$f12;       \
-       RETDBL;                 \
-       j       $31;            \
-       nop;                    \
-       .set    reorder;        \
+       MOVE_DF_BYTE0 (t);      \
+       OPCODE RET,ARG1;        \
+       MOVE_DF_RET (f, $31);   \
         ENDFN (NAME)
-       
+
  #ifdef L_m16negdf2
-DFOP2(__mips16_negdf2, neg.d)
+OPDF2 (__mips16_negdf2, neg.d)
  #endif
  #ifdef L_m16absdf2
-DFOP2(__mips16_absdf2, abs.d)
+OPDF2 (__mips16_absdf2, abs.d)
  #endif
  
-       
  /* Conversions between single and double precision.  */
  
  #ifdef L_m16extsfdf2
  STARTFN (__mips16_extendsfdf2)
-       .set    noreorder
-       mtc1    $4,$f12
-       nop
-       cvt.d.s $f0,$f12
-       RETDBL
-       j       $31
-       nop
-       .set    reorder
+       MOVE_SF_BYTE0 (t)
+       cvt.d.s RET,ARG1
+       MOVE_DF_RET (f, $31)
         ENDFN (__mips16_extendsfdf2)
  #endif
  
  #ifdef L_m16trdfsf2
  STARTFN (__mips16_truncdfsf2)
-       .set    noreorder
-       LDDBL1
-       nop
-       cvt.s.d $f0,$f12
-       mfc1    $2,$f0
-       j       $31
-       nop
-       .set    reorder
+       MOVE_DF_BYTE0 (t)
+       cvt.s.d RET,ARG1
+       MOVE_SF_RET (f, $31)
         ENDFN (__mips16_truncdfsf2)
  #endif
  
-/* Double precision comparisons.  */
+/* Double-precision comparisons.  */
  
-/* This macro defines a function which loads two double precision
-   values, performs a floating point comparison, and returns the
-   specified values according to whether the comparison is true or
-   false.  */
+/* Define a function NAME that loads two double-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
  
-#define DFCMP(NAME, OPCODE, TRUE, FALSE)       \
+#define CMPDF(NAME, OPCODE, TRUE, FALSE)       \
  STARTFN (NAME);                                        \
-       LDDBL1;                                 \
-       LDDBL2;                                 \
-       OPCODE  $f12,$f14;                      \
+       MOVE_DF_BYTE0 (t);                      \
+       MOVE_DF_BYTE8 (t);                      \
+       OPCODE  ARG1,ARG2;                      \
         li      $2,TRUE;                        \
         bc1t    1f;                             \
         li      $2,FALSE;                       \
@@ -324,13 +402,13 @@ STARTFN (NAME);                                   \
         j       $31;                            \
         ENDFN (NAME)
  
-/* This macro is like DFCMP, but it reverses the comparison.  */
+/* Like CMPDF, but reverse the comparison operands.  */
  
-#define DFREVCMP(NAME, OPCODE, TRUE, FALSE)    \
+#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)    \
  STARTFN (NAME);                                        \
-       LDDBL1;                                 \
-       LDDBL2;                                 \
-       OPCODE  $f14,$f12;                      \
+       MOVE_DF_BYTE0 (t);                      \
+       MOVE_DF_BYTE8 (t);                      \
+       OPCODE  ARG2,ARG1;                      \
         li      $2,TRUE;                        \
         bc1t    1f;                             \
         li      $2,FALSE;                       \
@@ -339,402 +417,293 @@ STARTFN (NAME);                                 \
         ENDFN (NAME)
  
  #ifdef L_m16eqdf2
-DFCMP(__mips16_eqdf2, c.eq.d, 0, 1)
+CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
  #endif
  #ifdef L_m16nedf2
-DFCMP(__mips16_nedf2, c.eq.d, 0, 1)
+CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
  #endif
  #ifdef L_m16gtdf2
-DFREVCMP(__mips16_gtdf2, c.lt.d, 1, 0)
+REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
  #endif
  #ifdef L_m16gedf2
-DFREVCMP(__mips16_gedf2, c.le.d, 0, -1)
+REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
  #endif
  #ifdef L_m16ledf2
-DFCMP(__mips16_ledf2, c.le.d, 0, 1)
+CMPDF (__mips16_ledf2, c.le.d, 0, 1)
  #endif
  #ifdef L_m16ltdf2
-DFCMP(__mips16_ltdf2, c.lt.d, -1, 0)
+CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
+#endif
+#ifdef L_m16unorddf2
+CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
  #endif
  
-/* Double precision conversions.  */
+/* Double-precision conversions.  */
  
  #ifdef L_m16fltsidf
  STARTFN (__mips16_floatsidf)
-       .set    noreorder
-       mtc1    $4,$f12
-       nop
-       cvt.d.w $f0,$f12
-       RETDBL
-       j       $31
-       nop
-       .set    reorder
+       MOVE_SI_BYTE0 (t)
+       cvt.d.w RET,ARG1
+       MOVE_DF_RET (f, $31)
         ENDFN (__mips16_floatsidf)
  #endif
-
-#ifdef L_m16fixdfsi
-STARTFN (__mips16_fixdfsi)
-       .set    noreorder
-       LDDBL1
-       nop
-       trunc.w.d $f0,$f12,$4
-       mfc1    $2,$f0
-       j       $31
-       nop
-       .set    reorder
-       ENDFN (__mips16_fixdfsi)
+       
+#ifdef L_m16fltunsidf
+STARTFN (__mips16_floatunsidf)
+       MOVE_SI_BYTE0 (t)
+       cvt.d.w RET,ARG1
+       bgez    $4,1f
+       li.d    ARG1, 4.294967296e+9
+       add.d   RET, RET, ARG1
+1:     MOVE_DF_RET (f, $31)
+       ENDFN (__mips16_floatunsidf)
+#endif
+       
+#ifdef L_m16fix_truncdfsi
+STARTFN (__mips16_fix_truncdfsi)
+       MOVE_DF_BYTE0 (t)
+       trunc.w.d RET,ARG1,$4
+       MOVE_SI_RET (f, $31)
+       ENDFN (__mips16_fix_truncdfsi)
  #endif
  #endif /* !__mips_single_float */
  
-/* These functions are used to return floating point values from
-   mips16 functions which do not use -mentry.  In this case we can
-   put mtc1 in a jump delay slot, because we know that the next
-   instruction will not refer to a floating point register.  */
+/* Define a function NAME that moves a return value of mode MODE from
+   FPRs to GPRs.  */
+
+#define RET_FUNCTION(NAME, MODE)       \
+STARTFN (NAME);                                \
+       MOVE_##MODE##_RET (t, $31);     \
+       ENDFN (NAME)
  
  #ifdef L_m16retsf
-STARTFN (__mips16_ret_sf)
-       .set    noreorder
-       j       $31
-       mtc1    $2,$f0
-       .set    reorder
-       ENDFN (__mips16_ret_sf)
+RET_FUNCTION (__mips16_ret_sf, SF)
+#endif
+
+#ifdef L_m16retsc
+RET_FUNCTION (__mips16_ret_sc, SC)
  #endif
  
  #if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
  #ifdef L_m16retdf
-STARTFN (__mips16_ret_df)
-       .set    noreorder
-#if defined(__mips64)
-       j       $31
-       dmtc1   $2,$f0
-#elif defined(__mipsfp64)
-       sw      $2,0($29)
-       sw      $3,4($29)
-       l.d     $f0,0($29) 
-#elif defined(__MIPSEB__)      
-       mtc1    $2,$f1
-       j       $31
-       mtc1    $3,$f0
-#else  
-       mtc1    $2,$f0
-       j       $31
-       mtc1    $3,$f1
+RET_FUNCTION (__mips16_ret_df, DF)
  #endif
-       .set    reorder
-       ENDFN (__mips16_ret_df)
+
+#ifdef L_m16retdc
+RET_FUNCTION (__mips16_ret_dc, DC)
  #endif
  #endif /* !__mips_single_float */
  
-/* These functions are used by 16 bit code when calling via a function
-   pointer.  They must copy the floating point arguments from the gp
-   regs into the fp regs.  The function to call will be in $2.  The
-   exact set of floating point arguments to copy is encoded in the
-   function name; the final number is an fp_code, as described in
-   mips.h in the comment about CUMULATIVE_ARGS.  */
+/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
+   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
+   classify the first and second arguments as follows:
+
+       1: a single-precision argument
+       2: a double-precision argument
+       0: no argument, or not one of the above.  */
+
+#define STUB_ARGS_0                                            /* () */
+#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)                          /* (sf) */
+#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)       /* (sf, sf) */
+#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)       /* (sf, df) */
+#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)                          /* (df) */
+#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)       /* (df, sf) */
+#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)      /* (df, df) */
+
+/* These functions are used by 16-bit code when calling via a function
+   pointer.  They must copy the floating point arguments from the GPRs
+   to FPRs and then call function $2.  */
+
+#define CALL_STUB_NO_RET(NAME, CODE)   \
+STARTFN (NAME);                                \
+       STUB_ARGS_##CODE;               \
+       .set    noreorder;              \
+       jr      $2;                     \
+       move    $25,$2;                 \
+       .set    reorder;                \
+       ENDFN (NAME)
  
  #ifdef L_m16stub1
-/* (float) */
-STARTFN (__mips16_call_stub_1)
-       .set    noreorder
-       mtc1    $4,$f12
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_1)
+CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
  #endif
  
  #ifdef L_m16stub5
-/* (float, float) */
-STARTFN (__mips16_call_stub_5)
-       .set    noreorder
-       mtc1    $4,$f12
-       mtc1    $5,$f14
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_5)
+CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
  #endif
  
  #if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
  
  #ifdef L_m16stub2
-/* (double) */
-STARTFN (__mips16_call_stub_2)
-       .set    noreorder
-       LDDBL1
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_2)
+CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
  #endif
  
  #ifdef L_m16stub6
-/* (double, float) */
-STARTFN (__mips16_call_stub_6)
-       .set    noreorder
-       LDDBL1
-       mtc1    $6,$f14
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_6)
+CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
  #endif
  
  #ifdef L_m16stub9
-/* (float, double) */
-STARTFN (__mips16_call_stub_9)
-       .set    noreorder
-       mtc1    $4,$f12
-       LDDBL2
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_9)
+CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
  #endif
  
  #ifdef L_m16stub10
-/* (double, double) */
-STARTFN (__mips16_call_stub_10)
-       .set    noreorder
-       LDDBL1
-       LDDBL2
-       j       $2
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_10)
+CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
  #endif
  #endif /* !__mips_single_float */
  
  /* Now we have the same set of functions, except that this time the
-   function being called returns an SFmode value.  The calling
+   function being called returns an SFmode, SCmode, DFmode or DCmode
+   value; we need to instantiate a set for each case.  The calling
     function will arrange to preserve $18, so these functions are free
     to use it to hold the return address.
  
     Note that we do not know whether the function we are calling is 16
-   bit or 32 bit.  However, it does not matter, because 16 bit
+   bit or 32 bit.  However, it does not matter, because 16-bit
     functions always return floating point values in both the gp and
     the fp regs.  It would be possible to check whether the function
     being called is 16 bits, in which case the copy is unnecessary;
     however, it's faster to always do the copy.  */
  
+#define CALL_STUB_RET(NAME, CODE, MODE)        \
+STARTFN (NAME);                                \
+       move    $18,$31;                \
+       STUB_ARGS_##CODE;               \
+       .set    noreorder;              \
+       jalr    $2;                     \
+       move    $25,$2;                 \
+       .set    reorder;                \
+       MOVE_##MODE##_RET (f, $18);     \
+       ENDFN (NAME)
+
+/* First, instantiate the single-float set.  */
+
  #ifdef L_m16stubsf0
-/* () */
-STARTFN (__mips16_call_stub_sf_0)
-       .set    noreorder
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_0)
+CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
  #endif
  
  #ifdef L_m16stubsf1
-/* (float) */
-STARTFN (__mips16_call_stub_sf_1)
-       .set    noreorder
-       mtc1    $4,$f12
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_1)
+CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
  #endif
  
  #ifdef L_m16stubsf5
-/* (float, float) */
-STARTFN (__mips16_call_stub_sf_5)
-       .set    noreorder
-       mtc1    $4,$f12
-       mtc1    $5,$f14
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_5)
+CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
  #endif
  
  #if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
  #ifdef L_m16stubsf2
-/* (double) */
-STARTFN (__mips16_call_stub_sf_2)
-       .set    noreorder
-       LDDBL1
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_2)
+CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
  #endif
  
  #ifdef L_m16stubsf6
-/* (double, float) */
-STARTFN (__mips16_call_stub_sf_6)
-       .set    noreorder
-       LDDBL1
-       mtc1    $6,$f14
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_6)
+CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
  #endif
  
  #ifdef L_m16stubsf9
-/* (float, double) */
-STARTFN (__mips16_call_stub_sf_9)
-       .set    noreorder
-       mtc1    $4,$f12
-       LDDBL2
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_9)
+CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
  #endif
  
  #ifdef L_m16stubsf10
-/* (double, double) */
-STARTFN (__mips16_call_stub_sf_10)
-       .set    noreorder
-       LDDBL1
-       LDDBL2
-       move    $18,$31
-       jal     $2
-       nop
-       mfc1    $2,$f0
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_sf_10)
+CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
  #endif
+#endif /* !__mips_single_float */
+
  
  /* Now we have the same set of functions again, except that this time
     the function being called returns an DFmode value.  */
  
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
  #ifdef L_m16stubdf0
-/* () */
-STARTFN (__mips16_call_stub_df_0)
-       .set    noreorder
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_0)
+CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
  #endif
  
  #ifdef L_m16stubdf1
-/* (float) */
-STARTFN (__mips16_call_stub_df_1)
-       .set    noreorder
-       mtc1    $4,$f12
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_1)
+CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
  #endif
  
-#ifdef L_m16stubdf2
-/* (double) */
-STARTFN (__mips16_call_stub_df_2)
-       .set    noreorder
-       LDDBL1
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_2)
+#ifdef L_m16stubdf5
+CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
  #endif
  
-#ifdef L_m16stubdf5
-/* (float, float) */
-STARTFN (__mips16_call_stub_df_5)
-       .set    noreorder
-       mtc1    $4,$f12
-       mtc1    $5,$f14
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_5)
+#ifdef L_m16stubdf2
+CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
  #endif
  
  #ifdef L_m16stubdf6
-/* (double, float) */
-STARTFN (__mips16_call_stub_df_6)
-       .set    noreorder
-       LDDBL1
-       mtc1    $6,$f14
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_6)
+CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
  #endif
  
  #ifdef L_m16stubdf9
-/* (float, double) */
-STARTFN (__mips16_call_stub_df_9)
-       .set    noreorder
-       mtc1    $4,$f12
-       LDDBL2
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_9)
+CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
  #endif
  
  #ifdef L_m16stubdf10
-/* (double, double) */
-STARTFN (__mips16_call_stub_df_10)
-       .set    noreorder
-       LDDBL1
-       LDDBL2
-       move    $18,$31
-       jal     $2
-       nop
-       RETDBL
-       j       $18
-       nop
-       .set    reorder
-       ENDFN (__mips16_call_stub_df_10)
+CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
  #endif
  #endif /* !__mips_single_float */
  
+
+/* Ho hum.  Here we have the same set of functions again, this time
+   for when the function being called returns an SCmode value.  */
+
+#ifdef L_m16stubsc0
+CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
+#endif
+
+#ifdef L_m16stubsc1
+CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
+#endif
+
+#ifdef L_m16stubsc5
+CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsc2
+CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
+#endif
+
+#ifdef L_m16stubsc6
+CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
+#endif
+
+#ifdef L_m16stubsc9
+CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
+#endif
+
+#ifdef L_m16stubsc10
+CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Finally, another set of functions for DCmode.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdc0
+CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
+#endif
+
+#ifdef L_m16stubdc1
+CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
+#endif
+
+#ifdef L_m16stubdc5
+CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
+#endif
+
+#ifdef L_m16stubdc2
+CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
+#endif
+
+#ifdef L_m16stubdc6
+CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
+#endif
+
+#ifdef L_m16stubdc9
+CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
+#endif
+
+#ifdef L_m16stubdc10
+CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
+#endif
+#endif /* !__mips_single_float */
+#endif