X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.md;h=b1868ac45d77f5d6b28a89afe6877fe396f2cfc2;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=70ce9598662d27b03a50a55f6b8eb8bc57c8801b;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 70ce9598..b1868ac4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,37 +1,31 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka ;; -;; This file is part of GNU CC. +;; This file is part of GCC. ;; -;; GNU CC is free software; you can redistribute it and/or modify +;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; -;; GNU CC is distributed in the hope that it will be useful, +;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License -;; along with GNU CC; see the file COPYING. If not, write to -;; the Free Software Foundation, 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. */ +;; along with GCC; see the file COPYING3. If not see +;; . */ ;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. ;; ;; See file "rtl.def" for documentation on define_insn, match_*, et. al. ;; -;; Macro #define NOTICE_UPDATE_CC in file i386.h handles condition code -;; updates for most instructions. -;; -;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register -;; constraint letters. -;; ;; The special asm out single letter directives following a '%' are: ;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of ;; operands[1]. @@ -49,55 +43,268 @@ ;; 'k' Likewise, print the SImode name of the register. ;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. ;; 'y' Print "st(0)" instead of "st" as a register. -;; + ;; UNSPEC usage: -;; 0 This is a `scas' operation. The mode of the UNSPEC is always SImode. -;; operand 0 is the memory address to scan. -;; operand 1 is a register containing the value to scan for. The mode -;; of the scas opcode will be the same as the mode of this operand. -;; operand 2 is the known alignment of operand 0. -;; 1 This is a `sin' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `sin'. -;; 2 This is a `cos' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `cos'. -;; 3 This is part of a `stack probe' operation. The mode of the UNSPEC is -;; always SImode. operand 0 is the size of the stack allocation. -;; 4 This is the source of a fake SET of the frame pointer which is used to -;; prevent insns referencing it being scheduled across the initial -;; decrement of the stack pointer. -;; 5 This is a `bsf' operation. -;; 6 This is the @GOT offset of a PIC address. -;; 7 This is the @GOTOFF offset of a PIC address. -;; 8 This is a reference to a symbol's @PLT address. -;; 9 This is an `fnstsw' operation. -;; 10 This is a `sahf' operation. -;; 11 This is a `fstcw' operation -;; 12 This is behaviour of add when setting carry flag. -;; 13 This is a `eh_return' placeholder. - -;; For SSE/MMX support: -;; 30 This is `fix', guaranteed to be truncating. -;; 31 This is a `emms' operation. -;; 32 This is a `maskmov' operation. -;; 33 This is a `movmsk' operation. -;; 34 This is a `non-temporal' move. -;; 36 This is used to distinguish COMISS from UCOMISS. -;; 37 This is a `ldmxcsr' operation. -;; 38 This is a forced `movaps' instruction (rather than whatever movti does) -;; 39 This is a forced `movups' instruction (rather than whatever movti does) -;; 40 This is a `stmxcsr' operation. -;; 41 This is a `shuffle' operation. -;; 42 This is a `rcp' operation. -;; 43 This is a `rsqsrt' operation. -;; 44 This is a `sfence' operation. -;; 45 This is a noop to prevent excessive combiner cleverness. -;; 46 This is a `femms' operation. -;; 49 This is a 'pavgusb' operation. -;; 50 This is a `pfrcp' operation. -;; 51 This is a `pfrcpit1' operation. -;; 52 This is a `pfrcpit2' operation. -;; 53 This is a `pfrsqrt' operation. -;; 54 This is a `pfrsqrit1' operation. + +(define_constants + [; Relocation specifiers + (UNSPEC_GOT 0) + (UNSPEC_GOTOFF 1) + (UNSPEC_GOTPCREL 2) + (UNSPEC_GOTTPOFF 3) + (UNSPEC_TPOFF 4) + (UNSPEC_NTPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_GOTNTPOFF 7) + (UNSPEC_INDNTPOFF 8) + (UNSPEC_PLTOFF 9) + (UNSPEC_MACHOPIC_OFFSET 10) + + ; Prologue support + (UNSPEC_STACK_ALLOC 11) + (UNSPEC_SET_GOT 12) + (UNSPEC_SSE_PROLOGUE_SAVE 13) + (UNSPEC_REG_SAVE 14) + (UNSPEC_DEF_CFA 15) + (UNSPEC_SET_RIP 16) + (UNSPEC_SET_GOT_OFFSET 17) + (UNSPEC_MEMORY_BLOCKAGE 18) + + ; TLS support + (UNSPEC_TP 20) + (UNSPEC_TLS_GD 21) + (UNSPEC_TLS_LD_BASE 22) + (UNSPEC_TLSDESC 23) + + ; Other random patterns + (UNSPEC_SCAS 30) + (UNSPEC_FNSTSW 31) + (UNSPEC_SAHF 32) + (UNSPEC_FSTCW 33) + (UNSPEC_ADD_CARRY 34) + (UNSPEC_FLDCW 35) + (UNSPEC_REP 36) + (UNSPEC_EH_RETURN 37) + (UNSPEC_LD_MPIC 38) ; load_macho_picbase + (UNSPEC_TRUNC_NOOP 39) + + ; For SSE/MMX support: + (UNSPEC_FIX_NOTRUNC 40) + (UNSPEC_MASKMOV 41) + (UNSPEC_MOVMSK 42) + (UNSPEC_MOVNT 43) + (UNSPEC_MOVU 44) + (UNSPEC_RCP 45) + (UNSPEC_RSQRT 46) + (UNSPEC_SFENCE 47) + (UNSPEC_PFRCP 49) + (UNSPEC_PFRCPIT1 40) + (UNSPEC_PFRCPIT2 41) + (UNSPEC_PFRSQRT 42) + (UNSPEC_PFRSQIT1 43) + (UNSPEC_MFENCE 44) + (UNSPEC_LFENCE 45) + (UNSPEC_PSADBW 46) + (UNSPEC_LDDQU 47) + (UNSPEC_MS_TO_SYSV_CALL 48) + + ; Generic math support + (UNSPEC_COPYSIGN 50) + (UNSPEC_IEEE_MIN 51) ; not commutative + (UNSPEC_IEEE_MAX 52) ; not commutative + + ; x87 Floating point + (UNSPEC_SIN 60) + (UNSPEC_COS 61) + (UNSPEC_FPATAN 62) + (UNSPEC_FYL2X 63) + (UNSPEC_FYL2XP1 64) + (UNSPEC_FRNDINT 65) + (UNSPEC_FIST 66) + (UNSPEC_F2XM1 67) + (UNSPEC_TAN 68) + (UNSPEC_FXAM 69) + + ; x87 Rounding + (UNSPEC_FRNDINT_FLOOR 70) + (UNSPEC_FRNDINT_CEIL 71) + (UNSPEC_FRNDINT_TRUNC 72) + (UNSPEC_FRNDINT_MASK_PM 73) + (UNSPEC_FIST_FLOOR 74) + (UNSPEC_FIST_CEIL 75) + + ; x87 Double output FP + (UNSPEC_SINCOS_COS 80) + (UNSPEC_SINCOS_SIN 81) + (UNSPEC_XTRACT_FRACT 84) + (UNSPEC_XTRACT_EXP 85) + (UNSPEC_FSCALE_FRACT 86) + (UNSPEC_FSCALE_EXP 87) + (UNSPEC_FPREM_F 88) + (UNSPEC_FPREM_U 89) + (UNSPEC_FPREM1_F 90) + (UNSPEC_FPREM1_U 91) + + (UNSPEC_C2_FLAG 95) + (UNSPEC_FXAM_MEM 96) + + ; SSP patterns + (UNSPEC_SP_SET 100) + (UNSPEC_SP_TEST 101) + (UNSPEC_SP_TLS_SET 102) + (UNSPEC_SP_TLS_TEST 103) + + ; SSSE3 + (UNSPEC_PSHUFB 120) + (UNSPEC_PSIGN 121) + (UNSPEC_PALIGNR 122) + + ; For SSE4A support + (UNSPEC_EXTRQI 130) + (UNSPEC_EXTRQ 131) + (UNSPEC_INSERTQI 132) + (UNSPEC_INSERTQ 133) + + ; For SSE4.1 support + (UNSPEC_BLENDV 134) + (UNSPEC_INSERTPS 135) + (UNSPEC_DP 136) + (UNSPEC_MOVNTDQA 137) + (UNSPEC_MPSADBW 138) + (UNSPEC_PHMINPOSUW 139) + (UNSPEC_PTEST 140) + (UNSPEC_ROUND 141) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) + (UNSPEC_PCMPESTR 144) + (UNSPEC_PCMPISTR 145) + + ;; For SSE5 + (UNSPEC_SSE5_INTRINSIC 150) + (UNSPEC_SSE5_UNSIGNED_CMP 151) + (UNSPEC_SSE5_TRUEFALSE 152) + (UNSPEC_SSE5_PERMUTE 153) + (UNSPEC_FRCZ 154) + (UNSPEC_CVTPH2PS 155) + (UNSPEC_CVTPS2PH 156) + + ; For AES support + (UNSPEC_AESENC 159) + (UNSPEC_AESENCLAST 160) + (UNSPEC_AESDEC 161) + (UNSPEC_AESDECLAST 162) + (UNSPEC_AESIMC 163) + (UNSPEC_AESKEYGENASSIST 164) + + ; For PCLMUL support + (UNSPEC_PCLMUL 165) + + ; For AVX support + (UNSPEC_PCMP 166) + (UNSPEC_VPERMIL 167) + (UNSPEC_VPERMIL2F128 168) + (UNSPEC_MASKLOAD 169) + (UNSPEC_MASKSTORE 170) + (UNSPEC_CAST 171) + (UNSPEC_VTESTP 172) + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_STACK_PROBE 1) + (UNSPECV_EMMS 2) + (UNSPECV_LDMXCSR 3) + (UNSPECV_STMXCSR 4) + (UNSPECV_FEMMS 5) + (UNSPECV_CLFLUSH 6) + (UNSPECV_ALIGN 7) + (UNSPECV_MONITOR 8) + (UNSPECV_MWAIT 9) + (UNSPECV_CMPXCHG 10) + (UNSPECV_XCHG 12) + (UNSPECV_LOCK 13) + (UNSPECV_PROLOGUE_USE 14) + (UNSPECV_CLD 15) + (UNSPECV_VZEROALL 16) + (UNSPECV_VZEROUPPER 17) + ]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) + ]) + +;; Constants used in the SSE5 pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + +;; Registers by name. +(define_constants + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (BX_REG 3) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) + (SP_REG 7) + (ST0_REG 8) + (ST1_REG 9) + (ST2_REG 10) + (ST3_REG 11) + (ST4_REG 12) + (ST5_REG 13) + (ST6_REG 14) + (ST7_REG 15) + (FLAGS_REG 17) + (FPSR_REG 18) + (FPCR_REG 19) + (XMM0_REG 21) + (XMM1_REG 22) + (XMM2_REG 23) + (XMM3_REG 24) + (XMM4_REG 25) + (XMM5_REG 26) + (XMM6_REG 27) + (XMM7_REG 28) + (MM0_REG 29) + (MM1_REG 30) + (MM2_REG 31) + (MM3_REG 32) + (MM4_REG 33) + (MM5_REG 34) + (MM6_REG 35) + (MM7_REG 36) + (R8_REG 37) + (R9_REG 38) + (R10_REG 39) + (R11_REG 40) + (R13_REG 42) + (XMM8_REG 45) + (XMM9_REG 46) + (XMM10_REG 47) + (XMM11_REG 48) + (XMM12_REG 49) + (XMM13_REG 50) + (XMM14_REG 51) + (XMM15_REG 52) + ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -108,34 +315,55 @@ ;; "reload_completed && TARGET_64BIT". -;; Processor type. This attribute must exactly match the processor_type -;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4" - (const (symbol_ref "ix86_cpu"))) +;; Processor type. +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2, + generic64,amdfam10" + (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx,fistp" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv,leave, + str,bitmanip, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, + ssemuladd,sse4arg, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI" +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF" (const_string "unknown")) -;; Set for i387 operations. -(define_attr "i387" "" - (if_then_else (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") - (const_int 1) - (const_int 0))) +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") + (const_string "i387") + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,sse,mmx") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, + bitmanip") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387,sse,mmx") (const_int 0) - (eq_attr "type" "alu1,negnot,alu,icmp,imovx,ishift,imul,push,pop") + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") (symbol_ref "ix86_attr_length_immediate_default(insn,1)") (eq_attr "type" "imov,test") (symbol_ref "ix86_attr_length_immediate_default(insn,0)") @@ -152,11 +380,12 @@ (eq_attr "type" "ibr") (const_int 1) ] - (symbol_ref "/* Update immediate_length and other attributes! */ abort(),1"))) + (symbol_ref "/* Update immediate_length and other attributes! */ + gcc_unreachable (),1"))) ;; The (bounding maximum) length of an instruction address. (define_attr "length_address" "" - (cond [(eq_attr "type" "str,cld,other,multi,fxch") + (cond [(eq_attr "type" "str,other,multi,fxch") (const_int 0) (and (eq_attr "type" "call") (match_operand 0 "constant_call_address_operand" "")) @@ -169,24 +398,70 @@ ;; Set when length prefix is used. (define_attr "prefix_data16" "" - (if_then_else (eq_attr "mode" "HI") + (if_then_else (ior (eq_attr "mode" "HI") + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF"))) (const_int 1) (const_int 0))) ;; Set when string REP prefix is used. -(define_attr "prefix_rep" "" (const_int 0)) +(define_attr "prefix_rep" "" + (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + (const_int 0))) ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" - (if_then_else (eq_attr "type" "imovx,setcc,icmov,sse,mmx") + (if_then_else + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip") + (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) +;; Set when REX opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(and (eq_attr "mode" "DI") + (eq_attr "type" "!push,pop,call,callv,leave,ibr")) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + ] + (const_int 0))) + +;; There are also additional prefixes in SSSE3. +(define_attr "prefix_extra" "" (const_int 0)) + +;; Prefix used: original, VEX or maybe VEX. +(define_attr "prefix" "orig,vex,maybe_vex" + (if_then_else (eq_attr "mode" "OI,V8SF,V4DF") + (const_string "vex") + (const_string "orig"))) + +;; There is a 8bit immediate for VEX. +(define_attr "prefix_vex_imm8" "" (const_int 0)) + +;; VEX W bit is used. +(define_attr "prefix_vex_w" "" (const_int 0)) + +;; The length of VEX prefix +(define_attr "length_vex" "" + (if_then_else (eq_attr "prefix_0f" "1") + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)")))) + ;; Set when modrm byte is used. (define_attr "modrm" "" - (cond [(eq_attr "type" "str,cld") + (cond [(eq_attr "type" "str,leave") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387") (const_int 0) (and (eq_attr "type" "incdec") (ior (match_operand:SI 1 "register_operand" "") @@ -199,8 +474,12 @@ (not (match_operand 0 "memory_operand" ""))) (const_int 0) (and (eq_attr "type" "imov") - (and (match_operand 0 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) + (ior (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" "")) + (ior (and (match_operand 0 "ax_reg_operand" "") + (match_operand 1 "memory_displacement_only_operand" "")) + (and (match_operand 0 "memory_displacement_only_operand" "") + (match_operand 1 "ax_reg_operand" ""))))) (const_int 0) (and (eq_attr "type" "call") (match_operand 0 "constant_call_address_operand" "")) @@ -212,16 +491,30 @@ (const_int 1))) ;; The (bounding maximum) length of an instruction in bytes. -;; ??? fistp is in fact fldcw/fistp/fldcw sequence. Later we may want -;; to split it and compute proper length as for other insns. +;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. +;; Later we may want to split them and compute proper length as for +;; other insns. (define_attr "length" "" - (cond [(eq_attr "type" "other,multi,fistp") + (cond [(eq_attr "type" "other,multi,fistp,frndint") (const_int 16) - ] + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address"))) + (ior (eq_attr "prefix" "vex") + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "TARGET_AVX") (const_int 0)))) + (plus (attr "length_vex") + (plus (attr "prefix_vex_imm8") + (plus (attr "modrm") + (attr "length_address"))))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") - (plus (attr "i387") - (const_int 1)))) + (plus (attr "prefix_rex") + (plus (attr "prefix_extra") + (const_int 1))))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -234,19 +527,25 @@ (define_attr "memory" "none,load,store,both,unknown" (cond [(eq_attr "type" "other,multi,str") (const_string "unknown") - (eq_attr "type" "lea,fcmov,fpspc,cld") + (eq_attr "type" "lea,fcmov,fpspc") (const_string "none") - (eq_attr "type" "fistp") + (eq_attr "type" "fistp,leave") (const_string "both") + (eq_attr "type" "frndint") + (const_string "load") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand" "") (const_string "both") (const_string "store")) - (eq_attr "type" "pop,setcc") + (eq_attr "type" "pop") (if_then_else (match_operand 0 "memory_operand" "") (const_string "both") (const_string "load")) - (eq_attr "type" "icmp,test") + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") (if_then_else (ior (match_operand 0 "memory_operand" "") (match_operand 1 "memory_operand" "")) (const_string "load") @@ -263,7 +562,7 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_string "none") (const_string "load")) - (and (eq_attr "type" "alu1,negnot") + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") (match_operand 1 "memory_operand" "")) (const_string "both") (and (match_operand 0 "memory_operand" "") @@ -273,10 +572,15 @@ (const_string "store") (match_operand 1 "memory_operand" "") (const_string "load") - (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") + (and (eq_attr "type" + "!alu1,negnot,ishift1, + imov,imovx,icmp,test,bitmanip, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, + sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") - (and (eq_attr "type" "icmov") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand" "")) (const_string "load") ] @@ -287,11 +591,11 @@ (define_attr "imm_disp" "false,true,unknown" (cond [(eq_attr "type" "other,multi") (const_string "unknown") - (and (eq_attr "type" "icmp,test,imov") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 1 "immediate_operand" ""))) (const_string "true") - (and (eq_attr "type" "alu,ishift,imul,idiv") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 2 "immediate_operand" ""))) (const_string "true") @@ -303,714 +607,132 @@ (define_attr "fp_int_src" "false,true" (const_string "false")) +;; Defines rounding mode of an FP operation. + +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" + (const_string "any")) + ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") (set_attr "type" "multi")]) - -;; Pentium Scheduling -;; -;; The Pentium is an in-order core with two integer pipelines. - -;; True for insns that behave like prefixed insns on the Pentium. -(define_attr "pent_prefix" "false,true" - (if_then_else (ior (eq_attr "prefix_0f" "1") - (ior (eq_attr "prefix_data16" "1") - (eq_attr "prefix_rep" "1"))) - (const_string "true") - (const_string "false"))) - -;; Categorize how an instruction slots. - -;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, -;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium -;; rules, because it results in noticeably better code on non-MMX Pentium -;; and doesn't hurt much on MMX. (Prefixed instructions are not very -;; common, so the scheduler usualy has a non-prefixed insn to pair). - -(define_attr "pent_pair" "uv,pu,pv,np" - (cond [(eq_attr "imm_disp" "true") - (const_string "np") - (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") - (and (eq_attr "type" "pop,push") - (eq_attr "memory" "!both"))) - (if_then_else (eq_attr "pent_prefix" "true") - (const_string "pu") - (const_string "uv")) - (eq_attr "type" "ibr") - (const_string "pv") - (and (eq_attr "type" "ishift") - (match_operand 2 "const_int_operand" "")) - (const_string "pu") - (and (eq_attr "type" "call") - (match_operand 0 "constant_call_address_operand" "")) - (const_string "pv") - (and (eq_attr "type" "callv") - (match_operand 1 "constant_call_address_operand" "")) - (const_string "pv") - ] - (const_string "np"))) -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; u describes pipe U -;; v describes pipe V -;; uv describes either pipe U or V for those that can issue to either -;; np describes not paring -;; fpu describes fpu -;; fpm describes fp insns of different types are not pipelined. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -;; Rep movs takes minimally 12 cycles. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "str")) - 12 12) - -; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22 -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "idiv")) - 46 46) - -; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode, -; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF. -; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable. -; The integer <-> fp conversion is not modeled correctly. Fild behaves -; like normal fp operation and fist takes 6 cycles. - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "cld")) - 2 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "none,load"))) - 1 1) - -; Read/Modify/Write instructions usually take 3 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,negnot,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "both")))) - 3 3) - -; Read/Modify or Modify/Write instructions usually take 2 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "load,store")))) - 2 2) - -; Insns w/o memory operands and move instructions usually take one cycle. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pu")) - 1 1) - -(define_function_unit "pent_v" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pv")) - 1 1) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "np")) - 1 1) - -; Pairable insns only conflict with other non-pairable insns. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1 - [(eq_attr "pent_pair" "np")]) - -; Floating point instructions usually blocks cycle longer when combined with -; integer instructions, because of the inpaired fxch instruction. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")) - 2 2 - [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")]) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fcmp,fxch,fsgn")) - 1 1) - -; Addition takes 3 cycles; assume other random cruft does as well. -; ??? Trivial fp operations such as fabs or fchs takes only one cycle. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fop,fop1,fistp")) - 3 1) - -; Multiplication takes 3 cycles and is only half pipelined. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 3 1) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 2 2) - -; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. -; They can overlap with integer insns. Only the last two cycles can overlap -; with other fp insns. Only fsin/fcos can overlap with multiplies. -; Only last two cycles of fsin/fcos can overlap with other instructions. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 37) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 39) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 68) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 70) - -;; Pentium Pro/PII Scheduling -;; -;; The PPro has an out-of-order core, but the instruction decoders are -;; naturally in-order and asymmetric. We get best performance by scheduling -;; for the decoders, for in doing so we give the oo execution unit the -;; most choices. - -;; Categorize how many uops an ia32 instruction evaluates to: -;; one -- an instruction with 1 uop can be decoded by any of the -;; three decoders. -;; few -- an instruction with 1 to 4 uops can be decoded only by -;; decoder 0. -;; many -- a complex instruction may take an unspecified number of -;; cycles to decode in decoder 0. - -(define_attr "ppro_uops" "one,few,many" - (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") - (const_string "many") - (eq_attr "type" "icmov,fcmov,str,cld") - (const_string "few") - (eq_attr "type" "imov") - (if_then_else (eq_attr "memory" "store,both") - (const_string "few") - (const_string "one")) - (eq_attr "memory" "!none") - (const_string "few") - ] - (const_string "one"))) +;; All integer comparison codes. +(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ]) + +;; All floating-point comparison codes. +(define_code_iterator fp_cond [unordered ordered + uneq unge ungt unle unlt ltgt ]) + +(define_code_iterator plusminus [plus minus]) + +(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) + +;; Base name for define_insn +(define_code_attr plusminus_insn + [(plus "add") (ss_plus "ssadd") (us_plus "usadd") + (minus "sub") (ss_minus "sssub") (us_minus "ussub")]) + +;; Base name for insn mnemonic. +(define_code_attr plusminus_mnemonic + [(plus "add") (ss_plus "adds") (us_plus "addus") + (minus "sub") (ss_minus "subs") (us_minus "subus")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") + (minus "") (ss_minus "") (us_minus "")]) + +;; Mapping of signed max and min +(define_code_iterator smaxmin [smax smin]) + +;; Mapping of unsigned max and min +(define_code_iterator umaxmin [umax umin]) + +;; Mapping of signed/unsigned max and min +(define_code_iterator maxmin [smax smin umax umin]) + +;; Base name for integer and FP insn mnemonic +(define_code_attr maxminiprefix [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) +(define_code_attr maxminfprefix [(smax "max") (smin "min")]) + +;; Mapping of parallel logic operators +(define_code_iterator plogic [and ior xor]) + +;; Base name for insn mnemonic. +(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")]) + +;; Mapping of abs neg operators +(define_code_iterator absneg [abs neg]) + +;; Base name for x87 insn mnemonic. +(define_code_attr absnegprefix [(abs "abs") (neg "chs")]) + +;; All single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without QImode. +(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_general_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) + +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_iterator X87MODEI [HI SI DI]) + +;; All integer modes handled by integer x87 operators. +(define_mode_iterator X87MODEI12 [HI SI]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_iterator SSEMODEI24 [SI DI]) + +;; SSE asm suffix for floating point modes +(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) + +;; SSE vector mode corresponding to a scalar mode +(define_mode_attr ssevecmode + [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; p0 describes port 0. -;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. -;; p2 describes port 2 for loads. -;; p34 describes ports 3 and 4 for stores. -;; fpu describes the fpu accessed via port 0. -;; ??? It is less than clear if there are separate fadd and fmul units -;; that could operate in parallel. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "ishift,lea,ibr,cld")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) - -;; ??? Does the divider lock out the pipe while it works, -;; or is there a disconnected unit? -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fistp")) - 3 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmov")) - 2 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmp")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmov")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 1) - -(define_function_unit "ppro_p01" 2 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "!imov,fmov")) - 1 1) - -(define_function_unit "ppro_p01" 2 0 - (and (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imov,fmov")) - (eq_attr "memory" "none")) - 1 1) - -(define_function_unit "ppro_p2" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 3 1) - -(define_function_unit "ppro_p34" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp")) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -;; imul uses the fpu. ??? does it have the same throughput as fmul? -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) -;; AMD K6/K6-2 Scheduling -;; -;; The K6 has similar architecture to PPro. Important difference is, that -;; there are only two decoders and they seems to be much slower than execution -;; units. So we have to pay much more attention to proper decoding for -;; schedulers. We share most of scheduler code for PPro in i386.c -;; -;; The fp unit is not pipelined and do one operation per two cycles including -;; the FXCH. -;; -;; alu describes both ALU units (ALU-X and ALU-Y). -;; alux describes X alu unit -;; fpu describes FPU unit -;; load describes load unit. -;; branch describes branch unit. -;; store decsribes store unit. This unit is not modelled completely and only -;; used to model lea operation. Otherwise it lie outside of the critical -;; path. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -;; The decoder specification is in the PPro section above! - -;; Shift instructions and certain arithmetic are issued only to X pipe. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,cld")) - 1 1) - -;; The QI mode arithmetic is issued to X pipe only. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") - (match_operand:QI 0 "general_operand" ""))) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "imov") - (eq_attr "memory" "none"))) - 1 1) - -(define_function_unit "k6_branch" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "call,callv,ibr")) - 1 1) - -;; Load unit have two cycle latency, but we take care for it in adjust_cost -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 1 1) - -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "str") - (eq_attr "memory" "load,both"))) - 10 10) - -;; Lea have two instructions, so latency is probably 2 -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "lea")) - 2 1) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "str")) - 10 10) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fop,fop1,fmov,fcmp,fistp")) - 2 2) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fmul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) +;; Scheduling descriptions + +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") +(include "geode.md") + -;; AMD Athlon Scheduling -;; -;; The Athlon does contain three pipelined FP units, three integer units and -;; three address generation units. -;; -;; The predecode logic is determining boundaries of instructions in the 64 -;; byte cache line. So the cache line straddling problem of K6 might be issue -;; here as well, but it is not noted in the documentation. -;; -;; Three DirectPath instructions decoders and only one VectorPath decoder -;; is available. They can decode three DirectPath instructions or one VectorPath -;; instruction per cycle. -;; Decoded macro instructions are then passed to 72 entry instruction control -;; unit, that passes -;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. -;; -;; The load/store queue unit is not attached to the schedulers but -;; communicates with all the execution units separately instead. - -(define_attr "athlon_decode" "direct,vector" - (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") - (const_string "vector") - (and (eq_attr "type" "push") - (match_operand 1 "memory_operand" "")) - (const_string "vector") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF"))) - (const_string "vector")] - (const_string "direct"))) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "vector")) - 1 1) - -(define_function_unit "athlon_directdec" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1 [(eq_attr "athlon_decode" "vector")]) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) - 1 1) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "str")) - 15 15) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 42) - -(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" - (cond [(eq_attr "type" "fop,fop1,fcmp,fistp") - (const_string "add") - (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") - (const_string "mul") - (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) - (const_string "store") - (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) - (const_string "any") - (and (eq_attr "type" "fmov") - (ior (match_operand:SI 1 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) - (const_string "store") - (eq_attr "type" "fmov") - (const_string "muladd")] - (const_string "none"))) +;; Operand and operator predicates and constraints -;; We use latencies 1 for definitions. This is OK to model colisions -;; in execution units. The real latencies are modeled in the "fp" pipeline. - -;; fsin, fcos: 96-192 -;; fsincos: 107-211 -;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fpspc")) - 100 1) - -;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fdiv")) - 24 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fop,fop1,fmul,fistp")) - 4 1) - -;; XFmode loads are slow. -;; XFmode store is slow too (8 cycles), but we don't need to model it, because -;; there are no dependent instructions. - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load") - (eq_attr "mode" "XF")))) - 10 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fmov,fsgn")) - 2 1) - -;; fcmp and ftst instructions -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "direct"))) - 3 1) - -;; fcmpi instructions. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "vector"))) - 3 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fcmov")) - 7 1) - -(define_function_unit "athlon_fp_mul" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "mul")) - 1 1) - -(define_function_unit "athlon_fp_add" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "add")) - 1 1) - -(define_function_unit "athlon_fp_muladd" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "muladd,mul,add")) - 1 1) - -(define_function_unit "athlon_fp_store" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "store")) - 1 1) - -;; We don't need to model the Address Generation Unit, since we don't model -;; the re-order buffer yet and thus we never schedule more than three operations -;; at time. Later we may want to experiment with MD_SCHED macros modeling the -;; decoders independently on the functional units. - -;(define_function_unit "athlon_agu" 3 0 -; (and (eq_attr "cpu" "athlon") -; (and (eq_attr "memory" "!none") -; (eq_attr "athlon_fpunits" "none"))) -; 1 1) - -;; Model load unit to avoid too long sequences of loads. We don't need to -;; model store queue, since it is hardly going to be bottleneck. - -(define_function_unit "athlon_load" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "memory" "load,both")) - 1 1) +(include "predicates.md") +(include "constraints.md") ;; Compare instructions. @@ -1019,13 +741,26 @@ ;; actually generating RTL. The bCOND or sCOND (emitted immediately ;; after the cmp) will actually emit the cmpM. +(define_expand "cmpti" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "x86_64_general_operand" "")))] + "TARGET_64BIT" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (TImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + (define_expand "cmpdi" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "x86_64_general_operand" "")))] "" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[0] = force_reg (DImode, operands[0]); ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -1033,12 +768,12 @@ }) (define_expand "cmpsi" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SI 0 "cmpsi_operand" "") (match_operand:SI 1 "general_operand" "")))] "" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[0] = force_reg (SImode, operands[0]); ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -1046,12 +781,12 @@ }) (define_expand "cmphi" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:HI 0 "nonimmediate_operand" "") (match_operand:HI 1 "general_operand" "")))] "" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[0] = force_reg (HImode, operands[0]); ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -1059,12 +794,12 @@ }) (define_expand "cmpqi" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:QI 0 "nonimmediate_operand" "") (match_operand:QI 1 "general_operand" "")))] "TARGET_QIMODE_MATH" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[0] = force_reg (QImode, operands[0]); ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -1072,19 +807,19 @@ }) (define_insn "cmpdi_ccno_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr") - (match_operand:DI 1 "const0_operand" "n,n")))] + (match_operand:DI 1 "const0_operand" "")))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" "@ - test{q}\t{%0, %0|%0, %0} + test{q}\t%0, %0 cmp{q}\t{%1, %0|%0, %1}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") (set_attr "mode" "DI")]) (define_insn "*cmpdi_minus_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r") (match_operand:DI 1 "x86_64_general_operand" "re,mr")) (const_int 0)))] @@ -1094,14 +829,14 @@ (set_attr "mode" "DI")]) (define_expand "cmpdi_1_rex64" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "general_operand" "")))] "TARGET_64BIT" "") (define_insn "cmpdi_1_insn_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r") (match_operand:DI 1 "x86_64_general_operand" "re,mr")))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" @@ -1111,19 +846,19 @@ (define_insn "*cmpsi_ccno_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr") - (match_operand:SI 1 "const0_operand" "n,n")))] + (match_operand:SI 1 "const0_operand" "")))] "ix86_match_ccmode (insn, CCNOmode)" "@ - test{l}\t{%0, %0|%0, %0} + test{l}\t%0, %0 cmp{l}\t{%1, %0|%0, %1}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") (set_attr "mode" "SI")]) (define_insn "*cmpsi_minus_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r") (match_operand:SI 1 "general_operand" "ri,mr")) (const_int 0)))] @@ -1133,38 +868,38 @@ (set_attr "mode" "SI")]) (define_expand "cmpsi_1" - [(set (reg:CC 17) - (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r") - (match_operand:SI 1 "general_operand" "ri,mr")))] + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" "")))] "" "") (define_insn "*cmpsi_1_insn" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r") (match_operand:SI 1 "general_operand" "ri,mr")))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_match_ccmode (insn, CCmode)" "cmp{l}\t{%1, %0|%0, %1}" [(set_attr "type" "icmp") (set_attr "mode" "SI")]) (define_insn "*cmphi_ccno_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr") - (match_operand:HI 1 "const0_operand" "n,n")))] + (match_operand:HI 1 "const0_operand" "")))] "ix86_match_ccmode (insn, CCNOmode)" "@ - test{w}\t{%0, %0|%0, %0} + test{w}\t%0, %0 cmp{w}\t{%1, %0|%0, %1}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") (set_attr "mode" "HI")]) (define_insn "*cmphi_minus_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r") - (match_operand:HI 1 "general_operand" "ri,mr")) + (match_operand:HI 1 "general_operand" "rn,mr")) (const_int 0)))] "ix86_match_ccmode (insn, CCGOCmode)" "cmp{w}\t{%1, %0|%0, %1}" @@ -1172,41 +907,41 @@ (set_attr "mode" "HI")]) (define_insn "*cmphi_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r") - (match_operand:HI 1 "general_operand" "ri,mr")))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + (match_operand:HI 1 "general_operand" "rn,mr")))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_match_ccmode (insn, CCmode)" "cmp{w}\t{%1, %0|%0, %1}" [(set_attr "type" "icmp") (set_attr "mode" "HI")]) (define_insn "*cmpqi_ccno_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq") - (match_operand:QI 1 "const0_operand" "n,n")))] + (match_operand:QI 1 "const0_operand" "")))] "ix86_match_ccmode (insn, CCNOmode)" "@ - test{b}\t{%0, %0|%0, %0} + test{b}\t%0, %0 cmp{b}\t{$0, %0|%0, 0}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") (set_attr "mode" "QI")]) (define_insn "*cmpqi_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q") - (match_operand:QI 1 "general_operand" "qi,mq")))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + (match_operand:QI 1 "general_operand" "qn,mq")))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %0|%0, %1}" [(set_attr "type" "icmp") (set_attr "mode" "QI")]) (define_insn "*cmpqi_minus_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q") - (match_operand:QI 1 "general_operand" "qi,mq")) + (match_operand:QI 1 "general_operand" "qn,mq")) (const_int 0)))] "ix86_match_ccmode (insn, CCGOCmode)" "cmp{b}\t{%1, %0|%0, %1}" @@ -1214,7 +949,7 @@ (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "general_operand" "Qm") (subreg:QI @@ -1228,7 +963,7 @@ (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "register_operand" "Q") (subreg:QI @@ -1242,14 +977,14 @@ (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (subreg:QI (zero_extract:SI (match_operand 0 "ext_register_operand" "Q") (const_int 8) (const_int 8)) 0) - (match_operand:QI 1 "const0_operand" "n")))] + (match_operand:QI 1 "const0_operand" "")))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t%h0, %h0" [(set_attr "type" "test") @@ -1257,7 +992,7 @@ (set_attr "mode" "QI")]) (define_expand "cmpqi_ext_3" - [(set (reg:CC 17) + [(set (reg:CC FLAGS_REG) (compare:CC (subreg:QI (zero_extract:SI @@ -1269,7 +1004,7 @@ "") (define_insn "cmpqi_ext_3_insn" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (subreg:QI (zero_extract:SI @@ -1283,7 +1018,7 @@ (set_attr "mode" "QI")]) (define_insn "cmpqi_ext_3_insn_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (subreg:QI (zero_extract:SI @@ -1297,7 +1032,7 @@ (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_4" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (subreg:QI (zero_extract:SI @@ -1320,20 +1055,9 @@ ;; the old patterns did, but with many more of them. (define_expand "cmpxf" - [(set (reg:CC 17) - (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "") - (match_operand:XF 1 "cmp_fp_expander_operand" "")))] - "!TARGET_64BIT && TARGET_80387" -{ - ix86_compare_op0 = operands[0]; - ix86_compare_op1 = operands[1]; - DONE; -}) - -(define_expand "cmptf" - [(set (reg:CC 17) - (compare:CC (match_operand:TF 0 "cmp_fp_expander_operand" "") - (match_operand:TF 1 "cmp_fp_expander_operand" "")))] + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 0 "nonmemory_operand" "") + (match_operand:XF 1 "nonmemory_operand" "")))] "TARGET_80387" { ix86_compare_op0 = operands[0]; @@ -1341,22 +1065,11 @@ DONE; }) -(define_expand "cmpdf" - [(set (reg:CC 17) - (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "") - (match_operand:DF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || TARGET_SSE2" -{ - ix86_compare_op0 = operands[0]; - ix86_compare_op1 = operands[1]; - DONE; -}) - -(define_expand "cmpsf" - [(set (reg:CC 17) - (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "") - (match_operand:SF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || TARGET_SSE" +(define_expand "cmp" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "") + (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -1369,281 +1082,358 @@ ;; CCFPmode compare with exceptions ;; CCFPUmode compare with no exceptions -;; %%% It is an unfortunate fact that ftst has no non-popping variant, -;; and that fp moves clobber the condition codes, and that there is -;; currently no way to describe this fact to reg-stack. So there are -;; no splitters yet for this. - -;; %%% YIKES! This scheme does not retain a strong connection between -;; the real compare and the ultimate cc0 user, so CC_REVERSE does not -;; work! Only allow tos/mem with tos in op 0. -;; -;; Hmm, of course, this is what the actual _hardware_ does. Perhaps -;; things aren't as bad as they sound... +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. (define_insn "*cmpfp_0" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI - [(compare:CCFP (match_operand 1 "register_operand" "f") - (match_operand 2 "const0_operand" "X"))] 9))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "ftst\;fnstsw\t%0\;fstp\t%y0"; - else - return "ftst\;fnstsw\t%0"; -} + "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) - -;; We may not use "#" to split and emit these, since the REG_DEAD notes -;; used to manage the reg stack popping would not be preserved. + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) -(define_insn "*cmpfp_2_sf" - [(set (reg:CCFP 18) +(define_insn_and_split "*cmpfp_0_cc" + [(set (reg:CCFP FLAGS_REG) (compare:CCFP - (match_operand:SF 0 "register_operand" "f") - (match_operand:SF 1 "nonimmediate_operand" "fm")))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "SF")]) - -(define_insn "*cmpfp_2_sf_1" - [(set (match_operand:HI 0 "register_operand" "=a") + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:HI - [(compare:CCFP - (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "nonimmediate_operand" "fm"))] 9))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 2, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "SF")]) - -(define_insn "*cmpfp_2_df" - [(set (reg:CCFP 18) - (compare:CCFP - (match_operand:DF 0 "register_operand" "f") - (match_operand:DF 1 "nonimmediate_operand" "fm")))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "DF")]) + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) -(define_insn "*cmpfp_2_df_1" +(define_insn "*cmpfp_xf" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:DF 1 "register_operand" "f") - (match_operand:DF 2 "nonimmediate_operand" "fm"))] 9))] + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "TARGET_80387" - "* return output_fp_compare (insn, operands, 2, 0);" - [(set_attr "type" "multi") - (set_attr "mode" "DF")]) - -(define_insn "*cmpfp_2_xf" - [(set (reg:CCFP 18) - (compare:CCFP - (match_operand:XF 0 "register_operand" "f") - (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "fcmp") + [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "mode" "XF")]) -(define_insn "*cmpfp_2_tf" - [(set (reg:CCFP 18) +(define_insn_and_split "*cmpfp_xf_cc" + [(set (reg:CCFP FLAGS_REG) (compare:CCFP - (match_operand:TF 0 "register_operand" "f") - (match_operand:TF 1 "register_operand" "f")))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "XF")]) - -(define_insn "*cmpfp_2_xf_1" - [(set (match_operand:HI 0 "register_operand" "=a") + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:HI - [(compare:CCFP - (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))] 9))] - "!TARGET_64BIT && TARGET_80387" - "* return output_fp_compare (insn, operands, 2, 0);" + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "mode" "XF")]) -(define_insn "*cmpfp_2_tf_1" +(define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:TF 1 "register_operand" "f") - (match_operand:TF 2 "register_operand" "f"))] 9))] + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] "TARGET_80387" - "* return output_fp_compare (insn, operands, 2, 0);" + "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") - (set_attr "mode" "XF")]) + (set_attr "unit" "i387") + (set_attr "mode" "")]) -(define_insn "*cmpfp_2u" - [(set (reg:CCFPU 18) - (compare:CCFPU - (match_operand 0 "register_operand" "f") - (match_operand 1 "register_operand" "f")))] +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (operands[0]) == GET_MODE (operands[1])" - "* return output_fp_compare (insn, operands, 0, 1);" - [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp")]) + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) -(define_insn "*cmpfp_2u_1" +(define_insn "*cmpfp_u" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFPU (match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f"))] 9))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" - "* return output_fp_compare (insn, operands, 2, 1);" + "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) - -;; Patterns to match the SImode-in-memory ficom instructions. -;; -;; %%% Play games with accepting gp registers, as otherwise we have to -;; force them to memory during rtl generation, which is no good. We -;; can get rid of this once we teach reload to do memory input reloads -;; via pushes. + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) -(define_insn "*ficom_1" - [(set (reg:CCFP 18) - (compare:CCFP - (match_operand 0 "register_operand" "f,f") - (float (match_operand:SI 1 "nonimmediate_operand" "m,?r"))))] - "0 && TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (XEXP (SET_SRC (PATTERN (insn)), 1)) == GET_MODE (operands[0])" - "#") +(define_insn_and_split "*cmpfp_u_cc" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) -;; Split the not-really-implemented gp register case into a -;; push-op-pop sequence. -;; -;; %%% This is most efficient, but am I gonna get in trouble -;; for separating cc0_setter and cc0_user? +(define_insn "*cmpfp_" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) -(define_split - [(set (reg:CCFP 18) +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) (compare:CCFP - (match_operand:SF 0 "register_operand" "") - (float (match_operand:SI 1 "register_operand" ""))))] - "0 && TARGET_80387 && reload_completed" - [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 1)) - (set (reg:CCFP 18) (compare:CCFP (match_dup 0) (match_dup 2))) - (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] - "operands[2] = gen_rtx_MEM (Pmode, stack_pointer_rtx); - operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);") + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) ;; FP compares, step 2 ;; Move the fpsw to ax. (define_insn "x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI [(reg 18)] 9))] + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] "TARGET_80387" "fnstsw\t%0" [(set_attr "length" "2") (set_attr "mode" "SI") - (set_attr "i387" "1") - (set_attr "ppro_uops" "few")]) + (set_attr "unit" "i387")]) ;; FP compares, step 3 ;; Get ax into flags, general case. (define_insn "x86_sahf_1" - [(set (reg:CC 17) - (unspec:CC [(match_operand:HI 0 "register_operand" "a")] 10))] - "!TARGET_64BIT" - "sahf" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] + UNSPEC_SAHF))] + "TARGET_SAHF" +{ +#ifdef HAVE_AS_IX86_SAHF + return "sahf"; +#else + return ".byte\t0x9e"; +#endif +} [(set_attr "length" "1") (set_attr "athlon_decode" "vector") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "one")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "mode" "SI")]) ;; Pentium Pro can do steps 1 through 3 in one go. - -(define_insn "*cmpfp_i" - [(set (reg:CCFP 17) - (compare:CCFP (match_operand 0 "register_operand" "f") - (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (operands[0]) == GET_MODE (operands[0])" - "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) - -(define_insn "*cmpfp_i_sse" - [(set (reg:CCFP 17) - (compare:CCFP (match_operand 0 "register_operand" "f#x,x#f") - (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] - "TARGET_80387 +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) +(define_insn "*cmpfp_i_mixed" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (operands[0]) == GET_MODE (operands[0])" + && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp,sse") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) -(define_insn "*cmpfp_i_sse_only" - [(set (reg:CCFP 17) +(define_insn "*cmpfp_i_sse" + [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "x") (match_operand 1 "nonimmediate_operand" "xm")))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (operands[0]) == GET_MODE (operands[0])" + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "sse") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) -(define_insn "*cmpfp_iu" - [(set (reg:CCFPU 17) - (compare:CCFPU (match_operand 0 "register_operand" "f") - (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && FLOAT_MODE_P (GET_MODE (operands[0])) +(define_insn "*cmpfp_i_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) && GET_MODE (operands[0]) == GET_MODE (operands[1])" - "* return output_fp_compare (insn, operands, 1, 1);" + "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) -(define_insn "*cmpfp_iu_sse" - [(set (reg:CCFPU 17) - (compare:CCFPU (match_operand 0 "register_operand" "f#x,x#f") - (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] - "TARGET_80387 +(define_insn "*cmpfp_iu_mixed" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "fcmp,sse") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) -(define_insn "*cmpfp_iu_sse_only" - [(set (reg:CCFPU 17) +(define_insn "*cmpfp_iu_sse" + [(set (reg:CCFPU FLAGS_REG) (compare:CCFPU (match_operand 0 "register_operand" "x") (match_operand 1 "nonimmediate_operand" "xm")))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "sse") - (set_attr "mode" "unknownfp") - (set_attr "athlon_decode" "vector")]) + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_iu_387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) ;; Move instructions. @@ -1658,7 +1448,7 @@ ;; Push/pop instructions. They are separate since autoinc/dec is not a ;; general_operand. ;; -;; %%% We don't use a post-inc memory reference because x86 is not a +;; %%% We don't use a post-inc memory reference because x86 is not a ;; general AUTO_INC_DEC host, which impacts how it is treated in flow. ;; Changing this impacts compiler performance on other non-AUTO_INC_DEC ;; targets without our curiosities, and it is just as easy to represent @@ -1692,9 +1482,9 @@ (define_insn "*popsi1_epilogue" [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI 7))) - (set (reg:SI 7) - (plus:SI (reg:SI 7) (const_int 4))) + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" "pop{l}\t%0" @@ -1703,9 +1493,9 @@ (define_insn "popsi1" [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI 7))) - (set (reg:SI 7) - (plus:SI (reg:SI 7) (const_int 4)))] + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4)))] "!TARGET_64BIT" "pop{l}\t%0" [(set_attr "type" "pop") @@ -1713,10 +1503,10 @@ (define_insn "*movsi_xor" [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "const0_operand" "i")) - (clobber (reg:CC 17))] - "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" - "xor{l}\t{%0, %0|%0, %0}" + (match_operand:SI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{l}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) @@ -1724,10 +1514,9 @@ (define_insn "*movsi_or" [(set (match_operand:SI 0 "register_operand" "=r") (match_operand:SI 1 "immediate_operand" "i")) - (clobber (reg:CC 17))] - "reload_completed && GET_CODE (operands[1]) == CONST_INT - && INTVAL (operands[1]) == -1 - && (TARGET_PENTIUM || optimize_size)" + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx" { operands[1] = constm1_rtx; return "or{l}\t{%1, %0|%0, %1}"; @@ -1737,19 +1526,39 @@ (set_attr "length_immediate" "1")]) (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,rm,*Y,*Y"))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + (match_operand:SI 1 "general_operand" + "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { - case TYPE_SSE: - if (get_attr_mode (insn) == TImode) - return "movdqa\t{%1, %0|%0, %1}"; - return "movd\t{%1, %0|%0, %1}"; + case TYPE_SSELOG1: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + return "%vxorps\t%0, %d0"; + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "%vmovss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } case TYPE_MMX: - if (get_attr_mode (insn) == DImode) + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1757,45 +1566,62 @@ return "lea{l}\t{%1, %0|%0, %1}"; default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) - abort(); + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); return "mov{l}\t{%1, %0|%0, %1}"; } } [(set (attr "type") - (cond [(eq_attr "alternative" "2,3,4") + (cond [(eq_attr "alternative" "2") (const_string "mmx") - (eq_attr "alternative" "5,6,7") - (const_string "sse") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:SI 1 "symbolic_operand" "")) + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "ssemov") + (match_operand:DI 1 "pic_32bit_operand" "") (const_string "lea") ] (const_string "imov"))) - (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1} - movabs{l}\t{%1, %a0|%a0, %1}" + mov{l}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*movabssi_2_rex64" [(set (match_operand:SI 0 "register_operand" "=a,r") (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{l}\t{%P1, %0|%0, %P1} mov{l}\t{%a1, %0|%0, %a1}" @@ -1814,11 +1640,10 @@ "" "xchg{l}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "mode" "SI") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "few")]) + (set_attr "amdfam10_decode" "double")]) (define_expand "movhi" [(set (match_operand:HI 0 "nonimmediate_operand" "") @@ -1827,28 +1652,26 @@ "ix86_expand_move (HImode, operands); DONE;") (define_insn "*pushhi2" - [(set (match_operand:HI 0 "push_operand" "=<,<") - (match_operand:HI 1 "general_no_elim_operand" "n,r*m"))] + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] "!TARGET_64BIT" - "@ - push{w}\t{|WORD PTR }%1 - push{w}\t%1" + "push{l}\t%k1" [(set_attr "type" "push") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For 64BIT abi we always round up to 8 bytes. (define_insn "*pushhi2_rex64" [(set (match_operand:HI 0 "push_operand" "=X") - (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))] + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] "TARGET_64BIT" "push{q}\t%q1" [(set_attr "type" "push") - (set_attr "mode" "QI")]) + (set_attr "mode" "DI")]) (define_insn "*movhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { @@ -1864,7 +1687,9 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") @@ -1894,28 +1719,27 @@ ] (const_string "HI")))]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1} - movabs{w}\t{%1, %a0|%a0, %1}" + mov{w}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*movabshi_2_rex64" [(set (match_operand:HI 0 "register_operand" "=a,r") (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{w}\t{%P1, %0|%0, %P1} mov{w}\t{%a1, %0|%0, %a1}" @@ -1931,53 +1755,54 @@ (match_operand:HI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{w}\t%1, %0" + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "mode" "HI") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "few")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swaphi_2" [(set (match_operand:HI 0 "register_operand" "+r") (match_operand:HI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "! TARGET_PARTIAL_REG_STALL" - "xchg{l}\t%k1, %k0" + "TARGET_PARTIAL_REG_STALL" + "xchg{w}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "HI") (set_attr "pent_pair" "np") - (set_attr "mode" "SI") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "few")]) + (set_attr "athlon_decode" "vector")]) (define_expand "movstricthi" [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "")) (match_operand:HI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL || optimize_size" + "" { + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; /* Don't generate memory->memory moves, go through a register */ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[1] = force_reg (HImode, operands[1]); }) (define_insn "*movstricthi_1" [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r")) (match_operand:HI 1 "general_operand" "rn,m"))] - "(! TARGET_PARTIAL_REG_STALL || optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "mov{w}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") (set_attr "mode" "HI")]) (define_insn "*movstricthi_xor" [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) - (match_operand:HI 1 "const0_operand" "i")) - (clobber (reg:CC 17))] - "reload_completed - && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)" - "xor{w}\t{%0, %0|%0, %0}" + (match_operand:HI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{w}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "HI") (set_attr "length_immediate" "0")]) @@ -1989,27 +1814,25 @@ "ix86_expand_move (QImode, operands); DONE;") ;; emit_push_insn when it calls move_by_pieces requires an insn to -;; "push a byte". But actually we use pushw, which has the effect -;; of rounding the amount pushed up to a halfword. +;; "push a byte". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. (define_insn "*pushqi2" - [(set (match_operand:QI 0 "push_operand" "=X,X") - (match_operand:QI 1 "nonmemory_no_elim_operand" "n,r"))] + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))] "!TARGET_64BIT" - "@ - push{w}\t{|word ptr }%1 - push{w}\t%w1" + "push{l}\t%k1" [(set_attr "type" "push") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For 64BIT abi we always round up to 8 bytes. (define_insn "*pushqi2_rex64" [(set (match_operand:QI 0 "push_operand" "=X") - (match_operand:QI 1 "nonmemory_no_elim_operand" "qi"))] + (match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))] "TARGET_64BIT" "push{q}\t%q1" [(set_attr "type" "push") - (set_attr "mode" "QI")]) + (set_attr "mode" "DI")]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for @@ -2024,13 +1847,12 @@ (define_insn "*movqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_IMOVX: - if (!ANY_QI_REG_P (operands[1]) && GET_CODE (operands[1]) != MEM) - abort (); + gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); return "movz{bl|x}\t{%1, %k0|%k0, %1}"; default: if (get_attr_mode (insn) == MODE_SI) @@ -2040,7 +1862,12 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "3") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "3") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_QIMODE_MATH") @@ -2062,13 +1889,17 @@ (eq_attr "type" "imovx") (const_string "SI") (and (eq_attr "type" "imov") - (and (eq_attr "alternative" "0,1,2") - (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") - (const_int 0)))) + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) (const_string "SI") ;; Avoid partial register stalls when not using QImode arithmetic (and (eq_attr "type" "imov") - (and (eq_attr "alternative" "0,1,2") + (and (eq_attr "alternative" "0,1") (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_QIMODE_MATH") @@ -2077,64 +1908,59 @@ ] (const_string "QI")))]) -(define_expand "reload_outqi" - [(parallel [(match_operand:QI 0 "" "=m") - (match_operand:QI 1 "register_operand" "r") - (match_operand:QI 2 "register_operand" "=&q")])] - "" -{ - rtx op0, op1, op2; - op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; - - if (reg_overlap_mentioned_p (op2, op0)) - abort (); - if (! q_regs_operand (op1, QImode)) - { - emit_insn (gen_movqi (op2, op1)); - op1 = op2; - } - emit_insn (gen_movqi (op0, op1)); - DONE; -}) - -(define_insn "*swapqi" +(define_insn "*swapqi_1" [(set (match_operand:QI 0 "register_operand" "+r") (match_operand:QI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "" - "xchg{b}\t%1, %0" + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 +(define_insn "*swapqi_2" + [(set (match_operand:QI 0 "register_operand" "+q") + (match_operand:QI 1 "register_operand" "+q")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{b}\t%1, %0" + [(set_attr "type" "imov") (set_attr "mode" "QI") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "few")]) + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL || optimize_size" + "" { + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; /* Don't generate memory->memory moves, go through a register. */ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + if (MEM_P (operands[0]) && MEM_P (operands[1])) operands[1] = force_reg (QImode, operands[1]); }) (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "(! TARGET_PARTIAL_REG_STALL || optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") (set_attr "mode" "QI")]) (define_insn "*movstrictqi_xor" [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) - (match_operand:QI 1 "const0_operand" "i")) - (clobber (reg:CC 17))] - "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" - "xor{b}\t{%0, %0|%0, %0}" + (match_operand:QI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{b}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "QI") (set_attr "length_immediate" "0")]) @@ -2213,28 +2039,27 @@ (const_string "SI") (const_string "QI")))]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1} - movabs{b}\t{%1, %a0|%a0, %1}" + mov{b}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "QI")]) (define_insn "*movabsqi_2_rex64" [(set (match_operand:QI 0 "register_operand" "=a,r") (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{b}\t{%P1, %0|%0, %P1} mov{b}\t{%a1, %0|%0, %a1}" @@ -2245,6 +2070,16 @@ (set_attr "memory" "load") (set_attr "mode" "QI")]) +(define_insn "*movdi_extzv_1" + [(set (match_operand:DI 0 "register_operand" "=R") + (zero_extract:DI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + (define_insn "*movsi_extzv_1" [(set (match_operand:SI 0 "register_operand" "=R") (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") @@ -2328,13 +2163,22 @@ [(set_attr "type" "imov") (set_attr "mode" "QI")]) +(define_insn "movdi_insv_1_rex64" + [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:DI 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + (define_insn "*movqi_insv_2" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") (const_int 8) (const_int 8)) - (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") - (const_int 8)) - (const_int 255)))] + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") @@ -2352,7 +2196,7 @@ "!TARGET_64BIT" "#") -(define_insn "pushdi2_rex64" +(define_insn "*pushdi2_rex64" [(set (match_operand:DI 0 "push_operand" "=<,!<") (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] "TARGET_64BIT" @@ -2378,6 +2222,7 @@ ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. (define_peephole2 [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "immediate_operand" ""))] @@ -2385,7 +2230,7 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] - "split_di (operands + 1, 1, operands + 2, operands + 3); + "split_di (&operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, GEN_INT (4))); @@ -2394,12 +2239,13 @@ (define_split [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "immediate_operand" ""))] - "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2)) + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] - "split_di (operands + 1, 1, operands + 2, operands + 3); + "split_di (&operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, GEN_INT (4))); @@ -2416,9 +2262,9 @@ (define_insn "*popdi1_epilogue_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI 7))) - (set (reg:DI 7) - (plus:DI (reg:DI 7) (const_int 8))) + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))] "TARGET_64BIT" "pop{q}\t%0" @@ -2427,9 +2273,9 @@ (define_insn "popdi1" [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI 7))) - (set (reg:DI 7) - (plus:DI (reg:DI 7) (const_int 8)))] + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8)))] "TARGET_64BIT" "pop{q}\t%0" [(set_attr "type" "pop") @@ -2437,11 +2283,11 @@ (define_insn "*movdi_xor_rex64" [(set (match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "const0_operand" "i")) - (clobber (reg:CC 17))] - "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size) + (match_operand:DI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" - "xor{l}\t{%k0, %k0|%k0, %k0}" + "xor{l}\t%k0, %k0"; [(set_attr "type" "alu1") (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) @@ -2449,11 +2295,10 @@ (define_insn "*movdi_or_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (match_operand:DI 1 "const_int_operand" "i")) - (clobber (reg:CC 17))] - "TARGET_64BIT && (TARGET_PENTIUM || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed - && GET_CODE (operands[1]) == CONST_INT - && INTVAL (operands[1]) == -1" + && operands[1] == constm1_rtx" { operands[1] = constm1_rtx; return "or{q}\t{%1, %0|%0, %1}"; @@ -2463,20 +2308,31 @@ (set_attr "length_immediate" "1")]) (define_insn "*movdi_2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,!*Y,!*Y") - (match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*Y,*Y,m"))] - "!TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ # # + pxor\t%0, %0 movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdqa\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,mmx,mmx,sse,sse,sse") - (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")]) + %vpxor\t%0, %d0 + %vmovq\t{%1, %0|%0, %1} + %vmovdqa\t{%1, %0|%0, %1} + %vmovq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8") + (const_string "vex") + (const_string "orig"))) + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) (define_split [(set (match_operand:DI 0 "push_operand" "") @@ -2497,27 +2353,54 @@ "ix86_split_long_move (operands); DONE;") (define_insn "*movdi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!m*y,!*y,!*Y,!m,!*Y") - (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,m,*Y,*Y,*m"))] - "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") + (match_operand:DI 1 "general_operand" + "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { - case TYPE_SSE: - if (register_operand (operands[0], DImode) - && register_operand (operands[1], DImode)) - return "movdqa\t{%1, %0|%0, %1}"; + case TYPE_SSECVT: + if (SSE_REG_P (operands[0])) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; + + case TYPE_SSEMOV: + if (TARGET_AVX) + { + if (get_attr_mode (insn) == MODE_TI) + return "vmovdqa\t{%1, %0|%0, %1}"; + else + return "vmovq\t{%1, %0|%0, %1}"; + } + + if (get_attr_mode (insn) == MODE_TI) + return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ - case TYPE_MMX: + + case TYPE_MMXMOV: + /* Moves from and into integer register is done using movd + opcode with REX prefix. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "movd\t{%1, %0|%0, %1}"; return "movq\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return "%vpxor\t%0, %d0"; + + case TYPE_MMX: + return "pxor\t%0, %0"; + case TYPE_MULTI: return "#"; + case TYPE_LEA: return "lea{q}\t{%a1, %0|%0, %a1}"; + default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) - abort (); + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else if (which_alternative == 2) @@ -2527,28 +2410,37 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "5,6") + (cond [(eq_attr "alternative" "5") (const_string "mmx") - (eq_attr "alternative" "7,8") - (const_string "sse") + (eq_attr "alternative" "6,7,8,9,10") + (const_string "mmxmov") + (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12,13,14,15,16") + (const_string "ssemov") + (eq_attr "alternative" "17,18") + (const_string "ssecvt") (eq_attr "alternative" "4") (const_string "multi") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:DI 1 "symbolic_operand" "")) + (match_operand:DI 1 "pic_32bit_operand" "") (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")]) - -;; Stores and loads of ax to arbitary constant address. + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "11,12,13,14,15,16") + (const_string "maybe_vex") + (const_string "orig"))) + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) + +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsdi_1_rex64" [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) (match_operand:DI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{q}\t{%1, %P0|%P0, %1} mov{q}\t{%1, %a0|%a0, %1}" @@ -2562,7 +2454,7 @@ (define_insn "*movabsdi_2_rex64" [(set (match_operand:DI 0 "register_operand" "=a,r") (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{q}\t{%P1, %0|%0, %P1} mov{q}\t{%a1, %0|%0, %a1}" @@ -2588,6 +2480,7 @@ ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. (define_peephole2 [(set (match_operand:DI 0 "memory_operand" "") (match_operand:DI 1 "immediate_operand" ""))] @@ -2595,17 +2488,18 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (operands, 2, operands + 2, operands + 4);") + "split_di (&operands[0], 2, &operands[2], &operands[4]);") (define_split [(set (match_operand:DI 0 "memory_operand" "") (match_operand:DI 1 "immediate_operand" ""))] - "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2)) + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (operands, 2, operands + 2, operands + 4);") + "split_di (&operands[0], 2, &operands[2], &operands[4]);") (define_insn "*swapdi_rex64" [(set (match_operand:DI 0 "register_operand" "+r") @@ -2615,178 +2509,345 @@ "TARGET_64BIT" "xchg{q}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "DI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "mode" "DI") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "few")]) + (set_attr "amdfam10_decode" "double")]) + +(define_expand "movoi" + [(set (match_operand:OI 0 "nonimmediate_operand" "") + (match_operand:OI 1 "general_operand" ""))] + "TARGET_AVX" + "ix86_expand_move (OImode, operands); DONE;") + +(define_insn "*movoi_internal" + [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:OI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_AVX + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return "vxorps\t%0, %0, %0"; + case 1: + case 2: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) - -(define_expand "movsf" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (match_operand:SF 1 "general_operand" ""))] - "" - "ix86_expand_move (SFmode, operands); DONE;") +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) -(define_insn "*pushsf" - [(set (match_operand:SF 0 "push_operand" "=<,<,<") - (match_operand:SF 1 "general_no_elim_operand" "f#rx,rFm#fx,x#rf"))] - "!TARGET_64BIT" +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (which_alternative) { case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (4); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - + return "%vpxor\t%0, %d0"; case 1: - return "push{l}\t%1"; case 2: - return "#"; - + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } default: - abort (); + gcc_unreachable (); } } - [(set_attr "type" "multi,push,multi") - (set_attr "mode" "SF,SI,SF")]) + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) -(define_insn "*pushsf_rex64" - [(set (match_operand:SF 0 "push_operand" "=X,X,X") - (match_operand:SF 1 "nonmemory_no_elim_operand" "f#rx,rF#fx,x#rf"))] - "TARGET_64BIT" +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (which_alternative) { case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - case 1: - return "push{q}\t%q1"; - - case 2: return "#"; - + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } default: - abort (); + gcc_unreachable (); } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand" "") + (match_operand:CDI 1 "general_operand" ""))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "ix86_expand_move (SFmode, operands); DONE;") + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; } [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,DI,SF")]) (define_split [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" ""))] "reload_completed - && GET_CODE (operands[1]) == MEM - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))" + && MEM_P (operands[1]) + && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) - (match_dup 1))] - "operands[1] = get_pool_constant (XEXP (operands[1], 0));") + (match_dup 2))]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) - (set (mem:SF (reg:SI 7)) (match_dup 1))]) + (match_operand:SF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (set (mem:SF (reg:SI SP_REG)) (match_dup 1))]) (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) - (set (mem:SF (reg:DI 7)) (match_dup 1))]) + (match_operand:SF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (set (mem:SF (reg:DI SP_REG)) (match_dup 1))]) (define_insn "*movsf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") - (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + (match_operand:SF 1 "general_operand" + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], SFmode))" + || memory_operand (operands[0], SFmode))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - abort(); + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2) - return "pxor\t%0, %0"; + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; else - return "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movaps\t{%1, %0|%0, %1}"; + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "movss\t{%1, %0|%0, %1}"; + return "%vmovss\t{%1, %d0|%d0, %1}"; case 7: + if (TARGET_AVX) + return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" + : "vmovss\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; case 8: - return "movss\t{%1, %0|%0, %1}"; + return "%vmovss\t{%1, %0|%0, %1}"; - case 9: - case 10: + case 9: case 10: case 14: case 15: return "movd\t{%1, %0|%0, %1}"; + case 12: case 13: + return "%vmovd\t{%1, %0|%0, %1}"; case 11: return "movq\t{%1, %0|%0, %1}"; default: - abort(); + gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sse,sse,sse,sse,mmx,mmx,mmx") - (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_insn "*swapsf" - [(set (match_operand:SF 0 "register_operand" "+f") - (match_operand:SF 1 "register_operand" "+f")) + [(set (match_operand:SF 0 "fp_register_operand" "+f") + (match_operand:SF 1 "fp_register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "reload_completed || !TARGET_SSE" + "reload_completed || TARGET_80387" { if (STACK_TOP_P (operands[0])) return "fxch\t%1"; @@ -2803,89 +2864,41 @@ "ix86_expand_move (DFmode, operands); DONE;") ;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer insturctions is 2+2*memory operand size +;; Size of pushdf using integer instructions is 2+2*memory operand size ;; On the average, pushdf using integers can be still shorter. Allow this ;; pattern for optimize_size too. (define_insn "*pushdf_nointeger" [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))] + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - case 3: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") (set_attr "mode" "DF,SI,SI,DF")]) (define_insn "*pushdf_integer" [(set (match_operand:DF 0 "push_operand" "=<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))] + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "DF,SI,DF")]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "!TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:DF (reg:SI 7)) (match_dup 1))] - "") - -(define_split - [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) - (set (mem:DF (reg:DI 7)) (match_dup 1))] + (match_operand:DF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (match_dup 1))] "") (define_split @@ -2900,142 +2913,430 @@ ;; when optimizing for size. (define_insn "*movdf_nointeger" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") - (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ((optimize_function_for_size_p (cfun) + || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && !memory_operand (operands[0], DFmode) + && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || ((optimize_function_for_size_p (cfun) + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + case MODE_TI: + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); } - abort(); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_insn "*movdf_integer_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + (match_operand:DF 1 "general_operand" + "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; + case 5: - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + case MODE_TI: + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; - else - return "movsd\t{%1, %0|%0, %1}"; case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "%vmovlpd\t{%1, %d0|%d0, %1}"; + case MODE_V2SF: + return "%vmovlps\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } + + case 9: + case 10: + return "%vmovd\t{%1, %0|%0, %1}"; default: - abort(); + gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4,9,10") + (const_string "DI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) (define_insn "*movdf_integer" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") - (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && !optimize_size && TARGET_INTEGER_DFMODE_MOVES + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && optimize_function_for_speed_p (cfun) + && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || memory_operand (operands[0], DFmode))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - abort(); + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; case 5: - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + gcc_unreachable (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; - else - return "movsd\t{%1, %0|%0, %1}"; case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } default: - abort(); + gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") (match_operand:DF 1 "general_operand" ""))] "reload_completed - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && ! (ANY_FP_REG_P (operands[0]) || + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (ANY_FP_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || + && ! (ANY_FP_REG_P (operands[1]) || (GET_CODE (operands[1]) == SUBREG && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_insn "*swapdf" - [(set (match_operand:DF 0 "register_operand" "+f") - (match_operand:DF 1 "register_operand" "+f")) + [(set (match_operand:DF 0 "fp_register_operand" "+f") + (match_operand:DF 1 "fp_register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "reload_completed || !TARGET_SSE2" + "reload_completed || TARGET_80387" { if (STACK_TOP_P (operands[0])) return "fxch\t%1"; @@ -3048,17 +3349,11 @@ (define_expand "movxf" [(set (match_operand:XF 0 "nonimmediate_operand" "") (match_operand:XF 1 "general_operand" ""))] - "!TARGET_64BIT" - "ix86_expand_move (XFmode, operands); DONE;") - -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] "" - "ix86_expand_move (TFmode, operands); DONE;") + "ix86_expand_move (XFmode, operands); DONE;") ;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer insturctions is 3+3*memory operand size +;; Size of pushdf using integer instructions is 3+3*memory operand size ;; Pushing using integer instructions is longer except for constants ;; and direct memory references. ;; (assuming that any given constant is pushed only once, but this ought to be @@ -3067,117 +3362,25 @@ (define_insn "*pushxf_nointeger" [(set (match_operand:XF 0 "push_operand" "=X,X,X") (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] - "!TARGET_64BIT && optimize_size" -{ - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } -} - [(set_attr "type" "multi") - (set_attr "mode" "XF,SI,SI")]) - -(define_insn "*pushtf_nointeger" - [(set (match_operand:TF 0 "push_operand" "=<,<,<") - (match_operand:TF 1 "general_no_elim_operand" "f,Fo,*r"))] - "optimize_size" + "optimize_function_for_size_p (cfun)" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "XF,SI,SI")]) (define_insn "*pushxf_integer" [(set (match_operand:XF 0 "push_operand" "=<,<") - (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] - "!TARGET_64BIT && !optimize_size" -{ - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } -} - [(set_attr "type" "multi") - (set_attr "mode" "XF,SI")]) - -(define_insn "*pushtf_integer" - [(set (match_operand:TF 0 "push_operand" "=<,<") - (match_operand:TF 1 "general_no_elim_operand" "f#r,rFo#f"))] - "!optimize_size" + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "optimize_function_for_speed_p (cfun)" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*") (set_attr "mode" "XF,SI")]) (define_split @@ -3185,232 +3388,168 @@ (match_operand 1 "general_operand" ""))] "reload_completed && (GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == DFmode) - && (!REG_P (operands[1]) || !ANY_FP_REGNO_P (REGNO (operands[1])))" + && !ANY_FP_REG_P (operands[1])" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_split [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:XF (reg:SI 7)) (match_dup 1))]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (match_dup 1))]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:TF (reg:DI 7)) (match_dup 1))]) + (match_operand:XF 1 "any_fp_register_operand" ""))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") ;; Do not use integer registers when optimizing for size (define_insn "*movxf_nointeger" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "!TARGET_64BIT - && optimize_size - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + "optimize_function_for_size_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed + || standard_80387_constant_p (operands[1]) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" + || memory_operand (operands[0], XFmode))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; + default: + gcc_unreachable (); } - abort(); } [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) -(define_insn "*movtf_nointeger" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m,f,*r,o") - (match_operand:TF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && optimize_size +(define_insn "*movxf_integer" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "optimize_function_for_speed_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || GET_CODE (operands[1]) != CONST_DOUBLE - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || memory_operand (operands[0], TFmode))" + || memory_operand (operands[0], XFmode))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; + + default: + gcc_unreachable (); } - abort(); } [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) -(define_insn "*movxf_integer" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") - (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] - "!TARGET_64BIT - && !optimize_size - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (reload_in_progress || reload_completed - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "fstp%z0\t%y0"; - + return "%vmovdqa\t{%1, %0|%0, %1}"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; - - case 3: case 4: - return "#"; + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); } - abort(); } - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) -(define_insn "*movtf_integer" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") - (match_operand:TF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && !optimize_size - && (reload_in_progress || reload_completed - || GET_CODE (operands[1]) != CONST_DOUBLE - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || memory_operand (operands[0], TFmode))" +(define_insn "*pushtf_sse" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] + "TARGET_SSE2" { - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "sse,*,*") + (set_attr "mode" "TF,SI,SI")]) - case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "TARGET_SSE2 && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") - case 3: case 4: - return "#"; - } - abort(); -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "any_fp_register_operand" ""))] + "TARGET_SSE2" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (mem:TF (reg:P SP_REG)) (match_dup 1))] + "") (define_split [(set (match_operand 0 "nonimmediate_operand" "") (match_operand 1 "general_operand" ""))] "reload_completed - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode) - && ! (ANY_FP_REG_P (operands[0]) || + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && GET_MODE (operands[0]) == XFmode + && ! (ANY_FP_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || + && ! (ANY_FP_REG_P (operands[1]) || (GET_CODE (operands[1]) == SUBREG && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" [(const_int 0)] @@ -3420,44 +3559,72 @@ [(set (match_operand 0 "register_operand" "") (match_operand 1 "memory_operand" ""))] "reload_completed - && GET_CODE (operands[1]) == MEM - && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode - || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)) - && (!(SSE_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0])))) - || standard_sse_constant_p (get_pool_constant (XEXP (operands[1], 0)))) - && (!(FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && FP_REG_P (SUBREG_REG (operands[0])))) - || standard_80387_constant_p (get_pool_constant (XEXP (operands[1], 0))))" - [(set (match_dup 0) - (match_dup 1))] - "operands[1] = get_pool_constant (XEXP (operands[1], 0));") - -(define_insn "swapxf" - [(set (match_operand:XF 0 "register_operand" "+f") - (match_operand:XF 1 "register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "" + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == SFmode + || GET_MODE (operands[0]) == DFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] { - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "XF")]) + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (float_extend (match_operand 1 "memory_operand" "")))] + "reload_completed + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == SFmode + || GET_MODE (operands[0]) == DFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) -(define_insn "swaptf" - [(set (match_operand:TF 0 "register_operand" "+f") - (match_operand:TF 1 "register_operand" "+f")) +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "" + "TARGET_80387" { if (STACK_TOP_P (operands[0])) return "fxch\t%1"; @@ -3466,6 +3633,34 @@ } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) + +;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (match_operand:X87MODEF 1 "immediate_operand" ""))] + "reload_completed && FP_REGNO_P (REGNO (operands[0])) + && (standard_80387_constant_p (operands[1]) == 8 + || standard_80387_constant_p (operands[1]) == 9)" + [(set (match_dup 0)(match_dup 1)) + (set (match_dup 0) + (neg:X87MODEF (match_dup 0)))] +{ + REAL_VALUE_TYPE r; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + if (real_isnegzero (&r)) + operands[1] = CONST0_RTX (mode); + else + operands[1] = CONST1_RTX (mode); +}) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") ;; Zero extension instructions @@ -3474,7 +3669,7 @@ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] "" { - if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) { operands[1] = force_reg (HImode, operands[1]); emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); @@ -3485,8 +3680,8 @@ (define_insn "zero_extendhisi2_and" [(set (match_operand:SI 0 "register_operand" "=r") (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) - (clobber (reg:CC 17))] - "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" [(set_attr "type" "alu1") (set_attr "mode" "SI")]) @@ -3494,16 +3689,18 @@ (define_split [(set (match_operand:SI 0 "register_operand" "") (zero_extend:SI (match_operand:HI 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + (clobber (reg:CC FLAGS_REG))] + "reload_completed && TARGET_ZERO_EXTEND_WITH_AND + && optimize_function_for_speed_p (cfun)" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_insn "*zero_extendhisi2_movzwl" [(set (match_operand:SI 0 "register_operand" "=r") (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] - "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)" "movz{wl|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) @@ -3512,15 +3709,15 @@ [(parallel [(set (match_operand:HI 0 "register_operand" "") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") (define_insn "*zero_extendqihi2_and" [(set (match_operand:HI 0 "register_operand" "=r,?&q") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) - (clobber (reg:CC 17))] - "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" [(set_attr "type" "alu1") (set_attr "mode" "HI")]) @@ -3528,27 +3725,30 @@ (define_insn "*zero_extendqihi2_movzbw_and" [(set (match_operand:HI 0 "register_operand" "=r,r") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) - (clobber (reg:CC 17))] - "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" "#" [(set_attr "type" "imovx,alu1") (set_attr "mode" "HI")]) -(define_insn "*zero_extendqihi2_movzbw" +; zero extend to SImode here to avoid partial register stalls +(define_insn "*zero_extendqihi2_movzbl" [(set (match_operand:HI 0 "register_operand" "=r") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" - "movz{bw|x}\t{%1, %0|%0, %1}" + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && reload_completed" + "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For the movzbw case strip only the clobber (define_split [(set (match_operand:HI 0 "register_operand" "") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "reload_completed - && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)) && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" [(set (match_operand:HI 0 "register_operand" "") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]) @@ -3558,10 +3758,11 @@ (define_split [(set (match_operand:HI 0 "register_operand" "") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed && ANY_QI_REG_P (operands[0]) - && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && (TARGET_ZERO_EXTEND_WITH_AND + && optimize_function_for_speed_p (cfun)) && !reg_overlap_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] @@ -3571,26 +3772,26 @@ (define_split [(set (match_operand:HI 0 "register_operand" "") (zero_extend:HI (match_operand:QI 1 "register_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_expand "zero_extendqisi2" [(parallel [(set (match_operand:SI 0 "register_operand" "") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") (define_insn "*zero_extendqisi2_and" [(set (match_operand:SI 0 "register_operand" "=r,?&q") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) - (clobber (reg:CC 17))] - "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" [(set_attr "type" "alu1") (set_attr "mode" "SI")]) @@ -3598,8 +3799,8 @@ (define_insn "*zero_extendqisi2_movzbw_and" [(set (match_operand:SI 0 "register_operand" "=r,r") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) - (clobber (reg:CC 17))] - "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" "#" [(set_attr "type" "imovx,alu1") (set_attr "mode" "SI")]) @@ -3607,7 +3808,8 @@ (define_insn "*zero_extendqisi2_movzbw" [(set (match_operand:SI 0 "register_operand" "=r") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && reload_completed" "movz{bl|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) @@ -3616,9 +3818,9 @@ (define_split [(set (match_operand:SI 0 "register_operand" "") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "reload_completed - && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]) @@ -3628,11 +3830,11 @@ (define_split [(set (match_operand:SI 0 "register_operand" "") (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed && ANY_QI_REG_P (operands[0]) - && (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM) - && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])) + && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) && !reg_overlap_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] @@ -3642,42 +3844,59 @@ (define_split [(set (match_operand:SI 0 "register_operand" "") (zero_extend:SI (match_operand:QI 1 "register_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") ;; %%% Kill me once multi-word ops are sane. (define_expand "zero_extendsidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] "" - "if (!TARGET_64BIT) - { - emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); - DONE; - } - ") +{ + if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + DONE; + } +}) (define_insn "zero_extendsidi2_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r"))) - (clobber (reg:CC 17))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "#" - [(set_attr "mode" "SI")]) + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")]) (define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0")))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] "TARGET_64BIT" "@ mov\t{%k1, %k0|%k0, %k1} - #" - [(set_attr "type" "imovx,imov") - (set_attr "mode" "SI,DI")]) + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -3686,50 +3905,47 @@ [(set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") -(define_split +(define_split [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" [(set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") -(define_split +(define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") (define_insn "zero_extendhidi2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] "TARGET_64BIT" - "@ - movz{wl|x}\t{%1, %k0|%k0, %1} - movz{wq|x}\t{%1, %0|%0, %1}" + "movz{wl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") - (set_attr "mode" "SI,DI")]) + (set_attr "mode" "DI")]) (define_insn "zero_extendqidi2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "Q,m")))] + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))] "TARGET_64BIT" - "@ - movz{bl|x}\t{%1, %k0|%k0, %1} - movz{bq|x}\t{%1, %0|%0, %1}" + "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") - (set_attr "mode" "SI,DI")]) + (set_attr "mode" "DI")]) ;; Sign extension instructions (define_expand "extendsidi2" [(parallel [(set (match_operand:DI 0 "register_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:SI 2 ""))])] "" { @@ -3743,7 +3959,7 @@ (define_insn "*extendsidi2_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:SI 2 "=X,X,X,&r"))] "!TARGET_64BIT" "#") @@ -3777,25 +3993,25 @@ (set_attr "mode" "DI")]) ;; Extend to memory case when source register does die. -(define_split +(define_split [(set (match_operand:DI 0 "memory_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (match_operand:SI 2 "register_operand" ""))] "(reload_completed && dead_or_set_p (insn, operands[1]) && !reg_mentioned_p (operands[1], operands[0]))" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 4) (match_dup 1))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") ;; Extend to memory case when source register does not die. -(define_split +(define_split [(set (match_operand:DI 0 "memory_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (match_operand:SI 2 "register_operand" ""))] "reload_completed" [(const_int 0)] @@ -3805,9 +4021,9 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[1]) == 0 - && true_regnum (operands[2]) == 1 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) { emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); } @@ -3822,10 +4038,10 @@ ;; Extend to register case. Optimize case where source and destination ;; registers match and cases where we can use cltd. -(define_split +(define_split [(set (match_operand:DI 0 "register_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:SI 2 ""))] "reload_completed" [(const_int 0)] @@ -3836,8 +4052,8 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[3]) == 0 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG) { emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); DONE; @@ -3951,26 +4167,19 @@ ;; These are all no-ops in the model used for the 80387. So just ;; emit moves. -;; %%% Kill these when call knows how to work out a DFmode push earlier. +;; %%% Kill these when call knows how to work out a DFmode push earlier. (define_insn "*dummy_extendsfdf2" [(set (match_operand:DF 0 "push_operand" "=<") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY")))] + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))] "0" "#") (define_split [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:DF (reg:SI 7)) (float_extend:DF (match_dup 1)))]) - -(define_split - [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) - (set (mem:DF (reg:DI 7)) (float_extend:DF (match_dup 1)))]) + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) (define_insn "*dummy_extendsfxf2" [(set (match_operand:XF 0 "push_operand" "=<") @@ -3980,771 +4189,772 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -(define_insn "*dummy_extendsftf2" - [(set (match_operand:TF 0 "push_operand" "=<") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "f")))] - "0" - "#") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) - -(define_insn "*dummy_extenddfxf2" - [(set (match_operand:XF 0 "push_operand" "=<") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "f")))] - "0" - "#") + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -(define_insn "*dummy_extenddftf2" - [(set (match_operand:TF 0 "push_operand" "=<") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "f")))] - "0" - "#") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } }) -(define_insn "*extendsfdf2_1" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,mf#Y,Y#f") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm#Y,f#Y,mY#f")))] - "(TARGET_80387 || TARGET_SSE2) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] { - switch (which_alternative) + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } else - return "fst\t%y0"; + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) +(define_insn "*extendsfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); - else - return "fst%z0\t%y0"; case 2: - return "cvtss2sd\t{%1, %0|%0, %1}"; + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; default: - abort (); + gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,sse") + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") (set_attr "mode" "SF,XF,DF")]) -(define_insn "*extendsfdf2_1_sse_only" - [(set (match_operand:DF 0 "register_operand" "=Y") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "cvtss2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") +(define_insn "*extendsfdf2_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtss2sd\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") (set_attr "mode" "DF")]) -(define_expand "extendsfxf2" +(define_insn "*extendsfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))] + "TARGET_80387" { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], mode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (mode, operands[1])); + } }) -(define_insn "*extendsfxf2_1" +(define_insn "*extendxf2_i387" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" ",XF")]) - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. - default: - abort (); +;; Conversion from DFmode to SFmode. + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + ; + else if (flag_unsafe_math_optimizations) + ; + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (SFmode, slot); + emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); + DONE; } -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) +}) -(define_expand "extendsftf2" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387" +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] { - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); }) -(define_insn "*extendsftf2_1" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +(define_expand "truncdfsf2_with_temp" + [(parallel [(set (match_operand:SF 0 "" "") + (float_truncate:SF (match_operand:DF 1 "" ""))) + (clobber (match_operand:SF 2 "" ""))])] + "") + +(define_insn "*truncdfsf_fast_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - + return output_387_reg_move (insn, operands); case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; default: - abort (); + gcc_unreachable (); } } - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) + [(set_attr "type" "fmov,ssecvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "SF")]) -(define_expand "extenddfxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "")))] - "!TARGET_64BIT && TARGET_80387" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (DFmode, operands[1]); -}) +;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, +;; because nothing we do here is unsafe. +(define_insn "*truncdfsf_fast_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtsd2ss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) -(define_insn "*extenddfxf2_1" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +(define_insn "*truncdfsf_fast_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,Y2 ,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] + "TARGET_MIX_SSE_I387" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - + return output_387_reg_move (insn, operands); case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; default: - abort (); + return "#"; } } - [(set_attr "type" "fmov") - (set_attr "mode" "DF,XF")]) + [(set_attr "type" "fmov,ssecvt,multi,multi,multi") + (set_attr "unit" "*,*,i387,i387,i387") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig") + (set_attr "mode" "SF")]) -(define_expand "extenddftf2" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "")))] +(define_insn "*truncdfsf_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] "TARGET_80387" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (DFmode, operands[1]); -}) - -(define_insn "*extenddftf2_1" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) { case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); default: - abort (); + return "#"; } } + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF,XF")]) + (set_attr "mode" "SF")]) -;; %%% This seems bad bad news. -;; This cannot output into an f-reg because there is no way to be sure -;; of truncating in that case. Otherwise this is just like a simple move -;; insn. So we pretend we can output to a reg in order to get better -;; register preferencing, but we really use a stack slot. +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "fp_register_operand" ""))) + (clobber (match_operand 2 "" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] +{ + operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); +}) -(define_expand "truncdfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:DF 1 "register_operand" ""))) +;; Conversion from XFmode to {SF,DF}mode + +(define_expand "truncxf2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "TARGET_80387 || TARGET_SSE2" - " - if (TARGET_80387) - operands[2] = assign_386_stack_local (SFmode, 0); - else - { - emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); - DONE; - } -") - -(define_insn "*truncdfsf2_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") - (float_truncate:SF - (match_operand:DF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] - "TARGET_80387 && !TARGET_SSE2" + "TARGET_80387" { - switch (which_alternative) + if (flag_unsafe_math_optimizations) { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort (); + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (mode); + emit_insn (gen_truncxf2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; } -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "mode" "SF,SF,SF,SF")]) + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (mode, slot); + } +}) -(define_insn "*truncdfsf2_1_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,Y") +(define_insn "*truncxfsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] - "TARGET_80387 && TARGET_SSE2" + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - case 4: - return "cvtsd2ss\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "fmov,multi,multi,multi,sse") - (set_attr "mode" "SF,SF,SF,SF,DF")]) - -(define_insn "*truncdfsf2_2" - [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] - "TARGET_80387 && TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - return "cvtsd2ss\t{%1, %0|%0, %1}"; - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort (); - } -} - [(set_attr "type" "sse,fmov") - (set_attr "mode" "DF,SF")]) - -(define_insn "truncdfsf2_3" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:DF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_insn "truncdfsf2_sse_only" - [(set (match_operand:SF 0 "register_operand" "=Y") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2" - "cvtsd2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF - (match_operand:DF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" ""))) - (clobber (match_operand 2 "" ""))] - "TARGET_80387 && reload_completed - && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:DF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed - && FP_REG_P (operands[1])" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - -(define_expand "truncxfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" - "operands[2] = assign_386_stack_local (SFmode, 0);") - -(define_insn "*truncxfsf2_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" -{ - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "mode" "SF")]) - -(define_insn "*truncxfsf2_2" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - -(define_expand "trunctfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (SFmode, 0);") - -(define_insn "*trunctfsf2_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") - (float_truncate:SF - (match_operand:TF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); } [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") (set_attr "mode" "SF")]) -(define_insn "*trunctfsf2_2" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:TF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - - -(define_expand "truncxfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" - "operands[2] = assign_386_stack_local (DFmode, 0);") - -(define_insn "*truncxfdf2_1" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") +(define_insn "*truncxfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r") (float_truncate:DF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } - abort (); + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); } [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") (set_attr "mode" "DF")]) -(define_insn "*truncxfdf2_2" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF +(define_insn "truncxf2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] +(define_insn "*truncxf2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387" - [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] - "") + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "register_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") -(define_expand "trunctfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (DFmode, 0);") - -(define_insn "*trunctfdf2_1" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") - (float_truncate:DF - (match_operand:TF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } - abort (); -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "mode" "DF")]) - - (define_insn "*trunctfdf2_2" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF - (match_operand:TF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) - (define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "memory_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387" - [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:DF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))] "") - -;; %%% Break up all these bad boys. - ;; Signed conversion to DImode. (define_expand "fix_truncxfdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:TF 1 "register_operand" "")))] + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" - "") +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) -(define_expand "fix_truncdfdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:DF 1 "register_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_64BIT)" +(define_expand "fix_truncdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { - if (TARGET_64BIT && TARGET_SSE2) + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncdfdi_sse (out, operands[1])); + emit_insn (gen_fix_truncdi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) -(define_expand "fix_truncsfdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:SF 1 "register_operand" "")))] - "TARGET_80387 || (TARGET_SSE && TARGET_64BIT)" +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" { - if (TARGET_SSE && TARGET_64BIT) + if (TARGET_FISTTP) { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncsfdi_sse (out, operands[1])); + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncsi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncdi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f")))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" +;; Signed conversion to HImode. + +(define_expand "fix_trunchi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; Unsigned conversion to SImode. + +(define_expand "fixuns_truncsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (use (match_dup 2)) + (clobber (match_scratch: 3 "")) + (clobber (match_scratch: 4 ""))])] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" +{ + enum machine_mode mode = mode; + enum machine_mode vecmode = mode; + REAL_VALUE_TYPE TWO31r; + rtx two31; + + if (optimize_insn_for_size_p ()) + FAIL; + + real_ldexp (&TWO31r, &dconst1, 31); + two31 = const_double_from_real_value (TWO31r, mode); + two31 = ix86_build_const_vector (mode, true, two31); + operands[2] = force_reg (vecmode, two31); +}) + +(define_insn_and_split "*fixuns_trunc_1" + [(set (match_operand:SI 0 "register_operand" "=&x,&x") + (unsigned_fix:SI + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) + (use (match_operand: 4 "nonimmediate_operand" "m,x")) + (clobber (match_scratch: 1 "=x,&x")) + (clobber (match_scratch: 2 "=x,x"))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_convert_uns_si_sse (operands); + DONE; +}) + +;; Unsigned conversion to HImode. +;; Without these patterns, we'll try the unsigned SI conversion which +;; is complex for SSE, rather than the signed SI conversion, which isn't. + +(define_expand "fixuns_trunchi2" + [(set (match_dup 2) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (subreg:HI (match_dup 2) 0))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "TARGET_64BIT && SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) + +(define_insn "fix_truncsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) + +;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. +(define_peephole2 + [(set (match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "memory_operand" "")) + (set (match_operand:SSEMODEI24 2 "register_operand" "") + (fix:SSEMODEI24 (match_dup 0)))] + "TARGET_SHORTEN_X87_SSE + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] + "") + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y2") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_insn_and_split "fix_trunc_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" "")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH) + && !(reload_completed || reload_in_progress)" "#" "&& 1" [(const_int 0)] { - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncdi_memory (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_fix_trunc_i387_fisttp (operands[0], operands[1])); else { - operands[4] = assign_386_stack_local (DImode, 0); - emit_insn (gen_fix_truncdi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); } DONE; } - [(set_attr "type" "fistp")]) + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) -(define_insn "fix_truncdi_nomemory" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=m,m")) - (clobber (match_scratch:DF 5 "=&1f,&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])] + "") + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && !(reload_completed || reload_in_progress)" "#" - [(set_attr "type" "fistp")]) + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) -(define_insn "fix_truncdi_memory" +(define_insn "fix_truncdi_i387" [(set (match_operand:DI 0 "memory_operand" "=m") (fix:DI (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) - (clobber (match_scratch:DF 4 "=&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" - "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) + (clobber (match_scratch:XF 4 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) -(define_split +(define_split [(set (match_operand:DI 0 "register_operand" "") (fix:DI (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) @@ -4759,7 +4969,7 @@ (set (match_dup 0) (match_dup 4))] "") -(define_split +(define_split [(set (match_operand:DI 0 "memory_operand" "") (fix:DI (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) @@ -4773,517 +4983,768 @@ (clobber (match_dup 5))])] "") -;; When SSE available, it is always faster to use it! -(define_insn "fix_truncsfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))] - "TARGET_64BIT && TARGET_SSE" - "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] - "TARGET_64BIT && TARGET_SSE2" - "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -;; Signed conversion to SImode. - -(define_expand "fix_truncxfsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:TF 1 "register_operand" "")))] - "TARGET_80387" - "") - -(define_expand "fix_truncdfsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:DF 1 "register_operand" "")))] - "TARGET_80387 || TARGET_SSE2" -{ - if (TARGET_SSE2) - { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncdfsi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); - DONE; - } -}) - -(define_expand "fix_truncsfsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:SF 1 "register_operand" "")))] - "TARGET_80387 || TARGET_SSE" -{ - if (TARGET_SSE) - { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncsfsi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); - DONE; - } -}) - -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f")))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress +(define_insn "fix_trunc_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - "&& 1" - [(const_int 0)] -{ - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncsi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (SImode, 0); - emit_insn (gen_fix_truncsi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp")]) - -(define_insn "fix_truncsi_nomemory" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f"))) + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "#" - [(set_attr "type" "fistp")]) - -(define_insn "fix_truncsi_memory" - [(set (match_operand:SI 0 "memory_operand" "=m") - (fix:SI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) - -;; When SSE available, it is always faster to use it! -(define_insn "fix_truncsfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r") - (fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "fix_truncdfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r") - (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] "reload_completed" - [(parallel [(set (match_dup 4) (fix:SI (match_dup 1))) + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) (use (match_dup 3))]) (set (match_dup 0) (match_dup 4))] "") -(define_split - [(set (match_operand:SI 0 "memory_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:SI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))])] - "") - -;; Signed conversion to HImode. - -(define_expand "fix_truncxfhi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfhi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:TF 1 "register_operand" "")))] - "TARGET_80387" - "") - -(define_expand "fix_truncdfhi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:DF 1 "register_operand" "")))] - "TARGET_80387 && !TARGET_SSE2" - "") - -(define_expand "fix_truncsfhi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:SF 1 "register_operand" "")))] - "TARGET_80387 && !TARGET_SSE" - "") - -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_trunchi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f")))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - "" - [(const_int 0)] -{ - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_trunchi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (HImode, 0); - emit_insn (gen_fix_trunchi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp")]) - -(define_insn "fix_trunchi_nomemory" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:HI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - [(set_attr "type" "fistp")]) - -(define_insn "fix_trunchi_memory" - [(set (match_operand:HI 0 "memory_operand" "=m") - (fix:HI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) - -(define_split - [(set (match_operand:HI 0 "memory_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] "reload_completed" - [(parallel [(set (match_dup 0) (fix:HI (match_dup 1))) + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) (use (match_dup 3))])] "") -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:HI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 4))]) - (set (match_dup 0) (match_dup 4))] - "") - -;; %% Not used yet. (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") - (unspec:HI [(reg:HI 18)] 11))] + (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") - (set_attr "ppro_uops" "few")]) + (set_attr "unit" "i387")]) (define_insn "x86_fldcw_1" - [(set (reg:HI 18) - (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] 12))] + [(set (reg:HI FPCR_REG) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] "TARGET_80387" "fldcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + (set_attr "amdfam10_decode" "vector")]) ;; Conversion between fixed point and floating point. ;; Even though we only accept memory inputs, the backend _really_ ;; wants to be able to do this between registers. -(define_insn "floathisf2" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && !TARGET_SSE" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "fp_int_src" "true")]) - -(define_expand "floatsisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_SSE || TARGET_80387" +(define_expand "floathi2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" "") -(define_insn "*floatsisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] - "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - # - cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") - (set_attr "mode" "SF") +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*floathi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);") + +(define_insn "*floathi2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:HI 2 "memory_operand" "=m,m"))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "#" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))] - "TARGET_SSE" - "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF") +(define_insn "*floathi2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") (set_attr "fp_int_src" "true")]) -(define_expand "floatdisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] - "(TARGET_64BIT && TARGET_SSE) || TARGET_80387" +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] "") -(define_insn "*floatdisf2_i387_only" - [(set (match_operand:SF 0 "register_operand" "=f,?f") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && (!TARGET_SSE || !TARGET_64BIT || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "fp_int_src" "true")]) +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "memory_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") -(define_insn "*floatdisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] - "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - # - cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") - (set_attr "mode" "SF") - (set_attr "fp_int_src" "true")]) +(define_expand "float2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))] + "TARGET_80387 + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") -(define_insn "*floatdisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))] - "TARGET_64BIT && TARGET_SSE" - "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF") +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*float2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))] + "((TARGET_80387 + && (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)) + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode == SImode + && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS + && optimize_function_for_speed_p (cfun) + && flag_trapping_math) + || !(TARGET_INTER_UNIT_CONVERSIONS + || optimize_function_for_size_p (cfun))))) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + + /* Avoid store forwarding (partial memory) stall penalty + by passing DImode value through XMM registers. */ + if (mode == DImode && !TARGET_64BIT + && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && optimize_function_for_speed_p (cfun)) + { + emit_insn (gen_floatdi2_i387_with_xmm (operands[0], + operands[1], + operands[2])); + DONE; + } +}) + +(define_insn "*floatsi2_vector_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") + (set_attr "mode" ",,,,") + (set_attr "unit" "*,i387,*,*,*") + (set_attr "athlon_decode" "*,*,double,direct,double") + (set_attr "amdfam10_decode" "*,*,vector,double,double") (set_attr "fp_int_src" "true")]) -(define_insn "floathidf2" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && !TARGET_SSE2" +(define_insn "*floatsi2_vector_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" "@ fild%z1\t%1 #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" ",") + (set_attr "unit" "i387,*") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_expand "floatsidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" - "") - -(define_insn "*floatsidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] - "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - # - cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") - (set_attr "mode" "DF") +(define_insn "*float2_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))] - "TARGET_SSE2" - "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "DF") - (set_attr "fp_int_src" "true")]) +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && TARGET_INTER_UNIT_CONVERSIONS + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") -(define_expand "floatdidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] - "(TARGET_64BIT && TARGET_SSE2) || TARGET_80387" +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] "") -(define_insn "*floatdidf2_i387_only" - [(set (match_operand:DF 0 "register_operand" "=f,?f") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && (!TARGET_SSE2 || !TARGET_64BIT)" +(define_insn "*float2_mixed_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" "@ fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") + %vcvtsi2s\t{%1, %d0|%d0, %1} + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "") + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] - "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" +(define_insn "*float2_mixed_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" "@ fild%z1\t%1 - # - cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") - (set_attr "mode" "DF") + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))] - "TARGET_SSE2" - "cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "DF") +(define_insn "*floatsi2_vector_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" ",,") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") (set_attr "fp_int_src" "true")]) -(define_insn "floathixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") +(define_insn "*floatsi2_vector_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "floathitf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "fp_int_src" "true")]) +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; -(define_insn "floatsixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "fp_int_src" "true")]) + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); -(define_insn "floatsitf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[2])); + } + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + gcc_unreachable (); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_insn "*float2_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "floatdixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") +(define_insn "*float2_sse_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_sse_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "floatditf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))] "TARGET_80387" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "TARGET_80387" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") (set_attr "fp_int_src" "true")]) -;; %%% Kill these when reload knows how to do it. (define_split - [(set (match_operand 0 "register_operand" "") - (float (match_operand 1 "register_operand" "")))] - "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0])) + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed && FP_REG_P (operands[0])" - [(const_int 0)] + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_insn "floatdi2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_scratch:V4SI 3 "=X,x")) + (clobber (match_scratch:V4SI 4 "=X,x")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "register_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] { - operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); - operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); - ix86_free_from_memory (GET_MODE (operands[1])); + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4])); + + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "memory_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + +(define_expand "floatunssi2" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && TARGET_SSE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (DImode, slot); + } +}) + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" +{ + if (TARGET_64BIT) + x86_emit_floatuns (operands); + else + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); DONE; }) ;; Add instructions +;; %%% splits for addditi3 + +(define_expand "addti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;") + +(define_insn "*addti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") + ;; %%% splits for addsidi3 ; [(set (match_operand:DI 0 "nonimmediate_operand" "") ; (plus:DI (match_operand:DI 1 "general_operand" "") @@ -5292,8 +5753,7 @@ (define_expand "adddi3" [(set (match_operand:DI 0 "nonimmediate_operand" "") (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "x86_64_general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:DI 2 "x86_64_general_operand" "")))] "" "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;") @@ -5301,7 +5761,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") (match_operand:DI 2 "general_operand" "roiF,riF"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" "#") @@ -5309,35 +5769,35 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "") (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") (match_operand:DI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && reload_completed" - [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] 12)) + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5))) - (clobber (reg:CC 17))])] - "split_di (operands+0, 1, operands+0, operands+3); - split_di (operands+1, 1, operands+1, operands+4); - split_di (operands+2, 1, operands+2, operands+5);") + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 3, &operands[0], &operands[3]);") -(define_insn "*adddi3_carry_rex64" +(define_insn "adddi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") - (plus:DI (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") (match_operand:DI 1 "nonimmediate_operand" "%0,0")) (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" "adc{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "mode" "DI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "DI")]) (define_insn "*adddi3_cc_rex64" - [(set (reg:CC 17) (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") - (match_operand:DI 2 "x86_64_general_operand" "re,rm")] 12)) + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (plus:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" @@ -5345,36 +5805,110 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) -(define_insn "*addsi3_carry" +(define_insn "*3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*add3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "m")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*sub3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (match_dup 0)))] + "" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*si3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "addqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 1 "nonimmediate_operand" "%0,0")) + (match_operand:QI 2 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "adc{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "addhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 1 "nonimmediate_operand" "%0,0")) + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, HImode, operands)" + "adc{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) + +(define_insn "addsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0,0")) (match_operand:SI 2 "general_operand" "ri,rm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "SI")]) (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0")) - (match_operand:SI 2 "general_operand" "rim")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "SI")]) (define_insn "*addsi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")] 12)) + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (plus:SI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, SImode, operands)" @@ -5383,8 +5917,10 @@ (set_attr "mode" "SI")]) (define_insn "addqi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qi,qm")] 12)) + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qn,qm")] + UNSPEC_ADD_CARRY)) (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (plus:QI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, QImode, operands)" @@ -5393,16 +5929,15 @@ (set_attr "mode" "QI")]) (define_expand "addsi3" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] "" "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;") (define_insn "*lea_1" [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "address_operand" "p"))] + (match_operand:SI 1 "no_seg_address_operand" "p"))] "!TARGET_64BIT" "lea{l}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -5410,7 +5945,7 @@ (define_insn "*lea_1_rex64" [(set (match_operand:SI 0 "register_operand" "=r") - (subreg:SI (match_operand:DI 1 "address_operand" "p") 0))] + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))] "TARGET_64BIT" "lea{l}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -5418,7 +5953,8 @@ (define_insn "*lea_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (subreg:SI (match_operand:DI 1 "address_operand" "p") 0)))] + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))] "TARGET_64BIT" "lea{l}\t{%a1, %k0|%k0, %a1}" [(set_attr "type" "lea") @@ -5426,7 +5962,7 @@ (define_insn "*lea_2_rex64" [(set (match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "address_operand" "p"))] + (match_operand:DI 1 "no_seg_address_operand" "p"))] "TARGET_64BIT" "lea{q}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -5437,12 +5973,12 @@ (define_insn_and_split "*lea_general_1" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (match_operand 1 "register_operand" "r") + (plus (plus (match_operand 1 "index_register_operand" "l") (match_operand 2 "register_operand" "r")) (match_operand 3 "immediate_operand" "i")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) - && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && GET_MODE (operands[0]) == GET_MODE (operands[1]) && GET_MODE (operands[0]) == GET_MODE (operands[2]) && (GET_MODE (operands[0]) == GET_MODE (operands[3]) @@ -5469,7 +6005,7 @@ (define_insn_and_split "*lea_general_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l") (match_operand:SI 2 "register_operand" "r")) (match_operand:SI 3 "immediate_operand" "i"))))] "TARGET_64BIT" @@ -5489,12 +6025,12 @@ (define_insn_and_split "*lea_general_2" [(set (match_operand 0 "register_operand" "=r") - (plus (mult (match_operand 1 "register_operand" "r") + (plus (mult (match_operand 1 "index_register_operand" "l") (match_operand 2 "const248_operand" "i")) (match_operand 3 "nonmemory_operand" "ri")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) - && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && GET_MODE (operands[0]) == GET_MODE (operands[1]) && (GET_MODE (operands[0]) == GET_MODE (operands[3]) || GET_MODE (operands[3]) == VOIDmode)" @@ -5519,7 +6055,7 @@ (define_insn_and_split "*lea_general_2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "nonmemory_operand" "ri"))))] "TARGET_64BIT" @@ -5538,13 +6074,13 @@ (define_insn_and_split "*lea_general_3" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (mult (match_operand 1 "register_operand" "r") + (plus (plus (mult (match_operand 1 "index_register_operand" "l") (match_operand 2 "const248_operand" "i")) (match_operand 3 "register_operand" "r")) (match_operand 4 "immediate_operand" "i")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) - && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && GET_MODE (operands[0]) == GET_MODE (operands[1]) && GET_MODE (operands[0]) == GET_MODE (operands[3])" "#" @@ -5572,8 +6108,9 @@ (define_insn_and_split "*lea_general_3_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "const248_operand" "n")) + (plus:SI (plus:SI (mult:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "register_operand" "r")) (match_operand:SI 4 "immediate_operand" "i"))))] "TARGET_64BIT" @@ -5595,8 +6132,8 @@ (define_insn "*adddi_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:DI 2 "x86_64_general_operand" "rme,re,re"))) - (clobber (reg:CC 17))] + (match_operand:DI 2 "x86_64_general_operand" "rme,re,le"))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" { switch (get_attr_type (insn)) @@ -5606,22 +6143,21 @@ return "lea{q}\t{%a2, %0|%0, %a2}"; case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) /* Avoid overflows. */ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 @@ -5652,7 +6188,7 @@ [(set (match_operand:DI 0 "register_operand" "") (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) @@ -5661,11 +6197,11 @@ "") (define_insn "*adddi_2_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") (match_operand:DI 2 "x86_64_general_operand" "rme,re")) - (const_int 0))) + (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") (plus:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) @@ -5677,23 +6213,22 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* ???? We ought to handle there the 32bit case too - - do we need new constrant? */ + - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) /* Avoid overflows. */ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 @@ -5713,13 +6248,13 @@ (set_attr "mode" "DI")]) (define_insn "*adddi_3_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme")) (match_operand:DI 1 "x86_64_general_operand" "%0"))) (clobber (match_scratch:DI 0 "=r"))] "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) /* Current assemblers are broken and do not allow @GOTOFF in ought but a memory context. */ && ! pic_symbolic_operand (operands[2], VOIDmode)" @@ -5727,23 +6262,22 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* ???? We ought to handle there the 32bit case too - - do we need new constrant? */ + - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) /* Avoid overflows. */ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 @@ -5771,7 +6305,7 @@ ; Also carry flag is reversed compared to cmp, so this conversion is valid ; only for comparisons not depending on it. (define_insn "*adddi_4_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:DI 2 "x86_64_immediate_operand" "e"))) (clobber (match_scratch:DI 0 "=rm"))] @@ -5783,14 +6317,14 @@ case TYPE_INCDEC: if (operands[2] == constm1_rtx) return "inc{q}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{q}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -5810,15 +6344,15 @@ (set_attr "mode" "DI")]) (define_insn "*adddi_5_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "x86_64_general_operand" "rme")) - (const_int 0))) + (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) /* Current assemblers are broken and do not allow @GOTOFF in ought but a memory context. */ && ! pic_symbolic_operand (operands[2], VOIDmode)" @@ -5826,21 +6360,20 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) /* Avoid overflows. */ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 @@ -5863,8 +6396,8 @@ (define_insn "*addsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:SI 2 "general_operand" "rmni,rni,rni"))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g,ri,li"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, SImode, operands)" { switch (get_attr_type (insn)) @@ -5874,22 +6407,21 @@ return "lea{l}\t{%a2, %0|%0, %a2}"; case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -5918,7 +6450,7 @@ [(set (match_operand 0 "register_operand" "") (plus (match_operand 1 "register_operand" "") (match_operand 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(const_int 0)] @@ -5948,28 +6480,29 @@ [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") - (match_operand:SI 2 "general_operand" "rmni,rni")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g,li")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: - operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); return "lea{l}\t{%a2, %k0|%k0, %a2}"; case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -5999,8 +6532,8 @@ (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")))) - (clobber (reg:CC 17))] - "reload_completed + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] @@ -6010,11 +6543,11 @@ }) (define_insn "*addsi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rmni,rni")) - (const_int 0))) + (match_operand:SI 2 "general_operand" "g,ri")) + (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (plus:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) @@ -6026,21 +6559,20 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6059,11 +6591,11 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rmni")) - (const_int 0))) + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) @@ -6077,15 +6609,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6103,12 +6636,12 @@ (set_attr "mode" "SI")]) (define_insn "*addsi_3" - [(set (reg 17) - (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "g")) (match_operand:SI 1 "nonimmediate_operand" "%0"))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCZmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) /* Current assemblers are broken and do not allow @GOTOFF in ought but a memory context. */ && ! pic_symbolic_operand (operands[2], VOIDmode)" @@ -6116,21 +6649,20 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6149,8 +6681,8 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_3_zext" - [(set (reg 17) - (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "g")) (match_operand:SI 1 "nonimmediate_operand" "%0"))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] @@ -6165,15 +6697,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6190,7 +6723,7 @@ (const_string "alu"))) (set_attr "mode" "SI")]) -; For comparisons agains 1, -1 and 128, we may generate better code +; For comparisons against 1, -1 and 128, we may generate better code ; by converting cmp to add, inc or dec as done by peephole2. This pattern ; is matched then. We can't accept general immediate, because for ; case of overflows, the result is messed up. @@ -6199,7 +6732,7 @@ ; Also carry flag is reversed compared to cmp, so this conversion is valid ; only for comparisons not depending on it. (define_insn "*addsi_4" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:SI 2 "const_int_operand" "n"))) (clobber (match_scratch:SI 0 "=rm"))] @@ -6211,14 +6744,14 @@ case TYPE_INCDEC: if (operands[2] == constm1_rtx) return "inc{l}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -6236,14 +6769,14 @@ (set_attr "mode" "SI")]) (define_insn "*addsi_5" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rmni")) - (const_int 0))) + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCGOCmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) /* Current assemblers are broken and do not allow @GOTOFF in ought but a memory context. */ && ! pic_symbolic_operand (operands[2], VOIDmode)" @@ -6251,21 +6784,20 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6283,10 +6815,9 @@ (set_attr "mode" "SI")]) (define_expand "addhi3" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;") @@ -6297,8 +6828,8 @@ (define_insn "*addhi_1_lea" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:HI 2 "general_operand" "ri,rm,rni"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rn,rm,ln"))) + (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, HImode, operands)" { @@ -6309,16 +6840,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6340,8 +6871,8 @@ (define_insn "*addhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "ri,rm"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] "TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, HImode, operands)" { @@ -6350,16 +6881,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6377,11 +6908,11 @@ (set_attr "mode" "HI")]) (define_insn "*addhi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmni,rni")) - (const_int 0))) + (match_operand:HI 2 "general_operand" "rmn,rn")) + (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (plus:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) @@ -6392,16 +6923,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6419,28 +6950,28 @@ (set_attr "mode" "HI")]) (define_insn "*addhi_3" - [(set (reg 17) - (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni")) + [(set (reg FLAGS_REG) + (compare (neg:HI (match_operand:HI 2 "general_operand" "rmn")) (match_operand:HI 1 "nonimmediate_operand" "%0"))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCZmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6457,9 +6988,9 @@ (const_string "alu"))) (set_attr "mode" "HI")]) -; See comments above addsi_3_imm for details. +; See comments above addsi_4 for details. (define_insn "*addhi_4" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:HI 2 "const_int_operand" "n"))) (clobber (match_scratch:HI 0 "=rm"))] @@ -6469,18 +7000,16 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + if (operands[2] == constm1_rtx) return "inc{w}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{w}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{w}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -6499,30 +7028,30 @@ (define_insn "*addhi_5" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rmni")) - (const_int 0))) + (match_operand:HI 2 "general_operand" "rmn")) + (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCGOCmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6540,10 +7069,9 @@ (set_attr "mode" "HI")]) (define_expand "addqi3" - [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;") @@ -6551,8 +7079,8 @@ (define_insn "*addqi_1_lea" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r") - (match_operand:QI 2 "general_operand" "qn,qmn,rn,rn"))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln"))) + (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, QImode, operands)" { @@ -6564,16 +7092,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6602,7 +7130,7 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, QImode, operands)" { @@ -6612,16 +7140,16 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -6644,32 +7172,74 @@ (const_string "alu"))) (set_attr "mode" "QI,QI,SI")]) -(define_insn "*addqi_2" - [(set (reg 17) - (compare - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qmni,qni")) - (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") - (plus:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, QImode, operands)" +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (operands[2] == const1_rtx) + if (operands[1] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } default: - /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ - if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) < 0) + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ + if (CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < 0) + { + operands[1] = GEN_INT (-INTVAL (operands[1])); + return "sub{b}\t{%1, %0|%0, %1}"; + } + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 1 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "load") + (const_string "none"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_2" + [(set (reg FLAGS_REG) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 0) { operands[2] = GEN_INT (-INTVAL (operands[2])); return "sub{b}\t{%2, %0|%0, %2}"; @@ -6684,27 +7254,29 @@ (set_attr "mode" "QI")]) (define_insn "*addqi_3" - [(set (reg 17) - (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni")) + [(set (reg FLAGS_REG) + (compare (neg:QI (match_operand:QI 2 "general_operand" "qmn")) (match_operand:QI 1 "nonimmediate_operand" "%0"))) (clobber (match_scratch:QI 0 "=q"))] "ix86_match_ccmode (insn, CCZmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) { operands[2] = GEN_INT (-INTVAL (operands[2])); @@ -6719,9 +7291,9 @@ (const_string "alu"))) (set_attr "mode" "QI")]) -; See comments above addsi_3_imm for details. +; See comments above addsi_4 for details. (define_insn "*addqi_4" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const_int_operand" "n"))) (clobber (match_scratch:QI 0 "=qm"))] @@ -6732,17 +7304,17 @@ { case TYPE_INCDEC: if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT + || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 255)) return "inc{b}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{b}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{b}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (INTVAL (operands[2]) < 0) { operands[2] = GEN_INT (-INTVAL (operands[2])); @@ -6759,29 +7331,31 @@ (define_insn "*addqi_5" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "general_operand" "qmni")) + (match_operand:QI 2 "general_operand" "qmn")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] "ix86_match_ccmode (insn, CCGOCmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) { operands[2] = GEN_INT (-INTVAL (operands[2])); @@ -6807,7 +7381,7 @@ (const_int 8) (const_int 8)) (match_operand:QI 2 "general_operand" "Qmn"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" { switch (get_attr_type (insn)) @@ -6815,11 +7389,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%h0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; @@ -6841,7 +7417,7 @@ (const_int 8) (const_int 8)) (match_operand:QI 2 "nonmemory_operand" "Qn"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" { switch (get_attr_type (insn)) @@ -6849,11 +7425,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%h0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; @@ -6878,7 +7456,7 @@ (match_operand 2 "ext_register_operand" "Q") (const_int 8) (const_int 8)))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "add{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") @@ -6890,39 +7468,56 @@ [(set (match_operand:XF 0 "register_operand" "") (plus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "addtf3" - [(set (match_operand:TF 0 "register_operand" "") - (plus:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") -(define_expand "adddf3" - [(set (match_operand:DF 0 "register_operand" "") - (plus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "addsf3" - [(set (match_operand:SF 0 "register_operand" "") - (plus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "add3" + [(set (match_operand:MODEF 0 "register_operand" "") + (plus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Subtract instructions +;; %%% splits for subditi3 + +(define_expand "subti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;") + +(define_insn "*subti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DI (match_dup 4) + (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") + ;; %%% splits for subsidi3 (define_expand "subdi3" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "x86_64_general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] "" "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;") @@ -6930,7 +7525,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:DI 2 "general_operand" "roiF,riF"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "#") @@ -6938,44 +7533,41 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "") (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") (match_operand:DI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && reload_completed" - [(parallel [(set (reg:CC 17) (compare:CC (match_dup 1) (match_dup 2))) + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (minus:SI (match_dup 4) - (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) - (clobber (reg:CC 17))])] - "split_di (operands+0, 1, operands+0, operands+3); - split_di (operands+1, 1, operands+1, operands+4); - split_di (operands+2, 1, operands+2, operands+5);") + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 3, &operands[0], &operands[3]);") (define_insn "subdi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") - (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") (match_operand:DI 2 "x86_64_general_operand" "re,rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "sbb{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "ppro_uops" "few") (set_attr "mode" "DI")]) (define_insn "*subdi_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "sub{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "DI")]) (define_insn "*subdi_2_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:DI 2 "x86_64_general_operand" "re,rm")) @@ -6989,7 +7581,7 @@ (set_attr "mode" "DI")]) (define_insn "*subdi_3_rex63" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") @@ -7000,39 +7592,59 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) +(define_insn "subqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 2 "general_operand" "qn,qm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sbb{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "subhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 2 "general_operand" "rn,rm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sbb{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) (define_insn "subsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") - (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 2 "general_operand" "ri,rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "ppro_uops" "few") (set_attr "mode" "SI")]) (define_insn "subsi3_carry_zext" - [(set (match_operand:DI 0 "register_operand" "=rm,r") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0,0") - (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) - (match_operand:SI 2 "general_operand" "ri,rm"))))) - (clobber (reg:CC 17))] + (minus:SI (match_operand:SI 1 "register_operand" "0") + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 2 "general_operand" "g"))))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "pent_pair" "pu") - (set_attr "ppro_uops" "few") (set_attr "mode" "SI")]) (define_expand "subsi3" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] "" "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;") @@ -7040,7 +7652,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:SI 2 "general_operand" "ri,rm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, SImode, operands)" "sub{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") @@ -7050,15 +7662,15 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sub{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*subsi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:SI 2 "general_operand" "ri,rm")) @@ -7072,10 +7684,10 @@ (set_attr "mode" "SI")]) (define_insn "*subsi_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -7088,7 +7700,7 @@ (set_attr "mode" "SI")]) (define_insn "*subsi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:SI 2 "general_operand" "ri,rm"))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") @@ -7100,42 +7712,41 @@ (set_attr "mode" "SI")]) (define_insn "*subsi_3_zext" - [(set (reg 17) - (compare (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "general_operand" "rim"))) + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g"))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sub{q}\t{%2, %0|%0, %2}" + "sub{l}\t{%2, %1|%1, %2}" [(set_attr "type" "alu") (set_attr "mode" "DI")]) (define_expand "subhi3" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;") (define_insn "*subhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") - (match_operand:HI 2 "general_operand" "ri,rm"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, HImode, operands)" "sub{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "HI")]) (define_insn "*subhi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") - (match_operand:HI 2 "general_operand" "ri,rm")) + (match_operand:HI 2 "general_operand" "rn,rm")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (minus:HI (match_dup 1) (match_dup 2)))] @@ -7146,9 +7757,9 @@ (set_attr "mode" "HI")]) (define_insn "*subhi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:HI 1 "nonimmediate_operand" "0,0") - (match_operand:HI 2 "general_operand" "ri,rm"))) + (match_operand:HI 2 "general_operand" "rn,rm"))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (minus:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) @@ -7158,31 +7769,41 @@ (set_attr "mode" "HI")]) (define_expand "subqi3" - [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") - (minus:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;") (define_insn "*subqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") - (match_operand:QI 2 "general_operand" "qn,qmn"))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, QImode, operands)" "sub{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*subqi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") - (match_operand:QI 2 "general_operand" "qi,qm")) + (match_operand:QI 2 "general_operand" "qn,qm")) (const_int 0))) - (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") - (minus:HI (match_dup 1) (match_dup 2)))] + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (MINUS, QImode, operands)" "sub{b}\t{%2, %0|%0, %2}" @@ -7190,11 +7811,11 @@ (set_attr "mode" "QI")]) (define_insn "*subqi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (match_operand:QI 1 "nonimmediate_operand" "0,0") - (match_operand:QI 2 "general_operand" "qi,qm"))) - (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") - (minus:HI (match_dup 1) (match_dup 2)))] + (match_operand:QI 2 "general_operand" "qn,qm"))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, QImode, operands)" "sub{b}\t{%2, %0|%0, %2}" @@ -7207,28 +7828,14 @@ [(set (match_operand:XF 0 "register_operand" "") (minus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "subtf3" - [(set (match_operand:TF 0 "register_operand" "") - (minus:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") -(define_expand "subdf3" - [(set (match_operand:DF 0 "register_operand" "") - (minus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "subsf3" - [(set (match_operand:SF 0 "register_operand" "") - (minus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "sub3" + [(set (match_operand:MODEF 0 "register_operand" "") + (minus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Multiply instructions @@ -7237,129 +7844,187 @@ [(parallel [(set (match_operand:DI 0 "register_operand" "") (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "x86_64_general_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") +;; On AMDFAM10 +;; IMUL reg64, reg64, imm8 Direct +;; IMUL reg64, mem64, imm8 VectorPath +;; IMUL reg64, reg64, imm32 Direct +;; IMUL reg64, mem64, imm32 VectorPath +;; IMUL reg64, reg64 Direct +;; IMUL reg64, mem64 Direct + (define_insn "*muldi3_1_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r,r") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,0,0") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:DI 2 "x86_64_general_operand" "K,e,mr"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{q}\t{%2, %1, %0|%0, %1, %2} imul{q}\t{%2, %1, %0|%0, %1, %2} imul{q}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "DI")]) (define_expand "mulsi3" [(parallel [(set (match_operand:SI 0 "register_operand" "") (mult:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") +;; On AMDFAM10 +;; IMUL reg32, reg32, imm8 Direct +;; IMUL reg32, mem32, imm8 VectorPath +;; IMUL reg32, reg32, imm32 Direct +;; IMUL reg32, mem32, imm32 VectorPath +;; IMUL reg32, reg32 Direct +;; IMUL reg32, mem32 Direct + (define_insn "*mulsi3_1" [(set (match_operand:SI 0 "register_operand" "=r,r,r") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SI 2 "general_operand" "K,i,mr"))) - (clobber (reg:CC 17))] - "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" - ; For the {r,0,i} alternative (i.e., register <- register * immediate), - ; there are two ways of writing the exact same machine instruction - ; in assembly language. One, for example, is: - ; - ; imul $12, %eax - ; - ; while the other is: - ; - ; imul $12, %eax, %eax - ; - ; The first is simply short-hand for the latter. But, some assemblers, - ; like the SCO OSR5 COFF assembler, don't handle the first form. + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{l}\t{%2, %1, %0|%0, %1, %2} imul{l}\t{%2, %1, %0|%0, %1, %2} imul{l}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_insn "*mulsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SI 2 "general_operand" "K,i,mr")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - ; For the {r,0,i} alternative (i.e., register <- register * immediate), - ; there are two ways of writing the exact same machine instruction - ; in assembly language. One, for example, is: - ; - ; imul $12, %eax - ; - ; while the other is: - ; - ; imul $12, %eax, %eax - ; - ; The first is simply short-hand for the latter. But, some assemblers, - ; like the SCO OSR5 COFF assembler, don't handle the first form. + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_expand "mulhi3" [(parallel [(set (match_operand:HI 0 "register_operand" "") (mult:HI (match_operand:HI 1 "register_operand" "") (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_HIMODE_MATH" "") +;; On AMDFAM10 +;; IMUL reg16, reg16, imm8 VectorPath +;; IMUL reg16, mem16, imm8 VectorPath +;; IMUL reg16, reg16, imm16 VectorPath +;; IMUL reg16, mem16, imm16 VectorPath +;; IMUL reg16, reg16 Direct +;; IMUL reg16, mem16 Direct (define_insn "*mulhi3_1" [(set (match_operand:HI 0 "register_operand" "=r,r,r") - (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,0,0") - (match_operand:HI 2 "general_operand" "K,i,mr"))) - (clobber (reg:CC 17))] - "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" - ; %%% There was a note about "Assembler has weird restrictions", - ; concerning alternative 1 when op1 == op0. True? + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:HI 2 "general_operand" "K,n,mr"))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{w}\t{%2, %1, %0|%0, %1, %2} imul{w}\t{%2, %1, %0|%0, %1, %2} imul{w}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(eq_attr "alternative" "0,1") + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "HI")]) (define_expand "mulqi3" [(parallel [(set (match_operand:QI 0 "register_operand" "") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "") (match_operand:QI 2 "register_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH" "") +;;On AMDFAM10 +;; MUL reg8 Direct +;; MUL mem8 Direct + (define_insn "*mulqi3_1" [(set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulqihi3" @@ -7368,7 +8033,7 @@ (match_operand:QI 1 "nonimmediate_operand" "")) (zero_extend:HI (match_operand:QI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH" "") @@ -7376,19 +8041,24 @@ [(set (match_operand:HI 0 "register_operand" "=a") (mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "mulqihi3" [(parallel [(set (match_operand:HI 0 "register_operand" "") (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")) (sign_extend:HI (match_operand:QI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH" "") @@ -7396,12 +8066,17 @@ [(set (match_operand:HI 0 "register_operand" "=a") (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulditi3" @@ -7410,7 +8085,7 @@ (match_operand:DI 1 "nonimmediate_operand" "")) (zero_extend:TI (match_operand:DI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") @@ -7418,13 +8093,17 @@ [(set (match_operand:TI 0 "register_operand" "=A") (mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{q}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers @@ -7434,7 +8113,7 @@ (match_operand:SI 1 "nonimmediate_operand" "")) (zero_extend:DI (match_operand:SI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" "") @@ -7442,13 +8121,17 @@ [(set (match_operand:DI 0 "register_operand" "=A") (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{l}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "mulditi3" @@ -7457,7 +8140,7 @@ (match_operand:DI 1 "nonimmediate_operand" "")) (sign_extend:TI (match_operand:DI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") @@ -7465,12 +8148,17 @@ [(set (match_operand:TI 0 "register_operand" "=A") (mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{q}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "mulsidi3" @@ -7479,7 +8167,7 @@ (match_operand:SI 1 "nonimmediate_operand" "")) (sign_extend:DI (match_operand:SI 2 "register_operand" "")))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" "") @@ -7487,12 +8175,17 @@ [(set (match_operand:DI 0 "register_operand" "=A") (mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{l}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "umuldi3_highpart" @@ -7505,7 +8198,7 @@ (match_operand:DI 2 "register_operand" ""))) (const_int 64)))) (clobber (match_scratch:DI 3 "")) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") @@ -7519,13 +8212,17 @@ (match_operand:DI 2 "nonimmediate_operand" "rm"))) (const_int 64)))) (clobber (match_scratch:DI 3 "=1")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{q}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "umulsi3_highpart" @@ -7538,7 +8235,7 @@ (match_operand:SI 2 "register_operand" ""))) (const_int 32)))) (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") @@ -7552,12 +8249,16 @@ (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32)))) (clobber (match_scratch:SI 3 "=1")) - (clobber (reg:CC 17))] - "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{l}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_insn "*umulsi3_highpart_zext" @@ -7570,17 +8271,21 @@ (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32))))) (clobber (match_scratch:SI 3 "=1")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{l}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "smuldi3_highpart" - [(parallel [(set (match_operand:DI 0 "register_operand" "=d") + [(parallel [(set (match_operand:DI 0 "register_operand" "") (truncate:DI (lshiftrt:TI (mult:TI (sign_extend:TI @@ -7589,7 +8294,7 @@ (match_operand:DI 2 "register_operand" ""))) (const_int 64)))) (clobber (match_scratch:DI 3 "")) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") @@ -7603,12 +8308,16 @@ (match_operand:DI 2 "nonimmediate_operand" "rm"))) (const_int 64)))) (clobber (match_scratch:DI 3 "=1")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{q}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "smulsi3_highpart" @@ -7621,7 +8330,7 @@ (match_operand:SI 2 "register_operand" ""))) (const_int 32)))) (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") @@ -7635,11 +8344,15 @@ (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32)))) (clobber (match_scratch:SI 3 "=1")) - (clobber (reg:CC 17))] - "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{l}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_insn "*smulsi3_highpart_zext" @@ -7652,12 +8365,16 @@ (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32))))) (clobber (match_scratch:SI 3 "=1")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{l}\t%2" [(set_attr "type" "imul") - (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) ;; The patterns that match these are at the end of this file. @@ -7666,29 +8383,18 @@ [(set (match_operand:XF 0 "register_operand" "") (mult:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "multf3" - [(set (match_operand:TF 0 "register_operand" "") - (mult:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") -(define_expand "muldf3" - [(set (match_operand:DF 0 "register_operand" "") - (mult:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +(define_expand "mul3" + [(set (match_operand:MODEF 0 "register_operand" "") + (mult:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") -(define_expand "mulsf3" - [(set (match_operand:SF 0 "register_operand" "") - (mult:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "") +;; SSE5 scalar multiply/add instructions are defined in sse.md. + ;; Divide instructions @@ -7696,23 +8402,21 @@ [(set (match_operand:QI 0 "register_operand" "=a") (div:QI (match_operand:HI 1 "register_operand" "0") (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH" "idiv{b}\t%2" [(set_attr "type" "idiv") - (set_attr "mode" "QI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "QI")]) (define_insn "udivqi3" [(set (match_operand:QI 0 "register_operand" "=a") (udiv:QI (match_operand:HI 1 "register_operand" "0") (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH" "div{b}\t%2" [(set_attr "type" "idiv") - (set_attr "mode" "QI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "QI")]) ;; The patterns that match these are at the end of this file. @@ -7720,13 +8424,6 @@ [(set (match_operand:XF 0 "register_operand" "") (div:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "divtf3" - [(set (match_operand:TF 0 "register_operand" "") - (div:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -7736,13 +8433,22 @@ (match_operand:DF 2 "nonimmediate_operand" "")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") - + (define_expand "divsf3" [(set (match_operand:SF 0 "register_operand" "") (div:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] "TARGET_80387 || TARGET_SSE_MATH" - "") +{ + if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) ;; Remainder instructions. @@ -7752,12 +8458,12 @@ (match_operand:DI 2 "nonimmediate_operand" ""))) (set (match_operand:DI 3 "register_operand" "") (mod:DI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" "") ;; Allow to come the parameter in eax or edx to avoid extra moves. -;; Penalize eax case sligthly because it results in worse scheduling +;; Penalize eax case slightly because it results in worse scheduling ;; of code. (define_insn "*divmoddi4_nocltd_rex64" [(set (match_operand:DI 0 "register_operand" "=&a,?a") @@ -7765,8 +8471,8 @@ (match_operand:DI 3 "nonimmediate_operand" "rm,rm"))) (set (match_operand:DI 1 "register_operand" "=&d,&d") (mod:DI (match_dup 2) (match_dup 3))) - (clobber (reg:CC 17))] - "TARGET_64BIT && !optimize_size && !TARGET_USE_CLTD" + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD" "#" [(set_attr "type" "multi")]) @@ -7776,8 +8482,8 @@ (match_operand:DI 3 "nonimmediate_operand" "rm"))) (set (match_operand:DI 1 "register_operand" "=&d") (mod:DI (match_dup 2) (match_dup 3))) - (clobber (reg:CC 17))] - "TARGET_64BIT && (optimize_size || TARGET_USE_CLTD)" + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)" "#" [(set_attr "type" "multi")]) @@ -7788,12 +8494,11 @@ (set (match_operand:DI 3 "register_operand" "=d") (mod:DI (match_dup 1) (match_dup 2))) (use (match_operand:DI 4 "register_operand" "3")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "idiv{q}\t%2" [(set_attr "type" "idiv") - (set_attr "mode" "DI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "DI")]) (define_split [(set (match_operand:DI 0 "register_operand" "") @@ -7801,20 +8506,20 @@ (match_operand:DI 2 "nonimmediate_operand" ""))) (set (match_operand:DI 3 "register_operand" "") (mod:DI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed" [(parallel [(set (match_dup 3) (ashiftrt:DI (match_dup 4) (const_int 63))) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (div:DI (reg:DI 0) (match_dup 2))) (set (match_dup 3) (mod:DI (reg:DI 0) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] { - /* Avoid use of cltd in favour of a mov+shift. */ - if (!TARGET_USE_CLTD && !optimize_size) + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun)) { if (true_regnum (operands[1])) emit_move_insn (operands[0], operands[1]); @@ -7824,8 +8529,7 @@ } else { - if (true_regnum (operands[1])) - abort(); + gcc_assert (!true_regnum (operands[1])); operands[4] = operands[1]; } }) @@ -7837,12 +8541,12 @@ (match_operand:SI 2 "nonimmediate_operand" ""))) (set (match_operand:SI 3 "register_operand" "") (mod:SI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "") ;; Allow to come the parameter in eax or edx to avoid extra moves. -;; Penalize eax case sligthly because it results in worse scheduling +;; Penalize eax case slightly because it results in worse scheduling ;; of code. (define_insn "*divmodsi4_nocltd" [(set (match_operand:SI 0 "register_operand" "=&a,?a") @@ -7850,8 +8554,8 @@ (match_operand:SI 3 "nonimmediate_operand" "rm,rm"))) (set (match_operand:SI 1 "register_operand" "=&d,&d") (mod:SI (match_dup 2) (match_dup 3))) - (clobber (reg:CC 17))] - "!optimize_size && !TARGET_USE_CLTD" + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD" "#" [(set_attr "type" "multi")]) @@ -7861,8 +8565,8 @@ (match_operand:SI 3 "nonimmediate_operand" "rm"))) (set (match_operand:SI 1 "register_operand" "=&d") (mod:SI (match_dup 2) (match_dup 3))) - (clobber (reg:CC 17))] - "optimize_size || TARGET_USE_CLTD" + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_size_p (cfun) || TARGET_USE_CLTD" "#" [(set_attr "type" "multi")]) @@ -7873,12 +8577,11 @@ (set (match_operand:SI 3 "register_operand" "=d") (mod:SI (match_dup 1) (match_dup 2))) (use (match_operand:SI 4 "register_operand" "3")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "idiv{l}\t%2" [(set_attr "type" "idiv") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "SI")]) (define_split [(set (match_operand:SI 0 "register_operand" "") @@ -7886,20 +8589,20 @@ (match_operand:SI 2 "nonimmediate_operand" ""))) (set (match_operand:SI 3 "register_operand" "") (mod:SI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed" [(parallel [(set (match_dup 3) (ashiftrt:SI (match_dup 4) (const_int 31))) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (div:SI (reg:SI 0) (match_dup 2))) (set (match_dup 3) (mod:SI (reg:SI 0) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] { - /* Avoid use of cltd in favour of a mov+shift. */ - if (!TARGET_USE_CLTD && !optimize_size) + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun)) { if (true_regnum (operands[1])) emit_move_insn (operands[0], operands[1]); @@ -7909,8 +8612,7 @@ } else { - if (true_regnum (operands[1])) - abort(); + gcc_assert (!true_regnum (operands[1])); operands[4] = operands[1]; } }) @@ -7921,7 +8623,7 @@ (match_operand:HI 2 "nonimmediate_operand" "rm"))) (set (match_operand:HI 3 "register_operand" "=&d") (mod:HI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_HIMODE_MATH" "cwtd\;idiv{w}\t%2" [(set_attr "type" "multi") @@ -7934,7 +8636,7 @@ (match_operand:DI 2 "nonimmediate_operand" "rm"))) (set (match_operand:DI 3 "register_operand" "=&d") (umod:DI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "xor{q}\t%3, %3\;div{q}\t%2" [(set_attr "type" "multi") @@ -7948,11 +8650,10 @@ (set (match_operand:DI 3 "register_operand" "=d") (umod:DI (match_dup 1) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "div{q}\t%2" [(set_attr "type" "idiv") - (set_attr "ppro_uops" "few") (set_attr "mode" "DI")]) (define_split @@ -7961,7 +8662,7 @@ (match_operand:DI 2 "nonimmediate_operand" ""))) (set (match_operand:DI 3 "register_operand" "") (umod:DI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed" [(set (match_dup 3) (const_int 0)) (parallel [(set (match_dup 0) @@ -7969,7 +8670,7 @@ (set (match_dup 3) (umod:DI (match_dup 1) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_insn "udivmodsi4" @@ -7978,7 +8679,7 @@ (match_operand:SI 2 "nonimmediate_operand" "rm"))) (set (match_operand:SI 3 "register_operand" "=&d") (umod:SI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "xor{l}\t%3, %3\;div{l}\t%2" [(set_attr "type" "multi") @@ -7992,11 +8693,10 @@ (set (match_operand:SI 3 "register_operand" "=d") (umod:SI (match_dup 1) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "div{l}\t%2" [(set_attr "type" "idiv") - (set_attr "ppro_uops" "few") (set_attr "mode" "SI")]) (define_split @@ -8005,7 +8705,7 @@ (match_operand:SI 2 "nonimmediate_operand" ""))) (set (match_operand:SI 3 "register_operand" "") (umod:SI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed" [(set (match_dup 3) (const_int 0)) (parallel [(set (match_dup 0) @@ -8013,7 +8713,7 @@ (set (match_dup 3) (umod:SI (match_dup 1) (match_dup 2))) (use (match_dup 3)) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_expand "udivmodhi4" @@ -8024,7 +8724,7 @@ (set (match_operand:HI 3 "register_operand" "") (umod:HI (match_dup 1) (match_dup 2))) (use (match_dup 4)) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "TARGET_HIMODE_MATH" "operands[4] = gen_reg_rtx (HImode);") @@ -8035,14 +8735,13 @@ (set (match_operand:HI 3 "register_operand" "=d") (umod:HI (match_dup 1) (match_dup 2))) (use (match_operand:HI 4 "register_operand" "3")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "div{w}\t%2" [(set_attr "type" "idiv") - (set_attr "mode" "HI") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "HI")]) -;; We can not use div/idiv for double division, because it causes +;; We cannot use div/idiv for double division, because it causes ;; "division by zero" on the overflow and that's not what we expect ;; from truncate. Because true (non truncating) double division is ;; never generated, we can't create this insn anyway. @@ -8056,11 +8755,10 @@ ; (set (match_operand:SI 3 "register_operand" "=d") ; (truncate:SI ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) -; (clobber (reg:CC 17))] +; (clobber (reg:CC FLAGS_REG))] ; "" ; "div{l}\t{%2, %0|%0, %2}" -; [(set_attr "type" "idiv") -; (set_attr "ppro_uops" "few")]) +; [(set_attr "type" "idiv")]) ;;- Logical AND instructions @@ -8068,17 +8766,18 @@ ;; Note that this excludes ah. (define_insn "*testdi_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare - (and:DI (match_operand:DI 0 "nonimmediate_operand" "%*a,r,*a,r,rm") - (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,Z,e,e,re")) + (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) (const_int 0)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ - test{l}\t{%k1, %k0|%k0, %k1} - test{l}\t{%k1, %k0|%k0, %k1} - test{q}\t{%1, %0|%0, %1} - test{q}\t{%1, %0|%0, %1} + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} test{q}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "modrm" "0,1,0,1,1") @@ -8086,12 +8785,13 @@ (set_attr "pent_pair" "uv,np,uv,np,uv")]) (define_insn "testsi_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare - (and:SI (match_operand:SI 0 "nonimmediate_operand" "%*a,r,rm") - (match_operand:SI 1 "nonmemory_operand" "in,in,rin")) + (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:SI 1 "general_operand" "i,i,ri")) (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "test{l}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "modrm" "0,1,1") @@ -8099,7 +8799,7 @@ (set_attr "pent_pair" "uv,np,uv")]) (define_expand "testsi_ccno_1" - [(set (reg:CCNO 17) + [(set (reg:CCNO FLAGS_REG) (compare:CCNO (and:SI (match_operand:SI 0 "nonimmediate_operand" "") (match_operand:SI 1 "nonmemory_operand" "")) @@ -8108,11 +8808,12 @@ "") (define_insn "*testhi_1" - [(set (reg 17) - (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%*a,r,rm") - (match_operand:HI 1 "nonmemory_operand" "n,n,rn")) + [(set (reg FLAGS_REG) + (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:HI 1 "general_operand" "n,n,rn")) (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "test{w}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "modrm" "0,1,1") @@ -8120,24 +8821,28 @@ (set_attr "pent_pair" "uv,np,uv")]) (define_expand "testqi_ccz_1" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") (match_operand:QI 1 "nonmemory_operand" "")) (const_int 0)))] "" "") -(define_insn "*testqi_1" - [(set (reg 17) - (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm,r") - (match_operand:QI 1 "nonmemory_operand" "n,n,qn,n")) - (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" +(define_insn "*testqi_1_maybe_si" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[1]) + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" { if (which_alternative == 3) { - if (GET_CODE (operands[1]) == CONST_INT - && (INTVAL (operands[1]) & 0xffffff00)) + if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); return "test{l}\t{%1, %k0|%k0, %1}"; } @@ -8148,8 +8853,23 @@ (set_attr "mode" "QI,QI,QI,SI") (set_attr "pent_pair" "uv,np,uv,np")]) +(define_insn "*testqi_1" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm") + (match_operand:QI 1 "general_operand" "n,n,qn")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "QI") + (set_attr "pent_pair" "uv,np,uv")]) + (define_expand "testqi_ext_ccno_0" - [(set (reg:CCNO 17) + [(set (reg:CCNO FLAGS_REG) (compare:CCNO (and:SI (zero_extract:SI @@ -8162,7 +8882,7 @@ "") (define_insn "*testqi_ext_0" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (zero_extract:SI @@ -8171,8 +8891,7 @@ (const_int 8)) (match_operand 1 "const_int_operand" "n")) (const_int 0)))] - "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) <= 0xff - && ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "type" "test") (set_attr "mode" "QI") @@ -8180,7 +8899,7 @@ (set_attr "pent_pair" "np")]) (define_insn "*testqi_ext_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (zero_extract:SI @@ -8188,15 +8907,16 @@ (const_int 8) (const_int 8)) (zero_extend:SI - (match_operand:QI 1 "nonimmediate_operand" "Qm"))) + (match_operand:QI 1 "general_operand" "Qm"))) (const_int 0)))] - "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "type" "test") (set_attr "mode" "QI")]) (define_insn "*testqi_ext_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (zero_extract:SI @@ -8212,7 +8932,7 @@ (set_attr "mode" "QI")]) (define_insn "*testqi_ext_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (zero_extract:SI @@ -8231,13 +8951,16 @@ ;; Combine likes to form bit extractions for some tests. Humor it. (define_insn "*testqi_ext_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (zero_extract:SI (match_operand 0 "nonimmediate_operand" "rm") (match_operand:SI 1 "const_int_operand" "") (match_operand:SI 2 "const_int_operand" "")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 && (GET_MODE (operands[0]) == SImode || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) || GET_MODE (operands[0]) == HImode @@ -8245,7 +8968,7 @@ "#") (define_insn "*testqi_ext_3_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (zero_extract:DI (match_operand 0 "nonimmediate_operand" "rm") (match_operand:DI 1 "const_int_operand" "") @@ -8253,8 +8976,8 @@ (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - /* The code below cannot deal with constants outside HOST_WIDE_INT. */ - && INTVAL (operands[1]) + INTVAL (operands[2]) < HOST_BITS_PER_WIDE_INT + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 /* Ensure that resulting mask is zero or sign extended operand. */ && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 @@ -8266,54 +8989,105 @@ "#") (define_split - [(set (reg 17) - (compare (zero_extract - (match_operand 0 "nonimmediate_operand" "") - (match_operand 1 "const_int_operand" "") - (match_operand 2 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode)" - [(set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] { - HOST_WIDE_INT len = INTVAL (operands[1]); - HOST_WIDE_INT pos = INTVAL (operands[2]); + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); HOST_WIDE_INT mask; enum machine_mode mode, submode; - mode = GET_MODE (operands[0]); - if (GET_CODE (operands[0]) == MEM) + mode = GET_MODE (val); + if (MEM_P (val)) { /* ??? Combine likes to put non-volatile mem extractions in QImode no matter the size of the test. So find a mode that works. */ - if (! MEM_VOLATILE_P (operands[0])) + if (! MEM_VOLATILE_P (val)) { mode = smallest_mode_for_size (pos + len, MODE_INT); - operands[0] = adjust_address (operands[0], mode, 0); + val = adjust_address (val, mode, 0); } } - else if (GET_CODE (operands[0]) == SUBREG - && (submode = GET_MODE (SUBREG_REG (operands[0])), + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) - && pos + len <= GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode) + && GET_MODE_CLASS (submode) == MODE_INT) { /* Narrow a paradoxical subreg to prevent partial register stalls. */ mode = submode; - operands[0] = SUBREG_REG (operands[0]); + val = SUBREG_REG (val); } else if (mode == HImode && pos + len <= 8) { /* Small HImode tests can be converted to QImode. */ mode = QImode; - operands[0] = gen_lowpart (QImode, operands[0]); + val = gen_lowpart (QImode, val); } - mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; - mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; - operands[3] = gen_rtx_AND (mode, operands[0], - GEN_INT (trunc_int_for_mode (mask, mode))); + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); }) +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the conversion only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]);") + + ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. @@ -8321,8 +9095,7 @@ (define_expand "anddi3" [(set (match_operand:DI 0 "nonimmediate_operand" "") (and:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "x86_64_szext_general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:DI 2 "x86_64_szext_general_operand" "")))] "TARGET_64BIT" "ix86_expand_binary_operator (AND, DImode, operands); DONE;") @@ -8330,7 +9103,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" { switch (get_attr_type (insn)) @@ -8339,15 +9112,15 @@ { enum machine_mode mode; - if (GET_CODE (operands[2]) != CONST_INT) - abort (); + gcc_assert (CONST_INT_P (operands[2])); if (INTVAL (operands[2]) == 0xff) mode = QImode; - else if (INTVAL (operands[2]) == 0xffff) - mode = HImode; else - abort (); - + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + operands[1] = gen_lowpart (mode, operands[1]); if (mode == QImode) return "movz{bq|x}\t{%1,%0|%0, %1}"; @@ -8356,8 +9129,7 @@ } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (get_attr_mode (insn) == MODE_SI) return "and{l}\t{%k2, %k0|%k0, %k2}"; else @@ -8369,7 +9141,7 @@ (set_attr "mode" "SI,DI,DI,DI")]) (define_insn "*anddi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) (const_int 0))) @@ -8378,8 +9150,8 @@ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (AND, DImode, operands)" "@ - and{l}\t{%k2, %k0|%k0, %k2} - and{q}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI,DI,DI")]) @@ -8387,8 +9159,7 @@ (define_expand "andsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (and:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "")))] "" "ix86_expand_binary_operator (AND, SImode, operands); DONE;") @@ -8396,7 +9167,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") (match_operand:SI 2 "general_operand" "ri,rm,L"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, SImode, operands)" { switch (get_attr_type (insn)) @@ -8405,15 +9176,15 @@ { enum machine_mode mode; - if (GET_CODE (operands[2]) != CONST_INT) - abort (); + gcc_assert (CONST_INT_P (operands[2])); if (INTVAL (operands[2]) == 0xff) mode = QImode; - else if (INTVAL (operands[2]) == 0xffff) - mode = HImode; else - abort (); - + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + operands[1] = gen_lowpart (mode, operands[1]); if (mode == QImode) return "movz{bl|x}\t{%1,%0|%0, %1}"; @@ -8422,8 +9193,7 @@ } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{l}\t{%2, %0|%0, %2}"; } } @@ -8435,8 +9205,8 @@ [(set (match_operand 0 "register_operand" "") (and (match_dup 0) (const_int -65536))) - (clobber (reg:CC 17))] - "optimize_size" + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_size_p (cfun) || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (HImode, operands[0]);") @@ -8444,8 +9214,8 @@ [(set (match_operand 0 "ext_register_operand" "") (and (match_dup 0) (const_int -256))) - (clobber (reg:CC 17))] - "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (QImode, operands[0]);") @@ -8453,19 +9223,19 @@ [(set (match_operand 0 "ext_register_operand" "") (and (match_dup 0) (const_int -65281))) - (clobber (reg:CC 17))] - "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed" [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) - (xor:SI + (xor:SI (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (SImode, operands[0]);") ;; See comment for addsi_1_zext why we do use nonimmediate_operand @@ -8473,17 +9243,17 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" "and{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*andsi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (and:SI (match_dup 1) (match_dup 2)))] @@ -8495,9 +9265,9 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*andsi_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] @@ -8510,30 +9280,26 @@ (define_expand "andhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (and:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (AND, HImode, operands); DONE;") (define_insn "*andhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:HI 2 "general_operand" "ri,rm,L"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rn,rm,L"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, HImode, operands)" { switch (get_attr_type (insn)) { case TYPE_IMOVX: - if (GET_CODE (operands[2]) != CONST_INT) - abort (); - if (INTVAL (operands[2]) == 0xff) - return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; - abort (); + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (INTVAL (operands[2]) == 0xff); + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{w}\t{%2, %0|%0, %2}"; } @@ -8543,9 +9309,9 @@ (set_attr "mode" "HI,HI,SI")]) (define_insn "*andhi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "rmn,rn")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (and:HI (match_dup 1) (match_dup 2)))] @@ -8558,8 +9324,7 @@ (define_expand "andqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (and:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (AND, QImode, operands); DONE;") @@ -8567,8 +9332,8 @@ (define_insn "*andqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qi,qmi,ri"))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, QImode, operands)" "@ and{b}\t{%2, %0|%0, %2} @@ -8580,28 +9345,30 @@ (define_insn "*andqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (and:QI (match_dup 0) - (match_operand:QI 1 "general_operand" "qi,qmi"))) - (clobber (reg:CC 17))] - "" + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) -(define_insn "*andqi_2" - [(set (reg 17) +(define_insn "*andqi_2_maybe_si" + [(set (reg FLAGS_REG) (compare (and:QI - (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qim,qi,i")) + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,n")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") (and:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (AND, QImode, operands)" + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[2]) + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" { if (which_alternative == 2) { - if (GET_CODE (operands[2]) == CONST_INT - && (INTVAL (operands[2]) & 0xffffff00)) + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); return "and{l}\t{%2, %k0|%k0, %2}"; } @@ -8610,15 +9377,31 @@ [(set_attr "type" "alu") (set_attr "mode" "QI,QI,SI")]) +(define_insn "*andqi_2" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, QImode, operands)" + "and{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + (define_insn "*andqi_2_slp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") - (match_operand:QI 1 "nonimmediate_operand" "qmi,qi")) + (match_operand:QI 1 "nonimmediate_operand" "qmn,qn")) (const_int 0))) (set (strict_low_part (match_dup 0)) (and:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8631,14 +9414,14 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI + (and:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) (match_operand 2 "const_int_operand" "n"))) - (clobber (reg:CC 17))] - "(unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + (clobber (reg:CC FLAGS_REG))] + "" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8648,7 +9431,7 @@ ;; often in fp comparisons. (define_insn "*andqi_ext_0_cc" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (and:SI (zero_extract:SI @@ -8660,14 +9443,13 @@ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI + (and:SI (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) (match_dup 2)))] - "ix86_match_ccmode (insn, CCNOmode) - && (unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "ix86_match_ccmode (insn, CCNOmode)" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8677,14 +9459,14 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI + (and:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) (zero_extend:SI (match_operand:QI 2 "general_operand" "Qm")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") @@ -8695,14 +9477,14 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI + (and:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) (zero_extend:SI (match_operand 2 "ext_register_operand" "Q")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") @@ -8722,12 +9504,57 @@ (match_operand 2 "ext_register_operand" "Q") (const_int 8) (const_int 8)))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "and{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it introduces risk +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Logical inclusive OR instructions @@ -8737,8 +9564,7 @@ (define_expand "iordi3" [(set (match_operand:DI 0 "nonimmediate_operand" "") (ior:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "x86_64_general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:DI 2 "x86_64_general_operand" "")))] "TARGET_64BIT" "ix86_expand_binary_operator (IOR, DImode, operands); DONE;") @@ -8746,7 +9572,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") (match_operand:DI 2 "x86_64_general_operand" "re,rme"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (IOR, DImode, operands)" "or{q}\t{%2, %0|%0, %2}" @@ -8754,7 +9580,7 @@ (set_attr "mode" "DI")]) (define_insn "*iordi_2_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") (match_operand:DI 2 "x86_64_general_operand" "rem,re")) (const_int 0))) @@ -8768,7 +9594,7 @@ (set_attr "mode" "DI")]) (define_insn "*iordi_3_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "x86_64_general_operand" "rem")) (const_int 0))) @@ -8784,16 +9610,15 @@ (define_expand "iorsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (ior:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "")))] "" "ix86_expand_binary_operator (IOR, SImode, operands); DONE;") (define_insn "*iorsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rmi"))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "ri,g"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") @@ -8801,30 +9626,30 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*iorsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*iorsi_1_zext_imm" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "or{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*iorsi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (ior:SI (match_dup 1) (match_dup 2)))] @@ -8837,9 +9662,9 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*iorsi_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] @@ -8850,7 +9675,7 @@ (set_attr "mode" "SI")]) (define_insn "*iorsi_2_zext_imm" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") (match_operand 2 "x86_64_zext_immediate_operand" "Z")) (const_int 0))) @@ -8863,13 +9688,13 @@ (set_attr "mode" "SI")]) (define_insn "*iorsi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "or{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) @@ -8877,25 +9702,24 @@ (define_expand "iorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (ior:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (IOR, HImode, operands); DONE;") (define_insn "*iorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rmn,rn"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, HImode, operands)" "or{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "HI")]) (define_insn "*iorhi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "rmn,rn")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (ior:HI (match_dup 1) (match_dup 2)))] @@ -8906,13 +9730,13 @@ (set_attr "mode" "HI")]) (define_insn "*iorhi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "rmn")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "or{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "HI")]) @@ -8920,8 +9744,7 @@ (define_expand "iorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (ior:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (IOR, QImode, operands); DONE;") @@ -8929,8 +9752,8 @@ (define_insn "*iorqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, QImode, operands)" "@ or{b}\t{%2, %0|%0, %2} @@ -8942,17 +9765,18 @@ (define_insn "*iorqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) (ior:QI (match_dup 0) - (match_operand:QI 1 "general_operand" "qmi,qi"))) - (clobber (reg:CC 17))] - "" + (match_operand:QI 1 "general_operand" "qmn,qn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) (define_insn "*iorqi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qim,qi")) + (match_operand:QI 2 "general_operand" "qmn,qn")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") (ior:QI (match_dup 1) (match_dup 2)))] @@ -8963,74 +9787,183 @@ (set_attr "mode" "QI")]) (define_insn "*iorqi_2_slp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") - (match_operand:QI 1 "general_operand" "qim,qi")) + (match_operand:QI 1 "general_operand" "qmn,qn")) (const_int 0))) (set (strict_low_part (match_dup 0)) (ior:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) (define_insn "*iorqi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "general_operand" "qim")) + (match_operand:QI 2 "general_operand" "qmn")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "or{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "QI")]) - -;; Logical XOR instructions - -;; %%% This used to optimize known byte-wide and operations to memory. -;; If this is considered useful, it should be done with splitters. - -(define_expand "xordi3" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (xor:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "x86_64_general_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT" - "ix86_expand_binary_operator (XOR, DImode, operands); DONE;") +(define_insn "*iorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) -(define_insn "*xordi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") - (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") - (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) - (clobber (reg:CC 17))] - "TARGET_64BIT - && ix86_binary_operator_ok (XOR, DImode, operands)" - "@ - xor{q}\t{%2, %0|%0, %2} - xor{q}\t{%2, %0|%0, %2}" +(define_insn "*iorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") - (set_attr "mode" "DI,DI")]) + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) -(define_insn "*xordi_2_rex64" - [(set (reg 17) - (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") - (match_operand:DI 2 "x86_64_general_operand" "rem,re")) - (const_int 0))) - (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") - (xor:DI (match_dup 1) (match_dup 2)))] +(define_insn "*iorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (ior:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical XOR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "xordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (XOR, DImode, operands); DONE;") + +(define_insn "*xordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*xordi_2_rex64" + [(set (reg FLAGS_REG) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (xor:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (XOR, DImode, operands)" - "@ - xor{q}\t{%2, %0|%0, %2} - xor{q}\t{%2, %0|%0, %2}" + "xor{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") - (set_attr "mode" "DI,DI")]) + (set_attr "mode" "DI")]) (define_insn "*xordi_3_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "x86_64_general_operand" "rem")) (const_int 0))) @@ -9045,8 +9978,7 @@ (define_expand "xorsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (xor:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "")))] "" "ix86_expand_binary_operator (XOR, SImode, operands); DONE;") @@ -9054,7 +9986,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") (match_operand:SI 2 "general_operand" "ri,rm"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, SImode, operands)" "xor{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") @@ -9066,8 +9998,8 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) - (clobber (reg:CC 17))] + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" "xor{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") @@ -9077,16 +10009,16 @@ [(set (match_operand:DI 0 "register_operand" "=r") (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" "xor{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*xorsi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (xor:SI (match_dup 1) (match_dup 2)))] @@ -9099,9 +10031,9 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*xorsi_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] @@ -9112,7 +10044,7 @@ (set_attr "mode" "SI")]) (define_insn "*xorsi_2_zext_imm" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") (match_operand 2 "x86_64_zext_immediate_operand" "Z")) (const_int 0))) @@ -9125,13 +10057,13 @@ (set_attr "mode" "SI")]) (define_insn "*xorsi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "xor{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "SI")]) @@ -9139,25 +10071,24 @@ (define_expand "xorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (xor:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (XOR, HImode, operands); DONE;") (define_insn "*xorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) - (clobber (reg:CC 17))] + (match_operand:HI 2 "general_operand" "rmn,rn"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, HImode, operands)" "xor{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "HI")]) (define_insn "*xorhi_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "rmn,rn")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (xor:HI (match_dup 1) (match_dup 2)))] @@ -9168,13 +10099,13 @@ (set_attr "mode" "HI")]) (define_insn "*xorhi_3" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "rmn")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "xor{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "HI")]) @@ -9182,8 +10113,7 @@ (define_expand "xorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (xor:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "general_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (XOR, QImode, operands); DONE;") @@ -9191,8 +10121,8 @@ (define_insn "*xorqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, QImode, operands)" "@ xor{b}\t{%2, %0|%0, %2} @@ -9201,29 +10131,95 @@ [(set_attr "type" "alu") (set_attr "mode" "QI,QI,SI")]) +(define_insn "*xorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (xor:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (xor:SI + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") (const_int 8) (const_int 8)))) - (clobber (reg:CC 17))] - "" + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" "xor{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) (define_insn "*xorqi_cc_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qim,qi")) + (match_operand:QI 2 "general_operand" "qmn,qn")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") (xor:QI (match_dup 1) (match_dup 2)))] @@ -9233,21 +10229,35 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*xorqi_2_slp" + [(set (reg FLAGS_REG) + (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_cc_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "general_operand" "qim")) + (match_operand:QI 2 "general_operand" "qmn")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] "ix86_match_ccmode (insn, CCNOmode) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "xor{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn "*xorqi_cc_ext_1" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (zero_extract:SI @@ -9259,7 +10269,7 @@ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") (const_int 8) (const_int 8)) - (xor:SI + (xor:SI (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) (match_dup 2)))] "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" @@ -9268,7 +10278,7 @@ (set_attr "mode" "QI")]) (define_insn "*xorqi_cc_ext_1_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (xor:SI (zero_extract:SI @@ -9280,7 +10290,7 @@ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (xor:SI + (xor:SI (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" @@ -9290,7 +10300,7 @@ (define_expand "xorqi_cc_ext_1" [(parallel [ - (set (reg:CCNO 17) + (set (reg:CCNO FLAGS_REG) (compare:CCNO (xor:SI (zero_extract:SI @@ -9302,25 +10312,99 @@ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "") (const_int 8) (const_int 8)) - (xor:SI + (xor:SI (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) (match_dup 2)))])] "" "") + +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (xor:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since XOR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Negation instructions +(define_expand "negti2" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_unary_operator (NEG, TImode, operands); DONE;") + +(define_insn "*negti2_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=ro") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_unary_operator_ok (NEG, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:DI (match_dup 1)))]) + (parallel + [(set (match_dup 2) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 2) + (neg:DI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 2, &operands[0], &operands[2]);") + (define_expand "negdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (neg:DI (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))] "" "ix86_expand_unary_operator (NEG, DImode, operands); DONE;") (define_insn "*negdi2_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=ro") (neg:DI (match_operand:DI 1 "general_operand" "0"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" "#") @@ -9328,29 +10412,28 @@ (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") (neg:DI (match_operand:DI 1 "general_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && reload_completed" [(parallel - [(set (reg:CCZ 17) - (compare:CCZ (neg:SI (match_dup 2)) (const_int 0))) - (set (match_dup 0) (neg:SI (match_dup 2)))]) + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:SI (match_dup 1)))]) (parallel - [(set (match_dup 1) - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + [(set (match_dup 2) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 3)) (const_int 0))) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (parallel - [(set (match_dup 1) - (neg:SI (match_dup 1))) - (clobber (reg:CC 17))])] - "split_di (operands+1, 1, operands+2, operands+3); - split_di (operands+0, 1, operands+0, operands+1);") + [(set (match_dup 2) + (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 2, &operands[0], &operands[2]);"); (define_insn "*negdi2_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" "neg{q}\t%0" [(set_attr "type" "negnot") @@ -9361,7 +10444,7 @@ ;; flag being the only useful item. (define_insn "*negdi2_cmpz_rex64" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -9373,16 +10456,15 @@ (define_expand "negsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (neg:SI (match_operand:SI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" "ix86_expand_unary_operator (NEG, SImode, operands); DONE;") (define_insn "*negsi2_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (NEG, SImode, operands)" "neg{l}\t%0" [(set_attr "type" "negnot") @@ -9394,7 +10476,7 @@ (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") (const_int 32))) (const_int 32))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" "neg{l}\t%k0" [(set_attr "type" "negnot") @@ -9405,7 +10487,7 @@ ;; flag being the only useful item. (define_insn "*negsi2_cmpz" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") @@ -9416,7 +10498,7 @@ (set_attr "mode" "SI")]) (define_insn "*negsi2_cmpz_zext" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") @@ -9433,23 +10515,22 @@ (set_attr "mode" "SI")]) (define_expand "neghi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (neg:HI (match_operand:HI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_unary_operator (NEG, HImode, operands); DONE;") (define_insn "*neghi2_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (NEG, HImode, operands)" "neg{w}\t%0" [(set_attr "type" "negnot") (set_attr "mode" "HI")]) (define_insn "*neghi2_cmpz" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") @@ -9460,23 +10541,22 @@ (set_attr "mode" "HI")]) (define_expand "negqi2" - [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") - (neg:QI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_unary_operator (NEG, QImode, operands); DONE;") (define_insn "*negqi2_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (NEG, QImode, operands)" "neg{b}\t%0" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) (define_insn "*negqi2_cmpz" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") @@ -9488,881 +10568,283 @@ ;; Changing of sign for FP values is doable using integer unit too. -(define_expand "negsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "if (TARGET_SSE) - { - /* In case operand is in memory, we will not use SSE. */ - if (memory_operand (operands[0], VOIDmode) - && rtx_equal_p (operands[0], operands[1])) - emit_insn (gen_negsf2_memory (operands[0], operands[1])); - else - { - /* Using SSE is tricky, since we need bitwise negation of -0 - in register. */ - rtx reg = gen_reg_rtx (SFmode); - rtx dest = operands[0]; - - operands[1] = force_reg (SFmode, operands[1]); - operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); - emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); - if (dest != operands[0]) - emit_move_insn (dest, operands[0]); - } - DONE; - } - ix86_expand_unary_operator (NEG, SFmode, operands); DONE;") +(define_expand "2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") + +(define_insn "*absneg2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand: 2 "nonimmediate_operand" "xm,0,X,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" + "#") -(define_insn "negsf2_memory" - [(set (match_operand:SF 0 "memory_operand" "=m") - (neg:SF (match_operand:SF 1 "memory_operand" "0"))) - (clobber (reg:CC 17))] - "ix86_unary_operator_ok (NEG, SFmode, operands)" +(define_insn "*absneg2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand: 2 "register_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "#") -(define_insn "negsf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) - (clobber (reg:CC 17))] - "TARGET_SSE - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" +(define_insn "*absneg2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "#") -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (neg:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) - (clobber (reg:CC 17))] - "" - [(parallel [(set (match_dup 0) - (neg:SF (match_dup 1))) - (clobber (reg:CC 17))])]) +(define_expand "tf2" + [(set (match_operand:TF 0 "register_operand" "") + (absneg:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (, TFmode, operands); DONE;") + +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2" + "#") + +;; Splitters for fp abs and neg. (define_split - [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) - (clobber (reg:CC 17))] - "reload_completed && !SSE_REG_P (operands[0])" - [(parallel [(set (match_dup 0) - (neg:SF (match_dup 1))) - (clobber (reg:CC 17))])]) + [(set (match_operand 0 "fp_register_operand" "") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) (define_split - [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) - (clobber (reg:CC 17))] + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "absneg_operator" + [(match_operand 1 "register_operand" "")])) + (use (match_operand 2 "nonimmediate_operand" "")) + (clobber (reg:CC FLAGS_REG))] "reload_completed && SSE_REG_P (operands[0])" - [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + [(set (match_dup 0) (match_dup 3))] { + enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode vmode = GET_MODE (operands[2]); + rtx tmp; + + operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0); if (operands_match_p (operands[0], operands[2])) { - rtx tmp; tmp = operands[1]; operands[1] = operands[2]; operands[2] = tmp; } + if (GET_CODE (operands[3]) == ABS) + tmp = gen_rtx_AND (vmode, operands[1], operands[2]); + else + tmp = gen_rtx_XOR (vmode, operands[1], operands[2]); + operands[3] = tmp; }) - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*negsf2_if" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && !TARGET_SSE - && ix86_unary_operator_ok (NEG, SFmode, operands)" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (neg:SF (match_dup 1)))] - "") - (define_split [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") - -(define_split - [(set (match_operand 0 "memory_operand" "") - (neg (match_operand 1 "memory_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" - [(parallel [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] -{ - int size = GET_MODE_SIZE (GET_MODE (operands[1])); - - /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ - if (size >= 12) - size = 10; - operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (0x80, QImode)); + (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand:V4SF 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_lowpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; }) -(define_expand "negdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "if (TARGET_SSE2) - { - /* In case operand is in memory, we will not use SSE. */ - if (memory_operand (operands[0], VOIDmode) - && rtx_equal_p (operands[0], operands[1])) - emit_insn (gen_negdf2_memory (operands[0], operands[1])); - else - { - /* Using SSE is tricky, since we need bitwise negation of -0 - in register. */ - rtx reg = gen_reg_rtx (DFmode); -#if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); -#else - rtx imm = immed_double_const (0, 0x80000000, DImode); -#endif - rtx dest = operands[0]; - - operands[1] = force_reg (DFmode, operands[1]); - operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); - emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); - if (dest != operands[0]) - emit_move_insn (dest, operands[0]); - } - DONE; - } - ix86_expand_unary_operator (NEG, DFmode, operands); DONE;") - -(define_insn "negdf2_memory" - [(set (match_operand:DF 0 "memory_operand" "=m") - (neg:DF (match_operand:DF 1 "memory_operand" "0"))) - (clobber (reg:CC 17))] - "ix86_unary_operator_ok (NEG, DFmode, operands)" - "#") - -(define_insn "negdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_SSE2 - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" - "#") - -(define_insn "*negdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm")) - (clobber (reg:CC 17))] - "TARGET_64BIT && TARGET_SSE2 - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" - "#") - -(define_split - [(set (match_operand:DF 0 "memory_operand" "") - (neg:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) - (clobber (reg:CC 17))] - "" - [(parallel [(set (match_dup 0) - (neg:DF (match_dup 1))) - (clobber (reg:CC 17))])]) - (define_split [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) - (clobber (reg:CC 17))] - "reload_completed && !SSE_REG_P (operands[0]) - && (!TARGET_64BIT || FP_REG_P (operands[0]))" - [(parallel [(set (match_dup 0) - (neg:DF (match_dup 1))) - (clobber (reg:CC 17))])]) + (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + if (TARGET_64BIT) + { + tmp = gen_lowpart (DImode, operands[0]); + tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63)); + operands[0] = tmp; -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) - (clobber (reg:CC 17))] - "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" - [(parallel [(set (match_dup 0) - (xor:DI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))])] - "operands[0] = gen_lowpart (DImode, operands[0]); - operands[1] = gen_lowpart (DImode, operands[1]); - operands[2] = gen_lowpart (DImode, operands[2]);") + if (GET_CODE (operands[1]) == ABS) + tmp = const0_rtx; + else + tmp = gen_rtx_NOT (DImode, tmp); + } + else + { + operands[0] = gen_highpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + operands[1] = tmp; +}) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) - (clobber (reg:CC 17))] - "reload_completed && SSE_REG_P (operands[0])" - [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + [(set (match_operand:XF 0 "register_operand" "") + (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] { - if (operands_match_p (operands[0], operands[2])) + rtx tmp; + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_64BIT ? 1 : 2)); + if (GET_CODE (operands[1]) == ABS) { - rtx tmp; - tmp = operands[1]; - operands[1] = operands[2]; - operands[2] = tmp; + tmp = GEN_INT (0x7fff); + tmp = gen_rtx_AND (SImode, operands[0], tmp); } + else + { + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; }) -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*negdf2_if" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (NEG, DFmode, operands)" - "#") +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. -;; FIXME: We should to allow integer registers here. Problem is that -;; we need another scratch register to get constant from. -;; Forcing constant to mem if no register available in peep2 should be -;; safe even for PIC mode, because of RIP relative addressing. -(define_insn "*negdf2_if_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (NEG, DFmode, operands)" - "#") +(define_insn "*2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed + || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "")]) -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (neg:DF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed - && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4))) - (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); - split_di (operands+0, 1, operands+2, operands+3);") - -(define_expand "negxf2" - [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" - "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;") - -(define_expand "negtf2" - [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "ix86_expand_unary_operator (NEG, TFmode, operands); DONE;") - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*negxf2_if" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (NEG, XFmode, operands)" - "#") - -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (neg:XF (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (neg:XF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (neg:XF (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*negtf2_if" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (NEG, TFmode, operands)" - "#") - -(define_split - [(set (match_operand:TF 0 "register_operand" "") - (neg:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (neg:TF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:TF 0 "register_operand" "") - (neg:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - -;; Conditionize these after reload. If they matches before reload, we -;; lose the clobber and ability to use integer instructions. - -(define_insn "*negsf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "SF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negextendsfdf2" +(define_insn "*extendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "fchs" + (absneg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "f" [(set_attr "type" "fsgn") - (set_attr "mode" "DF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negxf2_1" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negextenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "DF")]) -(define_insn "*negextendsfxf2" +(define_insn "*extendsfxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negtf2_1" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (match_operand:TF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] + (absneg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] "TARGET_80387" - "fchs" + "f" [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) + (set_attr "mode" "XF")]) -(define_insn "*negextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] +(define_insn "*extenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] "TARGET_80387" - "fchs" + "f" [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -;; Absolute value instructions - -(define_expand "abssf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "if (TARGET_SSE) - { - /* In case operand is in memory, we will not use SSE. */ - if (memory_operand (operands[0], VOIDmode) - && rtx_equal_p (operands[0], operands[1])) - emit_insn (gen_abssf2_memory (operands[0], operands[1])); - else - { - /* Using SSE is tricky, since we need bitwise negation of -0 - in register. */ - rtx reg = gen_reg_rtx (SFmode); - rtx dest = operands[0]; - - operands[1] = force_reg (SFmode, operands[1]); - operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); - emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); - if (dest != operands[0]) - emit_move_insn (dest, operands[0]); - } - DONE; - } - ix86_expand_unary_operator (ABS, SFmode, operands); DONE;") - -(define_insn "abssf2_memory" - [(set (match_operand:SF 0 "memory_operand" "=m") - (abs:SF (match_operand:SF 1 "memory_operand" "0"))) - (clobber (reg:CC 17))] - "ix86_unary_operator_ok (ABS, SFmode, operands)" - "#") - -(define_insn "abssf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) - (clobber (reg:CC 17))] - "TARGET_SSE - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" - "#") - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (abs:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) - (clobber (reg:CC 17))] - "" - [(parallel [(set (match_dup 0) - (abs:SF (match_dup 1))) - (clobber (reg:CC 17))])]) - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) - (clobber (reg:CC 17))] - "reload_completed && !SSE_REG_P (operands[0])" - [(parallel [(set (match_dup 0) - (abs:SF (match_dup 1))) - (clobber (reg:CC 17))])]) - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) - (clobber (reg:CC 17))] - "reload_completed && SSE_REG_P (operands[0])" - [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*abssf2_if" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (ABS, SFmode, operands) && !TARGET_SSE" - "#") + (set_attr "mode" "XF")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0]))" - [(set (match_dup 0) - (abs:SF (match_dup 1)))] - "") +;; Copysign instructions -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) -(define_split - [(set (match_operand 0 "memory_operand" "") - (abs (match_operand 1 "memory_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" - [(parallel [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] -{ - int size = GET_MODE_SIZE (GET_MODE (operands[1])); - - /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ - if (size >= 12) - size = 10; - operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (~0x80, QImode)); +(define_expand "copysign3" + [(match_operand:CSGNMODE 0 "register_operand" "") + (match_operand:CSGNMODE 1 "nonmemory_operand" "") + (match_operand:CSGNMODE 2 "register_operand" "")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" +{ + ix86_expand_copysign (operands); + DONE; }) -(define_expand "absdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "if (TARGET_SSE2) - { - /* In case operand is in memory, we will not use SSE. */ - if (memory_operand (operands[0], VOIDmode) - && rtx_equal_p (operands[0], operands[1])) - emit_insn (gen_absdf2_memory (operands[0], operands[1])); - else - { - /* Using SSE is tricky, since we need bitwise negation of -0 - in register. */ - rtx reg = gen_reg_rtx (DFmode); -#if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); -#else - rtx imm = immed_double_const (0, 0x80000000, DImode); -#endif - rtx dest = operands[0]; - - operands[1] = force_reg (DFmode, operands[1]); - operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); - emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); - if (dest != operands[0]) - emit_move_insn (dest, operands[0]); - } - DONE; - } - ix86_expand_unary_operator (ABS, DFmode, operands); DONE;") - -(define_insn "absdf2_memory" - [(set (match_operand:DF 0 "memory_operand" "=m") - (abs:DF (match_operand:DF 1 "memory_operand" "0"))) - (clobber (reg:CC 17))] - "ix86_unary_operator_ok (ABS, DFmode, operands)" - "#") - -(define_insn "absdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_SSE2 - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" - "#") - -(define_insn "*absdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) - (clobber (reg:CC 17))] - "TARGET_64BIT && TARGET_SSE2 - && (reload_in_progress || reload_completed - || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" - "#") - -(define_split - [(set (match_operand:DF 0 "memory_operand" "") - (abs:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) - (clobber (reg:CC 17))] - "" - [(parallel [(set (match_dup 0) - (abs:DF (match_dup 1))) - (clobber (reg:CC 17))])]) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) - (clobber (reg:CC 17))] - "reload_completed && !SSE_REG_P (operands[0])" - [(parallel [(set (match_dup 0) - (abs:DF (match_dup 1))) - (clobber (reg:CC 17))])]) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) - (clobber (reg:CC 17))] - "reload_completed && SSE_REG_P (operands[0])" - [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) - - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*absdf2_if" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (ABS, DFmode, operands)" - "#") - -;; FIXME: We should to allow integer registers here. Problem is that -;; we need another scratch register to get constant from. -;; Forcing constant to mem if no register available in peep2 should be -;; safe even for PIC mode, because of RIP relative addressing. -(define_insn "*absdf2_if_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (ABS, DFmode, operands)" - "#") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (abs:DF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed && - !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4))) - (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); - split_di (operands+0, 1, operands+2, operands+3);") - -(define_expand "absxf2" - [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" - "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;") - -(define_expand "abstf2" - [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] - "TARGET_80387" - "ix86_expand_unary_operator (ABS, TFmode, operands); DONE;") - -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*absxf2_if" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") - (abs:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 - && ix86_unary_operator_ok (ABS, XFmode, operands)" - "#") - -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (abs:XF (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (abs:XF (match_dup 1)))] - "") +(define_insn_and_split "copysign3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand: 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand: 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (abs:XF (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (~0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - -(define_insn "*abstf2_if" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") - (abs:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (ABS, TFmode, operands)" +(define_insn "copysign3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand: 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand: 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" "#") (define_split - [(set (match_operand:TF 0 "register_operand" "") - (abs:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" - [(set (match_dup 0) - (abs:TF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:TF 0 "register_operand" "") - (abs:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (~0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - -(define_insn "*abssf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (abs:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "SF")]) - -(define_insn "*absdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absxf2_1" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absextenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*absextendsfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*abstf2_1" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (match_operand:TF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*absextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) + [(set (match_operand:CSGNMODE 0 "register_operand" "") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "") + (match_operand:CSGNMODE 3 "register_operand" "") + (match_operand: 4 "" "") + (match_operand: 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 ""))] + "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))) + && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) ;; One complement instructions @@ -10381,7 +10863,7 @@ (set_attr "mode" "DI")]) (define_insn "*one_cmpldi2_2_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -10393,17 +10875,19 @@ (set_attr "mode" "DI")]) (define_split - [(set (reg 17) - (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:DI 0 "nonimmediate_operand" "") - (not:DI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:DI (match_operand:DI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "nonimmediate_operand" "") + (not:DI (match_dup 3)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:DI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:DI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 + [(xor:DI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:DI (match_dup 3) (const_int -1)))])] "") (define_expand "one_cmplsi2" @@ -10430,7 +10914,7 @@ (set_attr "mode" "SI")]) (define_insn "*one_cmplsi2_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") @@ -10442,22 +10926,23 @@ (set_attr "mode" "SI")]) (define_split - [(set (reg 17) - (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:SI 0 "nonimmediate_operand" "") - (not:SI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:SI 1 "nonimmediate_operand" "") + (not:SI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:SI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SI (match_dup 3) (const_int -1)))])] "") ;; ??? Currently never generated - xor is used instead. (define_insn "*one_cmplsi2_2_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (not:SI (match_operand:SI 1 "register_operand" "0")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") @@ -10469,17 +10954,18 @@ (set_attr "mode" "SI")]) (define_split - [(set (reg 17) - (compare (not:SI (match_operand:SI 1 "register_operand" "")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (not:SI (match_dup 1))))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 3))))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (zero_extend:DI (xor:SI (match_dup 1) (const_int -1))))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])] "") (define_expand "one_cmplhi2" @@ -10497,7 +10983,7 @@ (set_attr "mode" "HI")]) (define_insn "*one_cmplhi2_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") @@ -10509,17 +10995,18 @@ (set_attr "mode" "HI")]) (define_split - [(set (reg 17) - (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:HI 0 "nonimmediate_operand" "") - (not:HI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:HI (match_operand:HI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:HI 1 "nonimmediate_operand" "") + (not:HI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:HI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:HI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:HI (match_dup 3) (const_int -1)))])] "") ;; %%% Potential partial reg stall on alternative 1. What to do? @@ -10540,7 +11027,7 @@ (set_attr "mode" "QI,SI")]) (define_insn "*one_cmplqi2_2" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") @@ -10552,17 +11039,18 @@ (set_attr "mode" "QI")]) (define_split - [(set (reg 17) - (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "") - (not:QI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:QI (match_operand:QI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:QI 1 "nonimmediate_operand" "") + (not:QI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:QI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:QI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:QI (match_dup 3) (const_int -1)))])] "") ;; Arithmetic shift instructions @@ -10589,110 +11077,225 @@ ;; shift pair, instead using moves and sign extension for counts greater ;; than 31. -(define_expand "ashldi3" - [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") - (ashift:DI (match_operand:DI 1 "shiftdi_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))])] - "" -{ - if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) - { - emit_insn (gen_ashldi3_1 (operands[0], operands[1], operands[2])); - DONE; - } - ix86_expand_binary_operator (ASHIFT, DImode, operands); - DONE; -}) +(define_expand "ashlti3" + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;") -(define_insn "*ashldi3_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") - (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,r") - (match_operand:QI 2 "nonmemory_operand" "cJ,M"))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)" -{ - switch (get_attr_type (insn)) - { - case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - if (!rtx_equal_p (operands[0], operands[1])) - abort (); - return "add{q}\t{%0, %0|%0, %0}"; +;; This pattern must be defined before *ashlti3_1 to prevent +;; combine pass from converting sse2_ashlti3 to *ashlti3_1. - case TYPE_LEA: - if (GET_CODE (operands[2]) != CONST_INT - || (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 3) - abort (); - operands[1] = gen_rtx_MULT (DImode, operands[1], - GEN_INT (1 << INTVAL (operands[2]))); - return "lea{q}\t{%a1, %0|%0, %a1}"; +(define_insn "*avx_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) - default: - if (REG_P (operands[2])) - return "sal{q}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) - return "sal{q}\t%0"; - else - return "sal{q}\t{%2, %0|%0, %2}"; - } +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; } - [(set (attr "type") - (cond [(eq_attr "alternative" "1") - (const_string "lea") - (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") - (const_int 0)) - (match_operand 0 "register_operand" "")) - (match_operand 2 "const1_operand" "")) - (const_string "alu") - ] - (const_string "ishift"))) - (set_attr "mode" "DI")]) + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) -;; Convert lea to the lea pattern to avoid flags dependency. -(define_split +(define_insn "*ashlti3_1" + [(set (match_operand:TI 0 "register_operand" "=&r,r") + (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "Oc,Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +(define_expand "x86_64_shift_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 64)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_64BIT" + "") + +(define_expand "x86_64_shift_adj_2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "TARGET_64BIT" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;") + +(define_insn "*ashldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cJ,M"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{q}\t%0, %0"; + + case TYPE_LEA: + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3); + operands[1] = gen_rtx_MULT (DImode, operands[1], + GEN_INT (1 << INTVAL (operands[2]))); + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split [(set (match_operand:DI 0 "register_operand" "") - (ashift:DI (match_operand:DI 1 "register_operand" "") + (ashift:DI (match_operand:DI 1 "index_register_operand" "") (match_operand:QI 2 "immediate_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))] - "operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - DImode));") + "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashldi3_cmp_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "immediate_operand" "e")) + (match_operand:QI 2 "const_1_to_63_operand" "J")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashift:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{q}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{q}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{q}\t%0"; else return "sal{q}\t{%2, %0|%0, %2}"; @@ -10708,73 +11311,108 @@ (const_string "ishift"))) (set_attr "mode" "DI")]) -(define_insn "ashldi3_1" - [(set (match_operand:DI 0 "register_operand" "=r") - (ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE" - "#" - [(set_attr "type" "multi")]) +(define_insn "*ashldi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t%0, %0"; -(define_insn "*ashldi3_2" - [(set (match_operand:DI 0 "register_operand" "=r") - (ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (reg:CC 17))] + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_1" + [(set (match_operand:DI 0 "register_operand" "=&r,r") + (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "Jc,Jc"))) + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "#" [(set_attr "type" "multi")]) -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (ashift:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE && reload_completed" +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_ashldi (operands, operands[3]); DONE;") + "ix86_split_ashl (operands, operands[3], DImode); DONE;") (define_split [(set (match_operand:DI 0 "register_operand" "") - (ashift:DI (match_operand:DI 1 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" [(const_int 0)] - "ix86_split_ashldi (operands, NULL_RTX); DONE;") + "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") -(define_insn "x86_shld_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") +(define_insn "x86_shld" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) - (match_operand:QI 2 "nonmemory_operand" "I,c")) - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") (minus:QI (const_int 32) (match_dup 2))))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" - "@ - shld{l}\t{%2, %1, %0|%0, %1, %2} - shld{l}\t{%s2%1, %0|%0, %1, %2}" + "shld{l}\t{%s2%1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_shift_adj_1" - [(set (reg:CCZ 17) + [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") (const_int 32)) (const_int 0))) (set (match_operand:SI 0 "register_operand" "") - (if_then_else:SI (ne (reg:CCZ 17) (const_int 0)) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) (match_operand:SI 1 "register_operand" "") (match_dup 0))) (set (match_dup 1) - (if_then_else:SI (ne (reg:CCZ 17) (const_int 0)) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) (match_operand:SI 3 "register_operand" "r") (match_dup 1)))] "TARGET_CMOVE" @@ -10800,7 +11438,7 @@ JUMP_LABEL (tmp) = label; emit_move_insn (operands[0], operands[1]); - emit_move_insn (operands[1], const0_rtx); + ix86_expand_clear (operands[1]); emit_label (label); LABEL_NUSES (label) = 1; @@ -10811,26 +11449,23 @@ (define_expand "ashlsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "" "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;") (define_insn "*ashlsi3_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l") (match_operand:QI 2 "nonmemory_operand" "cI,M"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - if (!rtx_equal_p (operands[0], operands[1])) - abort (); - return "add{l}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{l}\t%0, %0"; case TYPE_LEA: return "#"; @@ -10838,9 +11473,8 @@ default: if (REG_P (operands[2])) return "sal{l}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -10861,18 +11495,23 @@ ;; Convert lea to the lea pattern to avoid flags dependency. (define_split [(set (match_operand 0 "register_operand" "") - (ashift (match_operand 1 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") (match_operand:QI 2 "const_int_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + && true_regnum (operands[0]) != true_regnum (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4" [(const_int 0)] { rtx pat; - operands[0] = gen_lowpart (SImode, operands[0]); - operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + enum machine_mode mode = GET_MODE (operands[0]); + + if (GET_MODE_SIZE (mode) < 4) + operands[0] = gen_lowpart (SImode, operands[0]); + if (mode != Pmode) + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); + pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); if (Pmode != SImode) pat = gen_rtx_SUBREG (SImode, pat, 0); @@ -10880,19 +11519,38 @@ DONE; }) +;; Rare case of shifting RSP is handled by generating move and shift +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat, clob; + emit_move_insn (operands[0], operands[1]); + pat = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (GET_MODE (operands[0]), + operands[0], operands[2])); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); + DONE; +}) + (define_insn "*ashlsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,r") + (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l") (match_operand:QI 2 "nonmemory_operand" "cI,M")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{l}\t{%k0, %k0|%k0, %k0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; case TYPE_LEA: return "#"; @@ -10900,9 +11558,8 @@ default: if (REG_P (operands[2])) return "sal{l}\t{%b2, %k0|%k0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -10924,43 +11581,88 @@ [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (ashift (match_operand 1 "register_operand" "") (match_operand:QI 2 "const_int_operand" "")))) - (clobber (reg:CC 17))] - "reload_completed + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" - [(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] + [(set (match_dup 0) (zero_extend:DI + (subreg:SI (mult:SI (match_dup 1) + (match_dup 2)) 0)))] { operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); }) ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashlsi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashift:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{l}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{l}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -10977,29 +11679,33 @@ (set_attr "mode" "SI")]) (define_insn "*ashlsi3_cmp_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{l}\t{%k0, %k0|%k0, %k0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; default: if (REG_P (operands[2])) return "sal{l}\t{%b2, %k0|%k0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -11017,16 +11723,15 @@ (define_expand "ashlhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;") (define_insn "*ashlhi3_1_lea" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") (match_operand:QI 2 "nonmemory_operand" "cI,M"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { @@ -11035,16 +11740,14 @@ case TYPE_LEA: return "#"; case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{w}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{w}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11066,23 +11769,21 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "nonmemory_operand" "cI"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{w}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{w}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11102,29 +11803,73 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashlhi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{w}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{w}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_insn "*ashlhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11143,8 +11888,7 @@ (define_expand "ashlqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;") @@ -11152,9 +11896,9 @@ (define_insn "*ashlqi3_1_lea" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { @@ -11163,12 +11907,11 @@ case TYPE_LEA: return "#"; case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) - return "add{l}\t{%k0, %k0|%k0, %k0}"; + return "add{l}\t%k0, %k0"; else - return "add{b}\t{%0, %0|%0, %0}"; + return "add{b}\t%0, %0"; default: if (REG_P (operands[2])) @@ -11178,9 +11921,8 @@ else return "sal{b}\t{%b2, %0|%0, %b2}"; } - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11212,19 +11954,18 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) - return "add{l}\t{%k0, %k0|%k0, %k0}"; + return "add{l}\t%k0, %k0"; else - return "add{b}\t{%0, %0|%0, %0}"; + return "add{b}\t%0, %0"; default: if (REG_P (operands[2])) @@ -11234,9 +11975,8 @@ else return "sal{b}\t{%b2, %0|%0, %b2}"; } - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11266,29 +12006,32 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashlqi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashift:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - return "add{b}\t{%0, %0|%0, %0}"; + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%0, %0"; default: if (REG_P (operands[2])) return "sal{b}\t{%b2, %0|%0, %b2}"; - else if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{b}\t%0"; else return "sal{b}\t{%2, %0|%0, %2}"; @@ -11304,30 +12047,143 @@ (const_string "ishift"))) (set_attr "mode" "QI")]) -;; See comment above `ashldi3' about how this works. - -(define_expand "ashrdi3" - [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") - (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))])] - "" +(define_insn "*ashlqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { - if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) + switch (get_attr_type (insn)) { - emit_insn (gen_ashrdi3_1 (operands[0], operands[1], operands[2])); - DONE; + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; } - ix86_expand_binary_operator (ASHIFTRT, DImode, operands); - DONE; -}) +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +;; See comment above `ashldi3' about how this works. + +(define_expand "ashrti3" + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;") + +(define_insn "*ashrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;") + +(define_expand "x86_64_shift_adj_3" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) (define_insn "ashrdi3_63_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") (match_operand:DI 2 "const_int_operand" "i,i"))) - (clobber (reg:CC 17))] - "TARGET_64BIT && INTVAL (operands[2]) == 63 && (TARGET_USE_CLTD || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "@ {cqto|cqo} @@ -11341,14 +12197,15 @@ (define_insn "*ashrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -11356,7 +12213,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "J,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "@ sar{q}\t{%2, %0|%0, %2} @@ -11368,94 +12225,118 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrdi3_one_bit_cmp_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*ashrdi3_one_bit_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrdi3_cmp_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_operand" "n")) + (match_operand:QI 2 "const_1_to_63_operand" "J")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) +(define_insn "*ashrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) -(define_insn "ashrdi3_1" - [(set (match_operand:DI 0 "register_operand" "=r") - (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE" - "#" - [(set_attr "type" "multi")]) - -(define_insn "*ashrdi3_2" +(define_insn "*ashrdi3_1" [(set (match_operand:DI 0 "register_operand" "=r") (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "#" [(set_attr "type" "multi")]) -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (ashiftrt:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE && reload_completed" +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_ashrdi (operands, operands[3]); DONE;") + "ix86_split_ashr (operands, operands[3], DImode); DONE;") (define_split [(set (match_operand:DI 0 "register_operand" "") (ashiftrt:DI (match_operand:DI 1 "register_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" [(const_int 0)] - "ix86_split_ashrdi (operands, NULL_RTX); DONE;") + "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") -(define_insn "x86_shrd_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") +(define_insn "x86_shrd" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashiftrt:SI (match_dup 0) - (match_operand:QI 2 "nonmemory_operand" "I,c")) - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") (minus:QI (const_int 32) (match_dup 2))))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" - "@ - shrd{l}\t{%2, %1, %0|%0, %1, %2} - shrd{l}\t{%s2%1, %0|%0, %1, %2}" + "shrd{l}\t{%s2%1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "pent_pair" "np") - (set_attr "ppro_uops" "few") (set_attr "mode" "SI")]) (define_expand "x86_shift_adj_3" @@ -11486,12 +12367,20 @@ DONE; }) -(define_insn "ashrsi3_31" +(define_expand "ashrsi3_31" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "*ashrsi3_31" [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") (match_operand:SI 2 "const_int_operand" "i,i"))) - (clobber (reg:CC 17))] - "INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "@ {cltd|cdq} @@ -11506,8 +12395,8 @@ [(set (match_operand:DI 0 "register_operand" "=*d,r") (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") (match_operand:SI 2 "const_int_operand" "i,i")))) - (clobber (reg:CC 17))] - "TARGET_64BIT && (TARGET_USE_CLTD || optimize_size) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) && INTVAL (operands[2]) == 31 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "@ @@ -11522,32 +12411,32 @@ (define_expand "ashrsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "" "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;") (define_insn "*ashrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) (define_insn "*ashrsi3_1_one_bit_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11556,7 +12445,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "@ sar{l}\t{%2, %0|%0, %2} @@ -11568,7 +12457,7 @@ [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "@ sar{l}\t{%2, %k0|%k0, %2} @@ -11580,33 +12469,48 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrsi3_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*ashrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + (define_insn "*ashrsi3_one_bit_cmp_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") @@ -11616,28 +12520,45 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrsi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) (define_insn "*ashrsi3_cmp_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") @@ -11646,22 +12567,21 @@ (define_expand "ashrhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;") (define_insn "*ashrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -11669,7 +12589,7 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "@ sar{w}\t{%2, %0|%0, %2} @@ -11681,35 +12601,64 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrhi3_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*ashrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrhi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*ashrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") @@ -11718,22 +12667,36 @@ (define_expand "ashrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;") (define_insn "*ashrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -11741,7 +12704,7 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "@ sar{b}\t{%2, %0|%0, %2} @@ -11749,75 +12712,176 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*ashrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + sar{b}\t{%1, %0|%0, %1} + sar{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrqi3_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "I")) + (match_operand:QI 2 "const1_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*ashrqi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashrqi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) + ;; Logical shift instructions ;; See comment above `ashldi3' about how this works. -(define_expand "lshrdi3" - [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") - (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") +(define_expand "lshrti3" + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;") + +;; This pattern must be defined before *lshrti3_1 to prevent +;; combine pass from converting sse2_lshrti3 to *lshrti3_1. + +(define_insn "*avx_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "*lshrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;") + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] "" -{ - if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) - { - emit_insn (gen_lshrdi3_1 (operands[0], operands[1], operands[2])); - DONE; - } - ix86_expand_binary_operator (LSHIFTRT, DImode, operands); - DONE; -}) + "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;") (define_insn "*lshrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -11825,7 +12889,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "J,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "@ shr{q}\t{%2, %0|%0, %2} @@ -11837,107 +12901,135 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrdi3_cmp_one_bit_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*lshrdi3_cconly_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrdi3_cmp_rex64" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_operand" "e")) + (match_operand:QI 2 "const_1_to_63_operand" "J")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) -(define_insn "lshrdi3_1" - [(set (match_operand:DI 0 "register_operand" "=r") - (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE" - "#" - [(set_attr "type" "multi")]) +(define_insn "*lshrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) -(define_insn "*lshrdi3_2" +(define_insn "*lshrdi3_1" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") (match_operand:QI 2 "nonmemory_operand" "Jc"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "#" [(set_attr "type" "multi")]) -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (lshiftrt:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_CMOVE && reload_completed" +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_lshrdi (operands, operands[3]); DONE;") + "ix86_split_lshr (operands, operands[3], DImode); DONE;") -(define_split +(define_split [(set (match_operand:DI 0 "register_operand" "") (lshiftrt:DI (match_operand:DI 1 "register_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] - "!TARGET_64BIT && reload_completed" + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" [(const_int 0)] - "ix86_split_lshrdi (operands, NULL_RTX); DONE;") + "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") (define_expand "lshrsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "" "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;") (define_insn "*lshrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) (define_insn "*lshrsi3_1_one_bit_zext" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11946,7 +13038,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "@ shr{l}\t{%2, %0|%0, %2} @@ -11959,7 +13051,7 @@ (zero_extend:DI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "@ shr{l}\t{%2, %k0|%k0, %2} @@ -11971,33 +13063,48 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrsi3_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*lshrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + (define_insn "*lshrsi3_cmp_one_bit_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") @@ -12007,28 +13114,45 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrsi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) (define_insn "*lshrsi3_cmp_zext" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") @@ -12037,22 +13161,21 @@ (define_expand "lshrhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;") (define_insn "*lshrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12060,7 +13183,7 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "@ shr{w}\t{%2, %0|%0, %2} @@ -12072,35 +13195,64 @@ ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrhi3_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*lshrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrhi3_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*lshrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") @@ -12109,22 +13261,35 @@ (define_expand "lshrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;") (define_insn "*lshrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "shr{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12132,7 +13297,7 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "@ shr{b}\t{%2, %0|%0, %2} @@ -12140,39 +13305,81 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*lshrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + shr{b}\t{%1, %0|%0, %1} + shr{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrqi2_one_bit_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*lshrqi2_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*lshrqi2_cmp" - [(set (reg 17) + [(set (reg FLAGS_REG) (compare (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*lshrqi2_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") @@ -12181,24 +13388,60 @@ ;; Rotate instructions (define_expand "rotldi3" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATE, DImode, operands); DONE;") + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATE, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashift:SI (match_dup 4) (match_dup 2)) + (lshiftrt:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashift:SI (match_dup 5) (match_dup 2)) + (lshiftrt:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 1, &operands[4], &operands[5]);") (define_insn "*rotlsi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, DImode, operands)" "rol{q}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12206,33 +13449,32 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "e,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)" "@ rol{q}\t{%2, %0|%0, %2} rol{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotlsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "" "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;") (define_insn "*rotlsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12240,24 +13482,25 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (rotate:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%k0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "length" "2")]) (define_insn "*rotlsi3_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATE, SImode, operands)" "@ rol{l}\t{%2, %0|%0, %2} rol{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotlsi3_1_zext" @@ -12265,33 +13508,32 @@ (zero_extend:DI (rotate:SI (match_operand:SI 1 "register_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)" "@ rol{l}\t{%2, %k0|%k0, %2} rol{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotlhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;") (define_insn "*rotlhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATE, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, HImode, operands)" "rol{w}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12299,67 +13541,139 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATE, HImode, operands)" "@ rol{w}\t{%2, %0|%0, %2} rol{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + (define_expand "rotlqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") +(define_insn "*rotlqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "rol{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*rotlqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATE, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, QImode, operands)" "rol{b}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*rotlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + rol{b}\t{%1, %0|%0, %1} + rol{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + (define_insn "*rotlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATE, QImode, operands)" "@ rol{b}\t{%2, %0|%0, %2} rol{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "QI")]) (define_expand "rotrdi3" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATERT, DImode, operands); DONE;") + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATERT, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2)) + (ashift:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2)) + (ashift:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 1, &operands[4], &operands[5]);") (define_insn "*rotrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, DImode, operands)" "ror{q}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:DI 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12367,33 +13681,32 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "J,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)" "@ ror{q}\t{%2, %0|%0, %2} ror{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotrsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "" "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;") (define_insn "*rotrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12401,14 +13714,15 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (rotatert:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) - (clobber (reg:CC 17))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%k0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand:SI 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12416,12 +13730,12 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATERT, SImode, operands)" "@ ror{l}\t{%2, %0|%0, %2} ror{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotrsi3_1_zext" @@ -12429,67 +13743,89 @@ (zero_extend:DI (rotatert:SI (match_operand:SI 1 "register_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c")))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "@ ror{l}\t{%2, %k0|%k0, %2} ror{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotrhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_HIMODE_MATH" "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;") (define_insn "*rotrhi3_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATERT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, HImode, operands)" "ror{w}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) -(define_insn "*rotrhi3" +(define_insn "*rotrhi3_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATERT, HImode, operands)" "@ ror{w}\t{%2, %0|%0, %2} ror{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotatert:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + (define_expand "rotrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) - (clobber (reg:CC 17))] + (match_operand:QI 2 "nonmemory_operand" "")))] "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;") (define_insn "*rotrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) - (clobber (reg:CC 17))] - "ix86_binary_operator_ok (ROTATERT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, QImode, operands)" "ror{b}\t%0" - [(set_attr "type" "ishift") - (set (attr "length") - (if_then_else (match_operand 0 "register_operand" "") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "ror{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) @@ -12497,12 +13833,25 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ROTATERT, QImode, operands)" "@ ror{b}\t{%2, %0|%0, %2} ror{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_insn "*rotrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + ror{b}\t{%1, %0|%0, %1} + ror{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") (set_attr "mode" "QI")]) ;; Bit set / bit test instructions @@ -12510,8 +13859,8 @@ (define_expand "extv" [(set (match_operand:SI 0 "register_operand" "") (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "immediate_operand" "") - (match_operand:SI 3 "immediate_operand" "")))] + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] "" { /* Handle extractions from %ah et al. */ @@ -12520,15 +13869,15 @@ /* From mips.md: extract_bit_field doesn't verify that our source matches the predicate, so check it again here. */ - if (! register_operand (operands[1], VOIDmode)) + if (! ext_register_operand (operands[1], VOIDmode)) FAIL; }) (define_expand "extzv" [(set (match_operand:SI 0 "register_operand" "") (zero_extract:SI (match_operand 1 "ext_register_operand" "") - (match_operand:SI 2 "immediate_operand" "") - (match_operand:SI 3 "immediate_operand" "")))] + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] "" { /* Handle extractions from %ah et al. */ @@ -12537,176 +13886,251 @@ /* From mips.md: extract_bit_field doesn't verify that our source matches the predicate, so check it again here. */ - if (! register_operand (operands[1], VOIDmode)) + if (! ext_register_operand (operands[1], VOIDmode)) FAIL; }) (define_expand "insv" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "") - (match_operand:SI 1 "immediate_operand" "") - (match_operand:SI 2 "immediate_operand" "")) - (match_operand:SI 3 "register_operand" ""))] + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "const8_operand" "") + (match_operand 2 "const8_operand" "")) + (match_operand 3 "register_operand" ""))] "" { - /* Handle extractions from %ah et al. */ + /* Handle insertions to %ah et al. */ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) FAIL; /* From mips.md: insert_bit_field doesn't verify that our source matches the predicate, so check it again here. */ - if (! register_operand (operands[0], VOIDmode)) + if (! ext_register_operand (operands[0], VOIDmode)) FAIL; + + if (TARGET_64BIT) + emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); + else + emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); + + DONE; }) ;; %%% bts, btr, btc, bt. - -;; Store-flag instructions. +;; In general these instructions are *slow* when applied to memory, +;; since they enforce atomic operation. When applied to registers, +;; it depends on the cpu implementation. They're never faster than +;; the corresponding and/ior/xor operations, so with 32-bit there's +;; no point. But in 64-bit, we can't hold the relevant immediates +;; within the instruction itself, so operating on bits in the high +;; 32-bits of a register becomes easier. +;; +;; These are slow on Nocona, but fast on Athlon64. We do require the use +;; of btrq and btcq for corner cases of post-reload expansion of absdf and +;; negdf respectively, so they can never be disabled entirely. + +(define_insn "*btsq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "bts{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +(define_insn "*btrq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +(define_insn "*btcq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btc{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +;; Allow Nocona to avoid these instructions if a register is available. -;; For all sCOND expanders, also expand the compare or test insn that -;; generates cc0. Generate an equality comparison if `seq' or `sne'. +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; -;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way -;; to avoid partial register stalls. Otherwise do things the setcc+movzx -;; way, which can later delete the movzx if only QImode is needed. + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); -(define_expand "seq" - [(set (match_operand:QI 0 "register_operand" "") - (eq:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;") + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } -(define_expand "sne" - [(set (match_operand:QI 0 "register_operand" "") - (ne:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;") + emit_insn (gen_iordi3 (operands[0], operands[0], op1)); + DONE; +}) -(define_expand "sgt" - [(set (match_operand:QI 0 "register_operand" "") - (gt:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;") +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; -(define_expand "sgtu" - [(set (match_operand:QI 0 "register_operand" "") - (gtu:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;") + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); -(define_expand "slt" - [(set (match_operand:QI 0 "register_operand" "") - (lt:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;") + op1 = immed_double_const (~lo, ~hi, DImode); + if (i >= 32) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } -(define_expand "sltu" - [(set (match_operand:QI 0 "register_operand" "") - (ltu:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;") + emit_insn (gen_anddi3 (operands[0], operands[0], op1)); + DONE; +}) -(define_expand "sge" - [(set (match_operand:QI 0 "register_operand" "") - (ge:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;") - -(define_expand "sgeu" - [(set (match_operand:QI 0 "register_operand" "") - (geu:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;") - -(define_expand "sle" - [(set (match_operand:QI 0 "register_operand" "") - (le:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;") +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI + (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; -(define_expand "sleu" - [(set (match_operand:QI 0 "register_operand" "") - (leu:QI (reg:CC 17) (const_int 0)))] - "" - "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;") + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); -(define_expand "sunordered" - [(set (match_operand:QI 0 "register_operand" "") - (unordered:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;") + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } -(define_expand "sordered" - [(set (match_operand:QI 0 "register_operand" "") - (ordered:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387" - "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;") + emit_insn (gen_xordi3 (operands[0], operands[0], op1)); + DONE; +}) -(define_expand "suneq" - [(set (match_operand:QI 0 "register_operand" "") - (uneq:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;") +(define_insn "*btdi_rex64" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))" + "bt{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) -(define_expand "sunge" - [(set (match_operand:QI 0 "register_operand" "") - (unge:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;") +(define_insn "*btsi" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "bt{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +;; Store-flag instructions. -(define_expand "sungt" - [(set (match_operand:QI 0 "register_operand" "") - (ungt:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;") +;; For all sCOND expanders, also expand the compare or test insn that +;; generates cc0. Generate an equality comparison if `seq' or `sne'. -(define_expand "sunle" - [(set (match_operand:QI 0 "register_operand" "") - (unle:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;") +;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way +;; to avoid partial register stalls. Otherwise do things the setcc+movzx +;; way, which can later delete the movzx if only QImode is needed. -(define_expand "sunlt" +(define_expand "s" [(set (match_operand:QI 0 "register_operand" "") - (unlt:QI (reg:CC 17) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;") + (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") -(define_expand "sltgt" +(define_expand "s" [(set (match_operand:QI 0 "register_operand" "") - (ltgt:QI (reg:CC 17) (const_int 0)))] + (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;") + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") (define_insn "*setcc_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (match_operator:QI 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]))] + [(reg FLAGS_REG) (const_int 0)]))] "" "set%C1\t%0" [(set_attr "type" "setcc") (set_attr "mode" "QI")]) -(define_insn "setcc_2" +(define_insn "*setcc_2" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (match_operator:QI 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]))] + [(reg FLAGS_REG) (const_int 0)]))] "" "set%C1\t%0" [(set_attr "type" "setcc") (set_attr "mode" "QI")]) ;; In general it is not safe to assume too much about CCmode registers, -;; so simplify-rtx stops when it sees a second one. Under certain +;; so simplify-rtx stops when it sees a second one. Under certain ;; conditions this is safe on x86, so help combine not create ;; ;; seta %al ;; testb %al, %al ;; sete %al -(define_split +(define_split [(set (match_operand:QI 0 "nonimmediate_operand" "") (ne:QI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] @@ -12714,10 +14138,10 @@ PUT_MODE (operands[1], QImode); }) -(define_split +(define_split [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (ne:QI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] @@ -12725,10 +14149,10 @@ PUT_MODE (operands[1], QImode); }) -(define_split +(define_split [(set (match_operand:QI 0 "nonimmediate_operand" "") (eq:QI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] @@ -12736,8 +14160,8 @@ rtx new_op1 = copy_rtx (operands[1]); operands[1] = new_op1; PUT_MODE (new_op1, QImode); - PUT_CODE (new_op1, REVERSE_CONDITION (GET_CODE (new_op1), - GET_MODE (XEXP (new_op1, 0)))); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ @@ -12745,10 +14169,10 @@ FAIL; }) -(define_split +(define_split [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (eq:QI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] @@ -12756,8 +14180,8 @@ rtx new_op1 = copy_rtx (operands[1]); operands[1] = new_op1; PUT_MODE (new_op1, QImode); - PUT_CODE (new_op1, REVERSE_CONDITION (GET_CODE (new_op1), - GET_MODE (XEXP (new_op1, 0)))); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ @@ -12768,185 +14192,68 @@ ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent -;; it directly. Futher holding this value in pseudo register might bring -;; problem in implicit normalization in spill code. -;; So we don't define FLOAT_STORE_FLAG_VALUE and create these -;; instructions after reload by splitting the conditional move patterns. - -(define_insn "*sse_setccsf" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 1 "sse_comparison_operator" - [(match_operand:SF 2 "register_operand" "0") - (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && reload_completed" - "cmp%D1ss\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) +;; it directly. + +(define_insn "*avx_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "avx_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_AVX" + "vcmp%D1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*sse_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && !TARGET_SSE5" + "cmp%D1s\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "")]) + +(define_insn "*sse5_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse5_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE5" + "com%Y1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "")]) -(define_insn "*sse_setccdf" - [(set (match_operand:DF 0 "register_operand" "=Y") - (match_operator:DF 1 "sse_comparison_operator" - [(match_operand:DF 2 "register_operand" "0") - (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 && reload_completed" - "cmp%D1sd\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) ;; Basic conditional jump instructions. ;; We ignore the overflow flag for signed branch instructions. ;; For all bCOND expanders, also expand the compare or test insn that -;; generates reg 17. Generate an equality comparison if `beq' or `bne'. - -(define_expand "beq" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (EQ, operands[0]); DONE;") - -(define_expand "bne" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (NE, operands[0]); DONE;") - -(define_expand "bgt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GT, operands[0]); DONE;") - -(define_expand "bgtu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GTU, operands[0]); DONE;") - -(define_expand "blt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LT, operands[0]); DONE;") - -(define_expand "bltu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LTU, operands[0]); DONE;") - -(define_expand "bge" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GE, operands[0]); DONE;") - -(define_expand "bgeu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GEU, operands[0]); DONE;") - -(define_expand "ble" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LE, operands[0]); DONE;") +;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'. -(define_expand "bleu" +(define_expand "b" [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) + (if_then_else (int_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) (pc)))] "" - "ix86_expand_branch (LEU, operands[0]); DONE;") - -(define_expand "bunordered" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNORDERED, operands[0]); DONE;") - -(define_expand "bordered" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (ORDERED, operands[0]); DONE;") - -(define_expand "buneq" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNEQ, operands[0]); DONE;") - -(define_expand "bunge" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNGE, operands[0]); DONE;") - -(define_expand "bungt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNGT, operands[0]); DONE;") - -(define_expand "bunle" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNLE, operands[0]); DONE;") + "ix86_expand_branch (, operands[0]); DONE;") -(define_expand "bunlt" +(define_expand "b" [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (UNLT, operands[0]); DONE;") - -(define_expand "bltgt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) + (if_then_else (fp_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) (pc)))] - "TARGET_80387 || TARGET_SSE" - "ix86_expand_branch (LTGT, operands[0]); DONE;") + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (, operands[0]); DONE;") (define_insn "*jcc_1" [(set (pc) (if_then_else (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (label_ref (match_operand 0 "" "")) (pc)))] "" @@ -12964,7 +14271,7 @@ (define_insn "*jcc_2" [(set (pc) (if_then_else (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (pc) (label_ref (match_operand 0 "" ""))))] "" @@ -12980,17 +14287,17 @@ (const_int 6)))]) ;; In general it is not safe to assume too much about CCmode registers, -;; so simplify-rtx stops when it sees a second one. Under certain +;; so simplify-rtx stops when it sees a second one. Under certain ;; conditions this is safe on x86, so help combine not create ;; ;; seta %al ;; testb %al, %al ;; je Lfoo -(define_split +(define_split [(set (pc) (if_then_else (ne (match_operator 0 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] @@ -13002,11 +14309,11 @@ { PUT_MODE (operands[0], VOIDmode); }) - -(define_split + +(define_split [(set (pc) (if_then_else (eq (match_operator 0 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + [(reg FLAGS_REG) (const_int 0)]) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] @@ -13019,8 +14326,8 @@ rtx new_op0 = copy_rtx (operands[0]); operands[0] = new_op0; PUT_MODE (new_op0, VOIDmode); - PUT_CODE (new_op0, REVERSE_CONDITION (GET_CODE (new_op0), - GET_MODE (XEXP (new_op0, 0)))); + PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ @@ -13028,22 +14335,220 @@ FAIL; }) +;; zero_extend in SImode is correct, since this is what combine pass +;; generates from shift insn with QImode operand. Actually, the mode of +;; operand 2 (bit offset operand) doesn't matter since bt insn takes +;; appropriate modulo of the bit offset value. + +(define_insn_and_split "*jcc_btdi_rex64" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:DI + (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btdi_mask_rex64" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:DI + (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x3f) == 0x3f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +(define_insn_and_split "*jcc_btsi" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SI + (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SI + (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + +(define_insn_and_split "*jcc_btsi_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask_1" + [(set (pc) + (if_then_else + (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) 0)) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + ;; Define combination compare-and-branch fp compare instructions to use ;; during early optimization. Splitting the operation apart early makes ;; for bad code when we want to reverse the operation. -(define_insn "*fp_jcc_1" +(define_insn "*fp_jcc_1_mixed" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f")]) + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) (label_ref (match_operand 3 "" "")) (pc))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "TARGET_CMOVE && TARGET_80387 - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -13051,44 +14556,44 @@ (define_insn "*fp_jcc_1_sse" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f#x,x#f") - (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) (label_ref (match_operand 3 "" "")) (pc))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "TARGET_80387 + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_1_sse_only" +(define_insn "*fp_jcc_1_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "x") - (match_operand 2 "nonimmediate_operand" "xm")]) + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) (label_ref (match_operand 3 "" "")) (pc))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_2" +(define_insn "*fp_jcc_2_mixed" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f")]) + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) (pc) (label_ref (match_operand 3 "" "")))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "TARGET_CMOVE && TARGET_80387 - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -13096,41 +14601,42 @@ (define_insn "*fp_jcc_2_sse" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f#x,x#f") - (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) (pc) (label_ref (match_operand 3 "" "")))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "TARGET_80387 + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_2_sse_only" +(define_insn "*fp_jcc_2_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "x") - (match_operand 2 "nonimmediate_operand" "xm")]) + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) (pc) (label_ref (match_operand 3 "" "")))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_3" +(define_insn "*fp_jcc_3_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") (match_operand 2 "nonimmediate_operand" "fm")]) (label_ref (match_operand 3 "" "")) (pc))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17)) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] "TARGET_80387 && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) @@ -13141,15 +14647,15 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_4" +(define_insn "*fp_jcc_4_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") (match_operand 2 "nonimmediate_operand" "fm")]) (pc) (label_ref (match_operand 3 "" "")))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17)) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] "TARGET_80387 && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) @@ -13160,38 +14666,78 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_5" +(define_insn "*fp_jcc_5_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f")]) (label_ref (match_operand 3 "" "")) (pc))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17)) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_6" +(define_insn "*fp_jcc_6_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f")]) (pc) (label_ref (match_operand 3 "" "")))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17)) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_7_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") +;; The order of operands in *fp_jcc_8_387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*fp_jcc_8_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) + (match_operand 3 "register_operand" "f,f")]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a,a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) + && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode + && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))" + "#") + (define_split [(set (pc) (if_then_else (match_operator 0 "comparison_operator" @@ -13199,13 +14745,13 @@ (match_operand 2 "nonimmediate_operand" "")]) (match_operand 3 "" "") (match_operand 4 "" ""))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17))] + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] "reload_completed" [(const_int 0)] { ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], - operands[3], operands[4], NULL_RTX); + operands[3], operands[4], NULL_RTX, NULL_RTX); DONE; }) @@ -13213,29 +14759,70 @@ [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "") - (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 2 "general_operand" "")]) (match_operand 3 "" "") (match_operand 4 "" ""))) - (clobber (reg:CCFP 18)) - (clobber (reg:CCFP 17)) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 5 "=a"))] "reload_completed" - [(set (pc) - (if_then_else (match_dup 6) - (match_dup 3) - (match_dup 4)))] + [(const_int 0)] { ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], - operands[3], operands[4], operands[5]); + operands[3], operands[4], operands[5], NULL_RTX); DONE; }) - -;; Unconditional and other jump instructions -(define_insn "jump" +(define_split [(set (pc) - (label_ref (match_operand 0 "" "")))] - "" + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], NULL_RTX); + DONE; +}) + +;; %%% Kill this when reload knows how to do it. +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "register_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], operands[2]); + DONE; +}) + +;; Unconditional and other jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" "jmp\t%l0" [(set_attr "type" "ibr") (set (attr "length") @@ -13248,184 +14835,70 @@ (set_attr "modrm" "0")]) (define_expand "indirect_jump" - [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] "" "") (define_insn "*indirect_jump" - [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] - "!TARGET_64BIT" - "jmp\t%A0" - [(set_attr "type" "ibr") - (set_attr "length_immediate" "0")]) - -(define_insn "*indirect_jump_rtx64" - [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))] - "TARGET_64BIT" + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))] + "" "jmp\t%A0" [(set_attr "type" "ibr") (set_attr "length_immediate" "0")]) (define_expand "tablejump" - [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm")) + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "")) (use (label_ref (match_operand 1 "" "")))])] "" { - /* In PIC mode, the table entries are stored GOT-relative. Convert - the relative address to an absolute address. */ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ if (flag_pic) { - if (TARGET_64BIT) - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - gen_rtx_LABEL_REF (Pmode, operands[1]), - NULL_RTX, 0, - OPTAB_DIRECT); - else if (HAVE_AS_GOTOFF_IN_DATA) + rtx op0, op1; + enum rtx_code code; + + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) + { + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) { - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - pic_offset_table_rtx, NULL_RTX, - 1, OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; } else { - operands[0] = expand_simple_binop (Pmode, MINUS, pic_offset_table_rtx, - operands[0], NULL_RTX, 1, - OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); } }) (define_insn "*tablejump_1" - [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm")) - (use (label_ref (match_operand 1 "" "")))] - "!TARGET_64BIT" - "jmp\t%A0" - [(set_attr "type" "ibr") - (set_attr "length_immediate" "0")]) - -(define_insn "*tablejump_1_rtx64" - [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm")) + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm")) (use (label_ref (match_operand 1 "" "")))] - "TARGET_64BIT" + "" "jmp\t%A0" [(set_attr "type" "ibr") (set_attr "length_immediate" "0")]) -;; Loop instruction -;; -;; This is all complicated by the fact that since this is a jump insn -;; we must handle our own reloads. - -(define_expand "doloop_end" - [(use (match_operand 0 "" "")) ; loop pseudo - (use (match_operand 1 "" "")) ; iterations; zero if unknown - (use (match_operand 2 "" "")) ; max iterations - (use (match_operand 3 "" "")) ; loop level - (use (match_operand 4 "" ""))] ; label - "!TARGET_64BIT && TARGET_USE_LOOP" - " -{ - /* Only use cloop on innermost loops. */ - if (INTVAL (operands[3]) > 1) - FAIL; - if (GET_MODE (operands[0]) != SImode) - FAIL; - emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0], - operands[0])); - DONE; -}") - -(define_insn "doloop_end_internal" - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r") - (const_int 1)) - (label_ref (match_operand 0 "" "")) - (pc))) - (set (match_operand:SI 2 "register_operand" "=1,1,*m*r") - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 3 "=X,X,r")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_USE_LOOP" -{ - if (which_alternative != 0) - return "#"; - if (get_attr_length (insn) == 2) - return "%+loop\t%l0"; - else - return "dec{l}\t%1\;%+jne\t%l0"; -} - [(set_attr "ppro_uops" "many") - (set (attr "length") - (if_then_else (and (eq_attr "alternative" "0") - (and (ge (minus (match_dup 0) (pc)) - (const_int -126)) - (lt (minus (match_dup 0) (pc)) - (const_int 128)))) - (const_int 2) - (const_int 16))) - ;; We don't know the type before shorten branches. Optimistically expect - ;; the loop instruction to match. - (set (attr "type") (const_string "ibr"))]) - -(define_split - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "") - (const_int 1)) - (match_operand 0 "" "") - (pc))) - (set (match_dup 1) - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 2 "")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_USE_LOOP - && reload_completed - && REGNO (operands[1]) != 2" - [(parallel [(set (reg:CCZ 17) - (compare:CCZ (plus:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))]) - (set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0)) - (match_dup 0) - (pc)))] - "") - -(define_split - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "") - (const_int 1)) - (match_operand 0 "" "") - (pc))) - (set (match_operand:SI 2 "nonimmediate_operand" "") - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_USE_LOOP - && reload_completed - && (! REG_P (operands[2]) - || ! rtx_equal_p (operands[1], operands[2]))" - [(set (match_dup 3) (match_dup 1)) - (parallel [(set (reg:CCZ 17) - (compare:CCZ (plus:SI (match_dup 3) (const_int -1)) - (const_int 0))) - (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))]) - (set (match_dup 2) (match_dup 3)) - (set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0)) - (match_dup 0) - (pc)))] - "") - ;; Convert setcc + movzbl to xor + setcc if operands don't overlap. (define_peephole2 - [(set (reg 17) (match_operand 0 "" "")) + [(set (reg FLAGS_REG) (match_operand 0 "" "")) (set (match_operand:QI 1 "register_operand" "") (match_operator:QI 2 "ix86_comparison_operator" - [(reg 17) (const_int 0)])) + [(reg FLAGS_REG) (const_int 0)])) (set (match_operand 3 "q_regs_operand" "") (zero_extend (match_dup 1)))] "(peep2_reg_dead_p (3, operands[1]) @@ -13435,21 +14908,21 @@ (set (strict_low_part (match_dup 5)) (match_dup 2))] { - operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); - operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) ;; Similar, but match zero_extendhisi2_and, which adds a clobber. (define_peephole2 - [(set (reg 17) (match_operand 0 "" "")) + [(set (reg FLAGS_REG) (match_operand 0 "" "")) (set (match_operand:QI 1 "register_operand" "") (match_operator:QI 2 "ix86_comparison_operator" - [(reg 17) (const_int 0)])) + [(reg FLAGS_REG) (const_int 0)])) (parallel [(set (match_operand 3 "q_regs_operand" "") (zero_extend (match_dup 1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0])" @@ -13457,8 +14930,8 @@ (set (strict_low_part (match_dup 5)) (match_dup 2))] { - operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); - operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) @@ -13468,37 +14941,28 @@ ;; checked for calls. This is a bug in the generic code, but it isn't that ;; easy to fix. Ignore it for now and be prepared to fix things up. +;; P6 processors will jump to the address after the decrement when %esp +;; is used as a call operand, so they will execute return address as a code. +;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17. + ;; Call subroutine returning no value. (define_expand "call_pop" [(parallel [(call (match_operand:QI 0 "" "") (match_operand:SI 1 "" "")) - (set (reg:SI 7) - (plus:SI (reg:SI 7) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "" "")))])] "!TARGET_64BIT" { - if (operands[3] == const0_rtx) - { - emit_insn (gen_call (operands[0], operands[1], constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT) - abort(); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0); + DONE; }) (define_insn "*call_pop_0" [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) (match_operand:SI 1 "" "")) - (set (reg:SI 7) (plus:SI (reg:SI 7) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "")))] "!TARGET_64BIT" { @@ -13508,62 +14972,50 @@ return "call\t%P0"; } [(set_attr "type" "call")]) - + (define_insn "*call_pop_1" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand:SI 1 "" "")) - (set (reg:SI 7) (plus:SI (reg:SI 7) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i")))] - "!TARGET_64BIT" + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" { if (constant_call_address_operand (operands[0], Pmode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + return "call\t%P0"; + return "call\t%A0"; } [(set_attr "type" "call")]) +(define_insn "*sibcall_pop_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + (define_expand "call" [(call (match_operand:QI 0 "" "") (match_operand 1 "" "")) (use (match_operand 2 "" ""))] - ;; Operand 1 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT && INTVAL (operands[2]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[2]); - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - DONE; + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; }) -(define_expand "call_exp" +(define_expand "sibcall" [(call (match_operand:QI 0 "" "") - (match_operand 1 "" ""))] + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] "" - "") +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); + DONE; +}) (define_insn "*call_0" [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) @@ -13578,69 +15030,90 @@ [(set_attr "type" "call")]) (define_insn "*call_1" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand 1 "" ""))] - "!TARGET_64BIT" + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" { - if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; } [(set_attr "type" "call")]) +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" ""))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + (define_insn "*call_1_rex64" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "TARGET_64BIT" + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" { - if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; } [(set_attr "type" "call")]) -;; Call subroutine, returning value in operand 0 -;; (which must be a hard register). +(define_insn "*call_1_rex64_ms_sysv" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64_large" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" ""))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "call\t%A0" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" ""))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) +;; Call subroutine, returning value in operand 0 (define_expand "call_value_pop" [(parallel [(set (match_operand 0 "" "") (call (match_operand:QI 1 "" "") (match_operand:SI 2 "" ""))) - (set (reg:SI 7) - (plus:SI (reg:SI 7) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (match_operand:SI 4 "" "")))])] "!TARGET_64BIT" { - if (operands[4] == const0_rtx) - { - emit_insn (gen_call_value (operands[0], operands[1], operands[2], - constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4], 0); + DONE; }) (define_expand "call_value" @@ -13651,35 +15124,21 @@ ;; Operand 2 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); - if (TARGET_64BIT && INTVAL (operands[3]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[3]); - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0); DONE; }) -(define_expand "call_value_exp" +(define_expand "sibcall_value" [(set (match_operand 0 "" "") (call (match_operand:QI 1 "" "") - (match_operand:SI 2 "" "")))] + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. "" - "") +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1); + DONE; +}) ;; Call subroutine returning any type. @@ -13695,14 +15154,21 @@ /* In order to give reg-stack an easier job in validating two coprocessor registers as containing a possible return value, simply pretend the untyped call returns a complex long double - value. */ - - emit_call_insn (TARGET_FLOAT_RETURNS_IN_80387 - ? gen_call_value (gen_rtx_REG (XCmode, FIRST_FLOAT_REG), - operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1)) - : gen_call (operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1))); + value. + + We can't use SSE_REGPARM_MAX here since callee is unprototyped + and should have the default ABI. */ + + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, + GEN_INT ((TARGET_64BIT + ? (DEFAULT_ABI == SYSV_ABI + ? X86_64_SSE_REGPARM_MAX + : X64_SSE_REGPARM_MAX) + : X86_32_SSE_REGPARM_MAX) + - 1), + NULL, 0); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -13725,7 +15191,33 @@ ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" - [(unspec_volatile [(const_int 0)] 0)] + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Do not schedule instructions accessing memory across this point. + +(define_expand "memory_blockage" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_blockage" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)] "" "" [(set_attr "length" "0")]) @@ -13738,9 +15230,9 @@ [(return)] "ix86_can_use_return_insn_p ()" { - if (current_function_pops_args) + if (crtl->args.pops_args) { - rtx popc = GEN_INT (current_function_pops_args); + rtx popc = GEN_INT (crtl->args.pops_args); emit_jump_insn (gen_return_pop_internal (popc)); DONE; } @@ -13754,6 +15246,19 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "return_internal_long" + [(return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep\;ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + (define_insn "return_pop_internal" [(return) (use (match_operand:SI 0 "const_int_operand" ""))] @@ -13777,70 +15282,93 @@ "nop" [(set_attr "length" "1") (set_attr "length_immediate" "0") - (set_attr "modrm" "0") - (set_attr "ppro_uops" "one")]) + (set_attr "modrm" "0")]) + +;; Align to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN + ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweigh the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) (define_expand "prologue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_prologue (); DONE;") -(define_insn "prologue_set_got" +(define_insn "set_got" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI - [(plus:SI (match_dup 0) - (plus:SI (match_operand:SI 1 "symbolic_operand" "") - (minus:SI (pc) (match_operand 2 "" ""))))] 1)) - (clobber (reg:CC 17))] + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" -{ - if (GET_CODE (operands[2]) == LABEL_REF) - operands[2] = XEXP (operands[2], 0); - if (TARGET_DEEP_BRANCH_PREDICTION) - return "add{l}\t{%1, %0|%0, %1}"; - else - return "add{l}\t{%1+[.-%X2], %0|%0, %a1+(.-%X2)}"; -} - [(set_attr "type" "alu") - ; Since this insn may have two constant operands, we must set the - ; length manually. - (set_attr "length_immediate" "4") - (set_attr "mode" "SI")]) + { return output_set_got (operands[0], NULL_RTX); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) -(define_insn "prologue_get_pc" +(define_insn "set_got_labelled" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(plus:SI (pc) (match_operand 1 "" ""))] 2))] + (unspec:SI [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" -{ - if (GET_CODE (operands[1]) == LABEL_REF) - operands[1] = XEXP (operands[1], 0); - output_asm_insn ("call\t%X1", operands); - if (! TARGET_DEEP_BRANCH_PREDICTION) - { - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[1])); - return "pop{l}\t%0"; - } - RET; -} - [(set_attr "type" "multi")]) + { return output_set_got (operands[0], operands[1]); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_insn "set_rip_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))] + "TARGET_64BIT" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_insn "set_got_offset_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT_OFFSET))] + "TARGET_64BIT" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" + [(set_attr "type" "imov") + (set_attr "length" "11")]) (define_expand "epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (1); DONE;") (define_expand "sibcall_epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13851,1036 +15379,3594 @@ emit_move_insn (tmp, ra); if (Pmode == SImode) - emit_insn (gen_eh_return_si (sa)); + emit_jump_insn (gen_eh_return_si (sa)); else - emit_insn (gen_eh_return_di (sa)); + emit_jump_insn (gen_eh_return_di (sa)); emit_barrier (); DONE; }) -(define_insn_and_split "eh_return_si" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] 13)] - "!TARGET_64BIT" - "#" - "reload_completed" - [(const_int 1)] - "ix86_expand_epilogue (2); DONE;") - -(define_insn_and_split "eh_return_di" - [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] 13)] - "TARGET_64BIT" +(define_insn_and_split "eh_return_" + [(set (pc) + (unspec [(match_operand:P 0 "register_operand" "c")] + UNSPEC_EH_RETURN))] + "" "#" "reload_completed" - [(const_int 1)] + [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_insn "leave" - [(set (reg:SI 7) (plus:SI (reg:SI 6) (const_int 4))) - (set (reg:SI 6) (mem:SI (reg:SI 6))) + [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) + (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_insn "leave_rex64" - [(set (reg:DI 7) (plus:DI (reg:DI 6) (const_int 8))) - (set (reg:DI 6) (mem:DI (reg:DI 6))) + [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) + (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) (clobber (mem:BLK (scratch)))] "TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_expand "ffssi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC FLAGS_REG))])] "" { - rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); - rtx in = operands[1]; - if (TARGET_CMOVE) { - emit_move_insn (tmp, constm1_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, out, - gen_rtx_IF_THEN_ELSE (SImode, - gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx), - tmp, - out))); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); + emit_insn (gen_ffs_cmove (operands[0], operands[1])); + DONE; } +}) - /* Pentium bsf instruction is extremly slow. The following code is - recommended by the Intel Optimizing Manual as a reasonable replacement: - TEST EAX,EAX - JZ SHORT BS2 - XOR ECX,ECX - MOV DWORD PTR [TEMP+4],ECX - SUB ECX,EAX - AND EAX,ECX - MOV DWORD PTR [TEMP],EAX - FILD QWORD PTR [TEMP] - FSTP QWORD PTR [TEMP] - WAIT ; WAIT only needed for compatibility with - ; earlier processors - MOV ECX, DWORD PTR [TEMP+4] - SHR ECX,20 - SUB ECX,3FFH - TEST EAX,EAX ; clear zero flag - BS2: - Following piece of code expand ffs to similar beast. - */ - - else if (TARGET_PENTIUM && !optimize_size && TARGET_80387) - { - rtx label = gen_label_rtx (); - rtx lo, hi; - rtx mem = assign_386_stack_local (DImode, 0); - rtx fptmp = gen_reg_rtx (DFmode); - split_di (&mem, 1, &lo, &hi); - - emit_move_insn (out, const0_rtx); - - emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label); - - emit_move_insn (hi, out); - emit_insn (gen_subsi3 (out, out, in)); - emit_insn (gen_andsi3 (out, out, in)); - emit_move_insn (lo, out); - emit_insn (gen_floatdidf2 (fptmp,mem)); - emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp); - emit_move_insn (out, hi); - emit_insn (gen_lshrsi3 (out, out, GEN_INT (20))); - emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1))); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operands[0], out); - } - else - { - emit_move_insn (tmp, const0_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, - gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)), - gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx))); - emit_insn (gen_negsi2 (tmp, tmp)); - emit_insn (gen_iorsi3 (out, out, tmp)); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); - } - DONE; +(define_expand "ffs_cmove" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "") + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_CMOVE" + "operands[2] = gen_reg_rtx (SImode);") + +(define_insn_and_split "*ffs_no_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_CMOVE" + "#" + "&& reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[3] = gen_lowpart (QImode, operands[2]); + ix86_expand_clear (operands[2]); }) -(define_insn "ffssi_1" - [(set (reg:CCZ 17) - (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") +(define_insn "*ffssi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_dup 1)] 5))] + (ctz:SI (match_dup 1)))] "" "bsf{l}\t{%1, %0|%0, %1}" - [(set_attr "prefix_0f" "1") - (set_attr "ppro_uops" "few")]) - -;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger -;; and slower than the two-byte movzx insn needed to do the work in SImode. - -;; These patterns match the binary 387 instructions for addM3, subM3, -;; mulM3 and divM3. There are three patterns for each of DFmode and -;; SFmode. The first is the normal insn, the second the same insn but -;; with one operand a conversion, and the third the same insn but with -;; the other operand a conversion. The conversion may be SFmode or -;; SImode if the target mode DFmode, but only SImode if the target mode -;; is SFmode. + [(set_attr "prefix_0f" "1")]) -;; Gcc is slightly more smart about handling normal two address instructions -;; so use special patterns for add and mull. -(define_insn "*fop_sf_comm_nosse" - [(set (match_operand:SF 0 "register_operand" "=f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "fm")]))] - "TARGET_80387 && !TARGET_SSE_MATH - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_expand "ffsdi2" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "") + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (ctz:DI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:DI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "operands[2] = gen_reg_rtx (DImode);") -(define_insn "*fop_sf_comm" - [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))] - "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (eq_attr "alternative" "1") - (const_string "sse") - (if_then_else (match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop")))) - (set_attr "mode" "SF")]) +(define_insn "*ffsdi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_dup 1)))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) -(define_insn "*fop_sf_comm_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) +(define_insn "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) -(define_insn "*fop_df_comm_nosse" - [(set (match_operand:DF 0 "register_operand" "=f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "fm")]))] - "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) -(define_insn "*fop_df_comm" - [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))] - "TARGET_80387 && TARGET_SSE_MATH && TARGET_SSE2 && TARGET_MIX_SSE_I387 - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (eq_attr "alternative" "1") - (const_string "sse") - (if_then_else (match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop")))) - (set_attr "mode" "DF")]) +(define_expand "clzsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzsi2_abm (operands[0], operands[1])); + DONE; + } +}) -(define_insn "*fop_df_comm_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 && TARGET_SSE_MATH - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) +(define_insn "clzsi2_abm" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) -(define_insn "*fop_xf_comm" - [(set (match_operand:XF 0 "register_operand" "=f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "%0") - (match_operand:XF 2 "register_operand" "f")]))] - "!TARGET_64BIT && TARGET_80387 - && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "XF")]) +(define_insn "*bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "SI")]) -(define_insn "*fop_tf_comm" - [(set (match_operand:TF 0 "register_operand" "=f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "%0") - (match_operand:TF 2 "register_operand" "f")]))] - "TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "XF")]) +(define_insn "popcount2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) -(define_insn "*fop_sf_1_nosse" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,fm") - (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387 && !TARGET_SSE_MATH - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_insn "*popcount2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcountsi2_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI(popcount:SI (match_dup 1))))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) -(define_insn "*fop_sf_1" - [(set (match_operand:SF 0 "register_operand" "=f,f,x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))] - "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") - (match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "" +{ + if (!TARGET_BSWAP) + { + rtx x = operands[0]; -(define_insn "*fop_sf_1_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE_MATH - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" - "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) + emit_move_insn (x, operands[1]); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + DONE; + } +}) -;; ??? Add SSE splitters for these! -(define_insn "*fop_sf_2" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) - (match_operand:SF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "ppro_uops" "many") - (set_attr "mode" "SI")]) +(define_insn "*bswapsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "0")))] + "TARGET_BSWAP" + "bswap\t%0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "2")]) -(define_insn "*fop_sf_3" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "register_operand" "0,0") - (float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "ppro_uops" "many") - (set_attr "mode" "SI")]) +(define_insn "*bswaphi_lowpart_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "2,4") + (set_attr "mode" "QI,HI")]) -(define_insn "*fop_df_1_nosse" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,fm") - (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_insn "bswaphi_lowpart" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "4") + (set_attr "mode" "HI")]) +(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (bswap:DI (match_operand:DI 1 "register_operand" "0")))] + "TARGET_64BIT" + "bswap\t%0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "3")]) -(define_insn "*fop_df_1" - [(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))] - "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") - (match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_expand "clzdi2" + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzdi2_abm (operands[0], operands[1])); + DONE; + } +}) -(define_insn "*fop_df_1_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 && TARGET_SSE_MATH - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" - "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse")]) +(define_insn "clzdi2_abm" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_ABM" + "lzcnt{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) -;; ??? Add SSE splitters for these! -(define_insn "*fop_df_2" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) - (match_operand:DF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "ppro_uops" "many") - (set_attr "mode" "SI")]) +(define_insn "*bsr_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsr{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) -(define_insn "*fop_df_3" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "register_operand" "0,0") - (float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "ppro_uops" "many") - (set_attr "mode" "SI")]) +(define_expand "clzhi2" + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzhi2_abm (operands[0], operands[1])); + DONE; + } +}) -(define_insn "*fop_df_4" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) - (match_operand:DF 2 "register_operand" "0,f")]))] - "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_insn "clzhi2_abm" + [(set (match_operand:HI 0 "register_operand" "=r") + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "HI")]) -(define_insn "*fop_df_5" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "register_operand" "0,f") - (float_extend:DF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_insn "*bsrhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "HI")]) -(define_insn "*fop_xf_1" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (match_operand:XF 2 "register_operand" "f,0")]))] - "!TARGET_64BIT && TARGET_80387 - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "XF")]) +(define_expand "paritydi2" + [(set (match_operand:DI 0 "register_operand" "") + (parity:DI (match_operand:DI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; -(define_insn "*fop_tf_1" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (match_operand:TF 2 "register_operand" "f,0")]))] - "TARGET_80387 - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "XF")]) + emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX, + NULL_RTX, operands[1])); -(define_insn "*fop_xf_2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) - (match_operand:XF 2 "register_operand" "0,0")]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) - -(define_insn "*fop_tf_2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float:TF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) - (match_operand:TF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + if (TARGET_64BIT) + emit_insn (gen_zero_extendqidi2 (operands[0], scratch)); + else + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (tmp, scratch)); + emit_insn (gen_zero_extendsidi2 (operands[0], tmp)); + } + DONE; +}) + +(define_insn_and_split "paritydi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:DI 3 "register_operand" "0"))) + (clobber (match_scratch:DI 0 "=r")) + (clobber (match_scratch:SI 1 "=&r")) + (clobber (match_scratch:HI 2 "=Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (parity:CC (match_dup 1))) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] +{ + operands[4] = gen_lowpart (SImode, operands[3]); + + if (TARGET_64BIT) + { + emit_move_insn (operands[1], gen_lowpart (SImode, operands[3])); + emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32))); + } + else + operands[1] = gen_highpart (SImode, operands[3]); +}) + +(define_expand "paritysi2" + [(set (match_operand:SI 0 "register_operand" "") + (parity:SI (match_operand:SI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); + DONE; +}) + +(define_insn_and_split "paritysi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:SI 2 "register_operand" "0"))) + (clobber (match_scratch:SI 0 "=r")) + (clobber (match_scratch:HI 1 "=&Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:HI (match_dup 1) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (parity:CC (match_dup 1))) + (clobber (match_dup 1))])] +{ + operands[3] = gen_lowpart (HImode, operands[2]); + + emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); + emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); +}) + +(define_insn "*parityhi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:HI 1 "register_operand" "0"))) + (clobber (match_scratch:HI 0 "=Q"))] + "! TARGET_POPCNT" + "xor{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "length" "2") + (set_attr "mode" "HI")]) + +(define_insn "*parityqi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:QI 0 "register_operand" "q")))] + "! TARGET_POPCNT" + "test{b}\t%0, %0" + [(set_attr "length" "2") + (set_attr "mode" "QI")]) + +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_global_dynamic_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]} + push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop" + [(set_attr "type" "multi") + (set_attr "length" "14")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], operands[1], operands[2])); + DONE; + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + { return ".byte\t0x66\n\tlea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; } + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_insn "*tls_local_dynamic_base_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]} + push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3" + [(set_attr "type" "multi") + (set_attr "length" "13")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], ix86_tls_module_base (), operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], ix86_tls_module_base ())); + DONE; + } + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Load and add the thread base pointer from %gs:0. + +(define_insn "*load_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "!TARGET_64BIT" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_64BIT" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP) + (match_operand:DI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand" "") + (const:SI + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "") + (match_operand:SI 4 "" "") + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "") + (match_operand:DI 3 "" "") + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. + +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. + +(define_insn "*fop__comm_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*rcpsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "%vrcpss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*fop__1_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "mode" "")]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop__1_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop__2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(float:MODEF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:MODEF 2 "register_operand" "0,0")]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop__3_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0,0") + (float:MODEF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_df_4_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_5_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_6_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_2_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float:XF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_3_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,0") + (float:XF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(float (match_operand:X87MODEI12 1 "register_operand" "")) + (match_operand 2 "register_operand" "")]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[4], + operands[2]))); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(match_operand 1 "register_operand" "") + (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[1], + operands[4]))); + ix86_free_from_memory (GET_MODE (operands[2])); + DONE; +}) + +;; FPU special functions. + +;; This pattern implements a no-op XFmode truncation for +;; all fancy i386 XFmode math functions. + +(define_insn "truncxf2_i387_noop_unspec" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_TRUNC_NOOP))] + "TARGET_USE_FANCY_MATH_387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "sqrt_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF + (float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "%vrsqrtss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + +(define_insn "*sqrt2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "%vsqrts\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*")]) + +(define_expand "sqrt2" + [(set (match_operand:MODEF 0 "register_operand" "") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "")))] + "TARGET_USE_FANCY_MATH_387 + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + if (mode == SFmode + && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = force_reg (mode, operands[1]); + + emit_insn (gen_sqrt_extendxf2_i387 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); + DONE; + } +}) + +(define_insn "fpremxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "fmodxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "fmod3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + emit_insn (gen_truncxf2 (operands[0], op1)); + else + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op1)); + + DONE; +}) + +(define_insn "fprem1xf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM1_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM1_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "remainderxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "remainder3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + emit_insn (gen_truncxf2 (operands[0], op1)); + else + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op1)); + + DONE; +}) + +(define_insn "*sinxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*sin_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cosxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cos_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; When sincos pattern is defined, sin and cos builtin functions will be +;; expanded to sincos pattern with one of its outputs left unused. +;; CSE pass will figure out if two sincos patterns can be combined, +;; otherwise sincos pattern will be split back to sin or cos pattern, +;; depending on the unused output. + +(define_insn "sincosxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] + "") + +(define_insn "sincos_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))] + "") + +(define_expand "sincos3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_sincos_extendxf3_i387 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[1], op1)); + DONE; +}) + +(define_insn "fptanxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operand:XF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fptan_extendxf4_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "tanxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx one = gen_reg_rtx (XFmode); + rtx op2 = CONST1_RTX (XFmode); /* fld1 */ + + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "tan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx one = gen_reg_rtx (mode); + rtx op2 = CONST1_RTX (mode); /* fld1 */ + + emit_insn (gen_fptan_extendxf4_i387 (one, op0, + operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "*fpatanxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fpatan_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (float_extend:XF + (match_operand:MODEF 2 "register_operand" "u"))] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "") + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "") + +(define_expand "atan23" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + emit_insn (gen_fpatan_extendxf3_i387 (op0, operands[2], operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "atan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (mode); + emit_move_insn (op2, CONST1_RTX (mode)); /* fld1 */ + + emit_insn (gen_fpatan_extendxf3_i387 (op0, op2, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "asinxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 5) (match_dup 1)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asin2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_asinxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "acosxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1) (match_dup 5)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acos2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_acosxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2x_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */ +}) + +(define_expand "log2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log10xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */ +}) + +(define_expand "log102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log2xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xp1xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2xp1_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "log1pxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + ix86_emit_i387_log1p (operands[0], operands[1]); + DONE; +}) + +(define_expand "log1p2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + + operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); + + ix86_emit_i387_log1p (op0, operands[1]); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxtractxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fxtract_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logbxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); +}) + +(define_expand "logb2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "ilogbxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_expand "ilogb2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_insn "*f2xm1xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*fscalexf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FSCALE_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FSCALE_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fscale" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expNcorexf3" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" ""))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 3; i < 10; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp10xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp10xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp2xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp2xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "expm1xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 9) (float_extend:XF (match_dup 13))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 12) (minus:XF (match_dup 10) + (float_extend:XF (match_dup 13)))) + (set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_dup 12) (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 13; i++) + operands[i] = gen_reg_rtx (XFmode); + + operands[13] + = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ + + emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ +}) + +(define_expand "expm12" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expm1xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "ldexpxf3" + [(set (match_dup 3) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 1) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexp3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:SI 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "scalbxf3" + [(parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 3) + (unspec:XF [(match_dup 1) (match_dup 2)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_expand "scalb3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1, op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_scalbxf3 (op0, op1, op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + + +(define_insn "sse4_1_round2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "%vrounds\t{%2, %1, %d0|%d0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn "rintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "rint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operand0, operand1); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_expand "round2" + [(match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "nonimmediate_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_round (operand0, operand1); + else + ix86_expand_rounddf_32 (operand0, operand1); + DONE; +}) + +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])] + "") + +(define_insn_and_split "*fist2_1" + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_with_temp (operands[0], operands[1], + operands[2])); + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2_with_temp" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))] + "") + +(define_expand "lrintxf2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "") + +(define_expand "lrint2" + [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] + UNSPEC_FIX_NOTRUNC))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT)" + "") + +(define_expand "lround2" + [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT) + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lround (operand0, operand1); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_floor" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + + emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_floor_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_expand "floorxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); + DONE; +}) + +(define_expand "floor2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operand0, operand1, true); + else + ix86_expand_floorceildf_32 (operand0, operand1, true); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_insn_and_split "*fist2_floor_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_floor (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_floor_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fistdi2_floor" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_floor_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_floor" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fist2_floor_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lfloorxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +(define_expand "lfloordi2" + [(match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT + && !flag_trapping_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lfloorceil (operand0, operand1, true); + DONE; +}) + +(define_expand "lfloorsi2" + [(match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + if (optimize_insn_for_size_p () && TARGET_64BIT) + FAIL; + ix86_expand_lfloorceil (operand0, operand1, true); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_ceil" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + + emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_ceil_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_expand "ceilxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); + DONE; +}) + +(define_expand "ceil2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operand0, operand1, false); + else + ix86_expand_floorceildf_32 (operand0, operand1, false); + } + else + { + rtx op0, op1; -(define_insn "*fop_xf_3" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,0") - (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) - -(define_insn "*fop_tf_3" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,0") - (float:TF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) + if (optimize_insn_for_size_p ()) + FAIL; -(define_insn "*fop_xf_4" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) - (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); -(define_insn "*fop_tf_4" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) - (match_operand:TF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) -(define_insn "*fop_xf_5" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (float_extend:XF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) +(define_insn_and_split "*fist2_ceil_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; -(define_insn "*fop_tf_5" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (float_extend:TF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_ceil (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_ceil_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) -(define_insn "*fop_xf_6" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_insn "fistdi2_ceil" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) -(define_insn "*fop_tf_6" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:TF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_insn "fistdi2_ceil_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) -(define_insn "*fop_xf_7" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (float_extend:XF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") -(define_insn "*fop_tf_7" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (float_extend:TF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_ceil" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fist2_ceil_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) (define_split - [(set (match_operand 0 "register_operand" "") - (match_operator 3 "binary_fp_operator" - [(float (match_operand:SI 1 "register_operand" "")) - (match_operand 2 "register_operand" "")]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" - [(const_int 0)] -{ - operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); - operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_fmt_ee (GET_CODE (operands[3]), - GET_MODE (operands[3]), - operands[4], - operands[2]))); - ix86_free_from_memory (GET_MODE (operands[1])); + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lceilxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +(define_expand "lceildi2" + [(match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT + && !flag_trapping_math" +{ + ix86_expand_lfloorceil (operand0, operand1, false); DONE; }) -(define_split - [(set (match_operand 0 "register_operand" "") - (match_operator 3 "binary_fp_operator" - [(match_operand 1 "register_operand" "") - (float (match_operand:SI 2 "register_operand" ""))]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" +(define_expand "lceilsi2" + [(match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + ix86_expand_lfloorceil (operand0, operand1, false); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_trunc" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_TRUNC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" [(const_int 0)] { - operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); - operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_fmt_ee (GET_CODE (operands[3]), - GET_MODE (operands[3]), - operands[1], - operands[4]))); - ix86_free_from_memory (GET_MODE (operands[2])); + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + + emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_trunc_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_expand "btruncxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); DONE; }) - -;; FPU special functions. -(define_expand "sqrtsf2" - [(set (match_operand:SF 0 "register_operand" "") - (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "")))] - "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) || TARGET_SSE_MATH" +(define_expand "btrunc2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" { - if (!TARGET_SSE_MATH) - operands[1] = force_reg (SFmode, operands[1]); + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_trunc (operand0, operand1); + else + ix86_expand_truncdf_32 (operand0, operand1); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_trunc (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; }) -(define_insn "sqrtsf2_1" - [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") - (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0#x,xm#f")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_SSE_MATH && TARGET_MIX_SSE_I387)" - "@ - fsqrt - sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "fpspc,sse") - (set_attr "mode" "SF,SF") - (set_attr "athlon_decode" "direct,*")]) +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; -(define_insn "sqrtsf2_1_sse_only" - [(set (match_operand:SF 0 "register_operand" "=x") - (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" - "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "*")]) - -(define_insn "sqrtsf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f") - (sqrt:SF (match_operand:SF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && !TARGET_SSE_MATH" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "direct")]) + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); -(define_expand "sqrtdf2" - [(set (match_operand:DF 0 "register_operand" "") - (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) - || (TARGET_SSE2 && TARGET_SSE_MATH)" + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_expand "nearbyintxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" { - if (!TARGET_SSE2 || !TARGET_SSE_MATH) - operands[1] = force_reg (DFmode, operands[1]); + emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); + + DONE; }) -(define_insn "sqrtdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") - (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387)" - "@ - fsqrt - sqrtsd\t{%1, %0|%0, %1}" - [(set_attr "type" "fpspc,sse") - (set_attr "mode" "DF,DF") - (set_attr "athlon_decode" "direct,*")]) - -(define_insn "sqrtdf2_1_sse_only" - [(set (match_operand:DF 0 "register_operand" "=Y") - (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2 && TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" - "sqrtsd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "*")]) +(define_expand "nearbyint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); -(define_insn "sqrtdf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f") - (sqrt:DF (match_operand:DF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (!TARGET_SSE2 || !TARGET_SSE_MATH)" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "direct")]) + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); -(define_insn "*sqrtextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (sqrt:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && !(TARGET_SSE2 && TARGET_SSE_MATH)" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "direct")]) + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxam2_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(match_operand:X87MODEF 1 "register_operand" "f")] + UNSPEC_FXAM))] + "TARGET_USE_FANCY_MATH_387" + "fxam\n\tfnstsw\t%0" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) -(define_insn "sqrtxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 - && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) +(define_insn_and_split "fxam2_i387_with_temp" + [(set (match_operand:HI 0 "register_operand" "") + (unspec:HI + [(match_operand:MODEF 1 "memory_operand" "")] + UNSPEC_FXAM_MEM))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(set (match_dup 2)(match_dup 1)) + (set (match_dup 0) + (unspec:HI [(match_dup 2)] UNSPEC_FXAM))] +{ + operands[2] = gen_reg_rtx (mode); -(define_insn "sqrttf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (match_operand:TF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + MEM_VOLATILE_P (operands[1]) = 1; +} + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) -(define_insn "*sqrtextenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) +(define_expand "isinfxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); -(define_insn "*sqrtextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + rtx cond; -(define_insn "*sqrtextendsfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); -(define_insn "*sqrtextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); -(define_insn "sindf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF")]) + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) -(define_insn "sinsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "SF")]) +(define_expand "isinf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "nonimmediate_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); -(define_insn "*sinextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF")]) + rtx cond; -(define_insn "sinxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF")]) + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); -(define_insn "sintf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF")]) + /* Remove excess precision by forcing value through memory. */ + if (memory_operand (operands[1], VOIDmode)) + emit_insn (gen_fxam2_i387_with_temp (scratch, operands[1])); + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (mode, slot); -(define_insn "cosdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fcos" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF")]) + emit_move_insn (temp, operands[1]); + emit_insn (gen_fxam2_i387_with_temp (scratch, temp)); + } -(define_insn "cossf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fcos" - [(set_attr "type" "fpspc") - (set_attr "mode" "SF")]) + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) -(define_insn "*cosextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fcos" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF")]) +(define_expand "signbit2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:X87MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x0200); -(define_insn "cosxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fcos" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF")]) + rtx scratch = gen_reg_rtx (HImode); -(define_insn "costf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && flag_unsafe_math_optimizations" - "fcos" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF")]) + emit_insn (gen_fxam2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + DONE; +}) ;; Block operation instructions (define_insn "cld" - [(set (reg:SI 19) (const_int 0))] - "" - "cld" - [(set_attr "type" "cld")]) + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) -(define_expand "movstrsi" +(define_expand "movmemsi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:SI 2 "nonmemory_operand" "")) - (use (match_operand:SI 3 "const_int_operand" ""))] + (use (match_operand:SI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "" { - if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3])) + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) DONE; else FAIL; }) -(define_expand "movstrdi" +(define_expand "movmemdi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:DI 2 "nonmemory_operand" "")) - (use (match_operand:DI 3 "const_int_operand" ""))] + (use (match_operand:DI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "TARGET_64BIT" { - if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3])) + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) DONE; else FAIL; @@ -14889,170 +18975,46 @@ ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. -(define_expand "strmovdi_rex64" - [(set (match_dup 2) - (mem:DI (match_operand:DI 1 "register_operand" ""))) - (set (mem:DI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (DImode); -}) - - -(define_expand "strmovsi" - [(set (match_dup 2) - (mem:SI (match_operand:SI 1 "register_operand" ""))) - (set (mem:SI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4))) - (clobber (reg:CC 17))])] +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand" "")) + (set (match_operand 1 "memory_operand" "") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] "" { - if (TARGET_64BIT) - { - emit_insn (gen_strmovsi_rex64 (operands[0], operands[1])); - DONE; - } - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (SImode); -}) + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); -(define_expand "strmovsi_rex64" - [(set (match_dup 2) - (mem:SI (match_operand:DI 1 "register_operand" ""))) - (set (mem:SI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (SImode); -}) + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); -(define_expand "strmovhi" - [(set (match_dup 2) - (mem:HI (match_operand:SI 1 "register_operand" ""))) - (set (mem:HI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strmovhi_rex64 (operands[0], operands[1])); - DONE; - } - if (TARGET_SINGLE_STRINGOP || optimize_size) + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) { - emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0], - operands[1])); + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); DONE; } - else - operands[2] = gen_reg_rtx (HImode); -}) -(define_expand "strmovhi_rex64" - [(set (match_dup 2) - (mem:HI (match_operand:DI 1 "register_operand" ""))) - (set (mem:HI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (HImode); + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); }) -(define_expand "strmovqi" - [(set (match_dup 2) - (mem:QI (match_operand:SI 1 "register_operand" ""))) - (set (mem:QI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1))) - (clobber (reg:CC 17))])] +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 5 "" ""))])] "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strmovqi_rex64 (operands[0], operands[1])); - DONE; - } - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (QImode); -}) - -(define_expand "strmovqi_rex64" - [(set (match_dup 2) - (mem:QI (match_operand:DI 1 "register_operand" ""))) - (set (mem:QI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (QImode); -}) + "ix86_current_function_needs_cld = 1;") -(define_insn "strmovdi_rex_1" +(define_insn "*strmovdi_rex_1" [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) (mem:DI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15060,15 +19022,14 @@ (const_int 8))) (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) - (const_int 8))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 8)))] + "TARGET_64BIT" "movsq" [(set_attr "type" "str") (set_attr "mode" "DI") (set_attr "memory" "both")]) -(define_insn "strmovsi_1" +(define_insn "*strmovsi_1" [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) (mem:SI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15076,15 +19037,14 @@ (const_int 4))) (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) - (const_int 4))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{movsl|movsd}" + (const_int 4)))] + "!TARGET_64BIT" + "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") (set_attr "memory" "both")]) -(define_insn "strmovsi_rex_1" +(define_insn "*strmovsi_rex_1" [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) (mem:SI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15092,15 +19052,14 @@ (const_int 4))) (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) - (const_int 4))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{movsl|movsd}" + (const_int 4)))] + "TARGET_64BIT" + "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") (set_attr "memory" "both")]) -(define_insn "strmovhi_1" +(define_insn "*strmovhi_1" [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) (mem:HI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15108,15 +19067,14 @@ (const_int 2))) (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) - (const_int 2))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 2)))] + "!TARGET_64BIT" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) -(define_insn "strmovhi_rex_1" +(define_insn "*strmovhi_rex_1" [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) (mem:HI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15124,15 +19082,14 @@ (const_int 2))) (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) - (const_int 2))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 2)))] + "TARGET_64BIT" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) -(define_insn "strmovqi_1" +(define_insn "*strmovqi_1" [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) (mem:QI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15140,15 +19097,14 @@ (const_int 1))) (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) - (const_int 1))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 1)))] + "!TARGET_64BIT" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "QI")]) -(define_insn "strmovqi_rex_1" +(define_insn "*strmovqi_rex_1" [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) (mem:QI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15156,129 +19112,145 @@ (const_int 1))) (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) - (const_int 1))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 1)))] + "TARGET_64BIT" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "QI")]) -(define_insn "rep_movdi_rex64" +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 5 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 6 "" "")) + (set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (use (match_dup 4))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_movdi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") (const_int 3)) (match_operand:DI 3 "register_operand" "0"))) - (set (match_operand:DI 1 "register_operand" "=S") + (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (ashift:DI (match_dup 5) (const_int 3)) (match_operand:DI 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) - (use (match_dup 5)) - (use (reg:SI 19))] + (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsq|rep movsq}" + "rep movsq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "DI")]) -(define_insn "rep_movsi" +(define_insn "*rep_movsi" [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") + (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") (const_int 2)) (match_operand:SI 3 "register_operand" "0"))) - (set (match_operand:SI 1 "register_operand" "=S") + (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (ashift:SI (match_dup 5) (const_int 2)) (match_operand:SI 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) - (use (match_dup 5)) - (use (reg:SI 19))] + (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movsi_rex64" +(define_insn "*rep_movsi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") (const_int 2)) (match_operand:DI 3 "register_operand" "0"))) - (set (match_operand:DI 1 "register_operand" "=S") + (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (ashift:DI (match_dup 5) (const_int 2)) (match_operand:DI 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) - (use (match_dup 5)) - (use (reg:SI 19))] + (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movqi" +(define_insn "*rep_movqi" [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") + (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_operand:SI 3 "register_operand" "0") (match_operand:SI 5 "register_operand" "2"))) - (set (match_operand:SI 1 "register_operand" "=S") + (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) - (use (match_dup 5)) - (use (reg:SI 19))] + (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movqi_rex64" +(define_insn "*rep_movqi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_operand:DI 3 "register_operand" "0") (match_operand:DI 5 "register_operand" "2"))) - (set (match_operand:DI 1 "register_operand" "=S") + (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) - (use (match_dup 5)) - (use (reg:SI 19))] + (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_expand "clrstrsi" +(define_expand "setmemsi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:SI 1 "nonmemory_operand" "")) - (use (match_operand 2 "const_int_operand" ""))] + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] "" { - if (ix86_expand_clrstr (operands[0], operands[1], operands[2])) + if (ix86_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) DONE; else FAIL; }) -(define_expand "clrstrdi" +(define_expand "setmemdi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:DI 1 "nonmemory_operand" "")) - (use (match_operand 2 "const_int_operand" ""))] + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand 4 "const_int_operand" "")) + (use (match_operand 5 "const_int_operand" ""))] "TARGET_64BIT" { - if (ix86_expand_clrstr (operands[0], operands[1], operands[2])) + if (ix86_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) DONE; else FAIL; @@ -15287,299 +19259,216 @@ ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. -(define_expand "strsetdi_rex64" - [(set (mem:DI (match_operand:DI 0 "register_operand" "")) - (match_operand:DI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetsi" - [(set (mem:SI (match_operand:SI 0 "register_operand" "")) - (match_operand:SI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))])] +(define_expand "strset" + [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (parallel [(set (match_operand 0 "register_operand" "") + (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] "" { - if (TARGET_64BIT) - { - emit_insn (gen_strsetsi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetsi_rex64" - [(set (mem:SI (match_operand:DI 0 "register_operand" "")) - (match_operand:SI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsethi" - [(set (mem:HI (match_operand:SI 0 "register_operand" "")) - (match_operand:HI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strsethi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsethi_rex64" - [(set (mem:HI (match_operand:DI 0 "register_operand" "")) - (match_operand:HI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); + if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) { - emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1])); + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); DONE; } }) -(define_expand "strsetqi" - [(set (mem:QI (match_operand:SI 0 "register_operand" "")) - (match_operand:QI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))])] +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 3 "" ""))])] "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strsetqi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetqi_rex64" - [(set (mem:QI (match_operand:DI 0 "register_operand" "")) - (match_operand:QI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) + "ix86_current_function_needs_cld = 1;") -(define_insn "strsetdi_rex_1" - [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) - (match_operand:SI 2 "register_operand" "a")) +(define_insn "*strsetdi_rex_1" + [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) - (const_int 8))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 8)))] + "TARGET_64BIT" "stosq" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "DI")]) -(define_insn "strsetsi_1" +(define_insn "*strsetsi_1" [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) - (const_int 4))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{stosl|stosd}" + (const_int 4)))] + "!TARGET_64BIT" + "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "strsetsi_rex_1" +(define_insn "*strsetsi_rex_1" [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) - (const_int 4))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{stosl|stosd}" + (const_int 4)))] + "TARGET_64BIT" + "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "strsethi_1" +(define_insn "*strsethi_1" [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) - (const_int 2))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 2)))] + "!TARGET_64BIT" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "HI")]) -(define_insn "strsethi_rex_1" +(define_insn "*strsethi_rex_1" [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) - (const_int 2))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 2)))] + "TARGET_64BIT" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "HI")]) -(define_insn "strsetqi_1" +(define_insn "*strsetqi_1" [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) - (const_int 1))) - (use (reg:SI 19))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 1)))] + "!TARGET_64BIT" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "strsetqi_rex_1" +(define_insn "*strsetqi_rex_1" [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) - (const_int 1))) - (use (reg:SI 19))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + (const_int 1)))] + "TARGET_64BIT" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "rep_stosdi_rex64" +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "memory_operand" "") (const_int 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_dup 1))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_stosdi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") (const_int 3)) (match_operand:DI 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:DI 2 "register_operand" "a")) - (use (match_dup 4)) - (use (reg:SI 19))] + (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosq|rep stosq}" + "rep stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "DI")]) -(define_insn "rep_stossi" +(define_insn "*rep_stossi" [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") + (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") (const_int 2)) (match_operand:SI 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:SI 2 "register_operand" "a")) - (use (match_dup 4)) - (use (reg:SI 19))] + (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "rep_stossi_rex64" +(define_insn "*rep_stossi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") (const_int 2)) (match_operand:DI 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:SI 2 "register_operand" "a")) - (use (match_dup 4)) - (use (reg:SI 19))] + (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "rep_stosqi" +(define_insn "*rep_stosqi" [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") + (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_operand:SI 3 "register_operand" "0") (match_operand:SI 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) - (use (match_dup 4)) - (use (reg:SI 19))] + (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "rep_stosqi_rex64" +(define_insn "*rep_stosqi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") + (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_operand:DI 3 "register_operand" "0") (match_operand:DI 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) - (use (match_dup 4)) - (use (reg:DI 19))] + (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_expand "cmpstrsi" +(define_expand "cmpstrnsi" [(set (match_operand:SI 0 "register_operand" "") (compare:SI (match_operand:BLK 1 "general_operand" "") (match_operand:BLK 2 "general_operand" ""))) @@ -15589,13 +19478,24 @@ { rtx addr1, addr2, out, outlow, count, countreg, align; + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + FAIL; + + /* Can't use this if the user has appropriated esi or edi. */ + if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) + FAIL; + out = operands[0]; - if (GET_CODE (out) != REG) + if (!REG_P (out)) out = gen_reg_rtx (SImode); addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); - + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + count = operands[3]; countreg = ix86_zero_extend_to_Pmode (count); @@ -15604,35 +19504,24 @@ once cc0 is dead. */ align = operands[4]; - emit_insn (gen_cld ()); - if (GET_CODE (count) == CONST_INT) + if (CONST_INT_P (count)) { if (INTVAL (count) == 0) { emit_move_insn (operands[0], const0_rtx); DONE; } - if (TARGET_64BIT) - emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - else - emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } else { if (TARGET_64BIT) - { - emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); - emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - } + emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); else - { - emit_insn (gen_cmpsi_1 (countreg, countreg)); - emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - } + emit_insn (gen_cmpsi_1 (countreg, countreg)); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } outlow = gen_lowpart (QImode, out); @@ -15649,13 +19538,13 @@ (define_expand "cmpintqi" [(set (match_dup 1) - (gtu:QI (reg:CC 17) (const_int 0))) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_dup 2) - (ltu:QI (reg:CC 17) (const_int 0))) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) (parallel [(set (match_operand:QI 0 "register_operand" "") (minus:QI (match_dup 1) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "" "operands[1] = gen_reg_rtx (QImode); operands[2] = gen_reg_rtx (QImode);") @@ -15663,74 +19552,97 @@ ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. -(define_insn "cmpstrqi_nz_1" - [(set (reg:CC 17) +(define_expand "cmpstrnqi_nz_1" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" ""))) + (use (match_operand 2 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_nz_1" + [(set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) (mem:BLK (match_operand:SI 5 "register_operand" "1")))) (use (match_operand:SI 6 "register_operand" "2")) (use (match_operand:SI 3 "immediate_operand" "i")) - (use (reg:SI 19)) (clobber (match_operand:SI 0 "register_operand" "=S")) (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "cmpstrqi_nz_rex_1" - [(set (reg:CC 17) +(define_insn "*cmpstrnqi_nz_rex_1" + [(set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) (mem:BLK (match_operand:DI 5 "register_operand" "1")))) (use (match_operand:DI 6 "register_operand" "2")) (use (match_operand:SI 3 "immediate_operand" "i")) - (use (reg:SI 19)) (clobber (match_operand:DI 0 "register_operand" "=S")) (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) ;; The same, but the count is not known to not be zero. -(define_insn "cmpstrqi_1" - [(set (reg:CC 17) +(define_expand "cmpstrnqi_1" + [(parallel [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 2 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" "")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_1" + [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") (const_int 0)) (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) (mem:BLK (match_operand:SI 5 "register_operand" "1"))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "i")) - (use (reg:CC 17)) - (use (reg:SI 19)) + (use (reg:CC FLAGS_REG)) (clobber (match_operand:SI 0 "register_operand" "=S")) (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "cmpstrqi_rex_1" - [(set (reg:CC 17) +(define_insn "*cmpstrnqi_rex_1" + [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") (const_int 0)) (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) (mem:BLK (match_operand:DI 5 "register_operand" "1"))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "i")) - (use (reg:CC 17)) - (use (reg:SI 19)) + (use (reg:CC FLAGS_REG)) (clobber (match_operand:DI 0 "register_operand" "=S")) (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -15739,7 +19651,7 @@ [(set (match_operand:SI 0 "register_operand" "") (unspec:SI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15752,7 +19664,7 @@ [(set (match_operand:DI 0 "register_operand" "") (unspec:DI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15761,39 +19673,44 @@ FAIL; }) -(define_insn "strlenqi_1" +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strlenqi_1" [(set (match_operand:SI 0 "register_operand" "=&c") (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:SI 4 "register_operand" "0")] 0)) - (use (reg:SI 19)) + (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) (clobber (match_operand:SI 1 "register_operand" "=D")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "strlenqi_rex_1" +(define_insn "*strlenqi_rex_1" [(set (match_operand:DI 0 "register_operand" "=&c") (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:DI 3 "immediate_operand" "i") - (match_operand:DI 4 "register_operand" "0")] 0)) - (use (reg:SI 19)) + (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) (clobber (match_operand:DI 1 "register_operand" "=D")) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -;; Peephole optimizations to clean up after cmpstr*. This should be +;; Peephole optimizations to clean up after cmpstrn*. This should be ;; handled in combine, but it is not currently up to the task. -;; When used for their truth value, the cmpstr* expanders generate +;; When used for their truth value, the cmpstrn* expanders generate ;; code like this: ;; ;; repz cmpsb @@ -15804,71 +19721,67 @@ ;; ;; The intermediate three instructions are unnecessary. -;; This one handles cmpstr*_nz_1... +;; This one handles cmpstrn*_nz_1... (define_peephole2 [(parallel[ - (set (reg:CC 17) + (set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) (mem:BLK (match_operand 5 "register_operand" "")))) (use (match_operand 6 "register_operand" "")) (use (match_operand:SI 3 "immediate_operand" "")) - (use (reg:SI 19)) (clobber (match_operand 0 "register_operand" "")) (clobber (match_operand 1 "register_operand" "")) (clobber (match_operand 2 "register_operand" ""))]) (set (match_operand:QI 7 "register_operand" "") - (gtu:QI (reg:CC 17) (const_int 0))) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_operand:QI 8 "register_operand" "") - (ltu:QI (reg:CC 17) (const_int 0))) - (set (reg 17) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) (compare (match_dup 7) (match_dup 8))) ] "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" [(parallel[ - (set (reg:CC 17) + (set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_dup 4)) (mem:BLK (match_dup 5)))) (use (match_dup 6)) (use (match_dup 3)) - (use (reg:SI 19)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))])] "") -;; ...and this one handles cmpstr*_1. +;; ...and this one handles cmpstrn*_1. (define_peephole2 [(parallel[ - (set (reg:CC 17) + (set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand 6 "register_operand" "") (const_int 0)) (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) (mem:BLK (match_operand 5 "register_operand" ""))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "")) - (use (reg:CC 17)) - (use (reg:SI 19)) + (use (reg:CC FLAGS_REG)) (clobber (match_operand 0 "register_operand" "")) (clobber (match_operand 1 "register_operand" "")) (clobber (match_operand 2 "register_operand" ""))]) (set (match_operand:QI 7 "register_operand" "") - (gtu:QI (reg:CC 17) (const_int 0))) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_operand:QI 8 "register_operand" "") - (ltu:QI (reg:CC 17) (const_int 0))) - (set (reg 17) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) (compare (match_dup 7) (match_dup 8))) ] "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" [(parallel[ - (set (reg:CC 17) + (set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_dup 6) (const_int 0)) (compare:CC (mem:BLK (match_dup 4)) (mem:BLK (match_dup 5))) (const_int 0))) (use (match_dup 3)) - (use (reg:CC 17)) - (use (reg:SI 19)) + (use (reg:CC FLAGS_REG)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))])] @@ -15884,19 +19797,35 @@ (match_operand:DI 2 "general_operand" "") (match_operand:DI 3 "general_operand" "")))] "TARGET_64BIT" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "x86_movdicc_0_m1_rex64" [(set (match_operand:DI 0 "register_operand" "=r") - (if_then_else:DI (ltu (reg:CC 17) (const_int 0)) + (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "") (const_int -1) (const_int 0))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "sbb{q}\t%0, %0" ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movdicc_0_m1_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{q}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "DI") @@ -15904,12 +19833,12 @@ (define_insn "*movdicc_c_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r") - (if_then_else:DI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:DI 2 "nonimmediate_operand" "rm,0") (match_operand:DI 3 "nonimmediate_operand" "0,rm")))] "TARGET_64BIT && TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" @@ -15922,7 +19851,7 @@ (match_operand:SI 2 "general_operand" "") (match_operand:SI 3 "general_operand" "")))] "" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. @@ -15930,15 +19859,31 @@ (define_insn "x86_movsicc_0_m1" [(set (match_operand:SI 0 "register_operand" "=r") - (if_then_else:SI (ltu (reg:CC 17) (const_int 0)) + (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "") (const_int -1) (const_int 0))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "" "sbb{l}\t%0, %0" ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movsicc_0_m1_se" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "SI") @@ -15946,12 +19891,12 @@ (define_insn "*movsicc_noc" [(set (match_operand:SI 0 "register_operand" "=r,r") - (if_then_else:SI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:SI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:SI 2 "nonimmediate_operand" "rm,0") (match_operand:SI 3 "nonimmediate_operand" "0,rm")))] "TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" @@ -15961,41 +19906,71 @@ (define_expand "movhicc" [(set (match_operand:HI 0 "register_operand" "") (if_then_else:HI (match_operand 1 "comparison_operator" "") - (match_operand:HI 2 "nonimmediate_operand" "") - (match_operand:HI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE && TARGET_HIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + (match_operand:HI 2 "general_operand" "") + (match_operand:HI 3 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "*movhicc_noc" [(set (match_operand:HI 0 "register_operand" "=r,r") - (if_then_else:HI (match_operator 1 "ix86_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:HI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:HI 2 "nonimmediate_operand" "rm,0") (match_operand:HI 3 "nonimmediate_operand" "0,rm")))] "TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" [(set_attr "type" "icmov") (set_attr "mode" "HI")]) -(define_expand "movsfcc" - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operand 1 "comparison_operator" "") - (match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "")))] - "TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "general_operand" "") + (match_operand:QI 3 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) -(define_insn "*movsfcc_1" +(define_expand "movcc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") + +(define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] - "TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3} @@ -16004,22 +19979,14 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) -(define_expand "movdfcc" - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operand 1 "comparison_operator" "") - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "")))] - "TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] - "!TARGET_64BIT && TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3} @@ -16029,13 +19996,13 @@ (set_attr "mode" "DF")]) (define_insn "*movdfcc_1_rex64" - [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] - "TARGET_64BIT && TARGET_CMOVE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3} @@ -16045,425 +20012,181 @@ (set_attr "mode" "DF")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand 4 "" "") (const_int 0)]) + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] - "!TARGET_64BIT && !ANY_FP_REG_P (operands[0]) && reload_completed" + "!TARGET_64BIT && reload_completed" [(set (match_dup 2) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) (match_dup 5) - (match_dup 7))) + (match_dup 6))) (set (match_dup 3) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) - (match_dup 6) + (match_dup 7) (match_dup 8)))] - "split_di (operands+2, 1, operands+5, operands+6); - split_di (operands+3, 1, operands+7, operands+8); - split_di (operands, 1, operands+2, operands+3);") - -(define_expand "movxfcc" - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operand 1 "comparison_operator" "") - (match_operand:XF 2 "register_operand" "") - (match_operand:XF 3 "register_operand" "")))] - "!TARGET_64BIT && TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - -(define_expand "movtfcc" - [(set (match_operand:TF 0 "register_operand" "") - (if_then_else:TF (match_operand 1 "comparison_operator" "") - (match_operand:TF 2 "register_operand" "") - (match_operand:TF 3 "register_operand" "")))] - "TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + "split_di (&operands[2], 2, &operands[5], &operands[7]); + split_di (&operands[0], 1, &operands[2], &operands[3]);") (define_insn "*movxfcc_1" [(set (match_operand:XF 0 "register_operand" "=f,f") - (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand:XF 2 "register_operand" "f,0") (match_operand:XF 3 "register_operand" "0,f")))] - "!TARGET_64BIT && TARGET_CMOVE" + "TARGET_80387 && TARGET_CMOVE" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3}" [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) -(define_insn "*movtfcc_1" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (if_then_else:TF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) - (match_operand:TF 2 "register_operand" "f,0") - (match_operand:TF 3 "register_operand" "0,f")))] - "TARGET_CMOVE" - "@ - fcmov%F1\t{%2, %0|%0, %2} - fcmov%f1\t{%3, %0|%0, %3}" - [(set_attr "type" "fcmov") - (set_attr "mode" "XF")]) - -(define_expand "minsf3" - [(parallel [ - (set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))])] - "TARGET_SSE" - "") - -(define_insn "*minsf" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0,0,f#x") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE && TARGET_IEEE_FP" - "#") - -(define_insn "*minsf_nonieee" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") - (if_then_else:SF (lt (match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:SF (lt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -;; We can't represent the LT test directly. Do this by swapping the operands. - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "register_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "register_operand" ""))) - (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP 17) - (compare:CCFP (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (if_then_else:SF (ge (reg:CCFP 17) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*minsf_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE && reload_completed" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_expand "mindf3" - [(parallel [ - (set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))])] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#") - -(define_insn "*mindf" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0,0,f#Y") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH" - "#") - -(define_insn "*mindf_nonieee" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") - (if_then_else:DF (lt (match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:DF (lt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -;; We can't represent the LT test directly. Do this by swapping the operands. -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "register_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "register_operand" ""))) - (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP 17) - (compare:CCFP (match_dup 2) - (match_dup 2))) - (set (match_dup 0) - (if_then_else:DF (ge (reg:CCFP 17) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*mindf_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) - -(define_expand "maxsf3" - [(parallel [ - (set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))])] - "TARGET_SSE" - "#") - -(define_insn "*maxsf" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0,0,f#x") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE && TARGET_IEEE_FP" - "#") - -(define_insn "*maxsf_nonieee" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") - (if_then_else:SF (gt (match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:SF (gt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "register_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "register_operand" ""))) - (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP 17) - (compare:CCFP (match_dup 1) - (match_dup 2))) +;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; SSE5 conditional move +(define_insn "*sse5_pcmov_" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x,0") + (match_operand:MODEF 2 "register_operand" "0,x") + (match_operand:MODEF 3 "register_operand" "x,x")))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" + "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "*avx_3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "s\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*avx_ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +(define_insn "*avx_ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmaxs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) (set (match_dup 0) - (if_then_else:SF (gt (reg:CCFP 17) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*maxsf_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE && reload_completed" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_expand "maxdf3" - [(parallel [ - (set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))])] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#") - -(define_insn "*maxdf" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0,0,f#Y") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP" - "#") - -(define_insn "*maxdf_nonieee" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") - (if_then_else:DF (gt (match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:DF (gt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[1], operands[0]);") + +;; Conditional addition patterns +(define_expand "addcc" + [(match_operand:SWI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:SWI 2 "register_operand" "") + (match_operand:SWI 3 "const_int_operand" "")] + "" + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "register_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "register_operand" ""))) - (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP 17) - (compare:CCFP (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (if_then_else:DF (gt (reg:CCFP 17) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*maxdf_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) ;; Misc patterns (?) ;; This pattern exists to put a dependency on all ebp-based memory accesses. ;; Otherwise there will be nothing to keep -;; +;; ;; [(set (reg ebp) (reg esp))] ;; [(set (reg esp) (plus (reg esp) (const_int -160000))) ;; (clobber (eflags)] ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] ;; ;; in proper program order. -(define_expand "pro_epilogue_adjust_stack" - [(parallel [(set (match_operand:SI 0 "register_operand" "=r,r") - (plus:SI (match_operand:SI 1 "register_operand" "0,r") - (match_operand:SI 2 "immediate_operand" "i,i"))) - (clobber (reg:CC 17)) - (clobber (mem:BLK (scratch)))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_pro_epilogue_adjust_stack_rex64 - (operands[0], operands[1], operands[2])); - DONE; - } -}) - -(define_insn "*pro_epilogue_adjust_stack_1" +(define_insn "pro_epilogue_adjust_stack_1" [(set (match_operand:SI 0 "register_operand" "=r,r") (plus:SI (match_operand:SI 1 "register_operand" "0,r") (match_operand:SI 2 "immediate_operand" "i,i"))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" { @@ -16473,7 +20196,7 @@ return "mov{l}\t{%1, %0|%0, %1}"; case TYPE_ALU: - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -16488,7 +20211,7 @@ return "lea{l}\t{%a2, %0|%0, %a2}"; default: - abort (); + gcc_unreachable (); } } [(set (attr "type") @@ -16504,7 +20227,7 @@ [(set (match_operand:DI 0 "register_operand" "=r,r") (plus:DI (match_operand:DI 1 "register_operand" "0,r") (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) - (clobber (reg:CC 17)) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "TARGET_64BIT" { @@ -16514,7 +20237,9 @@ return "mov{q}\t{%1, %0|%0, %1}"; case TYPE_ALU: - if (GET_CODE (operands[2]) == CONST_INT + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -16529,7 +20254,7 @@ return "lea{q}\t{%a2, %0|%0, %a2}"; default: - abort (); + gcc_unreachable (); } } [(set (attr "type") @@ -16541,293 +20266,83 @@ (const_string "lea"))) (set_attr "mode" "DI")]) - -;; Placeholder for the conditional moves. This one is split either to SSE -;; based moves emulation or to usual cmove sequence. Little bit unfortunate -;; fact is that compares supported by the cmp??ss instructions are exactly -;; swapped of those supported by cmove sequence. -;; The EQ/NE comparisons also needs bit care, since they are not directly -;; supported by i387 comparisons and we do need to emit two conditional moves -;; in tandem. - -(define_insn "sse_movsfcc" - [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f") - (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")]) - (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx") - (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx"))) - (clobber (match_scratch:SF 6 "=2,&4,X,X,X,X,X,X,X,X")) - (clobber (reg:CC 17))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) - && (!TARGET_IEEE_FP - || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" - "#") - -(define_insn "sse_movsfcc_eq" - [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?r#xf,?r#xf") - (if_then_else:SF (eq (match_operand:SF 3 "nonimmediate_operand" "%0#fx,x#fx,f#x,xm#f,f#x,xm#f") - (match_operand:SF 4 "nonimmediate_operand" "xm#f,xm#f,f#x,x#f,f#x,x#f")) - (match_operand:SF 1 "nonimmediate_operand" "x#fr,0#fr,0#fx,0#fx,0#rx,0#rx") - (match_operand:SF 2 "nonimmediate_operand" "x#fr,x#fr,f#fx,f#fx,rm#rx,rm#rx"))) - (clobber (match_scratch:SF 5 "=1,&3,X,X,X,X")) - (clobber (reg:CC 17))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" - "#") - -(define_insn "sse_movdfcc" - [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?f#Yr,?f#Yr,?r#Yf,?r#Yf,?r#Yf,?r#Yf") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "0#fY,Y#fY,f#Y,f#Y,Ym#f,Ym#f,f#Y,f#Y,Ym#f,Ym#f") - (match_operand:DF 5 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,f#Y,Y#f,Y#f,f#Y,f#Y,Y#f,Y#f")]) - (match_operand:DF 2 "nonimmediate_operand" "Y#fr,0#fr,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY,0#rY") - (match_operand:DF 3 "nonimmediate_operand" "Y#fr,Y#fr,0#fY,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY"))) - (clobber (match_scratch:DF 6 "=2,&4,X,X,X,X,X,X,X,X")) - (clobber (reg:CC 17))] - "TARGET_SSE2 - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) - && (!TARGET_IEEE_FP - || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" - "#") - -(define_insn "sse_movdfcc_eq" - [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?r#Yf,?r#Yf") - (if_then_else:DF (eq (match_operand:DF 3 "nonimmediate_operand" "%0#fY,Y#fY,f#Y,Ym#f,f#Y,Ym#f") - (match_operand:DF 4 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,Y#f,f#Y,Y#f")) - (match_operand:DF 1 "nonimmediate_operand" "Y#fr,0#fr,0#fY,0#fY,0#rY,0#rY") - (match_operand:DF 2 "nonimmediate_operand" "Y#fr,Y#fr,f#fY,f#fY,rm#rY,rm#rY"))) - (clobber (match_scratch:DF 5 "=1,&3,X,X,X,X")) - (clobber (reg:CC 17))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" - "#") - -;; For non-sse moves just expand the usual cmove sequence. -(define_split - [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "nonimmediate_operand" "") - (match_operand 5 "register_operand" "")]) - (match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "nonimmediate_operand" ""))) - (clobber (match_operand 6 "" "")) - (clobber (reg:CC 17))] - "!SSE_REG_P (operands[0]) && reload_completed - && VALID_SSE_REG_MODE (GET_MODE (operands[0]))" - [(const_int 0)] +(define_insn "pro_epilogue_adjust_stack_rex64_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 3 "immediate_operand" "i,i"))) + (use (match_operand:DI 2 "register_operand" "r,r")) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" { - ix86_compare_op0 = operands[5]; - ix86_compare_op1 = operands[4]; - operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), - VOIDmode, operands[5], operands[4]); - ix86_expand_fp_movcc (operands); - DONE; -}) - -;; Split SSE based conditional move into seqence: -;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison -;; and op2, op0 - zero op2 if comparison was false -;; nand op0, op3 - load op3 to op0 if comparison was false -;; or op2, op0 - get the non-zero one into the result. -(define_split - [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "sse_comparison_operator" - [(match_operand 4 "register_operand" "") - (match_operand 5 "nonimmediate_operand" "")]) - (match_operand 2 "register_operand" "") - (match_operand 3 "register_operand" ""))) - (clobber (match_operand 6 "" "")) - (clobber (reg:CC 17))] - "SSE_REG_P (operands[0]) && reload_completed" - [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)])) - (set (subreg:TI (match_dup 2) 0) (and:TI (subreg:TI (match_dup 2) 0) - (subreg:TI (match_dup 4) 0))) - (set (subreg:TI (match_dup 4) 0) (and:TI (not:TI (subreg:TI (match_dup 4) 0)) - (subreg:TI (match_dup 3) 0))) - (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) - (subreg:TI (match_dup 7) 0)))] -{ - /* If op2 == op3, op3 will be clobbered before it is used. - This should be optimized out though. */ - if (operands_match_p (operands[2], operands[3])) - abort (); - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (operands_match_p (operands[0], operands[4])) - operands[6] = operands[4], operands[7] = operands[2]; - else - operands[6] = operands[2], operands[7] = operands[4]; -}) - -;; Special case of conditional move we can handle effectivly. -;; Do not brother with the integer/floating point case, since these are -;; bot considerably slower, unlike in the generic case. -(define_insn "*sse_movsfcc_const0_1" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "register_operand" "0") - (match_operand:SF 5 "nonimmediate_operand" "xm")]) - (match_operand:SF 2 "register_operand" "x") - (match_operand:SF 3 "const0_operand" "X")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_2" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "register_operand" "0") - (match_operand:SF 5 "nonimmediate_operand" "xm")]) - (match_operand:SF 2 "const0_operand" "X") - (match_operand:SF 3 "register_operand" "x")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_3" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "xm") - (match_operand:SF 5 "register_operand" "0")]) - (match_operand:SF 2 "register_operand" "x") - (match_operand:SF 3 "const0_operand" "X")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_4" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "xm") - (match_operand:SF 5 "register_operand" "0")]) - (match_operand:SF 2 "const0_operand" "X") - (match_operand:SF 3 "register_operand" "x")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movdfcc_const0_1" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "register_operand" "0") - (match_operand:DF 5 "nonimmediate_operand" "Ym")]) - (match_operand:DF 2 "register_operand" "Y") - (match_operand:DF 3 "const0_operand" "X")))] - "TARGET_SSE2" - "#") - -(define_insn "*sse_movdfcc_const0_2" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "register_operand" "0") - (match_operand:DF 5 "nonimmediate_operand" "Ym")]) - (match_operand:DF 2 "const0_operand" "X") - (match_operand:DF 3 "register_operand" "Y")))] - "TARGET_SSE2" - "#") - -(define_insn "*sse_movdfcc_const0_3" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "Ym") - (match_operand:DF 5 "register_operand" "0")]) - (match_operand:DF 2 "register_operand" "Y") - (match_operand:DF 3 "const0_operand" "X")))] - "TARGET_SSE2" - "#") + switch (get_attr_type (insn)) + { + case TYPE_ALU: + return "add{q}\t{%2, %0|%0, %2}"; -(define_insn "*sse_movdfcc_const0_4" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "Ym") - (match_operand:DF 5 "register_operand" "0")]) - (match_operand:DF 2 "const0_operand" "X") - (match_operand:DF 3 "register_operand" "Y")))] - "TARGET_SSE2" - "#") + case TYPE_LEA: + operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); + return "lea{q}\t{%a2, %0|%0, %a2}"; -(define_split - [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") - (match_operand 5 "nonimmediate_operand" "")]) - (match_operand 2 "nonmemory_operand" "") - (match_operand 3 "nonmemory_operand" "")))] - "SSE_REG_P (operands[0]) && reload_completed - && (const0_operand (operands[2], GET_MODE (operands[0])) - || const0_operand (operands[3], GET_MODE (operands[0])))" - [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) - (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) - (subreg:TI (match_dup 7) 0)))] -{ - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) - { - rtx tmp = operands[5]; - operands[5] = operands[4]; - operands[4] = tmp; - PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); - } - if (const0_operand (operands[2], GET_MODE (operands[0]))) - { - operands[7] = operands[3]; - operands[6] = gen_rtx_NOT (TImode, gen_rtx_SUBREG (TImode, operands[0], - 0)); - } - else - { - operands[7] = operands[2]; - operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0); + default: + gcc_unreachable (); } -}) - -(define_expand "allocate_stack_worker" - [(match_operand:SI 0 "register_operand" "")] - "TARGET_STACK_PROBE" -{ - if (TARGET_64BIT) - emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); - else - emit_insn (gen_allocate_stack_worker_1 (operands[0])); - DONE; -}) +} + [(set_attr "type" "alu,lea") + (set_attr "mode" "DI")]) -(define_insn "allocate_stack_worker_1" - [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3) - (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) - (clobber (match_dup 0)) - (clobber (reg:CC 17))] +(define_insn "allocate_stack_worker_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) -(define_insn "allocate_stack_worker_rex64" - [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] 3) - (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) - (clobber (match_dup 0)) - (clobber (reg:CC 17))] +(define_insn "allocate_stack_worker_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1))) + (clobber (reg:DI R10_REG)) + (clobber (reg:DI R11_REG)) + (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) (define_expand "allocate_stack" - [(parallel [(set (match_operand:SI 0 "register_operand" "=r") - (minus:SI (reg:SI 7) - (match_operand:SI 1 "general_operand" ""))) - (clobber (reg:CC 17))]) - (parallel [(set (reg:SI 7) - (minus:SI (reg:SI 7) (match_dup 1))) - (clobber (reg:CC 17))])] + [(match_operand 0 "register_operand" "") + (match_operand 1 "general_operand" "")] "TARGET_STACK_PROBE" { -#ifdef CHECK_STACK_LIMIT - if (GET_CODE (operands[1]) == CONST_INT - && INTVAL (operands[1]) < CHECK_STACK_LIMIT) - emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, - operands[1])); - else + rtx x; + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT 0 #endif - emit_insn (gen_allocate_stack_worker (copy_to_mode_reg (SImode, - operands[1]))); + + if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < CHECK_STACK_LIMIT) + { + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1], + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + } + else + { + x = copy_to_mode_reg (Pmode, operands[1]); + if (TARGET_64BIT) + x = gen_allocate_stack_worker_64 (x, x); + else + x = gen_allocate_stack_worker_32 (x, x); + emit_insn (x); + } emit_move_insn (operands[0], virtual_stack_dynamic_rtx); DONE; @@ -16837,7 +20352,20 @@ [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" { - load_pic_register (); +#if TARGET_MACHO + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx)); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else +#endif + emit_insn (gen_set_got (pic_offset_table_rtx)); DONE; }) @@ -16848,16 +20376,18 @@ (match_operator 3 "promotable_binary_operator" [(match_operand 1 "register_operand" "") (match_operand 2 "aligned_operand" "")])) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ((GET_MODE (operands[0]) == HImode - && (!optimize_size || GET_CODE (operands[2]) != CONST_INT - || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))) - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + && ((GET_MODE (operands[0]) == HImode + && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ + || !CONST_INT_P (operands[2]) + || satisfies_constraint_K (operands[2]))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))" [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); if (GET_CODE (operands[3]) != ASHIFT) @@ -16869,63 +20399,69 @@ ; instruction size is unchanged, except in the %eax case for ; which it is increased by one byte, hence the ! optimize_size. (define_split - [(set (reg 17) - (compare (and (match_operand 1 "aligned_operand" "") - (match_operand 2 "const_int_operand" "")) - (const_int 0))) - (set (match_operand 0 "register_operand" "") - (and (match_dup 1) (match_dup 2)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)])) + (set (match_operand 1 "register_operand" "") + (and (match_dup 3) (match_dup 4)))] "! TARGET_PARTIAL_REG_STALL && reload_completed + && optimize_insn_for_speed_p () + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode) - && ! optimize_size - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) - (const_int 0))) - (set (match_dup 0) - (and:SI (match_dup 1) (match_dup 2)))])] - "operands[2] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); - operands[0] = gen_lowpart (SImode, operands[0]); - operands[1] = gen_lowpart (SImode, operands[1]);") + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) ; Don't promote the QImode tests, as i386 doesn't have encoding of ; the TEST instruction with 32-bit sign-extended immediate and thus ; the instruction size would at least double, which is not what we ; want even with ! optimize_size. (define_split - [(set (reg 17) - (compare (and (match_operand:HI 0 "aligned_operand" "") - (match_operand:HI 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand" "") + (match_operand:HI 3 "const_int_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed + && ! TARGET_FAST_PREFIX + && optimize_insn_for_speed_p () /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) - && ! optimize_size" - [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) - (const_int 0)))] - "operands[1] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); - operands[0] = gen_lowpart (SImode, operands[0]);") + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) (define_split [(set (match_operand 0 "register_operand" "") (neg (match_operand 1 "register_operand" ""))) - (clobber (reg:CC 17))] + (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(parallel [(set (match_dup 0) (neg:SI (match_dup 1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") @@ -16934,29 +20470,31 @@ (not (match_operand 1 "register_operand" "")))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(set (match_dup 0) (not:SI (match_dup 1)))] "operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") -(define_split +(define_split [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "comparison_operator" - [(reg 17) (const_int 0)]) + (if_then_else (match_operator 1 "comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) (match_operand 2 "register_operand" "") (match_operand 3 "register_operand" "")))] "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(set (match_dup 0) (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] "operands[0] = gen_lowpart (SImode, operands[0]); operands[2] = gen_lowpart (SImode, operands[2]); operands[3] = gen_lowpart (SImode, operands[3]);") - + ;; RTL Peephole optimizations, run before sched2. These primarily look to ;; transform a complex memory operation into two memory to register operations. @@ -16966,7 +20504,8 @@ [(set (match_operand:SI 0 "push_operand" "") (match_operand:SI 1 "memory_operand" "")) (match_scratch:SI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -16975,7 +20514,8 @@ [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "memory_operand" "")) (match_scratch:DI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -16986,7 +20526,8 @@ [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" "")) (match_scratch:SF 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -16995,7 +20536,8 @@ [(set (match_operand:HI 0 "push_operand" "") (match_operand:HI 1 "memory_operand" "")) (match_scratch:HI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -17004,7 +20546,8 @@ [(set (match_operand:QI 0 "push_operand" "") (match_operand:QI 1 "memory_operand" "")) (match_scratch:QI 2 "q")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -17015,13 +20558,13 @@ [(match_scratch:SI 1 "r") (set (match_operand:SI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 1) (const_int 0)) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 1))] "") @@ -17029,37 +20572,37 @@ [(match_scratch:HI 1 "r") (set (match_operand:HI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 [(match_scratch:QI 1 "q") (set (match_operand:QI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 [(match_scratch:SI 2 "r") (set (match_operand:SI 0 "memory_operand" "") (match_operand:SI 1 "immediate_operand" ""))] - "! optimize_size - && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -17068,8 +20611,9 @@ [(match_scratch:HI 2 "r") (set (match_operand:HI 0 "memory_operand" "") (match_operand:HI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -17078,28 +20622,30 @@ [(match_scratch:QI 2 "q") (set (match_operand:QI 0 "memory_operand" "") (match_operand:QI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") ;; Don't compare memory with zero, load and use a test instead. (define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "memory_operand" "") - (const_int 0))) + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand" "") + (const_int 0)])) (match_scratch:SI 3 "r")] - "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" - [(set (match_dup 3) (match_dup 0)) - (set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] + "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] "") -;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. ;; Don't split NOTs with a displacement operand, because resulting XOR -;; will not be pariable anyway. +;; will not be pairable anyway. ;; -;; On AMD K6, NOT is vector decoded with memory operand that can not be +;; On AMD K6, NOT is vector decoded with memory operand that cannot be ;; represented using a modRM byte. The XOR replacement is long decoded, ;; so this split helps here as well. ;; @@ -17109,43 +20655,43 @@ (define_peephole2 [(set (match_operand:SI 0 "nonimmediate_operand" "") (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] - "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM - && (GET_CODE (operands[0]) != MEM + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], SImode))) - || (TARGET_K6 && long_memory_operand (operands[0], SImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:SI (match_dup 1) (const_int -1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_peephole2 [(set (match_operand:HI 0 "nonimmediate_operand" "") (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] - "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM - && (GET_CODE (operands[0]) != MEM + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], HImode))) - || (TARGET_K6 && long_memory_operand (operands[0], HImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:HI (match_dup 1) (const_int -1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_peephole2 [(set (match_operand:QI 0 "nonimmediate_operand" "") (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] - "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM - && (GET_CODE (operands[0]) != MEM + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], QImode))) - || (TARGET_K6 && long_memory_operand (operands[0], QImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:QI (match_dup 1) (const_int -1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") ;; Non pairable "test imm, reg" instructions can be translated to @@ -17156,76 +20702,76 @@ ;; versions if we're concerned about partial register stalls. (define_peephole2 - [(set (reg 17) - (compare (and:SI (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "immediate_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")) + (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[0]) != 0 - || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K')) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + && (true_regnum (operands[2]) != AX_REG + || satisfies_constraint_K (operands[3])) + && peep2_reg_dead_p (1, operands[2])" [(parallel - [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 0) - (match_dup 1)) - (const_int 0))) - (set (match_dup 0) - (and:SI (match_dup 0) (match_dup 1)))])] + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])] "") ;; We don't need to handle HImode case, because it will be promoted to SImode ;; on ! TARGET_PARTIAL_REG_STALL (define_peephole2 - [(set (reg 17) - (compare (and:QI (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "immediate_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "immediate_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[0]) != 0 - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" [(parallel - [(set (reg:CCNO 17) - (compare:CCNO (and:QI (match_dup 0) - (match_dup 1)) - (const_int 0))) - (set (match_dup 0) - (and:QI (match_dup 0) (match_dup 1)))])] + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])] "") (define_peephole2 - [(set (reg 17) - (compare - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand" "") - (const_int 8) - (const_int 8)) - (match_operand 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[0]) != 0 - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO - (and:SI - (zero_extract:SI - (match_dup 0) - (const_int 8) - (const_int 8)) - (match_dup 1)) - (const_int 0))) - (set (zero_extract:SI (match_dup 0) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) - (and:SI + (and:SI (zero_extract:SI - (match_dup 0) + (match_dup 2) (const_int 8) (const_int 8)) - (match_dup 1)))])] + (match_dup 3)))])] "") ;; Don't do logical operations with memory inputs. @@ -17235,12 +20781,12 @@ (match_operator:SI 3 "arith_or_logical_operator" [(match_dup 0) (match_operand:SI 1 "memory_operand" "")])) - (clobber (reg:CC 17))])] - "! optimize_size && ! TARGET_READ_MODIFY" + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_peephole2 @@ -17249,12 +20795,47 @@ (match_operator:SI 3 "arith_or_logical_operator" [(match_operand:SI 1 "memory_operand" "") (match_dup 0)])) - (clobber (reg:CC 17))])] - "! optimize_size && ! TARGET_READ_MODIFY" + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address +;; refers to the destination of the load! + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) + (match_operator:SI 3 "commutative_operator" + [(match_dup 0) + (match_operand:SI 2 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[0]) != REGNO (operands[1]) + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 4)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);") + +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")) + (set (match_dup 0) + (match_operator 3 "commutative_operator" + [(match_dup 0) + (match_operand 2 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1]) + && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) + || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))] "") ; Don't do logical operations with memory outputs @@ -17269,12 +20850,12 @@ (match_operator:SI 3 "arith_or_logical_operator" [(match_dup 0) (match_operand:SI 1 "nonmemory_operand" "")])) - (clobber (reg:CC 17))])] - "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 2))] "") @@ -17284,53 +20865,52 @@ (match_operator:SI 3 "arith_or_logical_operator" [(match_operand:SI 1 "nonmemory_operand" "") (match_dup 0)])) - (clobber (reg:CC 17))])] - "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) - (clobber (reg:CC 17))]) + (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 2))] "") ;; Attempt to always use XOR for zeroing registers. (define_peephole2 [(set (match_operand 0 "register_operand" "") - (const_int 0))] - "(GET_MODE (operands[0]) == QImode - || GET_MODE (operands[0]) == HImode - || GET_MODE (operands[0]) == SImode - || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (! TARGET_USE_MOV0 || optimize_size) + (match_operand 1 "const0_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && GENERAL_REG_P (operands[0]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) - (clobber (reg:CC 17))])] - "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, - true_regnum (operands[0]));") + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (word_mode, operands[0]); +}) (define_peephole2 [(set (strict_low_part (match_operand 0 "register_operand" "")) (const_int 0))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) - && (! TARGET_USE_MOV0 || optimize_size) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) - (clobber (reg:CC 17))])]) + (clobber (reg:CC FLAGS_REG))])]) ;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. (define_peephole2 [(set (match_operand 0 "register_operand" "") (const_int -1))] "(GET_MODE (operands[0]) == HImode - || GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == SImode || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (optimize_size || TARGET_PENTIUM) + && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) - (clobber (reg:CC 17))])] - "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, - true_regnum (operands[0]));") + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, + operands[0]);") ;; Attempt to convert simple leas to adds. These can be created by ;; move expanders. @@ -17340,7 +20920,7 @@ (match_operand:SI 1 "nonmemory_operand" "")))] "peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_peephole2 @@ -17349,7 +20929,7 @@ (match_operand:DI 2 "nonmemory_operand" "")) 0))] "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_lowpart (SImode, operands[2]);") (define_peephole2 @@ -17358,7 +20938,7 @@ (match_operand:DI 1 "x86_64_general_operand" "")))] "peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "") (define_peephole2 @@ -17368,7 +20948,7 @@ "exact_log2 (INTVAL (operands[1])) >= 0 && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") (define_peephole2 @@ -17378,18 +20958,18 @@ "exact_log2 (INTVAL (operands[1])) >= 0 && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "const_int_operand" "")) 0))] - "exact_log2 (INTVAL (operands[1])) >= 0 + "exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) - (clobber (reg:CC 17))])] + (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") ;; The ESP adjustments can be done by the push and pop instructions. Resulting @@ -17413,52 +20993,52 @@ (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) - (clobber (reg:CC 17)) + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) - (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (clobber (reg:CC 17)) + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) - (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) ;; Convert esp subtractions to push. (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) - (clobber (reg:CC 17))])] - "optimize_size || !TARGET_SUB_ESP_4" + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (clobber (reg:CC 17))])] - "optimize_size || !TARGET_SUB_ESP_8" + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) ;; Convert epilogue deallocator to pop. (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) - (clobber (reg:CC 17)) + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))])] "") @@ -17467,38 +21047,38 @@ (define_peephole2 [(match_scratch:SI 0 "r") (match_scratch:SI 1 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) - (clobber (reg:CC 17)) + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] "") (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) - (clobber (reg:CC 17)) + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] "") ;; Convert esp additions to pop. (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) - (clobber (reg:CC 17))])] + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] "") ;; Two pops case is tricky, since pop causes dependency on destination register. @@ -17506,137 +21086,92 @@ (define_peephole2 [(match_scratch:SI 0 "r") (match_scratch:SI 1 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) - (clobber (reg:CC 17))])] + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] "") (define_peephole2 [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) - (clobber (reg:CC 17))])] - "optimize_size" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] "") ;; Convert compares with 1 to shorter inc/dec operations when CF is not -;; required and register dies. -(define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -(define_peephole2 - [(set (reg 17) - (compare (match_operand:HI 0 "register_operand" "") - (match_operand:HI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -(define_peephole2 - [(set (reg 17) - (compare (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -;; Convert compares with 128 to shorter add -128 -(define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "register_operand" "") - (const_int 128)))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (const_int 128))) - (clobber (match_dup 0))])] - "") - +;; required and register dies. Similarly for 128 to -128. (define_peephole2 - [(set (reg 17) - (compare (match_operand:HI 0 "register_operand" "") - (const_int 128)))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (const_int 128))) - (clobber (match_dup 0))])] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")]))] + "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_size) + && incdec_operand (operands[3], GET_MODE (operands[3]))) + || (!TARGET_FUSE_CMP_AND_BRANCH + && INTVAL (operands[3]) == 128)) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])] "") (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) - (clobber (reg:CC 17)) + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) - (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (clobber (reg:CC 17)) + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) - (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) ;; Convert esp subtractions to push. (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) - (clobber (reg:CC 17))])] - "optimize_size || !TARGET_SUB_ESP_4" + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))]) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (clobber (reg:CC 17))])] - "optimize_size || !TARGET_SUB_ESP_8" + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))]) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) ;; Convert epilogue deallocator to pop. (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) - (clobber (reg:CC 17)) + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))])] "") @@ -17645,38 +21180,38 @@ (define_peephole2 [(match_scratch:DI 0 "r") (match_scratch:DI 1 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) - (clobber (reg:CC 17)) + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] "") (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) - (clobber (reg:CC 17)) + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] "") ;; Convert esp additions to pop. (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) - (clobber (reg:CC 17))])] + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] "") ;; Two pops case is tricky, since pop causes dependency on destination register. @@ -17684,1779 +21219,424 @@ (define_peephole2 [(match_scratch:DI 0 "r") (match_scratch:DI 1 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) - (clobber (reg:CC 17))])] + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] "") (define_peephole2 [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) - (clobber (reg:CC 17))])] - "optimize_size" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) - (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] "") -;; Call-value patterns last so that the wildcard operand does not -;; disrupt insn-recog's switch tables. - -(define_insn "*call_value_pop_0" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) - (match_operand:SI 2 "" ""))) - (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 3 "immediate_operand" "")))] - "!TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} - [(set_attr "type" "callv")]) - -(define_insn "*call_value_pop_1" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) - (match_operand:SI 2 "" ""))) - (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 3 "immediate_operand" "i")))] - "!TARGET_64BIT" -{ - if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A1"; - else - return "call\t%A1"; -} - [(set_attr "type" "callv")]) - -(define_insn "*call_value_0" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) - (match_operand:SI 2 "" "")))] - "!TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} - [(set_attr "type" "callv")]) - -(define_insn "*call_value_0_rex64" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) - (match_operand:DI 2 "const_int_operand" "")))] - "TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} - [(set_attr "type" "callv")]) - -(define_insn "*call_value_1" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) - (match_operand:SI 2 "" "")))] - "!TARGET_64BIT" -{ - if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%*%1"; - else - return "call\t%*%1"; -} - [(set_attr "type" "callv")]) - -(define_insn "*call_value_1_rex64" - [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) - (match_operand:DI 2 "" "")))] - "TARGET_64BIT" -{ - if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A1"; - else - return "call\t%A1"; -} - [(set_attr "type" "callv")]) - -(define_insn "trap" - [(trap_if (const_int 1) (const_int 5))] - "" - "int\t$5") - -;;; ix86 doesn't have conditional trap instructions, but we fake them -;;; for the sake of bounds checking. By emitting bounds checks as -;;; conditional traps rather than as conditional jumps around -;;; unconditional traps we avoid introducing spurious basic-block -;;; boundaries and facilitate elimination of redundant checks. In -;;; honor of the too-inflexible-for-BPs `bound' instruction, we use -;;; interrupt 5. -;;; -;;; FIXME: Static branch prediction rules for ix86 are such that -;;; forward conditional branches predict as untaken. As implemented -;;; below, pseudo conditional traps violate that rule. We should use -;;; .pushsection/.popsection to place all of the `int 5's in a special -;;; section loaded at the end of the text segment and branch forward -;;; there on bounds-failure, and then jump back immediately (in case -;;; the system chooses to ignore bounds violations, or to report -;;; violations and continue execution). - -(define_expand "conditional_trap" - [(trap_if (match_operator 0 "comparison_operator" - [(match_dup 2) (const_int 0)]) - (match_operand 1 "const_int_operand" ""))] - "" -{ - emit_insn (gen_rtx_TRAP_IF (VOIDmode, - ix86_expand_compare (GET_CODE (operands[0]), - NULL, NULL), - operands[1])); - DONE; -}) - -(define_insn "*conditional_trap_1" - [(trap_if (match_operator 0 "comparison_operator" - [(reg 17) (const_int 0)]) - (match_operand 1 "const_int_operand" ""))] - "" -{ - operands[2] = gen_label_rtx (); - output_asm_insn ("j%c0\t%l2\; int\t%1", operands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[2])); - RET; -}) - - ;; Pentium III SIMD instructions. - -;; Moves for SSE/MMX regs. - -(define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] - "TARGET_SSE || TARGET_64BIT" -{ - if (TARGET_64BIT) - ix86_expand_move (TImode, operands); - else - ix86_expand_vector_move (TImode, operands); - DONE; -}) - -(define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (V4SFmode, operands); - DONE; -}) - -(define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V4SImode, operands); - DONE; -}) - -(define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V2SImode, operands); - DONE; -}) - -(define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V4HImode, operands); - DONE; -}) - -(define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V8QImode, operands); - DONE; -}) - -(define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] - "TARGET_3DNOW" -{ - ix86_expand_vector_move (V2SFmode, operands); - DONE; -}) - -(define_insn_and_split "*pushti" - [(set (match_operand:TI 0 "push_operand" "=<") - (match_operand:TI 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "sse")]) - -(define_insn_and_split "*pushv4sf" - [(set (match_operand:V4SF 0 "push_operand" "=<") - (match_operand:V4SF 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V4SF (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "sse")]) - -(define_insn_and_split "*pushv4si" - [(set (match_operand:V4SI 0 "push_operand" "=<") - (match_operand:V4SI 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V4SI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "sse")]) - -(define_insn_and_split "*pushv2si" - [(set (match_operand:V2SI 0 "push_operand" "=<") - (match_operand:V2SI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V2SI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv4hi" - [(set (match_operand:V4HI 0 "push_operand" "=<") - (match_operand:V4HI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V4HI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv8qi" - [(set (match_operand:V8QI 0 "push_operand" "=<") - (match_operand:V8QI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V8QI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv2sf" - [(set (match_operand:V2SF 0 "push_operand" "=<") - (match_operand:V2SF 1 "nonmemory_operand" "y"))] - "TARGET_3DNOW" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V2SF (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) +;; Convert imul by three, five and nine into lea +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9" + [(set (match_dup 0) + (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) -(define_insn "movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "O,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) -(define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - # - # - xorps\t%0, %0 - movaps\\t{%1, %0|%0, %1} - movaps\\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,sse,sse,sse") - (set_attr "mode" "TI")]) + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) + (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) -(define_split - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && optimize_insn_for_speed_p () + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) -;; These two patterns are useful for specifying exactly whether to use -;; movaps or movups -(define_insn "sse_movaps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] - "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movups" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] - "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +;; Imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "memory_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") -;; SSE Strange Moves. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] +"") + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:DI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) -(define_insn "sse_movmskps" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] - "TARGET_SSE" - "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:SI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_maskmovq" - [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] - "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_maskmovq_rex" - [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] - "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movntv4sf" - [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] - "TARGET_SSE" - "movntps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movntdi" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))] - "TARGET_SSE || TARGET_3DNOW_A" - "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 3)))] - "TARGET_SSE" - "movhlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movlhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 12)))] - "TARGET_SSE" - "movlhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0") - (match_operand:V4SF 2 "nonimmediate_operand" "m,x") - (const_int 12)))] - "TARGET_SSE - && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0") - (match_operand:V4SF 2 "nonimmediate_operand" "m,x") - (const_int 3)))] - "TARGET_SSE - && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_loadss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) - (const_int 1)))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x") - (const_int 1)))] - "TARGET_SSE" - "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_storess" - [(set (match_operand:SF 0 "memory_operand" "=m") - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_shufps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] 41))] - "TARGET_SSE" - ;; @@@ check operand order for intel/nonintel syntax - "shufps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) - - -;; SSE arithmetic - -(define_insn "addv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "addps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmaddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "addss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "subv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "subps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "subss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "mulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "mulps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmmulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "mulss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "divv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "divps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmdivv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "divss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - - -;; SSE square root/reciprocal - -(define_insn "rcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] - "TARGET_SSE" - "rcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "vmrcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] - "TARGET_SSE" - "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "vmrsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "vmsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -;; SSE logical operations. - -;; These are not called andti3 etc. because we really really don't want -;; the compiler to widen DImode ands to TImode ands and then try to move -;; into DImode subregs of SSE registers, and them together, and move out -;; of DImode subregs again! - -(define_insn "*sse_andti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_peephole2 + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:HI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) -(define_insn "*sse_andti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_andti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_andti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_andti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_nandti3_df" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_nandti3_sf" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax -(define_insn "*sse_nandti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pnand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_peephole2 + [(parallel [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand 3 "register_operand") + (match_operand 4 "x86_64_general_operand" "")) + (parallel [(set (match_operand 5 "register_operand" "") + (plus (match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3 + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + /* We reorder load and the shift. */ + && !rtx_equal_p (operands[1], operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + /* Last PLUS must consist of operand 0 and 3. */ + && !rtx_equal_p (operands[0], operands[3]) + && (rtx_equal_p (operands[3], operands[6]) + || rtx_equal_p (operands[3], operands[7])) + && (rtx_equal_p (operands[0], operands[6]) + || rtx_equal_p (operands[0], operands[7])) + /* The intermediate operand 0 must die or be same as output. */ + && (rtx_equal_p (operands[0], operands[5]) + || peep2_reg_dead_p (3, operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (Pmode, operands[1]); + rtx base = gen_lowpart (Pmode, operands[3]); + rtx dest = gen_lowpart (mode, operands[5]); + + operands[1] = gen_rtx_PLUS (Pmode, base, + gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + if (mode != Pmode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + operands[0] = dest; +}) + +;; Call-value patterns last so that the wildcard operand does not +;; disrupt insn-recog's switch tables. -(define_insn "*sse_iorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_insn "*call_value_pop_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) -(define_insn "*sse_iorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_iorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_iorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_iorti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_xorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_insn "*call_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) -(define_insn "*sse_xorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_xorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_xorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_xorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_xorti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -;; Use xor, but don't show input operands so they aren't live before -;; this insn. -(define_insn "sse_clrv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] 45))] - "TARGET_SSE" - "xorps\t{%0, %0|%0, %0}" - [(set_attr "type" "sse") - (set_attr "memory" "none")]) +(define_insn "*sibcall_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) -;; SSE mask-generating compares - -(define_insn "maskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]))] - "TARGET_SSE" - "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "maskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordps\t{%2, %0|%0, %2}"; +(define_insn "*call_value_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; else - return "cmpn%D3ps\t{%2, %0|%0, %2}"; + return "call\t%P1"; } - [(set_attr "type" "sse")]) - -(define_insn "vmmaskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "cmp%D3ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmmaskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])) - (subreg:V4SI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordss\t{%2, %0|%0, %2}"; + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" "")))] + "TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; else - return "cmpn%D3ss\t{%2, %0|%0, %2}"; + return "call\t%P1"; } - [(set_attr "type" "sse")]) - -(define_insn "sse_comi" - [(set (reg:CCFP 17) - (match_operator:CCFP 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] - "TARGET_SSE" - "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "sse_ucomi" - [(set (reg:CCFPU 17) - (match_operator:CCFPU 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] - "TARGET_SSE" - "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - - -;; SSE unpack - -(define_insn "sse_unpckhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_SSE" - "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sse_unpcklps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_SSE" - "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - - -;; SSE min/max - -(define_insn "smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmsmaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "vmsminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - - -;; SSE <-> integer/MMX conversions - -(define_insn "cvtpi2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) - (const_int 12)))] - "TARGET_SSE" - "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "cvtps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "cvttps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (const_int 14)))] - "TARGET_SSE" - "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "cvtss2si" - [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - -(define_insn "cvttss2si" - [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) - - -;; MMX insns - -;; MMX arithmetic - -(define_insn "addv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "addv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "addv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ssaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ssaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "usaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "usaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "subv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "subv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "subv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "sssubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "sssubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ussubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ussubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_MMX" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI - (mult:V2SI - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) (const_int 2)])))) - (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2SI (vec_select:V2HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3)]))))))] - "TARGET_MMX" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - - -;; MMX logical operations -;; Note we don't want to declare these as regular iordi3 insns to prevent -;; normal code that also wants to use the FPU from getting broken. -;; The UNSPECs are there to prevent the combiner from getting overly clever. -(define_insn "mmx_iordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] - "TARGET_MMX" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_xordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] - "TARGET_MMX" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) + [(set_attr "type" "callv")]) -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "mmx_clrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] 45))] - "TARGET_MMX" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) +(define_insn "*call_value_0_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) -(define_insn "mmx_anddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] - "TARGET_MMX" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_nanddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] - "TARGET_MMX" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ashiftrt:V8QI - (plus:V8QI (plus:V8QI - (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")) - (const_vector:V8QI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI - (plus:V4HI (plus:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")) - (const_vector:V4HI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_psadbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - - -;; MMX insert/extract/shuffle - -(define_insn "mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") - (vec_duplicate:V4HI - (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (match_operand:SI 3 "immediate_operand" "i")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pinsrw\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") - (parallel - [(match_operand:SI 2 "immediate_operand" "i")]))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) - -(define_insn "mmx_pshufw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")] 41))] - "TARGET_SSE || TARGET_3DNOW_A" - "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) - - -;; MMX mask-generating comparisons - -(define_insn "eqv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "eqv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "eqv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "gtv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "gtv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "gtv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - - -;; MMX max/min insns - -(define_insn "umaxv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "smaxv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "uminv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "sminv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - - -;; MMX shifts - -(define_insn "ashrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ashrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "lshrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "lshrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -;; See logical MMX insns. -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ashlv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "ashlv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -;; See logical MMX insns. -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - - -;; MMX pack/unpack insns. - -(define_insn "mmx_packsswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) - (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_packuswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] - "TARGET_MMX" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_MMX" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpckhdq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 1)])) - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_MMX" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] - "TARGET_MMX" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_MMX" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mmx_punpckldq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 1) - (const_int 0)])) - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 1)])) - (const_int 1)))] - "TARGET_MMX" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - - -;; Miscellaneous stuff - -(define_insn "emms" - [(unspec_volatile [(const_int 0)] 31) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_MMX" - "emms" - [(set_attr "type" "mmx") - (set_attr "memory" "unknown")]) +(define_insn "*call_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) -(define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] - "TARGET_MMX" - "ldmxcsr\t%0" - [(set_attr "type" "mmx") - (set_attr "memory" "load")]) +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) -(define_insn "stmxcsr" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] 40))] - "TARGET_MMX" - "stmxcsr\t%0" - [(set_attr "type" "mmx") - (set_attr "memory" "store")]) +(define_insn "*call_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) -(define_expand "sfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] 44))] - "TARGET_SSE || TARGET_3DNOW_A" +(define_insn "*call_value_1_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI 27)) + (clobber (reg:TI 28)) + (clobber (reg:TI 45)) + (clobber (reg:TI 46)) + (clobber (reg:TI 47)) + (clobber (reg:TI 48)) + (clobber (reg:TI 49)) + (clobber (reg:TI 50)) + (clobber (reg:TI 51)) + (clobber (reg:TI 52)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) -(define_insn "*sfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] 44))] - "TARGET_SSE || TARGET_3DNOW_A" - "sfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) +(define_insn "*call_value_1_rex64_large" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "call\t%A1" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) + +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 6))] + "" + { return ASM_SHORT "0x0b0f"; } + [(set_attr "length" "2")]) (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") @@ -19467,7 +21647,7 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) (use (label_ref:DI (match_operand 3 "" "")))])] @@ -19484,314 +21664,48 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) (use (label_ref:DI (match_operand 3 "" "X")))] "TARGET_64BIT - && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" - "* { int i; operands[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, operands[0], operands[4])); - output_asm_insn (\"jmp\\t%A1\", operands); - for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + /* VEX instruction with a REX prefix will #UD. */ + if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS) + gcc_unreachable (); + + output_asm_insn ("jmp\t%A1", operands); + for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) { operands[4] = adjust_address (operands[0], DImode, i*16); operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); PUT_MODE (operands[4], TImode); if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn (\"rex\", operands); - output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + output_asm_insn ("rex", operands); + output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); } - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", - CODE_LABEL_NUMBER (operands[3])); - RET; + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[3])); + return ""; } - " [(set_attr "type" "other") (set_attr "length_immediate" "0") (set_attr "length_address" "0") - (set_attr "length" "135") + (set (attr "length") + (if_then_else + (eq (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "34") + (const_string "42"))) (set_attr "memory" "store") (set_attr "modrm" "0") + (set_attr "prefix" "maybe_vex") (set_attr "mode" "DI")]) -;; 3Dnow! instructions - -(define_insn "addv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "subv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "subrv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") - (match_operand:V2SF 1 "register_operand" "0")))] - "TARGET_3DNOW" - "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "gtv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "gev2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "eqv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfmaxv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfminv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "mulv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "femms" - [(unspec_volatile [(const_int 0)] 46) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_3DNOW" - "femms" - [(set_attr "type" "mmx")]) - -(define_insn "pf2id" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "pf2iw" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (sign_extend:V2SI - (ss_truncate:V2HI - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] - "TARGET_3DNOW_A" - "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "pfacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (plus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW" - "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (minus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfpnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pi2fw" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF - (vec_concat:V2SI - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))) - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_dup 1) - (parallel [(const_int 1)])))))))] - "TARGET_3DNOW_A" - "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "floatv2si2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -;; This insn is identical to pavgb in operation, but the opcode is -;; different. To avoid accidentally matching pavgb, use an unspec. - -(define_insn "pavgusb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI - [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))] - "TARGET_3DNOW" - "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -;; 3DNow reciprical and sqrt - -(define_insn "pfrcpv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))] - "TARGET_3DNOW" - "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "pfrcpit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))] - "TARGET_3DNOW" - "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfrcpit2v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))] - "TARGET_3DNOW" - "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pfrsqrtv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))] - "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "pfrsqit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))] - "TARGET_3DNOW" - "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pmulhrwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_vector:V4SI [(const_int 32768) - (const_int 32768) - (const_int 32768) - (const_int 32768)])) - (const_int 16))))] - "TARGET_3DNOW" - "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) - -(define_insn "pswapdv2si2" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - -(define_insn "pswapdv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) - (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") (match_operand:SI 1 "const_int_operand" "") @@ -19801,15 +21715,13 @@ int rw = INTVAL (operands[1]); int locality = INTVAL (operands[2]); - if (rw != 0 && rw != 1) - abort (); - if (locality < 0 || locality > 3) - abort (); - if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode) - abort (); + gcc_assert (rw == 0 || rw == 1); + gcc_assert (locality >= 0 && locality <= 3); + gcc_assert (GET_MODE (operands[0]) == Pmode + || GET_MODE (operands[0]) == VOIDmode); /* Use 3dNOW prefetch in case we are asking for write prefetch not - suported by SSE counterpart or the SSE prefetch is not available + supported by SSE counterpart or the SSE prefetch is not available (K6 machines). Otherwise use SSE prefetch as it allows specifying of locality. */ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) @@ -19829,10 +21741,9 @@ }; int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); + gcc_assert (locality >= 0 && locality <= 3); - return patterns[locality]; + return patterns[locality]; } [(set_attr "type" "sse") (set_attr "memory" "none")]) @@ -19848,12 +21759,12 @@ }; int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); + gcc_assert (locality >= 0 && locality <= 3); - return patterns[locality]; + return patterns[locality]; } - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow" [(prefetch (match_operand:SI 0 "address_operand" "p") @@ -19880,4 +21791,180 @@ else return "prefetchw\t%a0"; } - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" +{ +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_set_di (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_set_si (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_set_di (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); +#endif + DONE; +}) + +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + else + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + } + [(set_attr "type" "multi")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + ix86_compare_emitted = flags; + +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_test_di (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_test_si (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1])); + else + emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1])); +#endif + emit_jump_insn (gen_beq (operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:SI 3 "=&r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_64BIT" + "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:SI 3 "=r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:DI 3 "=r"))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}"; + else + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}"; + } + [(set_attr "type" "multi")]) + +(define_mode_iterator CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "{b}") (HI "{w}") (SI "{l}")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + +(include "mmx.md") +(include "sse.md") +(include "sync.md")