X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gcc%2Fconfig%2Farm%2Fneon.md;fp=gcc%2Fconfig%2Farm%2Fneon.md;h=a3d3e73d2e7e1318bea814f674d5f590f22c73e3;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
new file mode 100644
index 00000000..a3d3e73d
--- /dev/null
+++ b/gcc/config/arm/neon.md
@@ -0,0 +1,4980 @@
+;; ARM NEON coprocessor Machine Description
+;; Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; .
+
+;; Constants for unspecs.
+(define_constants
+ [(UNSPEC_ASHIFT_SIGNED 65)
+ (UNSPEC_ASHIFT_UNSIGNED 66)
+ (UNSPEC_VABA 67)
+ (UNSPEC_VABAL 68)
+ (UNSPEC_VABD 69)
+ (UNSPEC_VABDL 70)
+ (UNSPEC_VABS 71)
+ (UNSPEC_VADD 72)
+ (UNSPEC_VADDHN 73)
+ (UNSPEC_VADDL 74)
+ (UNSPEC_VADDW 75)
+ (UNSPEC_VAND 76)
+ (UNSPEC_VBIC 77)
+ (UNSPEC_VBSL 78)
+ (UNSPEC_VCAGE 79)
+ (UNSPEC_VCAGT 80)
+ (UNSPEC_VCEQ 81)
+ (UNSPEC_VCGE 82)
+ (UNSPEC_VCGT 83)
+ (UNSPEC_VCLS 84)
+ (UNSPEC_VCLZ 85)
+ (UNSPEC_VCNT 86)
+ (UNSPEC_VCOMBINE 87)
+ (UNSPEC_VCVT 88)
+ (UNSPEC_VCVT_N 89)
+ (UNSPEC_VDUP_LANE 90)
+ (UNSPEC_VDUP_N 91)
+ (UNSPEC_VEOR 92)
+ (UNSPEC_VEXT 93)
+ (UNSPEC_VGET_HIGH 94)
+ (UNSPEC_VGET_LANE 95)
+ (UNSPEC_VGET_LOW 96)
+ (UNSPEC_VHADD 97)
+ (UNSPEC_VHSUB 98)
+ (UNSPEC_VLD1 99)
+ (UNSPEC_VLD1_DUP 100)
+ (UNSPEC_VLD1_LANE 101)
+ (UNSPEC_VLD2 102)
+ (UNSPEC_VLD2_DUP 103)
+ (UNSPEC_VLD2_LANE 104)
+ (UNSPEC_VLD3 105)
+ (UNSPEC_VLD3A 106)
+ (UNSPEC_VLD3B 107)
+ (UNSPEC_VLD3_DUP 108)
+ (UNSPEC_VLD3_LANE 109)
+ (UNSPEC_VLD4 110)
+ (UNSPEC_VLD4A 111)
+ (UNSPEC_VLD4B 112)
+ (UNSPEC_VLD4_DUP 113)
+ (UNSPEC_VLD4_LANE 114)
+ (UNSPEC_VMAX 115)
+ (UNSPEC_VMIN 116)
+ (UNSPEC_VMLA 117)
+ (UNSPEC_VMLAL 118)
+ (UNSPEC_VMLA_LANE 119)
+ (UNSPEC_VMLAL_LANE 120)
+ (UNSPEC_VMLS 121)
+ (UNSPEC_VMLSL 122)
+ (UNSPEC_VMLS_LANE 123)
+ (UNSPEC_VMLSL_LANE 124)
+ (UNSPEC_VMOVL 125)
+ (UNSPEC_VMOVN 126)
+ (UNSPEC_VMUL 127)
+ (UNSPEC_VMULL 128)
+ (UNSPEC_VMUL_LANE 129)
+ (UNSPEC_VMULL_LANE 130)
+ (UNSPEC_VMUL_N 131)
+ (UNSPEC_VMVN 132)
+ (UNSPEC_VORN 133)
+ (UNSPEC_VORR 134)
+ (UNSPEC_VPADAL 135)
+ (UNSPEC_VPADD 136)
+ (UNSPEC_VPADDL 137)
+ (UNSPEC_VPMAX 138)
+ (UNSPEC_VPMIN 139)
+ (UNSPEC_VPSMAX 140)
+ (UNSPEC_VPSMIN 141)
+ (UNSPEC_VPUMAX 142)
+ (UNSPEC_VPUMIN 143)
+ (UNSPEC_VQABS 144)
+ (UNSPEC_VQADD 145)
+ (UNSPEC_VQDMLAL 146)
+ (UNSPEC_VQDMLAL_LANE 147)
+ (UNSPEC_VQDMLSL 148)
+ (UNSPEC_VQDMLSL_LANE 149)
+ (UNSPEC_VQDMULH 150)
+ (UNSPEC_VQDMULH_LANE 151)
+ (UNSPEC_VQDMULL 152)
+ (UNSPEC_VQDMULL_LANE 153)
+ (UNSPEC_VQMOVN 154)
+ (UNSPEC_VQMOVUN 155)
+ (UNSPEC_VQNEG 156)
+ (UNSPEC_VQSHL 157)
+ (UNSPEC_VQSHL_N 158)
+ (UNSPEC_VQSHLU_N 159)
+ (UNSPEC_VQSHRN_N 160)
+ (UNSPEC_VQSHRUN_N 161)
+ (UNSPEC_VQSUB 162)
+ (UNSPEC_VRECPE 163)
+ (UNSPEC_VRECPS 164)
+ (UNSPEC_VREV16 165)
+ (UNSPEC_VREV32 166)
+ (UNSPEC_VREV64 167)
+ (UNSPEC_VRSQRTE 168)
+ (UNSPEC_VRSQRTS 169)
+ (UNSPEC_VSET_LANE 170)
+ (UNSPEC_VSHL 171)
+ (UNSPEC_VSHLL_N 172)
+ (UNSPEC_VSHL_N 173)
+ (UNSPEC_VSHR_N 174)
+ (UNSPEC_VSHRN_N 175)
+ (UNSPEC_VSLI 176)
+ (UNSPEC_VSRA_N 177)
+ (UNSPEC_VSRI 178)
+ (UNSPEC_VST1 179)
+ (UNSPEC_VST1_LANE 180)
+ (UNSPEC_VST2 181)
+ (UNSPEC_VST2_LANE 182)
+ (UNSPEC_VST3 183)
+ (UNSPEC_VST3A 184)
+ (UNSPEC_VST3B 185)
+ (UNSPEC_VST3_LANE 186)
+ (UNSPEC_VST4 187)
+ (UNSPEC_VST4A 188)
+ (UNSPEC_VST4B 189)
+ (UNSPEC_VST4_LANE 190)
+ (UNSPEC_VSTRUCTDUMMY 191)
+ (UNSPEC_VSUB 192)
+ (UNSPEC_VSUBHN 193)
+ (UNSPEC_VSUBL 194)
+ (UNSPEC_VSUBW 195)
+ (UNSPEC_VTBL 196)
+ (UNSPEC_VTBX 197)
+ (UNSPEC_VTRN1 198)
+ (UNSPEC_VTRN2 199)
+ (UNSPEC_VTST 200)
+ (UNSPEC_VUZP1 201)
+ (UNSPEC_VUZP2 202)
+ (UNSPEC_VZIP1 203)
+ (UNSPEC_VZIP2 204)])
+
+;; Double-width vector modes.
+(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+
+;; Double-width vector modes plus 64-bit elements.
+(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+
+;; Same, without floating-point elements.
+(define_mode_iterator VDI [V8QI V4HI V2SI])
+
+;; Quad-width vector modes.
+(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
+
+;; Quad-width vector modes plus 64-bit elements.
+(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+
+;; Same, without floating-point elements.
+(define_mode_iterator VQI [V16QI V8HI V4SI])
+
+;; Same, with TImode added, for moves.
+(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI])
+
+;; Opaque structure types wider than TImode.
+(define_mode_iterator VSTRUCT [EI OI CI XI])
+
+;; Number of instructions needed to load/store struct elements. FIXME!
+(define_mode_attr V_slen [(EI "2") (OI "2") (CI "3") (XI "4")])
+
+;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
+(define_mode_iterator VTAB [TI EI OI])
+
+;; vtbl suffix for above modes.
+(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
+
+;; Widenable modes.
+(define_mode_iterator VW [V8QI V4HI V2SI])
+
+;; Narrowable modes.
+(define_mode_iterator VN [V8HI V4SI V2DI])
+
+;; All supported vector modes (except singleton DImode).
+(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI])
+
+;; All supported vector modes (except those with 64-bit integer elements).
+(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; Supported integer vector modes (not 64 bit elements).
+(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Supported integer vector modes (not singleton DI)
+(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
+
+;; Vector modes, including 64-bit integer elements.
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
+
+;; Vector modes including 64-bit integer elements, but no floats.
+(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
+
+;; Vector modes for float->int conversions.
+(define_mode_iterator VCVTF [V2SF V4SF])
+
+;; Vector modes form int->float conversions.
+(define_mode_iterator VCVTI [V2SI V4SI])
+
+;; Vector modes for doubleword multiply-accumulate, etc. insns.
+(define_mode_iterator VMD [V4HI V2SI V2SF])
+
+;; Vector modes for quadword multiply-accumulate, etc. insns.
+(define_mode_iterator VMQ [V8HI V4SI V4SF])
+
+;; Above modes combined.
+(define_mode_iterator VMDQ [V4HI V2SI V2SF V8HI V4SI V4SF])
+
+;; As VMD, but integer modes only.
+(define_mode_iterator VMDI [V4HI V2SI])
+
+;; As VMQ, but integer modes only.
+(define_mode_iterator VMQI [V8HI V4SI])
+
+;; Above modes combined.
+(define_mode_iterator VMDQI [V4HI V2SI V8HI V4SI])
+
+;; Modes with 8-bit and 16-bit elements.
+(define_mode_iterator VX [V8QI V4HI V16QI V8HI])
+
+;; Modes with 8-bit elements.
+(define_mode_iterator VE [V8QI V16QI])
+
+;; Modes with 64-bit elements only.
+(define_mode_iterator V64 [DI V2DI])
+
+;; Modes with 32-bit elements only.
+(define_mode_iterator V32 [V2SI V2SF V4SI V4SF])
+
+;; (Opposite) mode to convert to/from for above conversions.
+(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI")
+ (V4SI "V4SF") (V4SF "V4SI")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
+ (V4HI "HI") (V8HI "HI")
+ (V2SI "SI") (V4SI "SI")
+ (V2SF "SF") (V4SF "SF")
+ (DI "DI") (V2DI "DI")])
+
+;; Element modes for vector extraction, padded up to register size.
+
+(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
+ (V4HI "SI") (V8HI "SI")
+ (V2SI "SI") (V4SI "SI")
+ (V2SF "SF") (V4SF "SF")
+ (DI "DI") (V2DI "DI")])
+
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
+ (V4HI "SI") (V8HI "SI")
+ (V2SI "V2SI") (V4SI "V2SI")
+ (V2SF "V2SF") (V4SF "V2SF")
+ (DI "V2DI") (V2DI "V2DI")])
+
+;; Similar, for three elements.
+;; ??? Should we define extra modes so that sizes of all three-element
+;; accesses can be accurately represented?
+(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI")
+ (V4HI "V4HI") (V8HI "V4HI")
+ (V2SI "V4SI") (V4SI "V4SI")
+ (V2SF "V4SF") (V4SF "V4SF")
+ (DI "EI") (V2DI "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
+ (V4HI "V4HI") (V8HI "V4HI")
+ (V2SI "V4SI") (V4SI "V4SI")
+ (V2SF "V4SF") (V4SF "V4SF")
+ (DI "OI") (V2DI "OI")])
+
+;; Register width from element mode
+(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
+ (V4HI "P") (V8HI "q")
+ (V2SI "P") (V4SI "q")
+ (V2SF "P") (V4SF "q")
+ (DI "P") (V2DI "q")])
+
+;; Wider modes with the same number of elements.
+(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
+
+;; Narrower modes with the same number of elements.
+(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+
+;; Modes with half the number of equal-sized elements.
+(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
+ (V4SI "V2SI") (V4SF "V2SF")
+ (V2DI "DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi")
+ (V4SI "v2si") (V4SF "v2sf")
+ (V2DI "di")])
+
+;; Modes with twice the number of equal-sized elements.
+(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
+ (V2SI "V4SI") (V2SF "V4SF")
+ (DI "V2DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
+ (V2SI "v4si") (V2SF "v4sf")
+ (DI "v2di")])
+
+;; Modes with double-width elements.
+(define_mode_attr V_double_width [(V8QI "V4HI") (V16QI "V8HI")
+ (V4HI "V2SI") (V8HI "V4SI")
+ (V2SI "DI") (V4SI "V2DI")])
+
+;; Mode of result of comparison operations (and bit-select operand 1).
+(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
+ (V4HI "V4HI") (V8HI "V8HI")
+ (V2SI "V2SI") (V4SI "V4SI")
+ (V2SF "V2SI") (V4SF "V4SI")
+ (DI "DI") (V2DI "V2DI")])
+
+;; Get element type from double-width mode, for operations where we don't care
+;; about signedness.
+(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8")
+ (V4HI "i16") (V8HI "i16")
+ (V2SI "i32") (V4SI "i32")
+ (DI "i64") (V2DI "i64")
+ (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on signed values.
+(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8")
+ (V4HI "s16") (V8HI "s16")
+ (V2SI "s32") (V4SI "s32")
+ (DI "s64") (V2DI "s64")
+ (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on unsigned values.
+(define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8")
+ (V4HI "u16") (V8HI "u16")
+ (V2SI "u32") (V4SI "u32")
+ (DI "u64") (V2DI "u64")
+ (V2SF "f32") (V4SF "f32")])
+
+;; Element types for extraction of unsigned scalars.
+(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8")
+ (V4HI "u16") (V8HI "u16")
+ (V2SI "32") (V4SI "32")
+ (V2SF "32") (V4SF "32")])
+
+(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8")
+ (V4HI "16") (V8HI "16")
+ (V2SI "32") (V4SI "32")
+ (DI "64") (V2DI "64")
+ (V2SF "32") (V4SF "32")])
+
+;; Element sizes for duplicating ARM registers to all elements of a vector.
+(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")])
+
+;; Opaque integer types for results of pair-forming intrinsics (vtrn, etc.)
+(define_mode_attr V_PAIR [(V8QI "TI") (V16QI "OI")
+ (V4HI "TI") (V8HI "OI")
+ (V2SI "TI") (V4SI "OI")
+ (V2SF "TI") (V4SF "OI")
+ (DI "TI") (V2DI "OI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_pair [(V8QI "ti") (V16QI "oi")
+ (V4HI "ti") (V8HI "oi")
+ (V2SI "ti") (V4SI "oi")
+ (V2SF "ti") (V4SF "oi")
+ (DI "ti") (V2DI "oi")])
+
+;; Operations on two halves of a quadword vector.
+(define_code_iterator vqh_ops [plus smin smax umin umax])
+
+;; Same, without unsigned variants (for use with *SFmode pattern).
+(define_code_iterator vqhs_ops [plus smin smax])
+
+;; Assembler mnemonics for above codes.
+(define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax")
+ (umin "vmin") (umax "vmax")])
+
+;; Signs of above, where relevant.
+(define_code_attr VQH_sign [(plus "i") (smin "s") (smax "s") (umin "u")
+ (umax "u")])
+
+;; Extra suffix on some 64-bit insn names (to avoid collision with standard
+;; names which we don't want to define).
+(define_mode_attr V_suf64 [(V8QI "") (V16QI "")
+ (V4HI "") (V8HI "")
+ (V2SI "") (V4SI "")
+ (V2SF "") (V4SF "")
+ (DI "_neon") (V2DI "")])
+
+;; Scalars to be presented to scalar multiplication instructions
+;; must satisfy the following constraints.
+;; 1. If the mode specifies 16-bit elements, the scalar must be in D0-D7.
+;; 2. If the mode specifies 32-bit elements, the scalar must be in D0-D15.
+;; This mode attribute is used to obtain the correct register constraints.
+(define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t")
+ (V8HI "x") (V4SI "t") (V4SF "t")])
+
+;; Attribute used to permit string comparisons against in
+;; neon_type attribute definitions.
+(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
+
+;; Predicates used for setting neon_type
+
+(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
+ (V4HI "false") (V8HI "false")
+ (V2SI "false") (V4SI "false")
+ (V2SF "true") (V4SF "true")
+ (DI "false") (V2DI "false")])
+
+(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
+ (V4HI "true") (V8HI "true")
+ (V2SI "false") (V4SI "false")
+ (V2SF "false") (V4SF "false")
+ (DI "false") (V2DI "false")])
+
+
+(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
+ (V4HI "true") (V8HI "false")
+ (V2SI "true") (V4SI "false")
+ (V2SF "true") (V4SF "false")
+ (DI "true") (V2DI "false")])
+
+(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+ (V4HI "4") (V8HI "8")
+ (V2SI "2") (V4SI "4")
+ (V2SF "2") (V4SF "4")
+ (DI "1") (V2DI "2")])
+
+(define_insn "*neon_mov"
+ [(set (match_operand:VD 0 "nonimmediate_operand"
+ "=w,Uv,w, w, ?r,?w,?r,?r, ?Us")
+ (match_operand:VD 1 "general_operand"
+ " w,w, Dn,Uvi, w, r, r, Usi,r"))]
+ "TARGET_NEON"
+{
+ if (which_alternative == 2)
+ {
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_move (operands[1], mode,
+ &operands[1], &width);
+
+ gcc_assert (is_valid != 0);
+
+ if (width == 0)
+ return "vmov.f32\t%P0, %1 @ ";
+ else
+ sprintf (templ, "vmov.i%d\t%%P0, %%1 @ ", width);
+
+ return templ;
+ }
+
+ /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
+ below must be changed to output_move_neon (which will use the
+ element/structure loads/stores), and the constraint changed to 'Un' instead
+ of 'Uv'. */
+
+ switch (which_alternative)
+ {
+ case 0: return "vmov\t%P0, %P1 @ ";
+ case 1: case 3: return output_move_vfp (operands);
+ case 2: gcc_unreachable ();
+ case 4: return "vmov\t%Q0, %R0, %P1 @ ";
+ case 5: return "vmov\t%P0, %Q1, %R1 @ ";
+ default: return output_move_double (operands);
+ }
+}
+ [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+ (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2")
+ (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
+ (set_attr "length" "4,4,4,4,4,4,8,8,8")
+ (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
+
+(define_insn "*neon_mov"
+ [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
+ "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
+ (match_operand:VQXMOV 1 "general_operand"
+ " w,w, Dn,Uni, w, r, r, Usi, r"))]
+ "TARGET_NEON"
+{
+ if (which_alternative == 2)
+ {
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_move (operands[1], mode,
+ &operands[1], &width);
+
+ gcc_assert (is_valid != 0);
+
+ if (width == 0)
+ return "vmov.f32\t%q0, %1 @ ";
+ else
+ sprintf (templ, "vmov.i%d\t%%q0, %%1 @ ", width);
+
+ return templ;
+ }
+
+ switch (which_alternative)
+ {
+ case 0: return "vmov\t%q0, %q1 @ ";
+ case 1: case 3: return output_move_neon (operands);
+ case 2: gcc_unreachable ();
+ case 4: return "vmov\t%Q0, %R0, %e1 @ \;vmov\t%J0, %K0, %f1";
+ case 5: return "vmov\t%e0, %Q1, %R1 @ \;vmov\t%f0, %J1, %K1";
+ default: return output_move_quad (operands);
+ }
+}
+ [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\
+ neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+ (set_attr "type" "*,*,*,*,*,*,alu,load4,store4")
+ (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
+ (set_attr "length" "4,8,4,8,8,8,16,8,16")
+ (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
+
+(define_expand "movti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "general_operand" ""))]
+ "TARGET_NEON"
+{
+})
+
+(define_expand "mov"
+ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
+ (match_operand:VSTRUCT 1 "general_operand" ""))]
+ "TARGET_NEON"
+{
+})
+
+(define_insn "*neon_mov"
+ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
+ (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "#";
+ case 1: case 2: return output_move_neon (operands);
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2")
+ (set_attr "length" ",,")])
+
+(define_split
+ [(set (match_operand:EI 0 "s_register_operand" "")
+ (match_operand:EI 1 "s_register_operand" ""))]
+ "TARGET_NEON && reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ rtx dest[2], src[2];
+
+ dest[0] = gen_rtx_REG (TImode, rdest);
+ src[0] = gen_rtx_REG (TImode, rsrc);
+ dest[1] = gen_rtx_REG (DImode, rdest + 4);
+ src[1] = gen_rtx_REG (DImode, rsrc + 4);
+
+ neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+ [(set (match_operand:OI 0 "s_register_operand" "")
+ (match_operand:OI 1 "s_register_operand" ""))]
+ "TARGET_NEON && reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ rtx dest[2], src[2];
+
+ dest[0] = gen_rtx_REG (TImode, rdest);
+ src[0] = gen_rtx_REG (TImode, rsrc);
+ dest[1] = gen_rtx_REG (TImode, rdest + 4);
+ src[1] = gen_rtx_REG (TImode, rsrc + 4);
+
+ neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+ [(set (match_operand:CI 0 "s_register_operand" "")
+ (match_operand:CI 1 "s_register_operand" ""))]
+ "TARGET_NEON && reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ rtx dest[3], src[3];
+
+ dest[0] = gen_rtx_REG (TImode, rdest);
+ src[0] = gen_rtx_REG (TImode, rsrc);
+ dest[1] = gen_rtx_REG (TImode, rdest + 4);
+ src[1] = gen_rtx_REG (TImode, rsrc + 4);
+ dest[2] = gen_rtx_REG (TImode, rdest + 8);
+ src[2] = gen_rtx_REG (TImode, rsrc + 8);
+
+ neon_disambiguate_copy (operands, dest, src, 3);
+})
+
+(define_split
+ [(set (match_operand:XI 0 "s_register_operand" "")
+ (match_operand:XI 1 "s_register_operand" ""))]
+ "TARGET_NEON && reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))
+ (set (match_dup 6) (match_dup 7))]
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ rtx dest[4], src[4];
+
+ dest[0] = gen_rtx_REG (TImode, rdest);
+ src[0] = gen_rtx_REG (TImode, rsrc);
+ dest[1] = gen_rtx_REG (TImode, rdest + 4);
+ src[1] = gen_rtx_REG (TImode, rsrc + 4);
+ dest[2] = gen_rtx_REG (TImode, rdest + 8);
+ src[2] = gen_rtx_REG (TImode, rsrc + 8);
+ dest[3] = gen_rtx_REG (TImode, rdest + 12);
+ src[3] = gen_rtx_REG (TImode, rsrc + 12);
+
+ neon_disambiguate_copy (operands, dest, src, 4);
+})
+
+(define_insn "vec_set_internal"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (vec_merge:VD
+ (vec_duplicate:VD
+ (match_operand: 1 "s_register_operand" "r"))
+ (match_operand:VD 3 "s_register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_NEON"
+{
+ int elt = ffs ((int) INTVAL (operands[2]) - 1);
+ if (BYTES_BIG_ENDIAN)
+ elt = GET_MODE_NUNITS (mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+
+ return "vmov%?.\t%P0[%c2], %1";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_mcr")])
+
+(define_insn "vec_set_internal"
+ [(set (match_operand:VQ 0 "s_register_operand" "=w")
+ (vec_merge:VQ
+ (vec_duplicate:VQ
+ (match_operand: 1 "s_register_operand" "r"))
+ (match_operand:VQ 3 "s_register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_NEON"
+{
+ HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+ int half_elts = GET_MODE_NUNITS (mode) / 2;
+ int elt = elem % half_elts;
+ int hi = (elem / half_elts) * 2;
+ int regno = REGNO (operands[0]);
+
+ if (BYTES_BIG_ENDIAN)
+ elt = half_elts - 1 - elt;
+
+ operands[0] = gen_rtx_REG (mode, regno + hi);
+ operands[2] = GEN_INT (elt);
+
+ return "vmov%?.\t%P0[%c2], %1";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_mcr")]
+)
+
+(define_insn "vec_setv2di_internal"
+ [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "s_register_operand" "r"))
+ (match_operand:V2DI 3 "s_register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_NEON"
+{
+ HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+ int regno = REGNO (operands[0]) + 2 * elem;
+
+ operands[0] = gen_rtx_REG (DImode, regno);
+
+ return "vmov%?.64\t%P0, %Q1, %R1";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_mcr_2_mcrr")]
+)
+
+(define_expand "vec_set"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand: 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
+ "TARGET_NEON"
+{
+ HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+ emit_insn (gen_vec_set_internal (operands[0], operands[1],
+ GEN_INT (elem), operands[0]));
+ DONE;
+})
+
+(define_insn "vec_extract"
+ [(set (match_operand: 0 "s_register_operand" "=r")
+ (vec_select:
+ (match_operand:VD 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "TARGET_NEON"
+{
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ return "vmov%?.\t%0, %P1[%c2]";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "vec_extract"
+ [(set (match_operand: 0 "s_register_operand" "=r")
+ (vec_select:
+ (match_operand:VQ 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "TARGET_NEON"
+{
+ int half_elts = GET_MODE_NUNITS (mode) / 2;
+ int elt = INTVAL (operands[2]) % half_elts;
+ int hi = (INTVAL (operands[2]) / half_elts) * 2;
+ int regno = REGNO (operands[1]);
+
+ if (BYTES_BIG_ENDIAN)
+ elt = half_elts - 1 - elt;
+
+ operands[1] = gen_rtx_REG (mode, regno + hi);
+ operands[2] = GEN_INT (elt);
+
+ return "vmov%?.\t%0, %P1[%c2]";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "vec_extractv2di"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "TARGET_NEON"
+{
+ int regno = REGNO (operands[1]) + INTVAL (operands[2]);
+
+ operands[1] = gen_rtx_REG (DImode, regno);
+
+ return "vmov%?.64\t%Q0, %R0, %P1";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_int_1")]
+)
+
+(define_expand "vec_init"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_NEON"
+{
+ neon_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
+;; Doubleword and quadword arithmetic.
+
+;; NOTE: vadd/vsub and some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations. We
+;; don't want to do this at present though, because moving values from the
+;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is
+;; easy to do with ARM instructions anyway.
+
+(define_insn "*add3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vadd.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_1")))]
+)
+
+(define_insn "*sub3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vsub.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_2")))]
+)
+
+(define_insn "*mul3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmul.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else
+ (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+ (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_mul_qqq_8_16_32_ddd_32")
+ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
+)
+
+(define_insn "ior3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+ (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+ (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vorr\t%0, %1, %2";
+ case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+ mode, 0, VALID_NEON_QREG_MODE (mode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "iordi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0")
+ (match_operand:DI 2 "neon_logic_op2" "w,Dl")]
+ UNSPEC_VORR))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vorr\t%P0, %P1, %P2";
+ case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+ DImode, 0, VALID_NEON_QREG_MODE (DImode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+;; The concrete forms of the Neon immediate-logic instructions are vbic and
+;; vorr. We support the pseudo-instruction vand instead, because that
+;; corresponds to the canonical form the middle-end expects to use for
+;; immediate bitwise-ANDs.
+
+(define_insn "and3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+ (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+ (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vand\t%0, %1, %2";
+ case 1: return neon_output_logic_immediate ("vand", &operands[2],
+ mode, 1, VALID_NEON_QREG_MODE (mode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "anddi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0")
+ (match_operand:DI 2 "neon_inv_logic_op2" "w,DL")]
+ UNSPEC_VAND))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vand\t%P0, %P1, %P2";
+ case 1: return neon_output_logic_immediate ("vand", &operands[2],
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "orn3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vorn\t%0, %1, %2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "orndi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:DI 2 "s_register_operand" "w")]
+ UNSPEC_VORN))]
+ "TARGET_NEON"
+ "vorn\t%P0, %P1, %P2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "bic3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vbic\t%0, %1, %2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "bicdi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:DI 2 "s_register_operand" "w")]
+ UNSPEC_VBIC))]
+ "TARGET_NEON"
+ "vbic\t%P0, %P1, %P2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "xor3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "veor\t%0, %1, %2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "xordi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:DI 2 "s_register_operand" "w")]
+ UNSPEC_VEOR))]
+ "TARGET_NEON"
+ "veor\t%P0, %P1, %P2"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "one_cmpl2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmvn\t%0, %1"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "abs2"
+ [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+ (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vabs.\t%0, %1"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_3")))]
+)
+
+(define_insn "neg2"
+ [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+ (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vneg.\t%0, %1"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_3")))]
+)
+
+(define_insn "*umin3_neon"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmin.\t%0, %1, %2"
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "*umax3_neon"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmax.\t%0, %1, %2"
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "*smin3_neon"
+ [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+ (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+ (match_operand:VDQW 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmin.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "*smax3_neon"
+ [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+ (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+ (match_operand:VDQW 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmax.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_int_5")))]
+)
+
+; TODO: V2DI shifts are current disabled because there are bugs in the
+; generic vectorizer code. It ends up creating a V2DI constructor with
+; SImode elements.
+
+(define_insn "ashl3"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vshl.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with signed operands. This is essentially the same as ashl3
+; above, but using an unspec in case GCC tries anything tricky with negative
+; shift amounts.
+
+(define_insn "ashl3_signed"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")]
+ UNSPEC_ASHIFT_SIGNED))]
+ "TARGET_NEON"
+ "vshl.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with unsigned operands.
+
+(define_insn "ashl3_unsigned"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")]
+ UNSPEC_ASHIFT_UNSIGNED))]
+ "TARGET_NEON"
+ "vshl.\t%0, %1, %2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+(define_expand "ashr3"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "")
+ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:VDQIW 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+{
+ rtx neg = gen_reg_rtx (mode);
+
+ emit_insn (gen_neg2 (neg, operands[2]));
+ emit_insn (gen_ashl3_signed (operands[0], operands[1], neg));
+
+ DONE;
+})
+
+(define_expand "lshr3"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "")
+ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:VDQIW 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+{
+ rtx neg = gen_reg_rtx (mode);
+
+ emit_insn (gen_neg2 (neg, operands[2]));
+ emit_insn (gen_ashl3_unsigned (operands[0], operands[1], neg));
+
+ DONE;
+})
+
+;; Widening operations
+
+(define_insn "widen_ssum3"
+ [(set (match_operand: 0 "s_register_operand" "=w")
+ (plus: (sign_extend:
+ (match_operand:VW 1 "s_register_operand" "%w"))
+ (match_operand: 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vaddw.\t%q0, %q2, %P1"
+ [(set_attr "neon_type" "neon_int_3")]
+)
+
+(define_insn "widen_usum3"
+ [(set (match_operand: 0 "s_register_operand" "=w")
+ (plus: (zero_extend:
+ (match_operand:VW 1 "s_register_operand" "%w"))
+ (match_operand: 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vaddw.\t%q0, %q2, %P1"
+ [(set_attr "neon_type" "neon_int_3")]
+)
+
+;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+;; shift-count granularity. That's good enough for the middle-end's current
+;; needs.
+
+(define_expand "vec_shr_"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand:VDQ 1 "s_register_operand" "")
+ (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+ "TARGET_NEON"
+{
+ rtx zero_reg;
+ HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+ const int width = GET_MODE_BITSIZE (mode);
+ const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+ rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+ (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+ if (num_bits == width)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+
+ zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+ operands[0] = gen_lowpart (bvecmode, operands[0]);
+ operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+ emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+ GEN_INT (num_bits / BITS_PER_UNIT)));
+ DONE;
+})
+
+(define_expand "vec_shl_"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand:VDQ 1 "s_register_operand" "")
+ (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+ "TARGET_NEON"
+{
+ rtx zero_reg;
+ HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+ const int width = GET_MODE_BITSIZE (mode);
+ const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+ rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+ (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+ if (num_bits == 0)
+ {
+ emit_move_insn (operands[0], CONST0_RTX (mode));
+ DONE;
+ }
+
+ num_bits = width - num_bits;
+
+ zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+ operands[0] = gen_lowpart (bvecmode, operands[0]);
+ operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+ emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+ GEN_INT (num_bits / BITS_PER_UNIT)));
+ DONE;
+})
+
+;; Helpers for quad-word reduction operations
+
+; Add (or smin, smax...) the low N/2 elements of the N-element vector
+; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
+; N/2-element vector.
+
+(define_insn "quad_halves_v4si"
+ [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+ (vqh_ops:V2SI
+ (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)]))
+ (vec_select:V2SI (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_NEON"
+ ".32\t%P0, %e1, %f1"
+ [(set_attr "vqh_mnem" "")
+ (set (attr "neon_type")
+ (if_then_else (eq_attr "vqh_mnem" "vadd")
+ (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_v4sf"
+ [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+ (vqhs_ops:V2SF
+ (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)]))
+ (vec_select:V2SF (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_NEON"
+ ".f32\t%P0, %e1, %f1"
+ [(set_attr "vqh_mnem" "")
+ (set (attr "neon_type")
+ (if_then_else (eq_attr "vqh_mnem" "vadd")
+ (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_v8hi"
+ [(set (match_operand:V4HI 0 "s_register_operand" "+w")
+ (vqh_ops:V4HI
+ (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_NEON"
+ ".16\t%P0, %e1, %f1"
+ [(set_attr "vqh_mnem" "")
+ (set (attr "neon_type")
+ (if_then_else (eq_attr "vqh_mnem" "vadd")
+ (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_v16qi"
+ [(set (match_operand:V8QI 0 "s_register_operand" "+w")
+ (vqh_ops:V8QI
+ (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (vec_select:V8QI (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_NEON"
+ ".8\t%P0, %e1, %f1"
+ [(set_attr "vqh_mnem" "")
+ (set (attr "neon_type")
+ (if_then_else (eq_attr "vqh_mnem" "vadd")
+ (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+; FIXME: We wouldn't need the following insns if we could write subregs of
+; vector registers. Make an attempt at removing unnecessary moves, though
+; we're really at the mercy of the register allocator.
+
+(define_insn "move_lo_quad_v4si"
+ [(set (match_operand:V4SI 0 "s_register_operand" "+w")
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "s_register_operand" "w")
+ (vec_select:V2SI (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%e0, %P1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "move_lo_quad_v4sf"
+ [(set (match_operand:V4SF 0 "s_register_operand" "+w")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "s_register_operand" "w")
+ (vec_select:V2SF (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%e0, %P1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "move_lo_quad_v8hi"
+ [(set (match_operand:V8HI 0 "s_register_operand" "+w")
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "s_register_operand" "w")
+ (vec_select:V4HI (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%e0, %P1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "move_lo_quad_v16qi"
+ [(set (match_operand:V16QI 0 "s_register_operand" "+w")
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "s_register_operand" "w")
+ (vec_select:V8QI (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%e0, %P1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+;; Reduction operations
+
+(define_expand "reduc_splus_"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], mode,
+ &gen_neon_vpadd_internal);
+ DONE;
+})
+
+(define_expand "reduc_splus_"
+ [(match_operand:VQ 0 "s_register_operand" "")
+ (match_operand:VQ 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ rtx step1 = gen_reg_rtx (mode);
+ rtx res_d = gen_reg_rtx (mode);
+
+ emit_insn (gen_quad_halves_plus (step1, operands[1]));
+ emit_insn (gen_reduc_splus_ (res_d, step1));
+ emit_insn (gen_move_lo_quad_ (operands[0], res_d));
+
+ DONE;
+})
+
+(define_insn "reduc_splus_v2di"
+ [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
+ UNSPEC_VPADD))]
+ "TARGET_NEON"
+ "vadd.i64\t%e0, %e1, %f1"
+ [(set_attr "neon_type" "neon_int_1")]
+)
+
+;; NEON does not distinguish between signed and unsigned addition except on
+;; widening operations.
+(define_expand "reduc_uplus_"
+ [(match_operand:VDQI 0 "s_register_operand" "")
+ (match_operand:VDQI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ emit_insn (gen_reduc_splus_ (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "reduc_smin_"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], mode,
+ &gen_neon_vpsmin);
+ DONE;
+})
+
+(define_expand "reduc_smin_"
+ [(match_operand:VQ 0 "s_register_operand" "")
+ (match_operand:VQ 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ rtx step1 = gen_reg_rtx (mode);
+ rtx res_d = gen_reg_rtx (mode);
+
+ emit_insn (gen_quad_halves_smin (step1, operands[1]));
+ emit_insn (gen_reduc_smin_ (res_d, step1));
+ emit_insn (gen_move_lo_quad_ (operands[0], res_d));
+
+ DONE;
+})
+
+(define_expand "reduc_smax_"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], mode,
+ &gen_neon_vpsmax);
+ DONE;
+})
+
+(define_expand "reduc_smax_"
+ [(match_operand:VQ 0 "s_register_operand" "")
+ (match_operand:VQ 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ rtx step1 = gen_reg_rtx (mode);
+ rtx res_d = gen_reg_rtx (mode);
+
+ emit_insn (gen_quad_halves_smax (step1, operands[1]));
+ emit_insn (gen_reduc_smax_ (res_d, step1));
+ emit_insn (gen_move_lo_quad_ (operands[0], res_d));
+
+ DONE;
+})
+
+(define_expand "reduc_umin_"
+ [(match_operand:VDI 0 "s_register_operand" "")
+ (match_operand:VDI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], mode,
+ &gen_neon_vpumin);
+ DONE;
+})
+
+(define_expand "reduc_umin_"
+ [(match_operand:VQI 0 "s_register_operand" "")
+ (match_operand:VQI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ rtx step1 = gen_reg_rtx (mode);
+ rtx res_d = gen_reg_rtx (mode);
+
+ emit_insn (gen_quad_halves_umin (step1, operands[1]));
+ emit_insn (gen_reduc_umin_ (res_d, step1));
+ emit_insn (gen_move_lo_quad_ (operands[0], res_d));
+
+ DONE;
+})
+
+(define_expand "reduc_umax_"
+ [(match_operand:VDI 0 "s_register_operand" "")
+ (match_operand:VDI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], mode,
+ &gen_neon_vpumax);
+ DONE;
+})
+
+(define_expand "reduc_umax_"
+ [(match_operand:VQI 0 "s_register_operand" "")
+ (match_operand:VQI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ rtx step1 = gen_reg_rtx (mode);
+ rtx res_d = gen_reg_rtx (mode);
+
+ emit_insn (gen_quad_halves_umax (step1, operands[1]));
+ emit_insn (gen_reduc_umax_ (res_d, step1));
+ emit_insn (gen_move_lo_quad_ (operands[0], res_d));
+
+ DONE;
+})
+
+(define_insn "neon_vpadd_internal"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPADD))]
+ "TARGET_NEON"
+ "vpadd.\t%P0, %P1, %P2"
+ ;; Assume this schedules like vadd.
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_1")))]
+)
+
+(define_insn "neon_vpsmin"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPSMIN))]
+ "TARGET_NEON"
+ "vpmin.\t%P0, %P1, %P2"
+ ;; Assume this schedules like vmin.
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vpsmax"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPSMAX))]
+ "TARGET_NEON"
+ "vpmax.\t%P0, %P1, %P2"
+ ;; Assume this schedules like vmax.
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vpumin"
+ [(set (match_operand:VDI 0 "s_register_operand" "=w")
+ (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+ (match_operand:VDI 2 "s_register_operand" "w")]
+ UNSPEC_VPUMIN))]
+ "TARGET_NEON"
+ "vpmin.\t%P0, %P1, %P2"
+ ;; Assume this schedules like umin.
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "neon_vpumax"
+ [(set (match_operand:VDI 0 "s_register_operand" "=w")
+ (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+ (match_operand:VDI 2 "s_register_operand" "w")]
+ UNSPEC_VPUMAX))]
+ "TARGET_NEON"
+ "vpmax.\t%P0, %P1, %P2"
+ ;; Assume this schedules like umax.
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
+;; Saturating arithmetic
+
+; NOTE: Neon supports many more saturating variants of instructions than the
+; following, but these are all GCC currently understands.
+; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
+; yet either, although these patterns may be used by intrinsics when they're
+; added.
+
+(define_insn "*ss_add_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqadd.\t%P0, %P1, %P2"
+ [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "*us_add_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqadd.\t%P0, %P1, %P2"
+ [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "*ss_sub