X-Git-Url: https://oss.titaniummirror.com/gitweb/?a=blobdiff_plain;f=gcc%2Fconfig%2Fmips%2F10000.md;fp=gcc%2Fconfig%2Fmips%2F10000.md;h=ad21e9e936e52806f4b31930708e741748c87569;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gcc/config/mips/10000.md b/gcc/config/mips/10000.md new file mode 100644 index 00000000..ad21e9e9 --- /dev/null +++ b/gcc/config/mips/10000.md @@ -0,0 +1,253 @@ +;; DFA-based pipeline description for the VR1x000. +;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; R12K/R14K/R16K are derivatives of R10K, thus copy its description +;; until specific tuning for each is added. + +;; R10000 has an int queue, fp queue, address queue. +;; The int queue feeds ALU1 and ALU2. +;; The fp queue feeds the fp-adder and fp-multiplier. +;; The addr queue feeds the Load/Store unit. +;; +;; However, we define the fp-adder and fp-multiplier as +;; separate automatons, because the fp-multiplier is +;; divided into fp-multiplier, fp-division, and +;; fp-squareroot units, all of which share the same +;; issue and completion logic, yet can operate in +;; parallel. +;; +;; This is based on the model described in the R10K Manual +;; and it helps to reduce the size of the automata. +(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr, + r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt") + +(define_cpu_unit "r10k_alu1" "r10k_a_int") +(define_cpu_unit "r10k_alu2" "r10k_a_int") +(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder") +(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy") +(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv") +(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt") +(define_cpu_unit "r10k_loadstore" "r10k_a_addr") + + +;; R10k Loads and Stores. +(define_insn_reservation "r10k_load" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "load,prefetch,prefetchx")) + "r10k_loadstore") + +(define_insn_reservation "r10k_store" 0 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "store,fpstore,fpidxstore")) + "r10k_loadstore") + +(define_insn_reservation "r10k_fpload" 3 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fpload,fpidxload")) + "r10k_loadstore") + + +;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2. +;; Miscellaneous arith goes here too (this is a guess). +(define_insn_reservation "r10k_arith" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical")) + "r10k_alu1 | r10k_alu2") + +;; We treat mfhilo differently, because we need to know when +;; it's HI and when it's LO. +(define_insn_reservation "r10k_mfhi" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "mfhilo") + (not (match_operand 1 "lo_operand")))) + "r10k_alu1 | r10k_alu2") + +(define_insn_reservation "r10k_mflo" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "mfhilo") + (match_operand 1 "lo_operand"))) + "r10k_alu1 | r10k_alu2") + + +;; ALU1 handles shifts, branch eval, and condmove. +;; +;; Brancher is separate, but part of ALU1, but can only +;; do one branch per cycle (is this even implementable?). +;; +;; Unsure if the brancher handles jumps and calls as well, but since +;; they're related, we'll add them here for now. +(define_insn_reservation "r10k_brancher" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "shift,branch,jump,call")) + "r10k_alu1") + +(define_insn_reservation "r10k_int_cmove" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SI,DI"))) + "r10k_alu1") + + +;; Coprocessor Moves. +;; mtc1/dmtc1 are handled by ALU1. +;; mfc1/dmfc1 are handled by the fp-multiplier. +(define_insn_reservation "r10k_mt_xfer" 3 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "mtc")) + "r10k_alu1") + +(define_insn_reservation "r10k_mf_xfer" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "mfc")) + "r10k_fpmpy") + + +;; Only ALU2 does int multiplications and divisions. +;; +;; According to the Vr10000 series user manual, +;; integer mult and div insns can be issued one +;; cycle earlier if using register Lo. We model +;; this by using the Lo value by default, as it +;; is the more common value, and use a bypass +;; for the Hi value when needed. +;; +;; Also of note, There are different latencies +;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7). +;; However, gcc does not have separate types +;; for these insns. Thus to strike a balance, +;; we use the Hi latency value for imul +;; operations until the imul type can be split. +(define_insn_reservation "r10k_imul_single" 6 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 6") + +(define_insn_reservation "r10k_imul_double" 10 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 10") + +;; Divides keep ALU2 busy. +(define_insn_reservation "r10k_idiv_single" 34 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 35") + +(define_insn_reservation "r10k_idiv_double" 66 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 67") + +(define_bypass 35 "r10k_idiv_single" "r10k_mfhi") +(define_bypass 67 "r10k_idiv_double" "r10k_mfhi") + + +;; Floating point add/sub, mul, abs value, neg, comp, & moves. +(define_insn_reservation "r10k_fp_miscadd" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fadd,fabs,fneg,fcmp")) + "r10k_fpadd") + +(define_insn_reservation "r10k_fp_miscmul" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fmul,fmove")) + "r10k_fpmpy") + +(define_insn_reservation "r10k_fp_cmove" 2 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SF,DF"))) + "r10k_fpmpy") + + +;; The fcvt.s.[wl] insn has latency 4, repeat 2. +;; All other fcvt insns have latency 2, repeat 1. +(define_insn_reservation "r10k_fcvt_single" 4 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "I2S"))) + "r10k_fpadd * 2") + +(define_insn_reservation "r10k_fcvt_other" 2 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "!I2S"))) + "r10k_fpadd") + + +;; Run the fmadd insn through fp-adder first, then fp-multiplier. +;; +;; The latency for fmadd is 2 cycles if the result is used +;; by another fmadd instruction. +(define_insn_reservation "r10k_fmadd" 4 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fmadd")) + "r10k_fpadd, r10k_fpmpy") + +(define_bypass 2 "r10k_fmadd" "r10k_fmadd") + + +;; Floating point Divisions & square roots. +(define_insn_reservation "r10k_fdiv_single" 12 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "SF"))) + "r10k_fpdiv * 14") + +(define_insn_reservation "r10k_fdiv_double" 19 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "DF"))) + "r10k_fpdiv * 21") + +(define_insn_reservation "r10k_fsqrt_single" 18 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_fsqrt_double" 33 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + +(define_insn_reservation "r10k_frsqrt_single" 30 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_frsqrt_double" 52 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + + +;; Handle unknown/multi insns here (this is a guess). +(define_insn_reservation "r10k_unknown" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "unknown,multi")) + "r10k_alu1 + r10k_alu2")