X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gcc%2Fpostreload-gcse.c;fp=gcc%2Fpostreload-gcse.c;h=57be7a5c39c5efed4fc22431a5c633a73f02defe;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c new file mode 100644 index 00000000..57be7a5c --- /dev/null +++ b/gcc/postreload-gcse.c @@ -0,0 +1,1341 @@ +/* Post reload partially redundant load elimination + Copyright (C) 2004, 2005, 2006, 2007, 2008 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "toplev.h" + +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "real.h" +#include "insn-config.h" +#include "recog.h" +#include "basic-block.h" +#include "output.h" +#include "function.h" +#include "expr.h" +#include "except.h" +#include "intl.h" +#include "obstack.h" +#include "hashtab.h" +#include "params.h" +#include "target.h" +#include "timevar.h" +#include "tree-pass.h" +#include "dbgcnt.h" + +/* The following code implements gcse after reload, the purpose of this + pass is to cleanup redundant loads generated by reload and other + optimizations that come after gcse. It searches for simple inter-block + redundancies and tries to eliminate them by adding moves and loads + in cold places. + + Perform partially redundant load elimination, try to eliminate redundant + loads created by the reload pass. We try to look for full or partial + redundant loads fed by one or more loads/stores in predecessor BBs, + and try adding loads to make them fully redundant. We also check if + it's worth adding loads to be able to delete the redundant load. + + Algorithm: + 1. Build available expressions hash table: + For each load/store instruction, if the loaded/stored memory didn't + change until the end of the basic block add this memory expression to + the hash table. + 2. Perform Redundancy elimination: + For each load instruction do the following: + perform partial redundancy elimination, check if it's worth adding + loads to make the load fully redundant. If so add loads and + register copies and delete the load. + 3. Delete instructions made redundant in step 2. + + Future enhancement: + If the loaded register is used/defined between load and some store, + look for some other free register between load and all its stores, + and replace the load with a copy from this register to the loaded + register. +*/ + + +/* Keep statistics of this pass. */ +static struct +{ + int moves_inserted; + int copies_inserted; + int insns_deleted; +} stats; + +/* We need to keep a hash table of expressions. The table entries are of + type 'struct expr', and for each expression there is a single linked + list of occurrences. */ + +/* The table itself. */ +static htab_t expr_table; + +/* Expression elements in the hash table. */ +struct expr +{ + /* The expression (SET_SRC for expressions, PATTERN for assignments). */ + rtx expr; + + /* The same hash for this entry. */ + hashval_t hash; + + /* List of available occurrence in basic blocks in the function. */ + struct occr *avail_occr; +}; + +static struct obstack expr_obstack; + +/* Occurrence of an expression. + There is at most one occurrence per basic block. If a pattern appears + more than once, the last appearance is used. */ + +struct occr +{ + /* Next occurrence of this expression. */ + struct occr *next; + /* The insn that computes the expression. */ + rtx insn; + /* Nonzero if this [anticipatable] occurrence has been deleted. */ + char deleted_p; +}; + +static struct obstack occr_obstack; + +/* The following structure holds the information about the occurrences of + the redundant instructions. */ +struct unoccr +{ + struct unoccr *next; + edge pred; + rtx insn; +}; + +static struct obstack unoccr_obstack; + +/* Array where each element is the CUID if the insn that last set the hard + register with the number of the element, since the start of the current + basic block. + + This array is used during the building of the hash table (step 1) to + determine if a reg is killed before the end of a basic block. + + It is also used when eliminating partial redundancies (step 2) to see + if a reg was modified since the start of a basic block. */ +static int *reg_avail_info; + +/* A list of insns that may modify memory within the current basic block. */ +struct modifies_mem +{ + rtx insn; + struct modifies_mem *next; +}; +static struct modifies_mem *modifies_mem_list; + +/* The modifies_mem structs also go on an obstack, only this obstack is + freed each time after completing the analysis or transformations on + a basic block. So we allocate a dummy modifies_mem_obstack_bottom + object on the obstack to keep track of the bottom of the obstack. */ +static struct obstack modifies_mem_obstack; +static struct modifies_mem *modifies_mem_obstack_bottom; + +/* Mapping of insn UIDs to CUIDs. + CUIDs are like UIDs except they increase monotonically in each basic + block, have no gaps, and only apply to real insns. */ +static int *uid_cuid; +#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)]) + + +/* Helpers for memory allocation/freeing. */ +static void alloc_mem (void); +static void free_mem (void); + +/* Support for hash table construction and transformations. */ +static bool oprs_unchanged_p (rtx, rtx, bool); +static void record_last_reg_set_info (rtx, rtx); +static void record_last_reg_set_info_regno (rtx, int); +static void record_last_mem_set_info (rtx); +static void record_last_set_info (rtx, const_rtx, void *); +static void record_opr_changes (rtx); + +static void find_mem_conflicts (rtx, const_rtx, void *); +static int load_killed_in_block_p (int, rtx, bool); +static void reset_opr_set_tables (void); + +/* Hash table support. */ +static hashval_t hash_expr (rtx, int *); +static hashval_t hash_expr_for_htab (const void *); +static int expr_equiv_p (const void *, const void *); +static void insert_expr_in_table (rtx, rtx); +static struct expr *lookup_expr_in_table (rtx); +static int dump_hash_table_entry (void **, void *); +static void dump_hash_table (FILE *); + +/* Helpers for eliminate_partially_redundant_load. */ +static bool reg_killed_on_edge (rtx, edge); +static bool reg_used_on_edge (rtx, edge); + +static rtx get_avail_load_store_reg (rtx); + +static bool bb_has_well_behaved_predecessors (basic_block); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +static void hash_scan_set (rtx); +static void compute_hash_table (void); + +/* The work horses of this pass. */ +static void eliminate_partially_redundant_load (basic_block, + rtx, + struct expr *); +static void eliminate_partially_redundant_loads (void); + + +/* Allocate memory for the CUID mapping array and register/memory + tracking tables. */ + +static void +alloc_mem (void) +{ + int i; + basic_block bb; + rtx insn; + + /* Find the largest UID and create a mapping from UIDs to CUIDs. */ + uid_cuid = XCNEWVEC (int, get_max_uid () + 1); + i = 1; + FOR_EACH_BB (bb) + FOR_BB_INSNS (bb, insn) + { + if (INSN_P (insn)) + uid_cuid[INSN_UID (insn)] = i++; + else + uid_cuid[INSN_UID (insn)] = i; + } + + /* Allocate the available expressions hash table. We don't want to + make the hash table too small, but unnecessarily making it too large + also doesn't help. The i/4 is a gcse.c relic, and seems like a + reasonable choice. */ + expr_table = htab_create (MAX (i / 4, 13), + hash_expr_for_htab, expr_equiv_p, NULL); + + /* We allocate everything on obstacks because we often can roll back + the whole obstack to some point. Freeing obstacks is very fast. */ + gcc_obstack_init (&expr_obstack); + gcc_obstack_init (&occr_obstack); + gcc_obstack_init (&unoccr_obstack); + gcc_obstack_init (&modifies_mem_obstack); + + /* Working array used to track the last set for each register + in the current block. */ + reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int)); + + /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we + can roll it back in reset_opr_set_tables. */ + modifies_mem_obstack_bottom = + (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack, + sizeof (struct modifies_mem)); +} + +/* Free memory allocated by alloc_mem. */ + +static void +free_mem (void) +{ + free (uid_cuid); + + htab_delete (expr_table); + + obstack_free (&expr_obstack, NULL); + obstack_free (&occr_obstack, NULL); + obstack_free (&unoccr_obstack, NULL); + obstack_free (&modifies_mem_obstack, NULL); + + free (reg_avail_info); +} + + +/* Hash expression X. + DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found + or if the expression contains something we don't want to insert in the + table. */ + +static hashval_t +hash_expr (rtx x, int *do_not_record_p) +{ + *do_not_record_p = 0; + return hash_rtx (x, GET_MODE (x), do_not_record_p, + NULL, /*have_reg_qty=*/false); +} + +/* Callback for hashtab. + Return the hash value for expression EXP. We don't actually hash + here, we just return the cached hash value. */ + +static hashval_t +hash_expr_for_htab (const void *expp) +{ + const struct expr *const exp = (const struct expr *) expp; + return exp->hash; +} + +/* Callback for hashtab. + Return nonzero if exp1 is equivalent to exp2. */ + +static int +expr_equiv_p (const void *exp1p, const void *exp2p) +{ + const struct expr *const exp1 = (const struct expr *) exp1p; + const struct expr *const exp2 = (const struct expr *) exp2p; + int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true); + + gcc_assert (!equiv_p || exp1->hash == exp2->hash); + return equiv_p; +} + + +/* Insert expression X in INSN in the hash TABLE. + If it is already present, record it as the last occurrence in INSN's + basic block. */ + +static void +insert_expr_in_table (rtx x, rtx insn) +{ + int do_not_record_p; + hashval_t hash; + struct expr *cur_expr, **slot; + struct occr *avail_occr, *last_occr = NULL; + + hash = hash_expr (x, &do_not_record_p); + + /* Do not insert expression in the table if it contains volatile operands, + or if hash_expr determines the expression is something we don't want + to or can't handle. */ + if (do_not_record_p) + return; + + /* We anticipate that redundant expressions are rare, so for convenience + allocate a new hash table element here already and set its fields. + If we don't do this, we need a hack with a static struct expr. Anyway, + obstack_free is really fast and one more obstack_alloc doesn't hurt if + we're going to see more expressions later on. */ + cur_expr = (struct expr *) obstack_alloc (&expr_obstack, + sizeof (struct expr)); + cur_expr->expr = x; + cur_expr->hash = hash; + cur_expr->avail_occr = NULL; + + slot = (struct expr **) htab_find_slot_with_hash (expr_table, cur_expr, + hash, INSERT); + + if (! (*slot)) + /* The expression isn't found, so insert it. */ + *slot = cur_expr; + else + { + /* The expression is already in the table, so roll back the + obstack and use the existing table entry. */ + obstack_free (&expr_obstack, cur_expr); + cur_expr = *slot; + } + + /* Search for another occurrence in the same basic block. */ + avail_occr = cur_expr->avail_occr; + while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn)) + { + /* If an occurrence isn't found, save a pointer to the end of + the list. */ + last_occr = avail_occr; + avail_occr = avail_occr->next; + } + + if (avail_occr) + /* Found another instance of the expression in the same basic block. + Prefer this occurrence to the currently recorded one. We want + the last one in the block and the block is scanned from start + to end. */ + avail_occr->insn = insn; + else + { + /* First occurrence of this expression in this basic block. */ + avail_occr = (struct occr *) obstack_alloc (&occr_obstack, + sizeof (struct occr)); + + /* First occurrence of this expression in any block? */ + if (cur_expr->avail_occr == NULL) + cur_expr->avail_occr = avail_occr; + else + last_occr->next = avail_occr; + + avail_occr->insn = insn; + avail_occr->next = NULL; + avail_occr->deleted_p = 0; + } +} + + +/* Lookup pattern PAT in the expression hash table. + The result is a pointer to the table entry, or NULL if not found. */ + +static struct expr * +lookup_expr_in_table (rtx pat) +{ + int do_not_record_p; + struct expr **slot, *tmp_expr; + hashval_t hash = hash_expr (pat, &do_not_record_p); + + if (do_not_record_p) + return NULL; + + tmp_expr = (struct expr *) obstack_alloc (&expr_obstack, + sizeof (struct expr)); + tmp_expr->expr = pat; + tmp_expr->hash = hash; + tmp_expr->avail_occr = NULL; + + slot = (struct expr **) htab_find_slot_with_hash (expr_table, tmp_expr, + hash, INSERT); + obstack_free (&expr_obstack, tmp_expr); + + if (!slot) + return NULL; + else + return (*slot); +} + + +/* Dump all expressions and occurrences that are currently in the + expression hash table to FILE. */ + +/* This helper is called via htab_traverse. */ +static int +dump_hash_table_entry (void **slot, void *filep) +{ + struct expr *expr = (struct expr *) *slot; + FILE *file = (FILE *) filep; + struct occr *occr; + + fprintf (file, "expr: "); + print_rtl (file, expr->expr); + fprintf (file,"\nhashcode: %u\n", expr->hash); + fprintf (file,"list of occurrences:\n"); + occr = expr->avail_occr; + while (occr) + { + rtx insn = occr->insn; + print_rtl_single (file, insn); + fprintf (file, "\n"); + occr = occr->next; + } + fprintf (file, "\n"); + return 1; +} + +static void +dump_hash_table (FILE *file) +{ + fprintf (file, "\n\nexpression hash table\n"); + fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n", + (long) htab_size (expr_table), + (long) htab_elements (expr_table), + htab_collisions (expr_table)); + if (htab_elements (expr_table) > 0) + { + fprintf (file, "\n\ntable entries:\n"); + htab_traverse (expr_table, dump_hash_table_entry, file); + } + fprintf (file, "\n"); +} + +/* Return true if register X is recorded as being set by an instruction + whose CUID is greater than the one given. */ + +static bool +reg_changed_after_insn_p (rtx x, int cuid) +{ + unsigned int regno, end_regno; + + regno = REGNO (x); + end_regno = END_HARD_REGNO (x); + do + if (reg_avail_info[regno] > cuid) + return true; + while (++regno < end_regno); + return false; +} + +/* Return nonzero if the operands of expression X are unchanged + 1) from the start of INSN's basic block up to but not including INSN + if AFTER_INSN is false, or + 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */ + +static bool +oprs_unchanged_p (rtx x, rtx insn, bool after_insn) +{ + int i, j; + enum rtx_code code; + const char *fmt; + + if (x == 0) + return 1; + + code = GET_CODE (x); + switch (code) + { + case REG: + /* We are called after register allocation. */ + gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER); + if (after_insn) + return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1); + else + return !reg_changed_after_insn_p (x, 0); + + case MEM: + if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn)) + return 0; + else + return oprs_unchanged_p (XEXP (x, 0), insn, after_insn); + + case PC: + case CC0: /*FIXME*/ + case CONST: + case CONST_INT: + case CONST_DOUBLE: + case CONST_FIXED: + case CONST_VECTOR: + case SYMBOL_REF: + case LABEL_REF: + case ADDR_VEC: + case ADDR_DIFF_VEC: + return 1; + + case PRE_DEC: + case PRE_INC: + case POST_DEC: + case POST_INC: + case PRE_MODIFY: + case POST_MODIFY: + if (after_insn) + return 0; + break; + + default: + break; + } + + for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) + { + if (fmt[i] == 'e') + { + if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn)) + return 0; + } + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn)) + return 0; + } + + return 1; +} + + +/* Used for communication between find_mem_conflicts and + load_killed_in_block_p. Nonzero if find_mem_conflicts finds a + conflict between two memory references. + This is a bit of a hack to work around the limitations of note_stores. */ +static int mems_conflict_p; + +/* DEST is the output of an instruction. If it is a memory reference, and + possibly conflicts with the load found in DATA, then set mems_conflict_p + to a nonzero value. */ + +static void +find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, + void *data) +{ + rtx mem_op = (rtx) data; + + while (GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT + || GET_CODE (dest) == STRICT_LOW_PART) + dest = XEXP (dest, 0); + + /* If DEST is not a MEM, then it will not conflict with the load. Note + that function calls are assumed to clobber memory, but are handled + elsewhere. */ + if (! MEM_P (dest)) + return; + + if (true_dependence (dest, GET_MODE (dest), mem_op, + rtx_addr_varies_p)) + mems_conflict_p = 1; +} + + +/* Return nonzero if the expression in X (a memory reference) is killed + in the current basic block before (if AFTER_INSN is false) or after + (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT. + + This function assumes that the modifies_mem table is flushed when + the hash table construction or redundancy elimination phases start + processing a new basic block. */ + +static int +load_killed_in_block_p (int uid_limit, rtx x, bool after_insn) +{ + struct modifies_mem *list_entry = modifies_mem_list; + + while (list_entry) + { + rtx setter = list_entry->insn; + + /* Ignore entries in the list that do not apply. */ + if ((after_insn + && INSN_CUID (setter) < uid_limit) + || (! after_insn + && INSN_CUID (setter) > uid_limit)) + { + list_entry = list_entry->next; + continue; + } + + /* If SETTER is a call everything is clobbered. Note that calls + to pure functions are never put on the list, so we need not + worry about them. */ + if (CALL_P (setter)) + return 1; + + /* SETTER must be an insn of some kind that sets memory. Call + note_stores to examine each hunk of memory that is modified. + It will set mems_conflict_p to nonzero if there may be a + conflict between X and SETTER. */ + mems_conflict_p = 0; + note_stores (PATTERN (setter), find_mem_conflicts, x); + if (mems_conflict_p) + return 1; + + list_entry = list_entry->next; + } + return 0; +} + + +/* Record register first/last/block set information for REGNO in INSN. */ + +static inline void +record_last_reg_set_info (rtx insn, rtx reg) +{ + unsigned int regno, end_regno; + + regno = REGNO (reg); + end_regno = END_HARD_REGNO (reg); + do + reg_avail_info[regno] = INSN_CUID (insn); + while (++regno < end_regno); +} + +static inline void +record_last_reg_set_info_regno (rtx insn, int regno) +{ + reg_avail_info[regno] = INSN_CUID (insn); +} + + +/* Record memory modification information for INSN. We do not actually care + about the memory location(s) that are set, or even how they are set (consider + a CALL_INSN). We merely need to record which insns modify memory. */ + +static void +record_last_mem_set_info (rtx insn) +{ + struct modifies_mem *list_entry; + + list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack, + sizeof (struct modifies_mem)); + list_entry->insn = insn; + list_entry->next = modifies_mem_list; + modifies_mem_list = list_entry; +} + +/* Called from compute_hash_table via note_stores to handle one + SET or CLOBBER in an insn. DATA is really the instruction in which + the SET is taking place. */ + +static void +record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data) +{ + rtx last_set_insn = (rtx) data; + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + + if (REG_P (dest)) + record_last_reg_set_info (last_set_insn, dest); + else if (MEM_P (dest)) + { + /* Ignore pushes, they don't clobber memory. They may still + clobber the stack pointer though. Some targets do argument + pushes without adding REG_INC notes. See e.g. PR25196, + where a pushsi2 on i386 doesn't have REG_INC notes. Note + such changes here too. */ + if (! push_operand (dest, GET_MODE (dest))) + record_last_mem_set_info (last_set_insn); + else + record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM); + } +} + + +/* Reset tables used to keep track of what's still available since the + start of the block. */ + +static void +reset_opr_set_tables (void) +{ + memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int)); + obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom); + modifies_mem_list = NULL; +} + + +/* Record things set by INSN. + This data is used by oprs_unchanged_p. */ + +static void +record_opr_changes (rtx insn) +{ + rtx note; + + /* Find all stores and record them. */ + note_stores (PATTERN (insn), record_last_set_info, insn); + + /* Also record autoincremented REGs for this insn as changed. */ + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) + if (REG_NOTE_KIND (note) == REG_INC) + record_last_reg_set_info (insn, XEXP (note, 0)); + + /* Finally, if this is a call, record all call clobbers. */ + if (CALL_P (insn)) + { + unsigned int regno; + rtx link, x; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + record_last_reg_set_info_regno (insn, regno); + + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + if (GET_CODE (XEXP (link, 0)) == CLOBBER) + { + x = XEXP (XEXP (link, 0), 0); + if (REG_P (x)) + { + gcc_assert (HARD_REGISTER_P (x)); + record_last_reg_set_info (insn, x); + } + } + + if (! RTL_CONST_OR_PURE_CALL_P (insn)) + record_last_mem_set_info (insn); + } +} + + +/* Scan the pattern of INSN and add an entry to the hash TABLE. + After reload we are interested in loads/stores only. */ + +static void +hash_scan_set (rtx insn) +{ + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + rtx dest = SET_DEST (pat); + + /* We are only interested in loads and stores. */ + if (! MEM_P (src) && ! MEM_P (dest)) + return; + + /* Don't mess with jumps and nops. */ + if (JUMP_P (insn) || set_noop_p (pat)) + return; + + if (REG_P (dest)) + { + if (/* Don't CSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (dest)) + /* Is SET_SRC something we want to gcse? */ + && general_operand (src, GET_MODE (src)) +#ifdef STACK_REGS + /* Never consider insns touching the register stack. It may + create situations that reg-stack cannot handle (e.g. a stack + register live across an abnormal edge). */ + && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG) +#endif + /* An expression is not available if its operands are + subsequently modified, including this insn. */ + && oprs_unchanged_p (src, insn, true)) + { + insert_expr_in_table (src, insn); + } + } + else if (REG_P (src)) + { + /* Only record sets of pseudo-regs in the hash table. */ + if (/* Don't CSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (src)) + /* Is SET_DEST something we want to gcse? */ + && general_operand (dest, GET_MODE (dest)) +#ifdef STACK_REGS + /* As above for STACK_REGS. */ + && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG) +#endif + && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest))) + /* Check if the memory expression is killed after insn. */ + && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true) + && oprs_unchanged_p (XEXP (dest, 0), insn, true)) + { + insert_expr_in_table (dest, insn); + } + } +} + + +/* Create hash table of memory expressions available at end of basic + blocks. Basically you should think of this hash table as the + representation of AVAIL_OUT. This is the set of expressions that + is generated in a basic block and not killed before the end of the + same basic block. Notice that this is really a local computation. */ + +static void +compute_hash_table (void) +{ + basic_block bb; + + FOR_EACH_BB (bb) + { + rtx insn; + + /* First pass over the instructions records information used to + determine when registers and memory are last set. + Since we compute a "local" AVAIL_OUT, reset the tables that + help us keep track of what has been modified since the start + of the block. */ + reset_opr_set_tables (); + FOR_BB_INSNS (bb, insn) + { + if (INSN_P (insn)) + record_opr_changes (insn); + } + + /* The next pass actually builds the hash table. */ + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) + hash_scan_set (insn); + } +} + + +/* Check if register REG is killed in any insn waiting to be inserted on + edge E. This function is required to check that our data flow analysis + is still valid prior to commit_edge_insertions. */ + +static bool +reg_killed_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns.r; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_set_p (reg, insn)) + return true; + + return false; +} + +/* Similar to above - check if register REG is used in any insn waiting + to be inserted on edge E. + Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p + with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */ + +static bool +reg_used_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns.r; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return true; + + return false; +} + +/* Return the loaded/stored register of a load/store instruction. */ + +static rtx +get_avail_load_store_reg (rtx insn) +{ + if (REG_P (SET_DEST (PATTERN (insn)))) + /* A load. */ + return SET_DEST(PATTERN(insn)); + else + { + /* A store. */ + gcc_assert (REG_P (SET_SRC (PATTERN (insn)))); + return SET_SRC (PATTERN (insn)); + } +} + +/* Return nonzero if the predecessors of BB are "well behaved". */ + +static bool +bb_has_well_behaved_predecessors (basic_block bb) +{ + edge pred; + edge_iterator ei; + + if (EDGE_COUNT (bb->preds) == 0) + return false; + + FOR_EACH_EDGE (pred, ei, bb->preds) + { + if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) + return false; + + if (JUMP_TABLE_DATA_P (BB_END (pred->src))) + return false; + } + return true; +} + + +/* Search for the occurrences of expression in BB. */ + +static struct occr* +get_bb_avail_insn (basic_block bb, struct occr *occr) +{ + for (; occr != NULL; occr = occr->next) + if (BLOCK_FOR_INSN (occr->insn) == bb) + return occr; + return NULL; +} + + +/* This handles the case where several stores feed a partially redundant + load. It checks if the redundancy elimination is possible and if it's + worth it. + + Redundancy elimination is possible if, + 1) None of the operands of an insn have been modified since the start + of the current basic block. + 2) In any predecessor of the current basic block, the same expression + is generated. + + See the function body for the heuristics that determine if eliminating + a redundancy is also worth doing, assuming it is possible. */ + +static void +eliminate_partially_redundant_load (basic_block bb, rtx insn, + struct expr *expr) +{ + edge pred; + rtx avail_insn = NULL_RTX; + rtx avail_reg; + rtx dest, pat; + struct occr *a_occr; + struct unoccr *occr, *avail_occrs = NULL; + struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL; + int npred_ok = 0; + gcov_type ok_count = 0; /* Redundant load execution count. */ + gcov_type critical_count = 0; /* Execution count of critical edges. */ + edge_iterator ei; + bool critical_edge_split = false; + + /* The execution count of the loads to be added to make the + load fully redundant. */ + gcov_type not_ok_count = 0; + basic_block pred_bb; + + pat = PATTERN (insn); + dest = SET_DEST (pat); + + /* Check that the loaded register is not used, set, or killed from the + beginning of the block. */ + if (reg_changed_after_insn_p (dest, 0) + || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn)) + return; + + /* Check potential for replacing load with copy for predecessors. */ + FOR_EACH_EDGE (pred, ei, bb->preds) + { + rtx next_pred_bb_end; + + avail_insn = NULL_RTX; + avail_reg = NULL_RTX; + pred_bb = pred->src; + next_pred_bb_end = NEXT_INSN (BB_END (pred_bb)); + for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr; + a_occr = get_bb_avail_insn (pred_bb, a_occr->next)) + { + /* Check if the loaded register is not used. */ + avail_insn = a_occr->insn; + avail_reg = get_avail_load_store_reg (avail_insn); + gcc_assert (avail_reg); + + /* Make sure we can generate a move from register avail_reg to + dest. */ + extract_insn (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg))); + if (! constrain_operands (1) + || reg_killed_on_edge (avail_reg, pred) + || reg_used_on_edge (dest, pred)) + { + avail_insn = NULL; + continue; + } + if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end)) + /* AVAIL_INSN remains non-null. */ + break; + else + avail_insn = NULL; + } + + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + + if (avail_insn != NULL_RTX) + { + npred_ok++; + ok_count += pred->count; + if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg))))) + { + /* Check if there is going to be a split. */ + if (EDGE_CRITICAL_P (pred)) + critical_edge_split = true; + } + else /* Its a dead move no need to generate. */ + continue; + occr = (struct unoccr *) obstack_alloc (&unoccr_obstack, + sizeof (struct unoccr)); + occr->insn = avail_insn; + occr->pred = pred; + occr->next = avail_occrs; + avail_occrs = occr; + if (! rollback_unoccr) + rollback_unoccr = occr; + } + else + { + /* Adding a load on a critical edge will cause a split. */ + if (EDGE_CRITICAL_P (pred)) + critical_edge_split = true; + not_ok_count += pred->count; + unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack, + sizeof (struct unoccr)); + unoccr->insn = NULL_RTX; + unoccr->pred = pred; + unoccr->next = unavail_occrs; + unavail_occrs = unoccr; + if (! rollback_unoccr) + rollback_unoccr = unoccr; + } + } + + if (/* No load can be replaced by copy. */ + npred_ok == 0 + /* Prevent exploding the code. */ + || (optimize_bb_for_size_p (bb) && npred_ok > 1) + /* If we don't have profile information we cannot tell if splitting + a critical edge is profitable or not so don't do it. */ + || ((! profile_info || ! flag_branch_probabilities + || targetm.cannot_modify_jumps_p ()) + && critical_edge_split)) + goto cleanup; + + /* Check if it's worth applying the partial redundancy elimination. */ + if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) + goto cleanup; + if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) + goto cleanup; + + /* Generate moves to the loaded register from where + the memory is available. */ + for (occr = avail_occrs; occr; occr = occr->next) + { + avail_insn = occr->insn; + pred = occr->pred; + /* Set avail_reg to be the register having the value of the + memory. */ + avail_reg = get_avail_load_store_reg (avail_insn); + gcc_assert (avail_reg); + + insert_insn_on_edge (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg)), + pred); + stats.moves_inserted++; + + if (dump_file) + fprintf (dump_file, + "generating move from %d to %d on edge from %d to %d\n", + REGNO (avail_reg), + REGNO (dest), + pred->src->index, + pred->dest->index); + } + + /* Regenerate loads where the memory is unavailable. */ + for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next) + { + pred = unoccr->pred; + insert_insn_on_edge (copy_insn (PATTERN (insn)), pred); + stats.copies_inserted++; + + if (dump_file) + { + fprintf (dump_file, + "generating on edge from %d to %d a copy of load: ", + pred->src->index, + pred->dest->index); + print_rtl (dump_file, PATTERN (insn)); + fprintf (dump_file, "\n"); + } + } + + /* Delete the insn if it is not available in this block and mark it + for deletion if it is available. If insn is available it may help + discover additional redundancies, so mark it for later deletion. */ + for (a_occr = get_bb_avail_insn (bb, expr->avail_occr); + a_occr && (a_occr->insn != insn); + a_occr = get_bb_avail_insn (bb, a_occr->next)); + + if (!a_occr) + { + stats.insns_deleted++; + + if (dump_file) + { + fprintf (dump_file, "deleting insn:\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + delete_insn (insn); + } + else + a_occr->deleted_p = 1; + +cleanup: + if (rollback_unoccr) + obstack_free (&unoccr_obstack, rollback_unoccr); +} + +/* Performing the redundancy elimination as described before. */ + +static void +eliminate_partially_redundant_loads (void) +{ + rtx insn; + basic_block bb; + + /* Note we start at block 1. */ + + if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR) + return; + + FOR_BB_BETWEEN (bb, + ENTRY_BLOCK_PTR->next_bb->next_bb, + EXIT_BLOCK_PTR, + next_bb) + { + /* Don't try anything on basic blocks with strange predecessors. */ + if (! bb_has_well_behaved_predecessors (bb)) + continue; + + /* Do not try anything on cold basic blocks. */ + if (optimize_bb_for_size_p (bb)) + continue; + + /* Reset the table of things changed since the start of the current + basic block. */ + reset_opr_set_tables (); + + /* Look at all insns in the current basic block and see if there are + any loads in it that we can record. */ + FOR_BB_INSNS (bb, insn) + { + /* Is it a load - of the form (set (reg) (mem))? */ + if (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == SET + && REG_P (SET_DEST (PATTERN (insn))) + && MEM_P (SET_SRC (PATTERN (insn)))) + { + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + struct expr *expr; + + if (!MEM_VOLATILE_P (src) + && GET_MODE (src) != BLKmode + && general_operand (src, GET_MODE (src)) + /* Are the operands unchanged since the start of the + block? */ + && oprs_unchanged_p (src, insn, false) + && !(flag_non_call_exceptions && may_trap_p (src)) + && !side_effects_p (src) + /* Is the expression recorded? */ + && (expr = lookup_expr_in_table (src)) != NULL) + { + /* We now have a load (insn) and an available memory at + its BB start (expr). Try to remove the loads if it is + redundant. */ + eliminate_partially_redundant_load (bb, insn, expr); + } + } + + /* Keep track of everything modified by this insn, so that we + know what has been modified since the start of the current + basic block. */ + if (INSN_P (insn)) + record_opr_changes (insn); + } + } + + commit_edge_insertions (); +} + +/* Go over the expression hash table and delete insns that were + marked for later deletion. */ + +/* This helper is called via htab_traverse. */ +static int +delete_redundant_insns_1 (void **slot, void *data ATTRIBUTE_UNUSED) +{ + struct expr *expr = (struct expr *) *slot; + struct occr *occr; + + for (occr = expr->avail_occr; occr != NULL; occr = occr->next) + { + if (occr->deleted_p && dbg_cnt (gcse2_delete)) + { + delete_insn (occr->insn); + stats.insns_deleted++; + + if (dump_file) + { + fprintf (dump_file, "deleting insn:\n"); + print_rtl_single (dump_file, occr->insn); + fprintf (dump_file, "\n"); + } + } + } + + return 1; +} + +static void +delete_redundant_insns (void) +{ + htab_traverse (expr_table, delete_redundant_insns_1, NULL); + if (dump_file) + fprintf (dump_file, "\n"); +} + +/* Main entry point of the GCSE after reload - clean some redundant loads + due to spilling. */ + +static void +gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED) +{ + + memset (&stats, 0, sizeof (stats)); + + /* Allocate memory for this pass. + Also computes and initializes the insns' CUIDs. */ + alloc_mem (); + + /* We need alias analysis. */ + init_alias_analysis (); + + compute_hash_table (); + + if (dump_file) + dump_hash_table (dump_file); + + if (htab_elements (expr_table) > 0) + { + eliminate_partially_redundant_loads (); + delete_redundant_insns (); + + if (dump_file) + { + fprintf (dump_file, "GCSE AFTER RELOAD stats:\n"); + fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted); + fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted); + fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted); + fprintf (dump_file, "\n\n"); + } + } + + /* We are finished with alias. */ + end_alias_analysis (); + + free_mem (); +} + + +static bool +gate_handle_gcse2 (void) +{ + return (optimize > 0 && flag_gcse_after_reload + && optimize_function_for_speed_p (cfun)); +} + + +static unsigned int +rest_of_handle_gcse2 (void) +{ + gcse_after_reload_main (get_insns ()); + rebuild_jump_labels (get_insns ()); + return 0; +} + +struct rtl_opt_pass pass_gcse2 = +{ + { + RTL_PASS, + "gcse2", /* name */ + gate_handle_gcse2, /* gate */ + rest_of_handle_gcse2, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_GCSE_AFTER_RELOAD, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_rtl_sharing + | TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ + } +}; +