|
@@ -76,6 +76,7 @@
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
+#include "llvm/CodeGen/MachineLoopInfo.h"
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
#include "llvm/CodeGen/Passes.h"
|
|
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
|
|
|
"rewrite-phi-limit", cl::Hidden, cl::init(10),
|
|
|
cl::desc("Limit the length of PHI chains to lookup"));
|
|
|
|
|
|
+// Limit the length of recurrence chain when evaluating the benefit of
|
|
|
+// commuting operands.
|
|
|
+static cl::opt<unsigned> MaxRecurrenceChain(
|
|
|
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
|
|
|
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
|
|
|
+ "of commuting operands"));
|
|
|
+
|
|
|
+
|
|
|
STATISTIC(NumReuse, "Number of extension results reused");
|
|
|
STATISTIC(NumCmps, "Number of compares eliminated");
|
|
|
STATISTIC(NumImmFold, "Number of move immediate folded");
|
|
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
|
|
|
namespace {
|
|
|
|
|
|
class ValueTrackerResult;
|
|
|
+ class RecurrenceInstr;
|
|
|
|
|
|
class PeepholeOptimizer : public MachineFunctionPass {
|
|
|
const TargetInstrInfo *TII;
|
|
|
const TargetRegisterInfo *TRI;
|
|
|
MachineRegisterInfo *MRI;
|
|
|
MachineDominatorTree *DT; // Machine dominator tree
|
|
|
+ MachineLoopInfo *MLI;
|
|
|
|
|
|
public:
|
|
|
static char ID; // Pass identification
|
|
@@ -150,6 +161,8 @@ namespace {
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
AU.setPreservesCFG();
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
+ AU.addRequired<MachineLoopInfo>();
|
|
|
+ AU.addPreserved<MachineLoopInfo>();
|
|
|
if (Aggressive) {
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
AU.addPreserved<MachineDominatorTree>();
|
|
@@ -160,6 +173,9 @@ namespace {
|
|
|
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
|
|
|
RewriteMapTy;
|
|
|
|
|
|
+ /// \brief Sequence of instructions that formulate recurrence cycle.
|
|
|
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
|
|
|
+
|
|
|
private:
|
|
|
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
|
|
|
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
|
|
@@ -170,6 +186,7 @@ namespace {
|
|
|
bool optimizeCoalescableCopy(MachineInstr *MI);
|
|
|
bool optimizeUncoalescableCopy(MachineInstr *MI,
|
|
|
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
|
|
|
+ bool optimizeRecurrence(MachineInstr &PHI);
|
|
|
bool findNextSource(unsigned Reg, unsigned SubReg,
|
|
|
RewriteMapTy &RewriteMap);
|
|
|
bool isMoveImmediate(MachineInstr *MI,
|
|
@@ -178,6 +195,13 @@ namespace {
|
|
|
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
|
|
|
SmallSet<unsigned, 4> &ImmDefRegs,
|
|
|
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
|
|
|
+ /// \brief Finds recurrence cycles, but only ones that formulated around
|
|
|
+ /// a def operand and a use operand that are tied. If there is a use
|
|
|
+ /// operand commutable with the tied use operand, find recurrence cycle
|
|
|
+ /// along that operand as well.
|
|
|
+ bool findTargetRecurrence(unsigned Reg,
|
|
|
+ const SmallSet<unsigned, 2> &TargetReg,
|
|
|
+ RecurrenceCycle &RC);
|
|
|
|
|
|
/// \brief If copy instruction \p MI is a virtual register copy, track it in
|
|
|
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
|
|
@@ -222,6 +246,28 @@ namespace {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+ /// \brief Helper class to hold instructions that are inside recurrence
|
|
|
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
|
|
|
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
|
|
|
+ /// with another use operand which is tied to the def operand. In the latter
|
|
|
+ /// case, index of the tied use operand and the commutable use operand are
|
|
|
+ /// maintained with CommutePair.
|
|
|
+ class RecurrenceInstr {
|
|
|
+ public:
|
|
|
+ typedef std::pair<unsigned, unsigned> IndexPair;
|
|
|
+
|
|
|
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
|
|
|
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
|
|
|
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
|
|
|
+
|
|
|
+ MachineInstr *getMI() const { return MI; }
|
|
|
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
|
|
|
+
|
|
|
+ private:
|
|
|
+ MachineInstr *MI;
|
|
|
+ Optional<IndexPair> CommutePair;
|
|
|
+ };
|
|
|
+
|
|
|
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
|
|
|
/// returned sources for a given search and the instructions where the sources
|
|
|
/// were tracked from.
|
|
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
|
|
|
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
|
|
|
"Peephole Optimizations", false, false)
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
|
|
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
|
|
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
|
|
|
"Peephole Optimizations", false, false)
|
|
|
|
|
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+/// \bried Returns true if \p MO is a virtual register operand.
|
|
|
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
|
|
|
+ if (!MO.isReg())
|
|
|
+ return false;
|
|
|
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
|
|
|
+}
|
|
|
+
|
|
|
+bool PeepholeOptimizer::findTargetRecurrence(
|
|
|
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
|
|
|
+ RecurrenceCycle &RC) {
|
|
|
+ // Recurrence found if Reg is in TargetRegs.
|
|
|
+ if (TargetRegs.count(Reg))
|
|
|
+ return true;
|
|
|
+
|
|
|
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
|
|
|
+ // cycle (the instruction that feeds the PHI instruction) to have more than
|
|
|
+ // one uses to guarantee that commuting operands does not tie registers
|
|
|
+ // with overlapping live range. Once we have actual live range info of
|
|
|
+ // each register, this constraint can be relaxed.
|
|
|
+ if (!MRI->hasOneNonDBGUse(Reg))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ // Give up if the reccurrence chain length is longer than the limit.
|
|
|
+ if (RC.size() >= MaxRecurrenceChain)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
|
|
|
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
|
|
|
+
|
|
|
+ // Only interested in recurrences whose instructions have only one def, which
|
|
|
+ // is a virtual register.
|
|
|
+ if (MI.getDesc().getNumDefs() != 1)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ MachineOperand &DefOp = MI.getOperand(0);
|
|
|
+ if (!isVirtualRegisterOperand(DefOp))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ // Check if def operand of MI is tied to any use operand. We are only
|
|
|
+ // interested in the case that all the instructions in the recurrence chain
|
|
|
+ // have there def operand tied with one of the use operand.
|
|
|
+ unsigned TiedUseIdx;
|
|
|
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (Idx == TiedUseIdx) {
|
|
|
+ RC.push_back(RecurrenceInstr(&MI));
|
|
|
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
|
|
|
+ } else {
|
|
|
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
|
|
|
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
|
|
|
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
|
|
|
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
|
|
|
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
|
|
|
+/// phi is in a loop header, a recurrence may formulated around the source and
|
|
|
+/// destination of the phi. For such case commuting operands of the instructions
|
|
|
+/// in the recurrence may enable coalescing of the copy instruction generated
|
|
|
+/// from the phi. For example, if there is a recurrence of
|
|
|
+///
|
|
|
+/// LoopHeader:
|
|
|
+/// %vreg1 = phi(%vreg0, %vreg100)
|
|
|
+/// LoopLatch:
|
|
|
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
|
|
|
+///
|
|
|
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
|
|
|
+/// the coalescing of copy instruction generated from the phi in
|
|
|
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
|
|
|
+/// %vreg2 have overlapping live range. This introduces additional move
|
|
|
+/// instruction to the final assembly. However, if we commute %vreg2 and
|
|
|
+/// %vreg1 of ADD instruction, the redundant move instruction can be
|
|
|
+/// avoided.
|
|
|
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
|
|
|
+ SmallSet<unsigned, 2> TargetRegs;
|
|
|
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
|
|
|
+ MachineOperand &MO = PHI.getOperand(Idx);
|
|
|
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
|
|
|
+ TargetRegs.insert(MO.getReg());
|
|
|
+ }
|
|
|
+
|
|
|
+ bool Changed = false;
|
|
|
+ RecurrenceCycle RC;
|
|
|
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
|
|
|
+ // Commutes operands of instructions in RC if necessary so that the copy to
|
|
|
+ // be generated from PHI can be coalesced.
|
|
|
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
|
|
|
+ for (auto &RI : RC) {
|
|
|
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
|
|
|
+ auto CP = RI.getCommutePair();
|
|
|
+ if (CP) {
|
|
|
+ Changed = true;
|
|
|
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
|
|
|
+ (*CP).second);
|
|
|
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return Changed;
|
|
|
+}
|
|
|
+
|
|
|
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
if (skipFunction(*MF.getFunction()))
|
|
|
return false;
|
|
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
TRI = MF.getSubtarget().getRegisterInfo();
|
|
|
MRI = &MF.getRegInfo();
|
|
|
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
|
|
|
+ MLI = &getAnalysis<MachineLoopInfo>();
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
|
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
SmallSet<unsigned, 4> CopySrcRegs;
|
|
|
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
|
|
|
|
|
|
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
|
|
|
+
|
|
|
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
|
|
|
MII != MIE; ) {
|
|
|
MachineInstr *MI = &*MII;
|
|
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
if (MI->isDebugValue())
|
|
|
continue;
|
|
|
|
|
|
- if (MI->isPosition() || MI->isPHI())
|
|
|
+ if (MI->isPosition())
|
|
|
continue;
|
|
|
|
|
|
+ if (IsLoopHeader && MI->isPHI()) {
|
|
|
+ if (optimizeRecurrence(*MI)) {
|
|
|
+ Changed = true;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
if (!MI->isCopy()) {
|
|
|
for (const auto &Op : MI->operands()) {
|
|
|
// Visit all operands: definitions can be implicit or explicit.
|
|
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
MRI->markUsesInDebugValueAsUndef(FoldedReg);
|
|
|
FoldAsLoadDefCandidates.erase(FoldedReg);
|
|
|
++NumLoadFold;
|
|
|
-
|
|
|
+
|
|
|
// MI is replaced with FoldMI so we can continue trying to fold
|
|
|
Changed = true;
|
|
|
MI = FoldMI;
|
|
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// If we run into an instruction we can't fold across, discard
|
|
|
// the load candidates. Note: We might be able to fold *into* this
|
|
|
// instruction, so this needs to be after the folding logic.
|