|
@@ -161,6 +161,31 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ bool hasMergeableAddress(const MachineRegisterInfo &MRI) {
|
|
|
|
+ for (unsigned i = 0; i < NumAddresses; ++i) {
|
|
|
|
+ const MachineOperand *AddrOp = AddrReg[i];
|
|
|
|
+ // Immediates are always OK.
|
|
|
|
+ if (AddrOp->isImm())
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ // Don't try to merge addresses that aren't either immediates or registers.
|
|
|
|
+ // TODO: Should be possible to merge FrameIndexes and maybe some other
|
|
|
|
+ // non-register
|
|
|
|
+ if (!AddrOp->isReg())
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ // TODO: We should be able to merge physical reg addreses.
|
|
|
|
+ if (Register::isPhysicalRegister(AddrOp->getReg()))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ // If an address has only one use then there will be on other
|
|
|
|
+ // instructions with the same address, so we can't merge this one.
|
|
|
|
+ if (MRI.hasOneNonDBGUse(AddrOp->getReg()))
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+
|
|
void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII,
|
|
void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII,
|
|
const GCNSubtarget &STM);
|
|
const GCNSubtarget &STM);
|
|
void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII);
|
|
void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII);
|
|
@@ -220,6 +245,10 @@ private:
|
|
bool promoteConstantOffsetToImm(MachineInstr &CI,
|
|
bool promoteConstantOffsetToImm(MachineInstr &CI,
|
|
MemInfoMap &Visited,
|
|
MemInfoMap &Visited,
|
|
SmallPtrSet<MachineInstr *, 4> &Promoted) const;
|
|
SmallPtrSet<MachineInstr *, 4> &Promoted) const;
|
|
|
|
+ void addInstToMergeableList(const CombineInfo &CI,
|
|
|
|
+ std::list<std::list<CombineInfo> > &MergeableInsts) const;
|
|
|
|
+ bool collectMergeableInsts(MachineBasicBlock &MBB,
|
|
|
|
+ std::list<std::list<CombineInfo> > &MergeableInsts) const;
|
|
|
|
|
|
public:
|
|
public:
|
|
static char ID;
|
|
static char ID;
|
|
@@ -228,7 +257,11 @@ public:
|
|
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
|
|
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
}
|
|
|
|
|
|
- bool optimizeBlock(MachineBasicBlock &MBB);
|
|
|
|
|
|
+ void removeCombinedInst(std::list<CombineInfo> &MergeList,
|
|
|
|
+ const MachineInstr &MI);
|
|
|
|
+ bool optimizeInstsWithSameBaseAddr(std::list<CombineInfo> &MergeList,
|
|
|
|
+ bool &OptimizeListAgain);
|
|
|
|
+ bool optimizeBlock(std::list<std::list<CombineInfo> > &MergeableInsts);
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
|
|
@@ -424,6 +457,8 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
|
|
AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]);
|
|
AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]);
|
|
AddrReg[i] = &I->getOperand(AddrIdx[i]);
|
|
AddrReg[i] = &I->getOperand(AddrIdx[i]);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ InstsToMove.clear();
|
|
}
|
|
}
|
|
|
|
|
|
void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI,
|
|
void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI,
|
|
@@ -646,15 +681,6 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
|
|
if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
|
|
if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
|
|
return false;
|
|
return false;
|
|
|
|
|
|
- for (unsigned i = 0; i < CI.NumAddresses; i++) {
|
|
|
|
- // We only ever merge operations with the same base address register, so
|
|
|
|
- // don't bother scanning forward if there are no other uses.
|
|
|
|
- if (CI.AddrReg[i]->isReg() &&
|
|
|
|
- (Register::isPhysicalRegister(CI.AddrReg[i]->getReg()) ||
|
|
|
|
- MRI->hasOneNonDBGUse(CI.AddrReg[i]->getReg())))
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
++MBBI;
|
|
++MBBI;
|
|
|
|
|
|
DenseSet<unsigned> RegDefsToMove;
|
|
DenseSet<unsigned> RegDefsToMove;
|
|
@@ -827,12 +853,11 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
|
|
|
|
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
|
|
|
|
- MachineBasicBlock::iterator Next = std::next(CI.I);
|
|
|
|
CI.I->eraseFromParent();
|
|
CI.I->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
|
|
LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
|
|
- return Next;
|
|
|
|
|
|
+ return Read2;
|
|
}
|
|
}
|
|
|
|
|
|
unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
|
|
unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
|
|
@@ -911,12 +936,11 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
|
|
|
|
|
|
moveInstsAfter(Write2, CI.InstsToMove);
|
|
moveInstsAfter(Write2, CI.InstsToMove);
|
|
|
|
|
|
- MachineBasicBlock::iterator Next = std::next(CI.I);
|
|
|
|
CI.I->eraseFromParent();
|
|
CI.I->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
|
|
LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
|
|
- return Next;
|
|
|
|
|
|
+ return Write2;
|
|
}
|
|
}
|
|
|
|
|
|
MachineBasicBlock::iterator
|
|
MachineBasicBlock::iterator
|
|
@@ -938,12 +962,13 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
|
|
|
|
- BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
|
|
|
|
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
|
|
|
|
- .addImm(MergedOffset) // offset
|
|
|
|
- .addImm(CI.GLC0) // glc
|
|
|
|
- .addImm(CI.DLC0) // dlc
|
|
|
|
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
+ MachineInstr *New =
|
|
|
|
+ BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
|
|
|
|
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
|
|
|
|
+ .addImm(MergedOffset) // offset
|
|
|
|
+ .addImm(CI.GLC0) // glc
|
|
|
|
+ .addImm(CI.DLC0) // dlc
|
|
|
|
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
|
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
|
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
|
|
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
|
|
@@ -963,10 +988,9 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
|
|
|
|
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
|
|
|
|
- MachineBasicBlock::iterator Next = std::next(CI.I);
|
|
|
|
CI.I->eraseFromParent();
|
|
CI.I->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
- return Next;
|
|
|
|
|
|
+ return New;
|
|
}
|
|
}
|
|
|
|
|
|
MachineBasicBlock::iterator
|
|
MachineBasicBlock::iterator
|
|
@@ -997,15 +1021,16 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
|
|
|
|
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
|
|
|
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
|
|
|
- .addImm(MergedOffset) // offset
|
|
|
|
- .addImm(CI.GLC0) // glc
|
|
|
|
- .addImm(CI.SLC0) // slc
|
|
|
|
- .addImm(0) // tfe
|
|
|
|
- .addImm(CI.DLC0) // dlc
|
|
|
|
- .addImm(0) // swz
|
|
|
|
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
+ MachineInstr *New =
|
|
|
|
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
|
|
|
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
|
|
|
+ .addImm(MergedOffset) // offset
|
|
|
|
+ .addImm(CI.GLC0) // glc
|
|
|
|
+ .addImm(CI.SLC0) // slc
|
|
|
|
+ .addImm(0) // tfe
|
|
|
|
+ .addImm(CI.DLC0) // dlc
|
|
|
|
+ .addImm(0) // swz
|
|
|
|
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
|
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
|
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
|
|
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
|
|
@@ -1025,10 +1050,9 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
|
|
|
|
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
moveInstsAfter(Copy1, CI.InstsToMove);
|
|
|
|
|
|
- MachineBasicBlock::iterator Next = std::next(CI.I);
|
|
|
|
CI.I->eraseFromParent();
|
|
CI.I->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
- return Next;
|
|
|
|
|
|
+ return New;
|
|
}
|
|
}
|
|
|
|
|
|
unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
|
|
unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
|
|
@@ -1191,22 +1215,22 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
|
|
|
|
|
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
|
|
|
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
|
|
|
- .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
|
|
|
|
- .addImm(CI.GLC0) // glc
|
|
|
|
- .addImm(CI.SLC0) // slc
|
|
|
|
- .addImm(0) // tfe
|
|
|
|
- .addImm(CI.DLC0) // dlc
|
|
|
|
- .addImm(0) // swz
|
|
|
|
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
+ MachineInstr *New =
|
|
|
|
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
|
|
|
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
|
|
|
+ .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
|
|
|
|
+ .addImm(CI.GLC0) // glc
|
|
|
|
+ .addImm(CI.SLC0) // slc
|
|
|
|
+ .addImm(0) // tfe
|
|
|
|
+ .addImm(CI.DLC0) // dlc
|
|
|
|
+ .addImm(0) // swz
|
|
|
|
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
|
|
|
|
|
moveInstsAfter(MIB, CI.InstsToMove);
|
|
moveInstsAfter(MIB, CI.InstsToMove);
|
|
|
|
|
|
- MachineBasicBlock::iterator Next = std::next(CI.I);
|
|
|
|
CI.I->eraseFromParent();
|
|
CI.I->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
CI.Paired->eraseFromParent();
|
|
- return Next;
|
|
|
|
|
|
+ return New;
|
|
}
|
|
}
|
|
|
|
|
|
MachineOperand
|
|
MachineOperand
|
|
@@ -1519,32 +1543,105 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
|
|
return false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
-// Scan through looking for adjacent LDS operations with constant offsets from
|
|
|
|
-// the same base register. We rely on the scheduler to do the hard work of
|
|
|
|
-// clustering nearby loads, and assume these are all adjacent.
|
|
|
|
-bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
|
|
|
|
- bool Modified = false;
|
|
|
|
|
|
+void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
|
|
|
|
+ std::list<std::list<CombineInfo> > &MergeableInsts) const {
|
|
|
|
+ for (std::list<CombineInfo> &AddrList : MergeableInsts) {
|
|
|
|
+ if (AddrList.front().hasSameBaseAddress(*CI.I) &&
|
|
|
|
+ AddrList.front().InstClass == CI.InstClass) {
|
|
|
|
+ AddrList.emplace_back(CI);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+ // Base address not found, so add a new list.
|
|
|
|
+ MergeableInsts.emplace_back(1, CI);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB,
|
|
|
|
+ std::list<std::list<CombineInfo> > &MergeableInsts) const {
|
|
|
|
+ bool Modified = false;
|
|
// Contain the list
|
|
// Contain the list
|
|
MemInfoMap Visited;
|
|
MemInfoMap Visited;
|
|
// Contains the list of instructions for which constant offsets are being
|
|
// Contains the list of instructions for which constant offsets are being
|
|
// promoted to the IMM.
|
|
// promoted to the IMM.
|
|
SmallPtrSet<MachineInstr *, 4> AnchorList;
|
|
SmallPtrSet<MachineInstr *, 4> AnchorList;
|
|
|
|
|
|
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
|
|
|
|
- MachineInstr &MI = *I;
|
|
|
|
-
|
|
|
|
|
|
+ // Sort potential mergeable instructions into lists. One list per base address.
|
|
|
|
+ for (MachineInstr &MI : MBB.instrs()) {
|
|
|
|
+ // We run this before checking if an address is mergeable, because it can produce
|
|
|
|
+ // better code even if the instructions aren't mergeable.
|
|
if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
|
|
if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
|
|
Modified = true;
|
|
Modified = true;
|
|
|
|
|
|
|
|
+ const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII);
|
|
|
|
+ if (InstClass == UNKNOWN)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
// Don't combine if volatile.
|
|
// Don't combine if volatile.
|
|
- if (MI.hasOrderedMemoryRef()) {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ if (MI.hasOrderedMemoryRef())
|
|
continue;
|
|
continue;
|
|
- }
|
|
|
|
|
|
|
|
CombineInfo CI;
|
|
CombineInfo CI;
|
|
- CI.setMI(I, *TII, *STM);
|
|
|
|
|
|
+ CI.setMI(MI, *TII, *STM);
|
|
|
|
+
|
|
|
|
+ if (!CI.hasMergeableAddress(*MRI))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ addInstToMergeableList(CI, MergeableInsts);
|
|
|
|
+ }
|
|
|
|
+ return Modified;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// Scan through looking for adjacent LDS operations with constant offsets from
|
|
|
|
+// the same base register. We rely on the scheduler to do the hard work of
|
|
|
|
+// clustering nearby loads, and assume these are all adjacent.
|
|
|
|
+bool SILoadStoreOptimizer::optimizeBlock(
|
|
|
|
+ std::list<std::list<CombineInfo> > &MergeableInsts) {
|
|
|
|
+ bool Modified = false;
|
|
|
|
+
|
|
|
|
+ for (std::list<CombineInfo> &MergeList : MergeableInsts) {
|
|
|
|
+ if (MergeList.size() < 2)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ bool OptimizeListAgain = false;
|
|
|
|
+ if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) {
|
|
|
|
+ // We weren't able to make any changes, so clear the list so we don't
|
|
|
|
+ // process the same instructions the next time we try to optimize this
|
|
|
|
+ // block.
|
|
|
|
+ MergeList.clear();
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // We made changes, but also determined that there were no more optimization
|
|
|
|
+ // opportunities, so we don't need to reprocess the list
|
|
|
|
+ if (!OptimizeListAgain)
|
|
|
|
+ MergeList.clear();
|
|
|
|
+
|
|
|
|
+ OptimizeAgain |= OptimizeListAgain;
|
|
|
|
+ Modified = true;
|
|
|
|
+ }
|
|
|
|
+ return Modified;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void
|
|
|
|
+SILoadStoreOptimizer::removeCombinedInst(std::list<CombineInfo> &MergeList,
|
|
|
|
+ const MachineInstr &MI) {
|
|
|
|
+
|
|
|
|
+ for (auto CI = MergeList.begin(), E = MergeList.end(); CI != E; ++CI) {
|
|
|
|
+ if (&*CI->I == &MI) {
|
|
|
|
+ MergeList.erase(CI);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+bool
|
|
|
|
+SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
|
|
|
|
+ std::list<CombineInfo> &MergeList,
|
|
|
|
+ bool &OptimizeListAgain) {
|
|
|
|
+ bool Modified = false;
|
|
|
|
+ for (auto I = MergeList.begin(); I != MergeList.end(); ++I) {
|
|
|
|
+ CombineInfo &CI = *I;
|
|
|
|
|
|
switch (CI.InstClass) {
|
|
switch (CI.InstClass) {
|
|
default:
|
|
default:
|
|
@@ -1552,55 +1649,57 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
|
|
case DS_READ:
|
|
case DS_READ:
|
|
if (findMatchingInst(CI)) {
|
|
if (findMatchingInst(CI)) {
|
|
Modified = true;
|
|
Modified = true;
|
|
- I = mergeRead2Pair(CI);
|
|
|
|
- } else {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ removeCombinedInst(MergeList, *CI.Paired);
|
|
|
|
+ MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI);
|
|
|
|
+ CI.setMI(NewMI, *TII, *STM);
|
|
}
|
|
}
|
|
- continue;
|
|
|
|
|
|
+ break;
|
|
case DS_WRITE:
|
|
case DS_WRITE:
|
|
if (findMatchingInst(CI)) {
|
|
if (findMatchingInst(CI)) {
|
|
Modified = true;
|
|
Modified = true;
|
|
- I = mergeWrite2Pair(CI);
|
|
|
|
- } else {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ removeCombinedInst(MergeList, *CI.Paired);
|
|
|
|
+ MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI);
|
|
|
|
+ CI.setMI(NewMI, *TII, *STM);
|
|
}
|
|
}
|
|
- continue;
|
|
|
|
|
|
+ break;
|
|
case S_BUFFER_LOAD_IMM:
|
|
case S_BUFFER_LOAD_IMM:
|
|
if (findMatchingInst(CI)) {
|
|
if (findMatchingInst(CI)) {
|
|
Modified = true;
|
|
Modified = true;
|
|
- I = mergeSBufferLoadImmPair(CI);
|
|
|
|
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 16;
|
|
|
|
- } else {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ removeCombinedInst(MergeList, *CI.Paired);
|
|
|
|
+ MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI);
|
|
|
|
+ CI.setMI(NewMI, *TII, *STM);
|
|
|
|
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 16;
|
|
}
|
|
}
|
|
- continue;
|
|
|
|
|
|
+ break;
|
|
case BUFFER_LOAD_OFFEN:
|
|
case BUFFER_LOAD_OFFEN:
|
|
case BUFFER_LOAD_OFFSET:
|
|
case BUFFER_LOAD_OFFSET:
|
|
case BUFFER_LOAD_OFFEN_exact:
|
|
case BUFFER_LOAD_OFFEN_exact:
|
|
case BUFFER_LOAD_OFFSET_exact:
|
|
case BUFFER_LOAD_OFFSET_exact:
|
|
if (findMatchingInst(CI)) {
|
|
if (findMatchingInst(CI)) {
|
|
Modified = true;
|
|
Modified = true;
|
|
- I = mergeBufferLoadPair(CI);
|
|
|
|
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
|
|
|
|
- } else {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ removeCombinedInst(MergeList, *CI.Paired);
|
|
|
|
+ MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI);
|
|
|
|
+ CI.setMI(NewMI, *TII, *STM);
|
|
|
|
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
|
|
}
|
|
}
|
|
- continue;
|
|
|
|
|
|
+ break;
|
|
case BUFFER_STORE_OFFEN:
|
|
case BUFFER_STORE_OFFEN:
|
|
case BUFFER_STORE_OFFSET:
|
|
case BUFFER_STORE_OFFSET:
|
|
case BUFFER_STORE_OFFEN_exact:
|
|
case BUFFER_STORE_OFFEN_exact:
|
|
case BUFFER_STORE_OFFSET_exact:
|
|
case BUFFER_STORE_OFFSET_exact:
|
|
if (findMatchingInst(CI)) {
|
|
if (findMatchingInst(CI)) {
|
|
Modified = true;
|
|
Modified = true;
|
|
- I = mergeBufferStorePair(CI);
|
|
|
|
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
|
|
|
|
- } else {
|
|
|
|
- ++I;
|
|
|
|
|
|
+ removeCombinedInst(MergeList, *CI.Paired);
|
|
|
|
+ MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI);
|
|
|
|
+ CI.setMI(NewMI, *TII, *STM);
|
|
|
|
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
|
|
}
|
|
}
|
|
- continue;
|
|
|
|
|
|
+ break;
|
|
}
|
|
}
|
|
-
|
|
|
|
- ++I;
|
|
|
|
|
|
+ // Clear the InstsToMove after we have finished searching so we don't have
|
|
|
|
+ // stale values left over if we search for this CI again in another pass
|
|
|
|
+ // over the block.
|
|
|
|
+ CI.InstsToMove.clear();
|
|
}
|
|
}
|
|
|
|
|
|
return Modified;
|
|
return Modified;
|
|
@@ -1626,10 +1725,14 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
|
|
bool Modified = false;
|
|
bool Modified = false;
|
|
|
|
|
|
|
|
+
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
|
|
+ std::list<std::list<CombineInfo> > MergeableInsts;
|
|
|
|
+ // First pass: Collect list of all instructions we know how to merge.
|
|
|
|
+ Modified |= collectMergeableInsts(MBB, MergeableInsts);
|
|
do {
|
|
do {
|
|
OptimizeAgain = false;
|
|
OptimizeAgain = false;
|
|
- Modified |= optimizeBlock(MBB);
|
|
|
|
|
|
+ Modified |= optimizeBlock(MergeableInsts);
|
|
} while (OptimizeAgain);
|
|
} while (OptimizeAgain);
|
|
}
|
|
}
|
|
|
|
|