|
@@ -40,6 +40,7 @@
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
#include "llvm/CodeGen/Passes.h"
|
|
|
+#include "llvm/IR/Dominators.h"
|
|
|
#include "llvm/IR/InstIterator.h"
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
@@ -65,7 +66,7 @@ class InterleavedAccess : public FunctionPass {
|
|
|
public:
|
|
|
static char ID;
|
|
|
InterleavedAccess(const TargetMachine *TM = nullptr)
|
|
|
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
|
|
|
+ : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
|
|
|
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
|
|
|
}
|
|
|
|
|
@@ -73,7 +74,13 @@ public:
|
|
|
|
|
|
bool runOnFunction(Function &F) override;
|
|
|
|
|
|
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
+ AU.addRequired<DominatorTreeWrapperPass>();
|
|
|
+ AU.addPreserved<DominatorTreeWrapperPass>();
|
|
|
+ }
|
|
|
+
|
|
|
private:
|
|
|
+ DominatorTree *DT;
|
|
|
const TargetMachine *TM;
|
|
|
const TargetLowering *TLI;
|
|
|
|
|
@@ -84,13 +91,26 @@ private:
|
|
|
/// \brief Transform an interleaved store into target specific intrinsics.
|
|
|
bool lowerInterleavedStore(StoreInst *SI,
|
|
|
SmallVector<Instruction *, 32> &DeadInsts);
|
|
|
+
|
|
|
+ /// \brief Returns true if the uses of an interleaved load by the
|
|
|
+ /// extractelement instructions in \p Extracts can be replaced by uses of the
|
|
|
+ /// shufflevector instructions in \p Shuffles instead. If so, the necessary
|
|
|
+ /// replacements are also performed.
|
|
|
+ bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
|
|
|
+ ArrayRef<ShuffleVectorInst *> Shuffles);
|
|
|
};
|
|
|
} // end anonymous namespace.
|
|
|
|
|
|
char InterleavedAccess::ID = 0;
|
|
|
-INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",
|
|
|
- "Lower interleaved memory accesses to target specific intrinsics",
|
|
|
- false, false)
|
|
|
+INITIALIZE_TM_PASS_BEGIN(
|
|
|
+ InterleavedAccess, "interleaved-access",
|
|
|
+ "Lower interleaved memory accesses to target specific intrinsics", false,
|
|
|
+ false)
|
|
|
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
|
+INITIALIZE_TM_PASS_END(
|
|
|
+ InterleavedAccess, "interleaved-access",
|
|
|
+ "Lower interleaved memory accesses to target specific intrinsics", false,
|
|
|
+ false)
|
|
|
|
|
|
FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
|
|
|
return new InterleavedAccess(TM);
|
|
@@ -179,9 +199,18 @@ bool InterleavedAccess::lowerInterleavedLoad(
|
|
|
return false;
|
|
|
|
|
|
SmallVector<ShuffleVectorInst *, 4> Shuffles;
|
|
|
+ SmallVector<ExtractElementInst *, 4> Extracts;
|
|
|
|
|
|
- // Check if all users of this load are shufflevectors.
|
|
|
+ // Check if all users of this load are shufflevectors. If we encounter any
|
|
|
+ // users that are extractelement instructions, we save them to later check if
|
|
|
+ // they can be modifed to extract from one of the shufflevectors instead of
|
|
|
+ // the load.
|
|
|
for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
|
|
|
+ auto *Extract = dyn_cast<ExtractElementInst>(*UI);
|
|
|
+ if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
|
|
|
+ Extracts.push_back(Extract);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
|
|
|
if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
|
|
|
return false;
|
|
@@ -217,6 +246,11 @@ bool InterleavedAccess::lowerInterleavedLoad(
|
|
|
Indices.push_back(Index);
|
|
|
}
|
|
|
|
|
|
+ // Try and modify users of the load that are extractelement instructions to
|
|
|
+ // use the shufflevector instructions instead of the load.
|
|
|
+ if (!tryReplaceExtracts(Extracts, Shuffles))
|
|
|
+ return false;
|
|
|
+
|
|
|
DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
|
|
|
|
|
|
// Try to create target specific intrinsics to replace the load and shuffles.
|
|
@@ -230,6 +264,73 @@ bool InterleavedAccess::lowerInterleavedLoad(
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+bool InterleavedAccess::tryReplaceExtracts(
|
|
|
+ ArrayRef<ExtractElementInst *> Extracts,
|
|
|
+ ArrayRef<ShuffleVectorInst *> Shuffles) {
|
|
|
+
|
|
|
+ // If there aren't any extractelement instructions to modify, there's nothing
|
|
|
+ // to do.
|
|
|
+ if (Extracts.empty())
|
|
|
+ return true;
|
|
|
+
|
|
|
+ // Maps extractelement instructions to vector-index pairs. The extractlement
|
|
|
+ // instructions will be modified to use the new vector and index operands.
|
|
|
+ DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
|
|
|
+
|
|
|
+ for (auto *Extract : Extracts) {
|
|
|
+
|
|
|
+ // The vector index that is extracted.
|
|
|
+ auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
|
|
|
+ auto Index = IndexOperand->getSExtValue();
|
|
|
+
|
|
|
+ // Look for a suitable shufflevector instruction. The goal is to modify the
|
|
|
+ // extractelement instruction (which uses an interleaved load) to use one
|
|
|
+ // of the shufflevector instructions instead of the load.
|
|
|
+ for (auto *Shuffle : Shuffles) {
|
|
|
+
|
|
|
+ // If the shufflevector instruction doesn't dominate the extract, we
|
|
|
+ // can't create a use of it.
|
|
|
+ if (!DT->dominates(Shuffle, Extract))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ // Inspect the indices of the shufflevector instruction. If the shuffle
|
|
|
+ // selects the same index that is extracted, we can modify the
|
|
|
+ // extractelement instruction.
|
|
|
+ SmallVector<int, 4> Indices;
|
|
|
+ Shuffle->getShuffleMask(Indices);
|
|
|
+ for (unsigned I = 0; I < Indices.size(); ++I)
|
|
|
+ if (Indices[I] == Index) {
|
|
|
+ assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
|
|
|
+ "Vector operations do not match");
|
|
|
+ ReplacementMap[Extract] = std::make_pair(Shuffle, I);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If we found a suitable shufflevector instruction, stop looking.
|
|
|
+ if (ReplacementMap.count(Extract))
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If we did not find a suitable shufflevector instruction, the
|
|
|
+ // extractelement instruction cannot be modified, so we must give up.
|
|
|
+ if (!ReplacementMap.count(Extract))
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Finally, perform the replacements.
|
|
|
+ IRBuilder<> Builder(Extracts[0]->getContext());
|
|
|
+ for (auto &Replacement : ReplacementMap) {
|
|
|
+ auto *Extract = Replacement.first;
|
|
|
+ auto *Vector = Replacement.second.first;
|
|
|
+ auto Index = Replacement.second.second;
|
|
|
+ Builder.SetInsertPoint(Extract);
|
|
|
+ Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
|
|
|
+ Extract->eraseFromParent();
|
|
|
+ }
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
bool InterleavedAccess::lowerInterleavedStore(
|
|
|
StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
|
|
|
if (!SI->isSimple())
|
|
@@ -262,6 +363,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
|
|
|
|
|
|
DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
|
|
|
|
|
|
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
|
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
|
|
|
MaxFactor = TLI->getMaxSupportedInterleaveFactor();
|
|
|
|