|
@@ -19,6 +19,7 @@
|
|
|
#include "llvm/ADT/MapVector.h"
|
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
|
#include "llvm/ADT/SetVector.h"
|
|
|
+#include "llvm/ADT/Optional.h"
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
|
|
#include "llvm/Analysis/AssumptionCache.h"
|
|
@@ -388,6 +389,15 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// \returns the AA location that is being access by the instruction.
|
|
|
+static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
|
|
|
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
|
|
+ return AA->getLocation(SI);
|
|
|
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
|
|
+ return AA->getLocation(LI);
|
|
|
+ return AliasAnalysis::Location();
|
|
|
+}
|
|
|
+
|
|
|
/// Bottom Up SLP Vectorizer.
|
|
|
class BoUpSLP {
|
|
|
public:
|
|
@@ -555,6 +565,52 @@ private:
|
|
|
};
|
|
|
typedef SmallVector<ExternalUser, 16> UserList;
|
|
|
|
|
|
+ /// Checks if two instructions may access the same memory.
|
|
|
+ ///
|
|
|
+ /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
|
|
|
+ /// is invariant in the calling loop.
|
|
|
+ bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1,
|
|
|
+ Instruction *Inst2) {
|
|
|
+
|
|
|
+ // First check if the result is already in the cache.
|
|
|
+ AliasCacheKey key = std::make_pair(Inst1, Inst2);
|
|
|
+ Optional<bool> &result = AliasCache[key];
|
|
|
+ if (result.hasValue()) {
|
|
|
+ return result.getValue();
|
|
|
+ }
|
|
|
+ AliasAnalysis::Location Loc2 = getLocation(Inst2, AA);
|
|
|
+ bool aliased = true;
|
|
|
+ if (Loc1.Ptr && Loc2.Ptr) {
|
|
|
+ // Do the alias check.
|
|
|
+ aliased = AA->alias(Loc1, Loc2);
|
|
|
+ }
|
|
|
+ // Store the result in the cache.
|
|
|
+ result = aliased;
|
|
|
+ return aliased;
|
|
|
+ }
|
|
|
+
|
|
|
+ typedef std::pair<Instruction *, Instruction *> AliasCacheKey;
|
|
|
+
|
|
|
+ /// Cache for alias results.
|
|
|
+ /// TODO: consider moving this to the AliasAnalysis itself.
|
|
|
+ DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
|
|
|
+
|
|
|
+ /// Removes an instruction from its block and eventually deletes it.
|
|
|
+ /// It's like Instruction::eraseFromParent() except that the actual deletion
|
|
|
+ /// is delayed until BoUpSLP is destructed.
|
|
|
+ /// This is required to ensure that there are no incorrect collisions in the
|
|
|
+ /// AliasCache, which can happen if a new instruction is allocated at the
|
|
|
+ /// same address as a previously deleted instruction.
|
|
|
+ void eraseInstruction(Instruction *I) {
|
|
|
+ I->removeFromParent();
|
|
|
+ I->dropAllReferences();
|
|
|
+ DeletedInstructions.push_back(std::unique_ptr<Instruction>(I));
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Temporary store for deleted instructions. Instructions will be deleted
|
|
|
+ /// eventually when the BoUpSLP is destructed.
|
|
|
+ SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
|
|
|
+
|
|
|
/// A list of values that need to extracted out of the tree.
|
|
|
/// This list holds pairs of (Internal Scalar : External User).
|
|
|
UserList ExternalUses;
|
|
@@ -791,7 +847,7 @@ private:
|
|
|
/// Checks if a bundle of instructions can be scheduled, i.e. has no
|
|
|
/// cyclic dependencies. This is only a dry-run, no instructions are
|
|
|
/// actually moved at this stage.
|
|
|
- bool tryScheduleBundle(ArrayRef<Value *> VL, AliasAnalysis *AA);
|
|
|
+ bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
|
|
|
|
|
|
/// Un-bundles a group of instructions.
|
|
|
void cancelScheduling(ArrayRef<Value *> VL);
|
|
@@ -808,7 +864,7 @@ private:
|
|
|
/// Updates the dependency information of a bundle and of all instructions/
|
|
|
/// bundles which depend on the original bundle.
|
|
|
void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
|
|
|
- AliasAnalysis *AA);
|
|
|
+ BoUpSLP *SLP);
|
|
|
|
|
|
/// Sets all instruction in the scheduling region to un-scheduled.
|
|
|
void resetSchedule();
|
|
@@ -1069,7 +1125,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|
|
}
|
|
|
BlockScheduling &BS = *BSRef.get();
|
|
|
|
|
|
- if (!BS.tryScheduleBundle(VL, AA)) {
|
|
|
+ if (!BS.tryScheduleBundle(VL, this)) {
|
|
|
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
|
|
|
BS.cancelScheduling(VL);
|
|
|
newTreeEntry(VL, false);
|
|
@@ -2360,7 +2416,7 @@ Value *BoUpSLP::vectorizeTree() {
|
|
|
Scalar->replaceAllUsesWith(Undef);
|
|
|
}
|
|
|
DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
|
|
|
- cast<Instruction>(Scalar)->eraseFromParent();
|
|
|
+ eraseInstruction(cast<Instruction>(Scalar));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2442,7 +2498,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
|
|
if (In->isIdenticalTo(*v) &&
|
|
|
DT->dominates((*v)->getParent(), In->getParent())) {
|
|
|
In->replaceAllUsesWith(*v);
|
|
|
- In->eraseFromParent();
|
|
|
+ eraseInstruction(In);
|
|
|
In = nullptr;
|
|
|
break;
|
|
|
}
|
|
@@ -2460,7 +2516,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
|
|
// Groups the instructions to a bundle (which is then a single scheduling entity)
|
|
|
// and schedules instructions until the bundle gets ready.
|
|
|
bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
|
|
- AliasAnalysis *AA) {
|
|
|
+ BoUpSLP *SLP) {
|
|
|
if (isa<PHINode>(VL[0]))
|
|
|
return true;
|
|
|
|
|
@@ -2517,7 +2573,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
|
|
DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
|
|
|
<< BB->getName() << "\n");
|
|
|
|
|
|
- calculateDependencies(Bundle, true, AA);
|
|
|
+ calculateDependencies(Bundle, true, SLP);
|
|
|
|
|
|
// Now try to schedule the new bundle. As soon as the bundle is "ready" it
|
|
|
// means that there are no cyclic dependencies and we can schedule it.
|
|
@@ -2648,18 +2704,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/// \returns the AA location that is being access by the instruction.
|
|
|
-static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
|
|
|
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
|
|
- return AA->getLocation(SI);
|
|
|
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
|
|
- return AA->getLocation(LI);
|
|
|
- return AliasAnalysis::Location();
|
|
|
-}
|
|
|
-
|
|
|
void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
|
|
|
bool InsertInReadyList,
|
|
|
- AliasAnalysis *AA) {
|
|
|
+ BoUpSLP *SLP) {
|
|
|
assert(SD->isSchedulingEntity());
|
|
|
|
|
|
SmallVector<ScheduleData *, 10> WorkList;
|
|
@@ -2704,14 +2751,14 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
|
|
|
// Handle the memory dependencies.
|
|
|
ScheduleData *DepDest = BundleMember->NextLoadStore;
|
|
|
if (DepDest) {
|
|
|
- AliasAnalysis::Location SrcLoc = getLocation(BundleMember->Inst, AA);
|
|
|
+ Instruction *SrcInst = BundleMember->Inst;
|
|
|
+ AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
|
|
|
bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
|
|
|
|
|
|
while (DepDest) {
|
|
|
assert(isInSchedulingRegion(DepDest));
|
|
|
if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) {
|
|
|
- AliasAnalysis::Location DstLoc = getLocation(DepDest->Inst, AA);
|
|
|
- if (!SrcLoc.Ptr || !DstLoc.Ptr || AA->alias(SrcLoc, DstLoc)) {
|
|
|
+ if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) {
|
|
|
DepDest->MemoryDependencies.push_back(BundleMember);
|
|
|
BundleMember->Dependencies++;
|
|
|
ScheduleData *DestBundle = DepDest->FirstInBundle;
|
|
@@ -2779,7 +2826,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
|
|
|
"scheduler and vectorizer have different opinion on what is a bundle");
|
|
|
SD->FirstInBundle->SchedulingPriority = Idx++;
|
|
|
if (SD->isSchedulingEntity()) {
|
|
|
- BS->calculateDependencies(SD, false, AA);
|
|
|
+ BS->calculateDependencies(SD, false, this);
|
|
|
NumToSchedule++;
|
|
|
}
|
|
|
}
|
|
@@ -2872,6 +2919,9 @@ struct SLPVectorizer : public FunctionPass {
|
|
|
// store instructions.
|
|
|
BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC);
|
|
|
|
|
|
+ // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
|
|
|
+ // delete instructions.
|
|
|
+
|
|
|
// Scan the blocks in the function in post order.
|
|
|
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
|
|
|
e = po_end(&F.getEntryBlock()); it != e; ++it) {
|