|
@@ -16,8 +16,8 @@
|
|
|
// case, we need to generate code to execute these 'left over' iterations.
|
|
|
//
|
|
|
// The current strategy generates an if-then-else sequence prior to the
|
|
|
-// unrolled loop to execute the 'left over' iterations. Other strategies
|
|
|
-// include generate a loop before or after the unrolled loop.
|
|
|
+// unrolled loop to execute the 'left over' iterations before or after the
|
|
|
+// unrolled loop.
|
|
|
//
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
@@ -60,33 +60,35 @@ STATISTIC(NumRuntimeUnrolled,
|
|
|
/// than the unroll factor.
|
|
|
///
|
|
|
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
|
|
- BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
|
|
|
- BasicBlock *OrigPH, BasicBlock *NewPH,
|
|
|
- ValueToValueMapTy &VMap, DominatorTree *DT,
|
|
|
- LoopInfo *LI, bool PreserveLCSSA) {
|
|
|
+ BasicBlock *PrologExit, BasicBlock *PreHeader,
|
|
|
+ BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
|
|
|
+ DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
|
|
|
BasicBlock *Latch = L->getLoopLatch();
|
|
|
assert(Latch && "Loop must have a latch");
|
|
|
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
|
|
|
|
|
|
// Create a PHI node for each outgoing value from the original loop
|
|
|
// (which means it is an outgoing value from the prolog code too).
|
|
|
// The new PHI node is inserted in the prolog end basic block.
|
|
|
- // The new PHI name is added as an operand of a PHI node in either
|
|
|
+ // The new PHI node value is added as an operand of a PHI node in either
|
|
|
// the loop header or the loop exit block.
|
|
|
- for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
|
|
|
- SBI != SBE; ++SBI) {
|
|
|
- for (BasicBlock::iterator BBI = (*SBI)->begin();
|
|
|
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
|
|
|
-
|
|
|
+ for (BasicBlock *Succ : successors(Latch)) {
|
|
|
+ for (Instruction &BBI : *Succ) {
|
|
|
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
|
|
|
+ // Exit when we passed all PHI nodes.
|
|
|
+ if (!PN)
|
|
|
+ break;
|
|
|
// Add a new PHI node to the prolog end block and add the
|
|
|
// appropriate incoming values.
|
|
|
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
|
|
|
- PrologEnd->getTerminator());
|
|
|
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
|
|
|
+ PrologExit->getFirstNonPHI());
|
|
|
// Adding a value to the new PHI node from the original loop preheader.
|
|
|
// This is the value that skips all the prolog code.
|
|
|
if (L->contains(PN)) {
|
|
|
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
|
|
|
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
|
|
|
+ PreHeader);
|
|
|
} else {
|
|
|
- NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
|
|
|
+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
|
|
|
}
|
|
|
|
|
|
Value *V = PN->getIncomingValueForBlock(Latch);
|
|
@@ -97,22 +99,22 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
|
|
}
|
|
|
// Adding a value to the new PHI node from the last prolog block
|
|
|
// that was created.
|
|
|
- NewPN->addIncoming(V, LastPrologBB);
|
|
|
+ NewPN->addIncoming(V, PrologLatch);
|
|
|
|
|
|
// Update the existing PHI node operand with the value from the
|
|
|
// new PHI node. How this is done depends on if the existing
|
|
|
// PHI node is in the original loop block, or the exit block.
|
|
|
if (L->contains(PN)) {
|
|
|
- PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
|
|
|
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
|
|
|
} else {
|
|
|
- PN->addIncoming(NewPN, PrologEnd);
|
|
|
+ PN->addIncoming(NewPN, PrologExit);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Create a branch around the original loop, which is taken if there are no
|
|
|
// iterations remaining to be executed after running the prologue.
|
|
|
- Instruction *InsertPt = PrologEnd->getTerminator();
|
|
|
+ Instruction *InsertPt = PrologExit->getTerminator();
|
|
|
IRBuilder<> B(InsertPt);
|
|
|
|
|
|
assert(Count != 0 && "nonsensical Count!");
|
|
@@ -126,25 +128,152 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
|
|
BasicBlock *Exit = L->getUniqueExitBlock();
|
|
|
assert(Exit && "Loop must have a single exit block only");
|
|
|
// Split the exit to maintain loop canonicalization guarantees
|
|
|
- SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
|
|
|
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
|
|
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
|
|
|
PreserveLCSSA);
|
|
|
// Add the branch to the exit block (around the unrolled loop)
|
|
|
- B.CreateCondBr(BrLoopExit, Exit, NewPH);
|
|
|
+ B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
|
|
|
+ InsertPt->eraseFromParent();
|
|
|
+}
|
|
|
+
|
|
|
+/// Connect the unrolling epilog code to the original loop.
|
|
|
+/// The unrolling epilog code contains code to execute the
|
|
|
+/// 'extra' iterations if the run-time trip count modulo the
|
|
|
+/// unroll count is non-zero.
|
|
|
+///
|
|
|
+/// This function performs the following:
|
|
|
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
|
|
|
+/// - Create PHI nodes at the unrolling loop exit to combine
|
|
|
+/// values that exit the unrolling loop code and jump around it.
|
|
|
+/// - Update PHI operands in the epilog loop by the new PHI nodes
|
|
|
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
|
|
|
+///
|
|
|
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
|
|
|
+ BasicBlock *Exit, BasicBlock *PreHeader,
|
|
|
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
|
|
|
+ ValueToValueMapTy &VMap, DominatorTree *DT,
|
|
|
+ LoopInfo *LI, bool PreserveLCSSA) {
|
|
|
+ BasicBlock *Latch = L->getLoopLatch();
|
|
|
+ assert(Latch && "Loop must have a latch");
|
|
|
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
|
|
|
+
|
|
|
+ // Loop structure should be the following:
|
|
|
+ //
|
|
|
+ // PreHeader
|
|
|
+ // NewPreHeader
|
|
|
+ // Header
|
|
|
+ // ...
|
|
|
+ // Latch
|
|
|
+ // NewExit (PN)
|
|
|
+ // EpilogPreHeader
|
|
|
+ // EpilogHeader
|
|
|
+ // ...
|
|
|
+ // EpilogLatch
|
|
|
+ // Exit (EpilogPN)
|
|
|
+
|
|
|
+ // Update PHI nodes at NewExit and Exit.
|
|
|
+ for (Instruction &BBI : *NewExit) {
|
|
|
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
|
|
|
+ // Exit when we passed all PHI nodes.
|
|
|
+ if (!PN)
|
|
|
+ break;
|
|
|
+ // PN should be used in another PHI located in Exit block as
|
|
|
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
|
|
|
+ // Basicaly it should look like:
|
|
|
+ // NewExit:
|
|
|
+ // PN = PHI [I, Latch]
|
|
|
+ // ...
|
|
|
+ // Exit:
|
|
|
+ // EpilogPN = PHI [PN, EpilogPreHeader]
|
|
|
+ //
|
|
|
+ // There is EpilogPreHeader incoming block instead of NewExit as
|
|
|
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
|
|
|
+ assert(PN->hasOneUse() && "The phi should have 1 use");
|
|
|
+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
|
|
|
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
|
|
|
+
|
|
|
+ // Add incoming PreHeader from branch around the Loop
|
|
|
+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
|
|
|
+
|
|
|
+ Value *V = PN->getIncomingValueForBlock(Latch);
|
|
|
+ Instruction *I = dyn_cast<Instruction>(V);
|
|
|
+ if (I && L->contains(I))
|
|
|
+ // If value comes from an instruction in the loop add VMap value.
|
|
|
+ V = VMap[I];
|
|
|
+ // For the instruction out of the loop, constant or undefined value
|
|
|
+ // insert value itself.
|
|
|
+ EpilogPN->addIncoming(V, EpilogLatch);
|
|
|
+
|
|
|
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
|
|
|
+ "EpilogPN should have EpilogPreHeader incoming block");
|
|
|
+ // Change EpilogPreHeader incoming block to NewExit.
|
|
|
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
|
|
|
+ NewExit);
|
|
|
+ // Now PHIs should look like:
|
|
|
+ // NewExit:
|
|
|
+ // PN = PHI [I, Latch], [undef, PreHeader]
|
|
|
+ // ...
|
|
|
+ // Exit:
|
|
|
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
|
|
|
+ }
|
|
|
+
|
|
|
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
|
|
|
+ // Update corresponding PHI nodes in epilog loop.
|
|
|
+ for (BasicBlock *Succ : successors(Latch)) {
|
|
|
+ // Skip this as we already updated phis in exit blocks.
|
|
|
+ if (!L->contains(Succ))
|
|
|
+ continue;
|
|
|
+ for (Instruction &BBI : *Succ) {
|
|
|
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
|
|
|
+ // Exit when we passed all PHI nodes.
|
|
|
+ if (!PN)
|
|
|
+ break;
|
|
|
+ // Add new PHI nodes to the loop exit block and update epilog
|
|
|
+ // PHIs with the new PHI values.
|
|
|
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
|
|
|
+ NewExit->getFirstNonPHI());
|
|
|
+ // Adding a value to the new PHI node from the unrolling loop preheader.
|
|
|
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
|
|
|
+ // Adding a value to the new PHI node from the unrolling loop latch.
|
|
|
+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
|
|
|
+
|
|
|
+ // Update the existing PHI node operand with the value from the new PHI
|
|
|
+ // node. Corresponding instruction in epilog loop should be PHI.
|
|
|
+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);
|
|
|
+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Instruction *InsertPt = NewExit->getTerminator();
|
|
|
+ IRBuilder<> B(InsertPt);
|
|
|
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal);
|
|
|
+ assert(Exit && "Loop must have a single exit block only");
|
|
|
+ // Split the exit to maintain loop canonicalization guarantees
|
|
|
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
|
|
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
|
|
|
+ PreserveLCSSA);
|
|
|
+ // Add the branch to the exit block (around the unrolling loop)
|
|
|
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
|
|
|
InsertPt->eraseFromParent();
|
|
|
}
|
|
|
|
|
|
/// Create a clone of the blocks in a loop and connect them together.
|
|
|
-/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
|
|
|
-/// loop will be created including all cloned blocks, and the iterator of it
|
|
|
-/// switches to count NewIter down to 0.
|
|
|
+/// If CreateRemainderLoop is false, loop structure will not be cloned,
|
|
|
+/// otherwise a new loop will be created including all cloned blocks, and the
|
|
|
+/// iterator of it switches to count NewIter down to 0.
|
|
|
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
|
|
|
+/// If loop structure is cloned InsertTop should be new preheader, InsertBot
|
|
|
+/// new loop exit.
|
|
|
///
|
|
|
-static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
+static void CloneLoopBlocks(Loop *L, Value *NewIter,
|
|
|
+ const bool CreateRemainderLoop,
|
|
|
+ const bool UseEpilogRemainder,
|
|
|
BasicBlock *InsertTop, BasicBlock *InsertBot,
|
|
|
+ BasicBlock *Preheader,
|
|
|
std::vector<BasicBlock *> &NewBlocks,
|
|
|
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
|
|
|
LoopInfo *LI) {
|
|
|
- BasicBlock *Preheader = L->getLoopPreheader();
|
|
|
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
|
|
|
BasicBlock *Header = L->getHeader();
|
|
|
BasicBlock *Latch = L->getLoopLatch();
|
|
|
Function *F = Header->getParent();
|
|
@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
|
|
|
Loop *NewLoop = nullptr;
|
|
|
Loop *ParentLoop = L->getParentLoop();
|
|
|
- if (!UnrollProlog) {
|
|
|
+ if (CreateRemainderLoop) {
|
|
|
NewLoop = new Loop();
|
|
|
if (ParentLoop)
|
|
|
ParentLoop->addChildLoop(NewLoop);
|
|
@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
// For each block in the original loop, create a new copy,
|
|
|
// and update the value map with the newly created values.
|
|
|
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
|
|
|
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
|
|
|
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
|
|
|
NewBlocks.push_back(NewBB);
|
|
|
|
|
|
if (NewLoop)
|
|
@@ -179,16 +308,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
}
|
|
|
|
|
|
if (Latch == *BB) {
|
|
|
- // For the last block, if UnrollProlog is true, create a direct jump to
|
|
|
- // InsertBot. If not, create a loop back to cloned head.
|
|
|
+ // For the last block, if CreateRemainderLoop is false, create a direct
|
|
|
+ // jump to InsertBot. If not, create a loop back to cloned head.
|
|
|
VMap.erase((*BB)->getTerminator());
|
|
|
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
|
|
|
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
|
|
|
IRBuilder<> Builder(LatchBR);
|
|
|
- if (UnrollProlog) {
|
|
|
+ if (!CreateRemainderLoop) {
|
|
|
Builder.CreateBr(InsertBot);
|
|
|
} else {
|
|
|
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
|
|
|
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
|
|
|
+ suffix + ".iter",
|
|
|
FirstLoopBB->getFirstNonPHI());
|
|
|
Value *IdxSub =
|
|
|
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
|
|
@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
// cloned loop.
|
|
|
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
|
|
|
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
|
|
|
- if (UnrollProlog) {
|
|
|
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
|
|
|
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
|
|
|
+ if (!CreateRemainderLoop) {
|
|
|
+ if (UseEpilogRemainder) {
|
|
|
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
|
|
|
+ NewPHI->setIncomingBlock(idx, InsertTop);
|
|
|
+ NewPHI->removeIncomingValue(Latch, false);
|
|
|
+ } else {
|
|
|
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
|
|
|
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
|
|
|
+ }
|
|
|
} else {
|
|
|
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
|
|
|
NewPHI->setIncomingBlock(idx, InsertTop);
|
|
@@ -254,7 +390,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/// Insert code in the prolog code when unrolling a loop with a
|
|
|
+/// Insert code in the prolog/epilog code when unrolling a loop with a
|
|
|
/// run-time trip-count.
|
|
|
///
|
|
|
/// This method assumes that the loop unroll factor is total number
|
|
@@ -266,6 +402,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
|
|
|
/// the switch instruction is generated.
|
|
|
///
|
|
|
+/// ***Prolog case***
|
|
|
/// extraiters = tripcount % loopfactor
|
|
|
/// if (extraiters == 0) jump Loop:
|
|
|
/// else jump Prol
|
|
@@ -277,17 +414,35 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|
|
/// ...
|
|
|
/// End:
|
|
|
///
|
|
|
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
- bool AllowExpensiveTripCount, LoopInfo *LI,
|
|
|
- ScalarEvolution *SE, DominatorTree *DT,
|
|
|
- bool PreserveLCSSA) {
|
|
|
- // For now, only unroll loops that contain a single exit.
|
|
|
+/// ***Epilog case***
|
|
|
+/// extraiters = tripcount % loopfactor
|
|
|
+/// if (extraiters == tripcount) jump LoopExit:
|
|
|
+/// unroll_iters = tripcount - extraiters
|
|
|
+/// Loop: LoopBody; (executes unroll_iter times);
|
|
|
+/// unroll_iter -= 1
|
|
|
+/// if (unroll_iter != 0) jump Loop:
|
|
|
+/// LoopExit:
|
|
|
+/// if (extraiters == 0) jump EpilExit:
|
|
|
+/// Epil: LoopBody; (executes extraiters times)
|
|
|
+/// extraiters -= 1 // Omitted if unroll factor is 2.
|
|
|
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
|
|
|
+/// EpilExit:
|
|
|
+
|
|
|
+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
|
|
+ bool AllowExpensiveTripCount,
|
|
|
+ bool UseEpilogRemainder,
|
|
|
+ LoopInfo *LI, ScalarEvolution *SE,
|
|
|
+ DominatorTree *DT, bool PreserveLCSSA) {
|
|
|
+ // for now, only unroll loops that contain a single exit
|
|
|
if (!L->getExitingBlock())
|
|
|
return false;
|
|
|
|
|
|
// Make sure the loop is in canonical form, and there is a single
|
|
|
// exit block only.
|
|
|
- if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
|
|
|
+ if (!L->isLoopSimplifyForm())
|
|
|
+ return false;
|
|
|
+ BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
|
|
|
+ if (!Exit)
|
|
|
return false;
|
|
|
|
|
|
// Use Scalar Evolution to compute the trip count. This allows more loops to
|
|
@@ -311,8 +466,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
return false;
|
|
|
|
|
|
BasicBlock *Header = L->getHeader();
|
|
|
- BasicBlock *PH = L->getLoopPreheader();
|
|
|
- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
|
|
+ BasicBlock *PreHeader = L->getLoopPreheader();
|
|
|
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
|
|
|
const DataLayout &DL = Header->getModule()->getDataLayout();
|
|
|
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
|
|
if (!AllowExpensiveTripCount &&
|
|
@@ -330,26 +485,75 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
SE->forgetLoop(ParentLoop);
|
|
|
|
|
|
BasicBlock *Latch = L->getLoopLatch();
|
|
|
- // It helps to split the original preheader twice, one for the end of the
|
|
|
- // prolog code and one for a new loop preheader.
|
|
|
- BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
|
|
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
|
|
- PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
|
|
|
|
|
+ // Loop structure is the following:
|
|
|
+ //
|
|
|
+ // PreHeader
|
|
|
+ // Header
|
|
|
+ // ...
|
|
|
+ // Latch
|
|
|
+ // Exit
|
|
|
+
|
|
|
+ BasicBlock *NewPreHeader;
|
|
|
+ BasicBlock *NewExit = nullptr;
|
|
|
+ BasicBlock *PrologExit = nullptr;
|
|
|
+ BasicBlock *EpilogPreHeader = nullptr;
|
|
|
+ BasicBlock *PrologPreHeader = nullptr;
|
|
|
+
|
|
|
+ if (UseEpilogRemainder) {
|
|
|
+ // If epilog remainder
|
|
|
+ // Split PreHeader to insert a branch around loop for unrolling.
|
|
|
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
|
|
|
+ NewPreHeader->setName(PreHeader->getName() + ".new");
|
|
|
+ // Split Exit to create phi nodes from branch above.
|
|
|
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
|
|
+ NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
|
|
|
+ DT, LI, PreserveLCSSA);
|
|
|
+ // Split NewExit to insert epilog remainder loop.
|
|
|
+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
|
|
|
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
|
|
|
+ } else {
|
|
|
+ // If prolog remainder
|
|
|
+ // Split the original preheader twice to insert prolog remainder loop
|
|
|
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
|
|
|
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
|
|
|
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
|
|
|
+ DT, LI);
|
|
|
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
|
|
|
+ // Split PrologExit to get NewPreHeader.
|
|
|
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
|
|
|
+ NewPreHeader->setName(PreHeader->getName() + ".new");
|
|
|
+ }
|
|
|
+ // Loop structure should be the following:
|
|
|
+ // Epilog Prolog
|
|
|
+ //
|
|
|
+ // PreHeader PreHeader
|
|
|
+ // *NewPreHeader *PrologPreHeader
|
|
|
+ // Header *PrologExit
|
|
|
+ // ... *NewPreHeader
|
|
|
+ // Latch Header
|
|
|
+ // *NewExit ...
|
|
|
+ // *EpilogPreHeader Latch
|
|
|
+ // Exit Exit
|
|
|
+
|
|
|
+ // Calculate conditions for branch around loop for unrolling
|
|
|
+ // in epilog case and around prolog remainder loop in prolog case.
|
|
|
// Compute the number of extra iterations required, which is:
|
|
|
- // extra iterations = run-time trip count % (loop unroll factor + 1)
|
|
|
+ // extra iterations = run-time trip count % loop unroll factor
|
|
|
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
|
|
|
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
|
|
|
PreHeaderBR);
|
|
|
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
|
|
|
PreHeaderBR);
|
|
|
-
|
|
|
IRBuilder<> B(PreHeaderBR);
|
|
|
Value *ModVal;
|
|
|
// Calculate ModVal = (BECount + 1) % Count.
|
|
|
// Note that TripCount is BECount + 1.
|
|
|
if (isPowerOf2_32(Count)) {
|
|
|
+ // When Count is power of 2 we don't BECount for epilog case, however we'll
|
|
|
+ // need it for a branch around unrolling loop for prolog case.
|
|
|
ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
|
|
|
- // 1. There are no iterations to be run in the prologue loop.
|
|
|
+ // 1. There are no iterations to be run in the prolog/epilog loop.
|
|
|
// OR
|
|
|
// 2. The addition computing TripCount overflowed.
|
|
|
//
|
|
@@ -371,18 +575,18 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
ConstantInt::get(BECount->getType(), Count),
|
|
|
"xtraiter");
|
|
|
}
|
|
|
- Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
|
|
-
|
|
|
- // Branch to either the extra iterations or the cloned/unrolled loop.
|
|
|
- // We will fix up the true branch label when adding loop body copies.
|
|
|
- B.CreateCondBr(BranchVal, PEnd, PEnd);
|
|
|
- assert(PreHeaderBR->isUnconditional() &&
|
|
|
- PreHeaderBR->getSuccessor(0) == PEnd &&
|
|
|
- "CFG edges in Preheader are not correct");
|
|
|
+ Value *CmpOperand =
|
|
|
+ UseEpilogRemainder ? TripCount :
|
|
|
+ ConstantInt::get(TripCount->getType(), 0);
|
|
|
+ Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
|
|
|
+ BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
|
|
|
+ BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
|
|
|
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
|
|
|
+ B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
|
|
|
PreHeaderBR->eraseFromParent();
|
|
|
Function *F = Header->getParent();
|
|
|
// Get an ordered list of blocks in the loop to help with the ordering of the
|
|
|
- // cloned blocks in the prolog code.
|
|
|
+ // cloned blocks in the prolog/epilog code
|
|
|
LoopBlocksDFS LoopBlocks(L);
|
|
|
LoopBlocks.perform(LI);
|
|
|
|
|
@@ -394,17 +598,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
std::vector<BasicBlock *> NewBlocks;
|
|
|
ValueToValueMapTy VMap;
|
|
|
|
|
|
- bool UnrollPrologue = Count == 2;
|
|
|
+ // For unroll factor 2 remainder loop will have 1 iterations.
|
|
|
+ // Do not create 1 iteration loop.
|
|
|
+ bool CreateRemainderLoop = (Count != 2);
|
|
|
|
|
|
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
|
|
|
// the loop, otherwise we create a cloned loop to execute the extra
|
|
|
// iterations. This function adds the appropriate CFG connections.
|
|
|
- CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
|
|
|
- VMap, LI);
|
|
|
-
|
|
|
- // Insert the cloned blocks into the function just before the original loop.
|
|
|
- F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
|
|
|
- NewBlocks[0]->getIterator(), F->end());
|
|
|
+ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
|
|
|
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
|
|
|
+ CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
|
|
|
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
|
|
|
+
|
|
|
+ // Insert the cloned blocks into the function.
|
|
|
+ F->getBasicBlockList().splice(InsertBot->getIterator(),
|
|
|
+ F->getBasicBlockList(),
|
|
|
+ NewBlocks[0]->getIterator(),
|
|
|
+ F->end());
|
|
|
+
|
|
|
+ // Loop structure should be the following:
|
|
|
+ // Epilog Prolog
|
|
|
+ //
|
|
|
+ // PreHeader PreHeader
|
|
|
+ // NewPreHeader PrologPreHeader
|
|
|
+ // Header PrologHeader
|
|
|
+ // ... ...
|
|
|
+ // Latch PrologLatch
|
|
|
+ // NewExit PrologExit
|
|
|
+ // EpilogPreHeader NewPreHeader
|
|
|
+ // EpilogHeader Header
|
|
|
+ // ... ...
|
|
|
+ // EpilogLatch Latch
|
|
|
+ // Exit Exit
|
|
|
|
|
|
// Rewrite the cloned instruction operands to use the values created when the
|
|
|
// clone is created.
|
|
@@ -415,11 +640,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Connect the prolog code to the original loop and update the
|
|
|
- // PHI functions.
|
|
|
- BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
|
|
|
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
|
|
|
- PreserveLCSSA);
|
|
|
+ if (UseEpilogRemainder) {
|
|
|
+ // Connect the epilog code to the original loop and update the
|
|
|
+ // PHI functions.
|
|
|
+ ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
|
|
|
+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,
|
|
|
+ PreserveLCSSA);
|
|
|
+
|
|
|
+ // Update counter in loop for unrolling.
|
|
|
+ // I should be multiply of Count.
|
|
|
+ IRBuilder<> B2(NewPreHeader->getTerminator());
|
|
|
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
|
|
|
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
|
|
|
+ B2.SetInsertPoint(LatchBR);
|
|
|
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
|
|
|
+ Header->getFirstNonPHI());
|
|
|
+ Value *IdxSub =
|
|
|
+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
|
|
|
+ NewIdx->getName() + ".nsub");
|
|
|
+ Value *IdxCmp;
|
|
|
+ if (LatchBR->getSuccessor(0) == Header)
|
|
|
+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
|
|
|
+ else
|
|
|
+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
|
|
|
+ NewIdx->addIncoming(TestVal, NewPreHeader);
|
|
|
+ NewIdx->addIncoming(IdxSub, Latch);
|
|
|
+ LatchBR->setCondition(IdxCmp);
|
|
|
+ } else {
|
|
|
+ // Connect the prolog code to the original loop and update the
|
|
|
+ // PHI functions.
|
|
|
+ ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
|
|
|
+ VMap, DT, LI, PreserveLCSSA);
|
|
|
+ }
|
|
|
NumRuntimeUnrolled++;
|
|
|
return true;
|
|
|
}
|