LoopUnrollPeel.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements some loop unrolling utilities for peeling loops
  11. // with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
  12. // unrolling loops with compile-time constant trip counts.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/Optional.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/Analysis/LoopInfo.h"
  20. #include "llvm/Analysis/LoopIterator.h"
  21. #include "llvm/Analysis/ScalarEvolution.h"
  22. #include "llvm/Analysis/TargetTransformInfo.h"
  23. #include "llvm/IR/BasicBlock.h"
  24. #include "llvm/IR/Dominators.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/IR/InstrTypes.h"
  27. #include "llvm/IR/Instruction.h"
  28. #include "llvm/IR/Instructions.h"
  29. #include "llvm/IR/LLVMContext.h"
  30. #include "llvm/IR/MDBuilder.h"
  31. #include "llvm/IR/Metadata.h"
  32. #include "llvm/Support/Casting.h"
  33. #include "llvm/Support/CommandLine.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/raw_ostream.h"
  36. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  37. #include "llvm/Transforms/Utils/Cloning.h"
  38. #include "llvm/Transforms/Utils/LoopSimplify.h"
  39. #include "llvm/Transforms/Utils/LoopUtils.h"
  40. #include "llvm/Transforms/Utils/UnrollLoop.h"
  41. #include "llvm/Transforms/Utils/ValueMapper.h"
  42. #include <algorithm>
  43. #include <cassert>
  44. #include <cstdint>
  45. #include <limits>
  46. using namespace llvm;
  47. #define DEBUG_TYPE "loop-unroll"
  48. STATISTIC(NumPeeled, "Number of loops peeled");
  49. static cl::opt<unsigned> UnrollPeelMaxCount(
  50. "unroll-peel-max-count", cl::init(7), cl::Hidden,
  51. cl::desc("Max average trip count which will cause loop peeling."));
  52. static cl::opt<unsigned> UnrollForcePeelCount(
  53. "unroll-force-peel-count", cl::init(0), cl::Hidden,
  54. cl::desc("Force a peel count regardless of profiling information."));
  55. // Designates that a Phi is estimated to become invariant after an "infinite"
  56. // number of loop iterations (i.e. only may become an invariant if the loop is
  57. // fully unrolled).
  58. static const unsigned InfiniteIterationsToInvariance =
  59. std::numeric_limits<unsigned>::max();
  60. // Check whether we are capable of peeling this loop.
  61. static bool canPeel(Loop *L) {
  62. // Make sure the loop is in simplified form
  63. if (!L->isLoopSimplifyForm())
  64. return false;
  65. // Only peel loops that contain a single exit
  66. if (!L->getExitingBlock() || !L->getUniqueExitBlock())
  67. return false;
  68. // Don't try to peel loops where the latch is not the exiting block.
  69. // This can be an indication of two different things:
  70. // 1) The loop is not rotated.
  71. // 2) The loop contains irreducible control flow that involves the latch.
  72. if (L->getLoopLatch() != L->getExitingBlock())
  73. return false;
  74. return true;
  75. }
  76. // This function calculates the number of iterations after which the given Phi
  77. // becomes an invariant. The pre-calculated values are memorized in the map. The
  78. // function (shortcut is I) is calculated according to the following definition:
  79. // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
  80. // If %y is a loop invariant, then I(%x) = 1.
  81. // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
  82. // Otherwise, I(%x) is infinite.
  83. // TODO: Actually if %y is an expression that depends only on Phi %z and some
  84. // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
  85. // looks like:
  86. // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
  87. // %y = phi(0, 5),
  88. // %a = %y + 1.
  89. static unsigned calculateIterationsToInvariance(
  90. PHINode *Phi, Loop *L, BasicBlock *BackEdge,
  91. SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
  92. assert(Phi->getParent() == L->getHeader() &&
  93. "Non-loop Phi should not be checked for turning into invariant.");
  94. assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
  95. // If we already know the answer, take it from the map.
  96. auto I = IterationsToInvariance.find(Phi);
  97. if (I != IterationsToInvariance.end())
  98. return I->second;
  99. // Otherwise we need to analyze the input from the back edge.
  100. Value *Input = Phi->getIncomingValueForBlock(BackEdge);
  101. // Place infinity to map to avoid infinite recursion for cycled Phis. Such
  102. // cycles can never stop on an invariant.
  103. IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
  104. unsigned ToInvariance = InfiniteIterationsToInvariance;
  105. if (L->isLoopInvariant(Input))
  106. ToInvariance = 1u;
  107. else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
  108. // Only consider Phis in header block.
  109. if (IncPhi->getParent() != L->getHeader())
  110. return InfiniteIterationsToInvariance;
  111. // If the input becomes an invariant after X iterations, then our Phi
  112. // becomes an invariant after X + 1 iterations.
  113. unsigned InputToInvariance = calculateIterationsToInvariance(
  114. IncPhi, L, BackEdge, IterationsToInvariance);
  115. if (InputToInvariance != InfiniteIterationsToInvariance)
  116. ToInvariance = InputToInvariance + 1u;
  117. }
  118. // If we found that this Phi lies in an invariant chain, update the map.
  119. if (ToInvariance != InfiniteIterationsToInvariance)
  120. IterationsToInvariance[Phi] = ToInvariance;
  121. return ToInvariance;
  122. }
  123. // Return the number of iterations we want to peel off.
  124. void llvm::computePeelCount(Loop *L, unsigned LoopSize,
  125. TargetTransformInfo::UnrollingPreferences &UP,
  126. unsigned &TripCount) {
  127. assert(LoopSize > 0 && "Zero loop size is not allowed!");
  128. UP.PeelCount = 0;
  129. if (!canPeel(L))
  130. return;
  131. // Only try to peel innermost loops.
  132. if (!L->empty())
  133. return;
  134. // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
  135. // iterations of the loop. For this we compute the number for iterations after
  136. // which every Phi is guaranteed to become an invariant, and try to peel the
  137. // maximum number of iterations among these values, thus turning all those
  138. // Phis into invariants.
  139. // First, check that we can peel at least one iteration.
  140. if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
  141. // Store the pre-calculated values here.
  142. SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
  143. // Now go through all Phis to calculate their the number of iterations they
  144. // need to become invariants.
  145. unsigned DesiredPeelCount = 0;
  146. BasicBlock *BackEdge = L->getLoopLatch();
  147. assert(BackEdge && "Loop is not in simplified form?");
  148. for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
  149. PHINode *Phi = cast<PHINode>(&*BI);
  150. unsigned ToInvariance = calculateIterationsToInvariance(
  151. Phi, L, BackEdge, IterationsToInvariance);
  152. if (ToInvariance != InfiniteIterationsToInvariance)
  153. DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
  154. }
  155. if (DesiredPeelCount > 0) {
  156. // Pay respect to limitations implied by loop size and the max peel count.
  157. unsigned MaxPeelCount = UnrollPeelMaxCount;
  158. MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
  159. DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
  160. // Consider max peel count limitation.
  161. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
  162. DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
  163. << " some Phis into invariants.\n");
  164. UP.PeelCount = DesiredPeelCount;
  165. return;
  166. }
  167. }
  168. // Bail if we know the statically calculated trip count.
  169. // In this case we rather prefer partial unrolling.
  170. if (TripCount)
  171. return;
  172. // If the user provided a peel count, use that.
  173. bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
  174. if (UserPeelCount) {
  175. DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
  176. << " iterations.\n");
  177. UP.PeelCount = UnrollForcePeelCount;
  178. return;
  179. }
  180. // If we don't know the trip count, but have reason to believe the average
  181. // trip count is low, peeling should be beneficial, since we will usually
  182. // hit the peeled section.
  183. // We only do this in the presence of profile information, since otherwise
  184. // our estimates of the trip count are not reliable enough.
  185. if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) {
  186. Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
  187. if (!PeelCount)
  188. return;
  189. DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
  190. << "\n");
  191. if (*PeelCount) {
  192. if ((*PeelCount <= UnrollPeelMaxCount) &&
  193. (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
  194. DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
  195. UP.PeelCount = *PeelCount;
  196. return;
  197. }
  198. DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
  199. DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
  200. DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
  201. DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
  202. }
  203. }
  204. }
  205. /// \brief Update the branch weights of the latch of a peeled-off loop
  206. /// iteration.
  207. /// This sets the branch weights for the latch of the recently peeled off loop
  208. /// iteration correctly.
  209. /// Our goal is to make sure that:
  210. /// a) The total weight of all the copies of the loop body is preserved.
  211. /// b) The total weight of the loop exit is preserved.
  212. /// c) The body weight is reasonably distributed between the peeled iterations.
  213. ///
  214. /// \param Header The copy of the header block that belongs to next iteration.
  215. /// \param LatchBR The copy of the latch branch that belongs to this iteration.
  216. /// \param IterNumber The serial number of the iteration that was just
  217. /// peeled off.
  218. /// \param AvgIters The average number of iterations we expect the loop to have.
  219. /// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
  220. /// iterations that are unaccounted for. As an input, it represents the number
  221. /// of times we expect to enter the header of the iteration currently being
  222. /// peeled off. The output is the number of times we expect to enter the
  223. /// header of the next iteration.
  224. static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
  225. unsigned IterNumber, unsigned AvgIters,
  226. uint64_t &PeeledHeaderWeight) {
  227. // FIXME: Pick a more realistic distribution.
  228. // Currently the proportion of weight we assign to the fall-through
  229. // side of the branch drops linearly with the iteration number, and we use
  230. // a 0.9 fudge factor to make the drop-off less sharp...
  231. if (PeeledHeaderWeight) {
  232. uint64_t FallThruWeight =
  233. PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
  234. uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
  235. PeeledHeaderWeight -= ExitWeight;
  236. unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
  237. MDBuilder MDB(LatchBR->getContext());
  238. MDNode *WeightNode =
  239. HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
  240. : MDB.createBranchWeights(FallThruWeight, ExitWeight);
  241. LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
  242. }
  243. }
  244. /// \brief Clones the body of the loop L, putting it between \p InsertTop and \p
  245. /// InsertBot.
  246. /// \param IterNumber The serial number of the iteration currently being
  247. /// peeled off.
  248. /// \param Exit The exit block of the original loop.
  249. /// \param[out] NewBlocks A list of the the blocks in the newly created clone
  250. /// \param[out] VMap The value map between the loop and the new clone.
  251. /// \param LoopBlocks A helper for DFS-traversal of the loop.
  252. /// \param LVMap A value-map that maps instructions from the original loop to
  253. /// instructions in the last peeled-off iteration.
  254. static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
  255. BasicBlock *InsertBot, BasicBlock *Exit,
  256. SmallVectorImpl<BasicBlock *> &NewBlocks,
  257. LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
  258. ValueToValueMapTy &LVMap, DominatorTree *DT,
  259. LoopInfo *LI) {
  260. BasicBlock *Header = L->getHeader();
  261. BasicBlock *Latch = L->getLoopLatch();
  262. BasicBlock *PreHeader = L->getLoopPreheader();
  263. Function *F = Header->getParent();
  264. LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  265. LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  266. Loop *ParentLoop = L->getParentLoop();
  267. // For each block in the original loop, create a new copy,
  268. // and update the value map with the newly created values.
  269. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
  270. BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
  271. NewBlocks.push_back(NewBB);
  272. if (ParentLoop)
  273. ParentLoop->addBasicBlockToLoop(NewBB, *LI);
  274. VMap[*BB] = NewBB;
  275. // If dominator tree is available, insert nodes to represent cloned blocks.
  276. if (DT) {
  277. if (Header == *BB)
  278. DT->addNewBlock(NewBB, InsertTop);
  279. else {
  280. DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
  281. // VMap must contain entry for IDom, as the iteration order is RPO.
  282. DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
  283. }
  284. }
  285. }
  286. // Hook-up the control flow for the newly inserted blocks.
  287. // The new header is hooked up directly to the "top", which is either
  288. // the original loop preheader (for the first iteration) or the previous
  289. // iteration's exiting block (for every other iteration)
  290. InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
  291. // Similarly, for the latch:
  292. // The original exiting edge is still hooked up to the loop exit.
  293. // The backedge now goes to the "bottom", which is either the loop's real
  294. // header (for the last peeled iteration) or the copied header of the next
  295. // iteration (for every other iteration)
  296. BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
  297. BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
  298. unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
  299. LatchBR->setSuccessor(HeaderIdx, InsertBot);
  300. LatchBR->setSuccessor(1 - HeaderIdx, Exit);
  301. if (DT)
  302. DT->changeImmediateDominator(InsertBot, NewLatch);
  303. // The new copy of the loop body starts with a bunch of PHI nodes
  304. // that pick an incoming value from either the preheader, or the previous
  305. // loop iteration. Since this copy is no longer part of the loop, we
  306. // resolve this statically:
  307. // For the first iteration, we use the value from the preheader directly.
  308. // For any other iteration, we replace the phi with the value generated by
  309. // the immediately preceding clone of the loop body (which represents
  310. // the previous iteration).
  311. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
  312. PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
  313. if (IterNumber == 0) {
  314. VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
  315. } else {
  316. Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
  317. Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
  318. if (LatchInst && L->contains(LatchInst))
  319. VMap[&*I] = LVMap[LatchInst];
  320. else
  321. VMap[&*I] = LatchVal;
  322. }
  323. cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
  324. }
  325. // Fix up the outgoing values - we need to add a value for the iteration
  326. // we've just created. Note that this must happen *after* the incoming
  327. // values are adjusted, since the value going out of the latch may also be
  328. // a value coming into the header.
  329. for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
  330. PHINode *PHI = cast<PHINode>(I);
  331. Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
  332. Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
  333. if (LatchInst && L->contains(LatchInst))
  334. LatchVal = VMap[LatchVal];
  335. PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
  336. }
  337. // LastValueMap is updated with the values for the current loop
  338. // which are used the next time this function is called.
  339. for (const auto &KV : VMap)
  340. LVMap[KV.first] = KV.second;
  341. }
  342. /// \brief Peel off the first \p PeelCount iterations of loop \p L.
  343. ///
  344. /// Note that this does not peel them off as a single straight-line block.
  345. /// Rather, each iteration is peeled off separately, and needs to check the
  346. /// exit condition.
  347. /// For loops that dynamically execute \p PeelCount iterations or less
  348. /// this provides a benefit, since the peeled off iterations, which account
  349. /// for the bulk of dynamic execution, can be further simplified by scalar
  350. /// optimizations.
  351. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
  352. ScalarEvolution *SE, DominatorTree *DT,
  353. AssumptionCache *AC, bool PreserveLCSSA) {
  354. if (!canPeel(L))
  355. return false;
  356. LoopBlocksDFS LoopBlocks(L);
  357. LoopBlocks.perform(LI);
  358. BasicBlock *Header = L->getHeader();
  359. BasicBlock *PreHeader = L->getLoopPreheader();
  360. BasicBlock *Latch = L->getLoopLatch();
  361. BasicBlock *Exit = L->getUniqueExitBlock();
  362. Function *F = Header->getParent();
  363. // Set up all the necessary basic blocks. It is convenient to split the
  364. // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
  365. // body, and a new preheader for the "real" loop.
  366. // Peeling the first iteration transforms.
  367. //
  368. // PreHeader:
  369. // ...
  370. // Header:
  371. // LoopBody
  372. // If (cond) goto Header
  373. // Exit:
  374. //
  375. // into
  376. //
  377. // InsertTop:
  378. // LoopBody
  379. // If (!cond) goto Exit
  380. // InsertBot:
  381. // NewPreHeader:
  382. // ...
  383. // Header:
  384. // LoopBody
  385. // If (cond) goto Header
  386. // Exit:
  387. //
  388. // Each following iteration will split the current bottom anchor in two,
  389. // and put the new copy of the loop body between these two blocks. That is,
  390. // after peeling another iteration from the example above, we'll split
  391. // InsertBot, and get:
  392. //
  393. // InsertTop:
  394. // LoopBody
  395. // If (!cond) goto Exit
  396. // InsertBot:
  397. // LoopBody
  398. // If (!cond) goto Exit
  399. // InsertBot.next:
  400. // NewPreHeader:
  401. // ...
  402. // Header:
  403. // LoopBody
  404. // If (cond) goto Header
  405. // Exit:
  406. BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
  407. BasicBlock *InsertBot =
  408. SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
  409. BasicBlock *NewPreHeader =
  410. SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
  411. InsertTop->setName(Header->getName() + ".peel.begin");
  412. InsertBot->setName(Header->getName() + ".peel.next");
  413. NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
  414. ValueToValueMapTy LVMap;
  415. // If we have branch weight information, we'll want to update it for the
  416. // newly created branches.
  417. BranchInst *LatchBR =
  418. cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
  419. unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
  420. uint64_t TrueWeight, FalseWeight;
  421. uint64_t ExitWeight = 0, CurHeaderWeight = 0;
  422. if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
  423. ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
  424. // The # of times the loop body executes is the sum of the exit block
  425. // weight and the # of times the backedges are taken.
  426. CurHeaderWeight = TrueWeight + FalseWeight;
  427. }
  428. // For each peeled-off iteration, make a copy of the loop.
  429. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
  430. SmallVector<BasicBlock *, 8> NewBlocks;
  431. ValueToValueMapTy VMap;
  432. // Subtract the exit weight from the current header weight -- the exit
  433. // weight is exactly the weight of the previous iteration's header.
  434. // FIXME: due to the way the distribution is constructed, we need a
  435. // guard here to make sure we don't end up with non-positive weights.
  436. if (ExitWeight < CurHeaderWeight)
  437. CurHeaderWeight -= ExitWeight;
  438. else
  439. CurHeaderWeight = 1;
  440. cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
  441. NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
  442. // Remap to use values from the current iteration instead of the
  443. // previous one.
  444. remapInstructionsInBlocks(NewBlocks, VMap);
  445. if (DT) {
  446. // Latches of the cloned loops dominate over the loop exit, so idom of the
  447. // latter is the first cloned loop body, as original PreHeader dominates
  448. // the original loop body.
  449. if (Iter == 0)
  450. DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
  451. #ifndef NDEBUG
  452. if (VerifyDomInfo)
  453. DT->verifyDomTree();
  454. #endif
  455. }
  456. updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
  457. PeelCount, ExitWeight);
  458. InsertTop = InsertBot;
  459. InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
  460. InsertBot->setName(Header->getName() + ".peel.next");
  461. F->getBasicBlockList().splice(InsertTop->getIterator(),
  462. F->getBasicBlockList(),
  463. NewBlocks[0]->getIterator(), F->end());
  464. }
  465. // Now adjust the phi nodes in the loop header to get their initial values
  466. // from the last peeled-off iteration instead of the preheader.
  467. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
  468. PHINode *PHI = cast<PHINode>(I);
  469. Value *NewVal = PHI->getIncomingValueForBlock(Latch);
  470. Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
  471. if (LatchInst && L->contains(LatchInst))
  472. NewVal = LVMap[LatchInst];
  473. PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
  474. }
  475. // Adjust the branch weights on the loop exit.
  476. if (ExitWeight) {
  477. // The backedge count is the difference of current header weight and
  478. // current loop exit weight. If the current header weight is smaller than
  479. // the current loop exit weight, we mark the loop backedge weight as 1.
  480. uint64_t BackEdgeWeight = 0;
  481. if (ExitWeight < CurHeaderWeight)
  482. BackEdgeWeight = CurHeaderWeight - ExitWeight;
  483. else
  484. BackEdgeWeight = 1;
  485. MDBuilder MDB(LatchBR->getContext());
  486. MDNode *WeightNode =
  487. HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
  488. : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
  489. LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
  490. }
  491. // If the loop is nested, we changed the parent loop, update SE.
  492. if (Loop *ParentLoop = L->getParentLoop()) {
  493. SE->forgetLoop(ParentLoop);
  494. // FIXME: Incrementally update loop-simplify
  495. simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
  496. } else {
  497. // FIXME: Incrementally update loop-simplify
  498. simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
  499. }
  500. NumPeeled++;
  501. return true;
  502. }