SimpleLoopUnswitch.cpp 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171
  1. //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
  10. #include "llvm/ADT/DenseMap.h"
  11. #include "llvm/ADT/STLExtras.h"
  12. #include "llvm/ADT/Sequence.h"
  13. #include "llvm/ADT/SetVector.h"
  14. #include "llvm/ADT/SmallPtrSet.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/Statistic.h"
  17. #include "llvm/ADT/Twine.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/CodeMetrics.h"
  20. #include "llvm/Analysis/LoopAnalysisManager.h"
  21. #include "llvm/Analysis/LoopInfo.h"
  22. #include "llvm/Analysis/LoopPass.h"
  23. #include "llvm/IR/BasicBlock.h"
  24. #include "llvm/IR/Constant.h"
  25. #include "llvm/IR/Constants.h"
  26. #include "llvm/IR/Dominators.h"
  27. #include "llvm/IR/Function.h"
  28. #include "llvm/IR/InstrTypes.h"
  29. #include "llvm/IR/Instruction.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/IntrinsicInst.h"
  32. #include "llvm/IR/Use.h"
  33. #include "llvm/IR/Value.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/Casting.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/ErrorHandling.h"
  38. #include "llvm/Support/GenericDomTree.h"
  39. #include "llvm/Support/raw_ostream.h"
  40. #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
  41. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  42. #include "llvm/Transforms/Utils/Cloning.h"
  43. #include "llvm/Transforms/Utils/LoopUtils.h"
  44. #include "llvm/Transforms/Utils/ValueMapper.h"
  45. #include <algorithm>
  46. #include <cassert>
  47. #include <iterator>
  48. #include <numeric>
  49. #include <utility>
  50. #define DEBUG_TYPE "simple-loop-unswitch"
  51. using namespace llvm;
  52. STATISTIC(NumBranches, "Number of branches unswitched");
  53. STATISTIC(NumSwitches, "Number of switches unswitched");
  54. STATISTIC(NumTrivial, "Number of unswitches that are trivial");
  55. static cl::opt<bool> EnableNonTrivialUnswitch(
  56. "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
  57. cl::desc("Forcibly enables non-trivial loop unswitching rather than "
  58. "following the configuration passed into the pass."));
  59. static cl::opt<int>
  60. UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
  61. cl::desc("The cost threshold for unswitching a loop."));
  62. static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
  63. Constant &Replacement) {
  64. assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
  65. // Replace uses of LIC in the loop with the given constant.
  66. for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
  67. // Grab the use and walk past it so we can clobber it in the use list.
  68. Use *U = &*UI++;
  69. Instruction *UserI = dyn_cast<Instruction>(U->getUser());
  70. if (!UserI || !L.contains(UserI))
  71. continue;
  72. // Replace this use within the loop body.
  73. *U = &Replacement;
  74. }
  75. }
  76. /// Update the IDom for a basic block whose predecessor set has changed.
  77. ///
  78. /// This routine is designed to work when the domtree update is relatively
  79. /// localized by leveraging a known common dominator, often a loop header.
  80. ///
  81. /// FIXME: Should consider hand-rolling a slightly more efficient non-DFS
  82. /// approach here as we can do that easily by persisting the candidate IDom's
  83. /// dominating set between each predecessor.
  84. ///
  85. /// FIXME: Longer term, many uses of this can be replaced by an incremental
  86. /// domtree update strategy that starts from a known dominating block and
  87. /// rebuilds that subtree.
  88. static bool updateIDomWithKnownCommonDominator(BasicBlock *BB,
  89. BasicBlock *KnownDominatingBB,
  90. DominatorTree &DT) {
  91. assert(pred_begin(BB) != pred_end(BB) &&
  92. "This routine does not handle unreachable blocks!");
  93. BasicBlock *OrigIDom = DT[BB]->getIDom()->getBlock();
  94. BasicBlock *IDom = *pred_begin(BB);
  95. assert(DT.dominates(KnownDominatingBB, IDom) &&
  96. "Bad known dominating block!");
  97. // Walk all of the other predecessors finding the nearest common dominator
  98. // until all predecessors are covered or we reach the loop header. The loop
  99. // header necessarily dominates all loop exit blocks in loop simplified form
  100. // so we can early-exit the moment we hit that block.
  101. for (auto PI = std::next(pred_begin(BB)), PE = pred_end(BB);
  102. PI != PE && IDom != KnownDominatingBB; ++PI) {
  103. assert(DT.dominates(KnownDominatingBB, *PI) &&
  104. "Bad known dominating block!");
  105. IDom = DT.findNearestCommonDominator(IDom, *PI);
  106. }
  107. if (IDom == OrigIDom)
  108. return false;
  109. DT.changeImmediateDominator(BB, IDom);
  110. return true;
  111. }
  112. // Note that we don't currently use the IDFCalculator here for two reasons:
  113. // 1) It computes dominator tree levels for the entire function on each run
  114. // of 'compute'. While this isn't terrible, given that we expect to update
  115. // relatively small subtrees of the domtree, it isn't necessarily the right
  116. // tradeoff.
  117. // 2) The interface doesn't fit this usage well. It doesn't operate in
  118. // append-only, and builds several sets that we don't need.
  119. //
  120. // FIXME: Neither of these issues are a big deal and could be addressed with
  121. // some amount of refactoring of IDFCalculator. That would allow us to share
  122. // the core logic here (which is solving the same core problem).
  123. static void appendDomFrontier(DomTreeNode *Node,
  124. SmallSetVector<BasicBlock *, 4> &Worklist,
  125. SmallVectorImpl<DomTreeNode *> &DomNodes,
  126. SmallPtrSetImpl<BasicBlock *> &DomSet) {
  127. assert(DomNodes.empty() && "Must start with no dominator nodes.");
  128. assert(DomSet.empty() && "Must start with an empty dominator set.");
  129. // First flatten this subtree into sequence of nodes by doing a pre-order
  130. // walk.
  131. DomNodes.push_back(Node);
  132. // We intentionally re-evaluate the size as each node can add new children.
  133. // Because this is a tree walk, this cannot add any duplicates.
  134. for (int i = 0; i < (int)DomNodes.size(); ++i)
  135. DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
  136. // Now create a set of the basic blocks so we can quickly test for
  137. // dominated successors. We could in theory use the DFS numbers of the
  138. // dominator tree for this, but we want this to remain predictably fast
  139. // even while we mutate the dominator tree in ways that would invalidate
  140. // the DFS numbering.
  141. for (DomTreeNode *InnerN : DomNodes)
  142. DomSet.insert(InnerN->getBlock());
  143. // Now re-walk the nodes, appending every successor of every node that isn't
  144. // in the set. Note that we don't append the node itself, even though if it
  145. // is a successor it does not strictly dominate itself and thus it would be
  146. // part of the dominance frontier. The reason we don't append it is that
  147. // the node passed in came *from* the worklist and so it has already been
  148. // processed.
  149. for (DomTreeNode *InnerN : DomNodes)
  150. for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
  151. if (!DomSet.count(SuccBB))
  152. Worklist.insert(SuccBB);
  153. DomNodes.clear();
  154. DomSet.clear();
  155. }
  156. /// Update the dominator tree after unswitching a particular former exit block.
  157. ///
  158. /// This handles the full update of the dominator tree after hoisting a block
  159. /// that previously was an exit block (or split off of an exit block) up to be
  160. /// reached from the new immediate dominator of the preheader.
  161. ///
  162. /// The common case is simple -- we just move the unswitched block to have an
  163. /// immediate dominator of the old preheader. But in complex cases, there may
  164. /// be other blocks reachable from the unswitched block that are immediately
  165. /// dominated by some node between the unswitched one and the old preheader.
  166. /// All of these also need to be hoisted in the dominator tree. We also want to
  167. /// minimize queries to the dominator tree because each step of this
  168. /// invalidates any DFS numbers that would make queries fast.
  169. static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
  170. DominatorTree &DT) {
  171. DomTreeNode *OldPHNode = DT[OldPH];
  172. DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
  173. // If the dominator tree has already been updated for this unswitched node,
  174. // we're done. This makes it easier to use this routine if there are multiple
  175. // paths to the same unswitched destination.
  176. if (UnswitchedNode->getIDom() == OldPHNode)
  177. return;
  178. // First collect the domtree nodes that we are hoisting over. These are the
  179. // set of nodes which may have children that need to be hoisted as well.
  180. SmallPtrSet<DomTreeNode *, 4> DomChain;
  181. for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
  182. IDom = IDom->getIDom())
  183. DomChain.insert(IDom);
  184. // The unswitched block ends up immediately dominated by the old preheader --
  185. // regardless of whether it is the loop exit block or split off of the loop
  186. // exit block.
  187. DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
  188. // For everything that moves up the dominator tree, we need to examine the
  189. // dominator frontier to see if it additionally should move up the dominator
  190. // tree. This lambda appends the dominator frontier for a node on the
  191. // worklist.
  192. SmallSetVector<BasicBlock *, 4> Worklist;
  193. // Scratch data structures reused by domfrontier finding.
  194. SmallVector<DomTreeNode *, 4> DomNodes;
  195. SmallPtrSet<BasicBlock *, 4> DomSet;
  196. // Append the initial dom frontier nodes.
  197. appendDomFrontier(UnswitchedNode, Worklist, DomNodes, DomSet);
  198. // Walk the worklist. We grow the list in the loop and so must recompute size.
  199. for (int i = 0; i < (int)Worklist.size(); ++i) {
  200. auto *BB = Worklist[i];
  201. DomTreeNode *Node = DT[BB];
  202. assert(!DomChain.count(Node) &&
  203. "Cannot be dominated by a block you can reach!");
  204. // If this block had an immediate dominator somewhere in the chain
  205. // we hoisted over, then its position in the domtree needs to move as it is
  206. // reachable from a node hoisted over this chain.
  207. if (!DomChain.count(Node->getIDom()))
  208. continue;
  209. DT.changeImmediateDominator(Node, OldPHNode);
  210. // Now add this node's dominator frontier to the worklist as well.
  211. appendDomFrontier(Node, Worklist, DomNodes, DomSet);
  212. }
  213. }
  214. /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
  215. /// incoming values along this edge.
  216. static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
  217. BasicBlock &ExitBB) {
  218. for (Instruction &I : ExitBB) {
  219. auto *PN = dyn_cast<PHINode>(&I);
  220. if (!PN)
  221. // No more PHIs to check.
  222. return true;
  223. // If the incoming value for this edge isn't loop invariant the unswitch
  224. // won't be trivial.
  225. if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
  226. return false;
  227. }
  228. llvm_unreachable("Basic blocks should never be empty!");
  229. }
  230. /// Rewrite the PHI nodes in an unswitched loop exit basic block.
  231. ///
  232. /// Requires that the loop exit and unswitched basic block are the same, and
  233. /// that the exiting block was a unique predecessor of that block. Rewrites the
  234. /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
  235. /// PHI nodes from the old preheader that now contains the unswitched
  236. /// terminator.
  237. static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
  238. BasicBlock &OldExitingBB,
  239. BasicBlock &OldPH) {
  240. for (PHINode &PN : UnswitchedBB.phis()) {
  241. // When the loop exit is directly unswitched we just need to update the
  242. // incoming basic block. We loop to handle weird cases with repeated
  243. // incoming blocks, but expect to typically only have one operand here.
  244. for (auto i : seq<int>(0, PN.getNumOperands())) {
  245. assert(PN.getIncomingBlock(i) == &OldExitingBB &&
  246. "Found incoming block different from unique predecessor!");
  247. PN.setIncomingBlock(i, &OldPH);
  248. }
  249. }
  250. }
  251. /// Rewrite the PHI nodes in the loop exit basic block and the split off
  252. /// unswitched block.
  253. ///
  254. /// Because the exit block remains an exit from the loop, this rewrites the
  255. /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
  256. /// nodes into the unswitched basic block to select between the value in the
  257. /// old preheader and the loop exit.
  258. static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
  259. BasicBlock &UnswitchedBB,
  260. BasicBlock &OldExitingBB,
  261. BasicBlock &OldPH) {
  262. assert(&ExitBB != &UnswitchedBB &&
  263. "Must have different loop exit and unswitched blocks!");
  264. Instruction *InsertPt = &*UnswitchedBB.begin();
  265. for (PHINode &PN : ExitBB.phis()) {
  266. auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
  267. PN.getName() + ".split", InsertPt);
  268. // Walk backwards over the old PHI node's inputs to minimize the cost of
  269. // removing each one. We have to do this weird loop manually so that we
  270. // create the same number of new incoming edges in the new PHI as we expect
  271. // each case-based edge to be included in the unswitched switch in some
  272. // cases.
  273. // FIXME: This is really, really gross. It would be much cleaner if LLVM
  274. // allowed us to create a single entry for a predecessor block without
  275. // having separate entries for each "edge" even though these edges are
  276. // required to produce identical results.
  277. for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
  278. if (PN.getIncomingBlock(i) != &OldExitingBB)
  279. continue;
  280. Value *Incoming = PN.removeIncomingValue(i);
  281. NewPN->addIncoming(Incoming, &OldPH);
  282. }
  283. // Now replace the old PHI with the new one and wire the old one in as an
  284. // input to the new one.
  285. PN.replaceAllUsesWith(NewPN);
  286. NewPN->addIncoming(&PN, &ExitBB);
  287. }
  288. }
  289. /// Unswitch a trivial branch if the condition is loop invariant.
  290. ///
  291. /// This routine should only be called when loop code leading to the branch has
  292. /// been validated as trivial (no side effects). This routine checks if the
  293. /// condition is invariant and one of the successors is a loop exit. This
  294. /// allows us to unswitch without duplicating the loop, making it trivial.
  295. ///
  296. /// If this routine fails to unswitch the branch it returns false.
  297. ///
  298. /// If the branch can be unswitched, this routine splits the preheader and
  299. /// hoists the branch above that split. Preserves loop simplified form
  300. /// (splitting the exit block as necessary). It simplifies the branch within
  301. /// the loop to an unconditional branch but doesn't remove it entirely. Further
  302. /// cleanup can be done with some simplify-cfg like pass.
  303. static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
  304. LoopInfo &LI) {
  305. assert(BI.isConditional() && "Can only unswitch a conditional branch!");
  306. DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
  307. Value *LoopCond = BI.getCondition();
  308. // Need a trivial loop condition to unswitch.
  309. if (!L.isLoopInvariant(LoopCond))
  310. return false;
  311. // FIXME: We should compute this once at the start and update it!
  312. SmallVector<BasicBlock *, 16> ExitBlocks;
  313. L.getExitBlocks(ExitBlocks);
  314. SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
  315. ExitBlocks.end());
  316. // Check to see if a successor of the branch is guaranteed to
  317. // exit through a unique exit block without having any
  318. // side-effects. If so, determine the value of Cond that causes
  319. // it to do this.
  320. ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
  321. ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
  322. int LoopExitSuccIdx = 0;
  323. auto *LoopExitBB = BI.getSuccessor(0);
  324. if (!ExitBlockSet.count(LoopExitBB)) {
  325. std::swap(CondVal, Replacement);
  326. LoopExitSuccIdx = 1;
  327. LoopExitBB = BI.getSuccessor(1);
  328. if (!ExitBlockSet.count(LoopExitBB))
  329. return false;
  330. }
  331. auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
  332. assert(L.contains(ContinueBB) &&
  333. "Cannot have both successors exit and still be in the loop!");
  334. auto *ParentBB = BI.getParent();
  335. if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
  336. return false;
  337. DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
  338. << " == " << LoopCond << "\n");
  339. // Split the preheader, so that we know that there is a safe place to insert
  340. // the conditional branch. We will change the preheader to have a conditional
  341. // branch on LoopCond.
  342. BasicBlock *OldPH = L.getLoopPreheader();
  343. BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
  344. // Now that we have a place to insert the conditional branch, create a place
  345. // to branch to: this is the exit block out of the loop that we are
  346. // unswitching. We need to split this if there are other loop predecessors.
  347. // Because the loop is in simplified form, *any* other predecessor is enough.
  348. BasicBlock *UnswitchedBB;
  349. if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
  350. (void)PredBB;
  351. assert(PredBB == BI.getParent() &&
  352. "A branch's parent isn't a predecessor!");
  353. UnswitchedBB = LoopExitBB;
  354. } else {
  355. UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
  356. }
  357. // Now splice the branch to gate reaching the new preheader and re-point its
  358. // successors.
  359. OldPH->getInstList().splice(std::prev(OldPH->end()),
  360. BI.getParent()->getInstList(), BI);
  361. OldPH->getTerminator()->eraseFromParent();
  362. BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
  363. BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
  364. // Create a new unconditional branch that will continue the loop as a new
  365. // terminator.
  366. BranchInst::Create(ContinueBB, ParentBB);
  367. // Rewrite the relevant PHI nodes.
  368. if (UnswitchedBB == LoopExitBB)
  369. rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
  370. else
  371. rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
  372. *ParentBB, *OldPH);
  373. // Now we need to update the dominator tree.
  374. updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
  375. // But if we split something off of the loop exit block then we also removed
  376. // one of the predecessors for the loop exit block and may need to update its
  377. // idom.
  378. if (UnswitchedBB != LoopExitBB)
  379. updateIDomWithKnownCommonDominator(LoopExitBB, L.getHeader(), DT);
  380. // Since this is an i1 condition we can also trivially replace uses of it
  381. // within the loop with a constant.
  382. replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
  383. ++NumTrivial;
  384. ++NumBranches;
  385. return true;
  386. }
  387. /// Unswitch a trivial switch if the condition is loop invariant.
  388. ///
  389. /// This routine should only be called when loop code leading to the switch has
  390. /// been validated as trivial (no side effects). This routine checks if the
  391. /// condition is invariant and that at least one of the successors is a loop
  392. /// exit. This allows us to unswitch without duplicating the loop, making it
  393. /// trivial.
  394. ///
  395. /// If this routine fails to unswitch the switch it returns false.
  396. ///
  397. /// If the switch can be unswitched, this routine splits the preheader and
  398. /// copies the switch above that split. If the default case is one of the
  399. /// exiting cases, it copies the non-exiting cases and points them at the new
  400. /// preheader. If the default case is not exiting, it copies the exiting cases
  401. /// and points the default at the preheader. It preserves loop simplified form
  402. /// (splitting the exit blocks as necessary). It simplifies the switch within
  403. /// the loop by removing now-dead cases. If the default case is one of those
  404. /// unswitched, it replaces its destination with a new basic block containing
  405. /// only unreachable. Such basic blocks, while technically loop exits, are not
  406. /// considered for unswitching so this is a stable transform and the same
  407. /// switch will not be revisited. If after unswitching there is only a single
  408. /// in-loop successor, the switch is further simplified to an unconditional
  409. /// branch. Still more cleanup can be done with some simplify-cfg like pass.
  410. static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
  411. LoopInfo &LI) {
  412. DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
  413. Value *LoopCond = SI.getCondition();
  414. // If this isn't switching on an invariant condition, we can't unswitch it.
  415. if (!L.isLoopInvariant(LoopCond))
  416. return false;
  417. auto *ParentBB = SI.getParent();
  418. // FIXME: We should compute this once at the start and update it!
  419. SmallVector<BasicBlock *, 16> ExitBlocks;
  420. L.getExitBlocks(ExitBlocks);
  421. SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
  422. ExitBlocks.end());
  423. SmallVector<int, 4> ExitCaseIndices;
  424. for (auto Case : SI.cases()) {
  425. auto *SuccBB = Case.getCaseSuccessor();
  426. if (ExitBlockSet.count(SuccBB) &&
  427. areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
  428. ExitCaseIndices.push_back(Case.getCaseIndex());
  429. }
  430. BasicBlock *DefaultExitBB = nullptr;
  431. if (ExitBlockSet.count(SI.getDefaultDest()) &&
  432. areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
  433. !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
  434. DefaultExitBB = SI.getDefaultDest();
  435. else if (ExitCaseIndices.empty())
  436. return false;
  437. DEBUG(dbgs() << " unswitching trivial cases...\n");
  438. SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
  439. ExitCases.reserve(ExitCaseIndices.size());
  440. // We walk the case indices backwards so that we remove the last case first
  441. // and don't disrupt the earlier indices.
  442. for (unsigned Index : reverse(ExitCaseIndices)) {
  443. auto CaseI = SI.case_begin() + Index;
  444. // Save the value of this case.
  445. ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
  446. // Delete the unswitched cases.
  447. SI.removeCase(CaseI);
  448. }
  449. // Check if after this all of the remaining cases point at the same
  450. // successor.
  451. BasicBlock *CommonSuccBB = nullptr;
  452. if (SI.getNumCases() > 0 &&
  453. std::all_of(std::next(SI.case_begin()), SI.case_end(),
  454. [&SI](const SwitchInst::CaseHandle &Case) {
  455. return Case.getCaseSuccessor() ==
  456. SI.case_begin()->getCaseSuccessor();
  457. }))
  458. CommonSuccBB = SI.case_begin()->getCaseSuccessor();
  459. if (DefaultExitBB) {
  460. // We can't remove the default edge so replace it with an edge to either
  461. // the single common remaining successor (if we have one) or an unreachable
  462. // block.
  463. if (CommonSuccBB) {
  464. SI.setDefaultDest(CommonSuccBB);
  465. } else {
  466. BasicBlock *UnreachableBB = BasicBlock::Create(
  467. ParentBB->getContext(),
  468. Twine(ParentBB->getName()) + ".unreachable_default",
  469. ParentBB->getParent());
  470. new UnreachableInst(ParentBB->getContext(), UnreachableBB);
  471. SI.setDefaultDest(UnreachableBB);
  472. DT.addNewBlock(UnreachableBB, ParentBB);
  473. }
  474. } else {
  475. // If we're not unswitching the default, we need it to match any cases to
  476. // have a common successor or if we have no cases it is the common
  477. // successor.
  478. if (SI.getNumCases() == 0)
  479. CommonSuccBB = SI.getDefaultDest();
  480. else if (SI.getDefaultDest() != CommonSuccBB)
  481. CommonSuccBB = nullptr;
  482. }
  483. // Split the preheader, so that we know that there is a safe place to insert
  484. // the switch.
  485. BasicBlock *OldPH = L.getLoopPreheader();
  486. BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
  487. OldPH->getTerminator()->eraseFromParent();
  488. // Now add the unswitched switch.
  489. auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
  490. // Rewrite the IR for the unswitched basic blocks. This requires two steps.
  491. // First, we split any exit blocks with remaining in-loop predecessors. Then
  492. // we update the PHIs in one of two ways depending on if there was a split.
  493. // We walk in reverse so that we split in the same order as the cases
  494. // appeared. This is purely for convenience of reading the resulting IR, but
  495. // it doesn't cost anything really.
  496. SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
  497. SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
  498. // Handle the default exit if necessary.
  499. // FIXME: It'd be great if we could merge this with the loop below but LLVM's
  500. // ranges aren't quite powerful enough yet.
  501. if (DefaultExitBB) {
  502. if (pred_empty(DefaultExitBB)) {
  503. UnswitchedExitBBs.insert(DefaultExitBB);
  504. rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
  505. } else {
  506. auto *SplitBB =
  507. SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
  508. rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
  509. *ParentBB, *OldPH);
  510. updateIDomWithKnownCommonDominator(DefaultExitBB, L.getHeader(), DT);
  511. DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
  512. }
  513. }
  514. // Note that we must use a reference in the for loop so that we update the
  515. // container.
  516. for (auto &CasePair : reverse(ExitCases)) {
  517. // Grab a reference to the exit block in the pair so that we can update it.
  518. BasicBlock *ExitBB = CasePair.second;
  519. // If this case is the last edge into the exit block, we can simply reuse it
  520. // as it will no longer be a loop exit. No mapping necessary.
  521. if (pred_empty(ExitBB)) {
  522. // Only rewrite once.
  523. if (UnswitchedExitBBs.insert(ExitBB).second)
  524. rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
  525. continue;
  526. }
  527. // Otherwise we need to split the exit block so that we retain an exit
  528. // block from the loop and a target for the unswitched condition.
  529. BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
  530. if (!SplitExitBB) {
  531. // If this is the first time we see this, do the split and remember it.
  532. SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
  533. rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
  534. *ParentBB, *OldPH);
  535. updateIDomWithKnownCommonDominator(ExitBB, L.getHeader(), DT);
  536. }
  537. // Update the case pair to point to the split block.
  538. CasePair.second = SplitExitBB;
  539. }
  540. // Now add the unswitched cases. We do this in reverse order as we built them
  541. // in reverse order.
  542. for (auto CasePair : reverse(ExitCases)) {
  543. ConstantInt *CaseVal = CasePair.first;
  544. BasicBlock *UnswitchedBB = CasePair.second;
  545. NewSI->addCase(CaseVal, UnswitchedBB);
  546. updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
  547. }
  548. // If the default was unswitched, re-point it and add explicit cases for
  549. // entering the loop.
  550. if (DefaultExitBB) {
  551. NewSI->setDefaultDest(DefaultExitBB);
  552. updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
  553. // We removed all the exit cases, so we just copy the cases to the
  554. // unswitched switch.
  555. for (auto Case : SI.cases())
  556. NewSI->addCase(Case.getCaseValue(), NewPH);
  557. }
  558. // If we ended up with a common successor for every path through the switch
  559. // after unswitching, rewrite it to an unconditional branch to make it easy
  560. // to recognize. Otherwise we potentially have to recognize the default case
  561. // pointing at unreachable and other complexity.
  562. if (CommonSuccBB) {
  563. BasicBlock *BB = SI.getParent();
  564. SI.eraseFromParent();
  565. BranchInst::Create(CommonSuccBB, BB);
  566. }
  567. DT.verifyDomTree();
  568. ++NumTrivial;
  569. ++NumSwitches;
  570. return true;
  571. }
  572. /// This routine scans the loop to find a branch or switch which occurs before
  573. /// any side effects occur. These can potentially be unswitched without
  574. /// duplicating the loop. If a branch or switch is successfully unswitched the
  575. /// scanning continues to see if subsequent branches or switches have become
  576. /// trivial. Once all trivial candidates have been unswitched, this routine
  577. /// returns.
  578. ///
  579. /// The return value indicates whether anything was unswitched (and therefore
  580. /// changed).
  581. static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
  582. LoopInfo &LI) {
  583. bool Changed = false;
  584. // If loop header has only one reachable successor we should keep looking for
  585. // trivial condition candidates in the successor as well. An alternative is
  586. // to constant fold conditions and merge successors into loop header (then we
  587. // only need to check header's terminator). The reason for not doing this in
  588. // LoopUnswitch pass is that it could potentially break LoopPassManager's
  589. // invariants. Folding dead branches could either eliminate the current loop
  590. // or make other loops unreachable. LCSSA form might also not be preserved
  591. // after deleting branches. The following code keeps traversing loop header's
  592. // successors until it finds the trivial condition candidate (condition that
  593. // is not a constant). Since unswitching generates branches with constant
  594. // conditions, this scenario could be very common in practice.
  595. BasicBlock *CurrentBB = L.getHeader();
  596. SmallPtrSet<BasicBlock *, 8> Visited;
  597. Visited.insert(CurrentBB);
  598. do {
  599. // Check if there are any side-effecting instructions (e.g. stores, calls,
  600. // volatile loads) in the part of the loop that the code *would* execute
  601. // without unswitching.
  602. if (llvm::any_of(*CurrentBB,
  603. [](Instruction &I) { return I.mayHaveSideEffects(); }))
  604. return Changed;
  605. TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
  606. if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
  607. // Don't bother trying to unswitch past a switch with a constant
  608. // condition. This should be removed prior to running this pass by
  609. // simplify-cfg.
  610. if (isa<Constant>(SI->getCondition()))
  611. return Changed;
  612. if (!unswitchTrivialSwitch(L, *SI, DT, LI))
  613. // Coludn't unswitch this one so we're done.
  614. return Changed;
  615. // Mark that we managed to unswitch something.
  616. Changed = true;
  617. // If unswitching turned the terminator into an unconditional branch then
  618. // we can continue. The unswitching logic specifically works to fold any
  619. // cases it can into an unconditional branch to make it easier to
  620. // recognize here.
  621. auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
  622. if (!BI || BI->isConditional())
  623. return Changed;
  624. CurrentBB = BI->getSuccessor(0);
  625. continue;
  626. }
  627. auto *BI = dyn_cast<BranchInst>(CurrentTerm);
  628. if (!BI)
  629. // We do not understand other terminator instructions.
  630. return Changed;
  631. // Don't bother trying to unswitch past an unconditional branch or a branch
  632. // with a constant value. These should be removed by simplify-cfg prior to
  633. // running this pass.
  634. if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
  635. return Changed;
  636. // Found a trivial condition candidate: non-foldable conditional branch. If
  637. // we fail to unswitch this, we can't do anything else that is trivial.
  638. if (!unswitchTrivialBranch(L, *BI, DT, LI))
  639. return Changed;
  640. // Mark that we managed to unswitch something.
  641. Changed = true;
  642. // We unswitched the branch. This should always leave us with an
  643. // unconditional branch that we can follow now.
  644. BI = cast<BranchInst>(CurrentBB->getTerminator());
  645. assert(!BI->isConditional() &&
  646. "Cannot form a conditional branch by unswitching1");
  647. CurrentBB = BI->getSuccessor(0);
  648. // When continuing, if we exit the loop or reach a previous visited block,
  649. // then we can not reach any trivial condition candidates (unfoldable
  650. // branch instructions or switch instructions) and no unswitch can happen.
  651. } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
  652. return Changed;
  653. }
  654. /// Build the cloned blocks for an unswitched copy of the given loop.
  655. ///
  656. /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
  657. /// after the split block (`SplitBB`) that will be used to select between the
  658. /// cloned and original loop.
  659. ///
  660. /// This routine handles cloning all of the necessary loop blocks and exit
  661. /// blocks including rewriting their instructions and the relevant PHI nodes.
  662. /// It skips loop and exit blocks that are not necessary based on the provided
  663. /// set. It also correctly creates the unconditional branch in the cloned
  664. /// unswitched parent block to only point at the unswitched successor.
  665. ///
  666. /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
  667. /// block splitting is correctly reflected in `LoopInfo`, essentially all of
  668. /// the cloned blocks (and their loops) are left without full `LoopInfo`
  669. /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
  670. /// blocks to them but doesn't create the cloned `DominatorTree` structure and
  671. /// instead the caller must recompute an accurate DT. It *does* correctly
  672. /// update the `AssumptionCache` provided in `AC`.
  673. static BasicBlock *buildClonedLoopBlocks(
  674. Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
  675. ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
  676. BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
  677. const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
  678. ValueToValueMapTy &VMap, AssumptionCache &AC, DominatorTree &DT,
  679. LoopInfo &LI) {
  680. SmallVector<BasicBlock *, 4> NewBlocks;
  681. NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
  682. // We will need to clone a bunch of blocks, wrap up the clone operation in
  683. // a helper.
  684. auto CloneBlock = [&](BasicBlock *OldBB) {
  685. // Clone the basic block and insert it before the new preheader.
  686. BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
  687. NewBB->moveBefore(LoopPH);
  688. // Record this block and the mapping.
  689. NewBlocks.push_back(NewBB);
  690. VMap[OldBB] = NewBB;
  691. // Add the block to the domtree. We'll move it to the correct position
  692. // below.
  693. DT.addNewBlock(NewBB, SplitBB);
  694. return NewBB;
  695. };
  696. // First, clone the preheader.
  697. auto *ClonedPH = CloneBlock(LoopPH);
  698. // Then clone all the loop blocks, skipping the ones that aren't necessary.
  699. for (auto *LoopBB : L.blocks())
  700. if (!SkippedLoopAndExitBlocks.count(LoopBB))
  701. CloneBlock(LoopBB);
  702. // Split all the loop exit edges so that when we clone the exit blocks, if
  703. // any of the exit blocks are *also* a preheader for some other loop, we
  704. // don't create multiple predecessors entering the loop header.
  705. for (auto *ExitBB : ExitBlocks) {
  706. if (SkippedLoopAndExitBlocks.count(ExitBB))
  707. continue;
  708. // When we are going to clone an exit, we don't need to clone all the
  709. // instructions in the exit block and we want to ensure we have an easy
  710. // place to merge the CFG, so split the exit first. This is always safe to
  711. // do because there cannot be any non-loop predecessors of a loop exit in
  712. // loop simplified form.
  713. auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
  714. // Rearrange the names to make it easier to write test cases by having the
  715. // exit block carry the suffix rather than the merge block carrying the
  716. // suffix.
  717. MergeBB->takeName(ExitBB);
  718. ExitBB->setName(Twine(MergeBB->getName()) + ".split");
  719. // Now clone the original exit block.
  720. auto *ClonedExitBB = CloneBlock(ExitBB);
  721. assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
  722. "Exit block should have been split to have one successor!");
  723. assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
  724. "Cloned exit block has the wrong successor!");
  725. // Move the merge block's idom to be the split point as one exit is
  726. // dominated by one header, and the other by another, so we know the split
  727. // point dominates both. While the dominator tree isn't fully accurate, we
  728. // want sub-trees within the original loop to be correctly reflect
  729. // dominance within that original loop (at least) and that requires moving
  730. // the merge block out of that subtree.
  731. // FIXME: This is very brittle as we essentially have a partial contract on
  732. // the dominator tree. We really need to instead update it and keep it
  733. // valid or stop relying on it.
  734. DT.changeImmediateDominator(MergeBB, SplitBB);
  735. // Remap any cloned instructions and create a merge phi node for them.
  736. for (auto ZippedInsts : llvm::zip_first(
  737. llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
  738. llvm::make_range(ClonedExitBB->begin(),
  739. std::prev(ClonedExitBB->end())))) {
  740. Instruction &I = std::get<0>(ZippedInsts);
  741. Instruction &ClonedI = std::get<1>(ZippedInsts);
  742. // The only instructions in the exit block should be PHI nodes and
  743. // potentially a landing pad.
  744. assert(
  745. (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
  746. "Bad instruction in exit block!");
  747. // We should have a value map between the instruction and its clone.
  748. assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
  749. auto *MergePN =
  750. PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
  751. &*MergeBB->getFirstInsertionPt());
  752. I.replaceAllUsesWith(MergePN);
  753. MergePN->addIncoming(&I, ExitBB);
  754. MergePN->addIncoming(&ClonedI, ClonedExitBB);
  755. }
  756. }
  757. // Rewrite the instructions in the cloned blocks to refer to the instructions
  758. // in the cloned blocks. We have to do this as a second pass so that we have
  759. // everything available. Also, we have inserted new instructions which may
  760. // include assume intrinsics, so we update the assumption cache while
  761. // processing this.
  762. for (auto *ClonedBB : NewBlocks)
  763. for (Instruction &I : *ClonedBB) {
  764. RemapInstruction(&I, VMap,
  765. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  766. if (auto *II = dyn_cast<IntrinsicInst>(&I))
  767. if (II->getIntrinsicID() == Intrinsic::assume)
  768. AC.registerAssumption(II);
  769. }
  770. // Remove the cloned parent as a predecessor of the cloned continue successor
  771. // if we did in fact clone it.
  772. auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
  773. if (auto *ClonedContinueSuccBB =
  774. cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
  775. ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
  776. /*DontDeleteUselessPHIs*/ true);
  777. // Replace the cloned branch with an unconditional branch to the cloneed
  778. // unswitched successor.
  779. auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
  780. ClonedParentBB->getTerminator()->eraseFromParent();
  781. BranchInst::Create(ClonedSuccBB, ClonedParentBB);
  782. // Update any PHI nodes in the cloned successors of the skipped blocks to not
  783. // have spurious incoming values.
  784. for (auto *LoopBB : L.blocks())
  785. if (SkippedLoopAndExitBlocks.count(LoopBB))
  786. for (auto *SuccBB : successors(LoopBB))
  787. if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
  788. for (PHINode &PN : ClonedSuccBB->phis())
  789. PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
  790. return ClonedPH;
  791. }
  792. /// Recursively clone the specified loop and all of its children.
  793. ///
  794. /// The target parent loop for the clone should be provided, or can be null if
  795. /// the clone is a top-level loop. While cloning, all the blocks are mapped
  796. /// with the provided value map. The entire original loop must be present in
  797. /// the value map. The cloned loop is returned.
  798. static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
  799. const ValueToValueMapTy &VMap, LoopInfo &LI) {
  800. auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
  801. assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
  802. ClonedL.reserveBlocks(OrigL.getNumBlocks());
  803. for (auto *BB : OrigL.blocks()) {
  804. auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
  805. ClonedL.addBlockEntry(ClonedBB);
  806. if (LI.getLoopFor(BB) == &OrigL) {
  807. assert(!LI.getLoopFor(ClonedBB) &&
  808. "Should not have an existing loop for this block!");
  809. LI.changeLoopFor(ClonedBB, &ClonedL);
  810. }
  811. }
  812. };
  813. // We specially handle the first loop because it may get cloned into
  814. // a different parent and because we most commonly are cloning leaf loops.
  815. Loop *ClonedRootL = LI.AllocateLoop();
  816. if (RootParentL)
  817. RootParentL->addChildLoop(ClonedRootL);
  818. else
  819. LI.addTopLevelLoop(ClonedRootL);
  820. AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
  821. if (OrigRootL.empty())
  822. return ClonedRootL;
  823. // If we have a nest, we can quickly clone the entire loop nest using an
  824. // iterative approach because it is a tree. We keep the cloned parent in the
  825. // data structure to avoid repeatedly querying through a map to find it.
  826. SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
  827. // Build up the loops to clone in reverse order as we'll clone them from the
  828. // back.
  829. for (Loop *ChildL : llvm::reverse(OrigRootL))
  830. LoopsToClone.push_back({ClonedRootL, ChildL});
  831. do {
  832. Loop *ClonedParentL, *L;
  833. std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
  834. Loop *ClonedL = LI.AllocateLoop();
  835. ClonedParentL->addChildLoop(ClonedL);
  836. AddClonedBlocksToLoop(*L, *ClonedL);
  837. for (Loop *ChildL : llvm::reverse(*L))
  838. LoopsToClone.push_back({ClonedL, ChildL});
  839. } while (!LoopsToClone.empty());
  840. return ClonedRootL;
  841. }
  842. /// Build the cloned loops of an original loop from unswitching.
  843. ///
  844. /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
  845. /// operation. We need to re-verify that there even is a loop (as the backedge
  846. /// may not have been cloned), and even if there are remaining backedges the
  847. /// backedge set may be different. However, we know that each child loop is
  848. /// undisturbed, we only need to find where to place each child loop within
  849. /// either any parent loop or within a cloned version of the original loop.
  850. ///
  851. /// Because child loops may end up cloned outside of any cloned version of the
  852. /// original loop, multiple cloned sibling loops may be created. All of them
  853. /// are returned so that the newly introduced loop nest roots can be
  854. /// identified.
  855. static Loop *buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
  856. const ValueToValueMapTy &VMap, LoopInfo &LI,
  857. SmallVectorImpl<Loop *> &NonChildClonedLoops) {
  858. Loop *ClonedL = nullptr;
  859. auto *OrigPH = OrigL.getLoopPreheader();
  860. auto *OrigHeader = OrigL.getHeader();
  861. auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
  862. auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
  863. // We need to know the loops of the cloned exit blocks to even compute the
  864. // accurate parent loop. If we only clone exits to some parent of the
  865. // original parent, we want to clone into that outer loop. We also keep track
  866. // of the loops that our cloned exit blocks participate in.
  867. Loop *ParentL = nullptr;
  868. SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
  869. SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
  870. ClonedExitsInLoops.reserve(ExitBlocks.size());
  871. for (auto *ExitBB : ExitBlocks)
  872. if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
  873. if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
  874. ExitLoopMap[ClonedExitBB] = ExitL;
  875. ClonedExitsInLoops.push_back(ClonedExitBB);
  876. if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
  877. ParentL = ExitL;
  878. }
  879. assert((!ParentL || ParentL == OrigL.getParentLoop() ||
  880. ParentL->contains(OrigL.getParentLoop())) &&
  881. "The computed parent loop should always contain (or be) the parent of "
  882. "the original loop.");
  883. // We build the set of blocks dominated by the cloned header from the set of
  884. // cloned blocks out of the original loop. While not all of these will
  885. // necessarily be in the cloned loop, it is enough to establish that they
  886. // aren't in unreachable cycles, etc.
  887. SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
  888. for (auto *BB : OrigL.blocks())
  889. if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
  890. ClonedLoopBlocks.insert(ClonedBB);
  891. // Rebuild the set of blocks that will end up in the cloned loop. We may have
  892. // skipped cloning some region of this loop which can in turn skip some of
  893. // the backedges so we have to rebuild the blocks in the loop based on the
  894. // backedges that remain after cloning.
  895. SmallVector<BasicBlock *, 16> Worklist;
  896. SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
  897. for (auto *Pred : predecessors(ClonedHeader)) {
  898. // The only possible non-loop header predecessor is the preheader because
  899. // we know we cloned the loop in simplified form.
  900. if (Pred == ClonedPH)
  901. continue;
  902. // Because the loop was in simplified form, the only non-loop predecessor
  903. // should be the preheader.
  904. assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
  905. "header other than the preheader "
  906. "that is not part of the loop!");
  907. // Insert this block into the loop set and on the first visit (and if it
  908. // isn't the header we're currently walking) put it into the worklist to
  909. // recurse through.
  910. if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
  911. Worklist.push_back(Pred);
  912. }
  913. // If we had any backedges then there *is* a cloned loop. Put the header into
  914. // the loop set and then walk the worklist backwards to find all the blocks
  915. // that remain within the loop after cloning.
  916. if (!BlocksInClonedLoop.empty()) {
  917. BlocksInClonedLoop.insert(ClonedHeader);
  918. while (!Worklist.empty()) {
  919. BasicBlock *BB = Worklist.pop_back_val();
  920. assert(BlocksInClonedLoop.count(BB) &&
  921. "Didn't put block into the loop set!");
  922. // Insert any predecessors that are in the possible set into the cloned
  923. // set, and if the insert is successful, add them to the worklist. Note
  924. // that we filter on the blocks that are definitely reachable via the
  925. // backedge to the loop header so we may prune out dead code within the
  926. // cloned loop.
  927. for (auto *Pred : predecessors(BB))
  928. if (ClonedLoopBlocks.count(Pred) &&
  929. BlocksInClonedLoop.insert(Pred).second)
  930. Worklist.push_back(Pred);
  931. }
  932. ClonedL = LI.AllocateLoop();
  933. if (ParentL) {
  934. ParentL->addBasicBlockToLoop(ClonedPH, LI);
  935. ParentL->addChildLoop(ClonedL);
  936. } else {
  937. LI.addTopLevelLoop(ClonedL);
  938. }
  939. ClonedL->reserveBlocks(BlocksInClonedLoop.size());
  940. // We don't want to just add the cloned loop blocks based on how we
  941. // discovered them. The original order of blocks was carefully built in
  942. // a way that doesn't rely on predecessor ordering. Rather than re-invent
  943. // that logic, we just re-walk the original blocks (and those of the child
  944. // loops) and filter them as we add them into the cloned loop.
  945. for (auto *BB : OrigL.blocks()) {
  946. auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
  947. if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
  948. continue;
  949. // Directly add the blocks that are only in this loop.
  950. if (LI.getLoopFor(BB) == &OrigL) {
  951. ClonedL->addBasicBlockToLoop(ClonedBB, LI);
  952. continue;
  953. }
  954. // We want to manually add it to this loop and parents.
  955. // Registering it with LoopInfo will happen when we clone the top
  956. // loop for this block.
  957. for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
  958. PL->addBlockEntry(ClonedBB);
  959. }
  960. // Now add each child loop whose header remains within the cloned loop. All
  961. // of the blocks within the loop must satisfy the same constraints as the
  962. // header so once we pass the header checks we can just clone the entire
  963. // child loop nest.
  964. for (Loop *ChildL : OrigL) {
  965. auto *ClonedChildHeader =
  966. cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
  967. if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
  968. continue;
  969. #ifndef NDEBUG
  970. // We should never have a cloned child loop header but fail to have
  971. // all of the blocks for that child loop.
  972. for (auto *ChildLoopBB : ChildL->blocks())
  973. assert(BlocksInClonedLoop.count(
  974. cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
  975. "Child cloned loop has a header within the cloned outer "
  976. "loop but not all of its blocks!");
  977. #endif
  978. cloneLoopNest(*ChildL, ClonedL, VMap, LI);
  979. }
  980. }
  981. // Now that we've handled all the components of the original loop that were
  982. // cloned into a new loop, we still need to handle anything from the original
  983. // loop that wasn't in a cloned loop.
  984. // Figure out what blocks are left to place within any loop nest containing
  985. // the unswitched loop. If we never formed a loop, the cloned PH is one of
  986. // them.
  987. SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
  988. if (BlocksInClonedLoop.empty())
  989. UnloopedBlockSet.insert(ClonedPH);
  990. for (auto *ClonedBB : ClonedLoopBlocks)
  991. if (!BlocksInClonedLoop.count(ClonedBB))
  992. UnloopedBlockSet.insert(ClonedBB);
  993. // Copy the cloned exits and sort them in ascending loop depth, we'll work
  994. // backwards across these to process them inside out. The order shouldn't
  995. // matter as we're just trying to build up the map from inside-out; we use
  996. // the map in a more stably ordered way below.
  997. auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
  998. std::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(),
  999. [&](BasicBlock *LHS, BasicBlock *RHS) {
  1000. return ExitLoopMap.lookup(LHS)->getLoopDepth() <
  1001. ExitLoopMap.lookup(RHS)->getLoopDepth();
  1002. });
  1003. // Populate the existing ExitLoopMap with everything reachable from each
  1004. // exit, starting from the inner most exit.
  1005. while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
  1006. assert(Worklist.empty() && "Didn't clear worklist!");
  1007. BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
  1008. Loop *ExitL = ExitLoopMap.lookup(ExitBB);
  1009. // Walk the CFG back until we hit the cloned PH adding everything reachable
  1010. // and in the unlooped set to this exit block's loop.
  1011. Worklist.push_back(ExitBB);
  1012. do {
  1013. BasicBlock *BB = Worklist.pop_back_val();
  1014. // We can stop recursing at the cloned preheader (if we get there).
  1015. if (BB == ClonedPH)
  1016. continue;
  1017. for (BasicBlock *PredBB : predecessors(BB)) {
  1018. // If this pred has already been moved to our set or is part of some
  1019. // (inner) loop, no update needed.
  1020. if (!UnloopedBlockSet.erase(PredBB)) {
  1021. assert(
  1022. (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
  1023. "Predecessor not mapped to a loop!");
  1024. continue;
  1025. }
  1026. // We just insert into the loop set here. We'll add these blocks to the
  1027. // exit loop after we build up the set in an order that doesn't rely on
  1028. // predecessor order (which in turn relies on use list order).
  1029. bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
  1030. (void)Inserted;
  1031. assert(Inserted && "Should only visit an unlooped block once!");
  1032. // And recurse through to its predecessors.
  1033. Worklist.push_back(PredBB);
  1034. }
  1035. } while (!Worklist.empty());
  1036. }
  1037. // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
  1038. // blocks to their outer loops, walk the cloned blocks and the cloned exits
  1039. // in their original order adding them to the correct loop.
  1040. // We need a stable insertion order. We use the order of the original loop
  1041. // order and map into the correct parent loop.
  1042. for (auto *BB : llvm::concat<BasicBlock *const>(
  1043. makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
  1044. if (Loop *OuterL = ExitLoopMap.lookup(BB))
  1045. OuterL->addBasicBlockToLoop(BB, LI);
  1046. #ifndef NDEBUG
  1047. for (auto &BBAndL : ExitLoopMap) {
  1048. auto *BB = BBAndL.first;
  1049. auto *OuterL = BBAndL.second;
  1050. assert(LI.getLoopFor(BB) == OuterL &&
  1051. "Failed to put all blocks into outer loops!");
  1052. }
  1053. #endif
  1054. // Now that all the blocks are placed into the correct containing loop in the
  1055. // absence of child loops, find all the potentially cloned child loops and
  1056. // clone them into whatever outer loop we placed their header into.
  1057. for (Loop *ChildL : OrigL) {
  1058. auto *ClonedChildHeader =
  1059. cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
  1060. if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
  1061. continue;
  1062. #ifndef NDEBUG
  1063. for (auto *ChildLoopBB : ChildL->blocks())
  1064. assert(VMap.count(ChildLoopBB) &&
  1065. "Cloned a child loop header but not all of that loops blocks!");
  1066. #endif
  1067. NonChildClonedLoops.push_back(cloneLoopNest(
  1068. *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
  1069. }
  1070. // Return the main cloned loop if any.
  1071. return ClonedL;
  1072. }
  1073. static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
  1074. SmallVectorImpl<BasicBlock *> &ExitBlocks,
  1075. DominatorTree &DT, LoopInfo &LI) {
  1076. // Walk the dominator tree to build up the set of blocks we will delete here.
  1077. // The order is designed to allow us to always delete bottom-up and avoid any
  1078. // dangling uses.
  1079. SmallSetVector<BasicBlock *, 16> DeadBlocks;
  1080. DeadBlocks.insert(DeadSubtreeRoot);
  1081. for (int i = 0; i < (int)DeadBlocks.size(); ++i)
  1082. for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) {
  1083. // FIXME: This assert should pass and that means we don't change nearly
  1084. // as much below! Consider rewriting all of this to avoid deleting
  1085. // blocks. They are always cloned before being deleted, and so instead
  1086. // could just be moved.
  1087. // FIXME: This in turn means that we might actually be more able to
  1088. // update the domtree.
  1089. assert((L.contains(ChildN->getBlock()) ||
  1090. llvm::find(ExitBlocks, ChildN->getBlock()) != ExitBlocks.end()) &&
  1091. "Should never reach beyond the loop and exits when deleting!");
  1092. DeadBlocks.insert(ChildN->getBlock());
  1093. }
  1094. // Filter out the dead blocks from the exit blocks list so that it can be
  1095. // used in the caller.
  1096. llvm::erase_if(ExitBlocks,
  1097. [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
  1098. // Remove these blocks from their successors.
  1099. for (auto *BB : DeadBlocks)
  1100. for (BasicBlock *SuccBB : successors(BB))
  1101. SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
  1102. // Walk from this loop up through its parents removing all of the dead blocks.
  1103. for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
  1104. for (auto *BB : DeadBlocks)
  1105. ParentL->getBlocksSet().erase(BB);
  1106. llvm::erase_if(ParentL->getBlocksVector(),
  1107. [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
  1108. }
  1109. // Now delete the dead child loops. This raw delete will clear them
  1110. // recursively.
  1111. llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
  1112. if (!DeadBlocks.count(ChildL->getHeader()))
  1113. return false;
  1114. assert(llvm::all_of(ChildL->blocks(),
  1115. [&](BasicBlock *ChildBB) {
  1116. return DeadBlocks.count(ChildBB);
  1117. }) &&
  1118. "If the child loop header is dead all blocks in the child loop must "
  1119. "be dead as well!");
  1120. LI.destroy(ChildL);
  1121. return true;
  1122. });
  1123. // Remove the mappings for the dead blocks.
  1124. for (auto *BB : DeadBlocks)
  1125. LI.changeLoopFor(BB, nullptr);
  1126. // Drop all the references from these blocks to others to handle cyclic
  1127. // references as we start deleting the blocks themselves.
  1128. for (auto *BB : DeadBlocks)
  1129. BB->dropAllReferences();
  1130. for (auto *BB : llvm::reverse(DeadBlocks)) {
  1131. DT.eraseNode(BB);
  1132. BB->eraseFromParent();
  1133. }
  1134. }
  1135. /// Recompute the set of blocks in a loop after unswitching.
  1136. ///
  1137. /// This walks from the original headers predecessors to rebuild the loop. We
  1138. /// take advantage of the fact that new blocks can't have been added, and so we
  1139. /// filter by the original loop's blocks. This also handles potentially
  1140. /// unreachable code that we don't want to explore but might be found examining
  1141. /// the predecessors of the header.
  1142. ///
  1143. /// If the original loop is no longer a loop, this will return an empty set. If
  1144. /// it remains a loop, all the blocks within it will be added to the set
  1145. /// (including those blocks in inner loops).
  1146. static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
  1147. LoopInfo &LI) {
  1148. SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
  1149. auto *PH = L.getLoopPreheader();
  1150. auto *Header = L.getHeader();
  1151. // A worklist to use while walking backwards from the header.
  1152. SmallVector<BasicBlock *, 16> Worklist;
  1153. // First walk the predecessors of the header to find the backedges. This will
  1154. // form the basis of our walk.
  1155. for (auto *Pred : predecessors(Header)) {
  1156. // Skip the preheader.
  1157. if (Pred == PH)
  1158. continue;
  1159. // Because the loop was in simplified form, the only non-loop predecessor
  1160. // is the preheader.
  1161. assert(L.contains(Pred) && "Found a predecessor of the loop header other "
  1162. "than the preheader that is not part of the "
  1163. "loop!");
  1164. // Insert this block into the loop set and on the first visit and, if it
  1165. // isn't the header we're currently walking, put it into the worklist to
  1166. // recurse through.
  1167. if (LoopBlockSet.insert(Pred).second && Pred != Header)
  1168. Worklist.push_back(Pred);
  1169. }
  1170. // If no backedges were found, we're done.
  1171. if (LoopBlockSet.empty())
  1172. return LoopBlockSet;
  1173. // Add the loop header to the set.
  1174. LoopBlockSet.insert(Header);
  1175. // We found backedges, recurse through them to identify the loop blocks.
  1176. while (!Worklist.empty()) {
  1177. BasicBlock *BB = Worklist.pop_back_val();
  1178. assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
  1179. // Because we know the inner loop structure remains valid we can use the
  1180. // loop structure to jump immediately across the entire nested loop.
  1181. // Further, because it is in loop simplified form, we can directly jump
  1182. // to its preheader afterward.
  1183. if (Loop *InnerL = LI.getLoopFor(BB))
  1184. if (InnerL != &L) {
  1185. assert(L.contains(InnerL) &&
  1186. "Should not reach a loop *outside* this loop!");
  1187. // The preheader is the only possible predecessor of the loop so
  1188. // insert it into the set and check whether it was already handled.
  1189. auto *InnerPH = InnerL->getLoopPreheader();
  1190. assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
  1191. "but not contain the inner loop "
  1192. "preheader!");
  1193. if (!LoopBlockSet.insert(InnerPH).second)
  1194. // The only way to reach the preheader is through the loop body
  1195. // itself so if it has been visited the loop is already handled.
  1196. continue;
  1197. // Insert all of the blocks (other than those already present) into
  1198. // the loop set. The only block we expect to already be in the set is
  1199. // the one we used to find this loop as we immediately handle the
  1200. // others the first time we encounter the loop.
  1201. for (auto *InnerBB : InnerL->blocks()) {
  1202. if (InnerBB == BB) {
  1203. assert(LoopBlockSet.count(InnerBB) &&
  1204. "Block should already be in the set!");
  1205. continue;
  1206. }
  1207. bool Inserted = LoopBlockSet.insert(InnerBB).second;
  1208. (void)Inserted;
  1209. assert(Inserted && "Should only insert an inner loop once!");
  1210. }
  1211. // Add the preheader to the worklist so we will continue past the
  1212. // loop body.
  1213. Worklist.push_back(InnerPH);
  1214. continue;
  1215. }
  1216. // Insert any predecessors that were in the original loop into the new
  1217. // set, and if the insert is successful, add them to the worklist.
  1218. for (auto *Pred : predecessors(BB))
  1219. if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
  1220. Worklist.push_back(Pred);
  1221. }
  1222. // We've found all the blocks participating in the loop, return our completed
  1223. // set.
  1224. return LoopBlockSet;
  1225. }
  1226. /// Rebuild a loop after unswitching removes some subset of blocks and edges.
  1227. ///
  1228. /// The removal may have removed some child loops entirely but cannot have
  1229. /// disturbed any remaining child loops. However, they may need to be hoisted
  1230. /// to the parent loop (or to be top-level loops). The original loop may be
  1231. /// completely removed.
  1232. ///
  1233. /// The sibling loops resulting from this update are returned. If the original
  1234. /// loop remains a valid loop, it will be the first entry in this list with all
  1235. /// of the newly sibling loops following it.
  1236. ///
  1237. /// Returns true if the loop remains a loop after unswitching, and false if it
  1238. /// is no longer a loop after unswitching (and should not continue to be
  1239. /// referenced).
  1240. static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
  1241. LoopInfo &LI,
  1242. SmallVectorImpl<Loop *> &HoistedLoops) {
  1243. auto *PH = L.getLoopPreheader();
  1244. // Compute the actual parent loop from the exit blocks. Because we may have
  1245. // pruned some exits the loop may be different from the original parent.
  1246. Loop *ParentL = nullptr;
  1247. SmallVector<Loop *, 4> ExitLoops;
  1248. SmallVector<BasicBlock *, 4> ExitsInLoops;
  1249. ExitsInLoops.reserve(ExitBlocks.size());
  1250. for (auto *ExitBB : ExitBlocks)
  1251. if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
  1252. ExitLoops.push_back(ExitL);
  1253. ExitsInLoops.push_back(ExitBB);
  1254. if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
  1255. ParentL = ExitL;
  1256. }
  1257. // Recompute the blocks participating in this loop. This may be empty if it
  1258. // is no longer a loop.
  1259. auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
  1260. // If we still have a loop, we need to re-set the loop's parent as the exit
  1261. // block set changing may have moved it within the loop nest. Note that this
  1262. // can only happen when this loop has a parent as it can only hoist the loop
  1263. // *up* the nest.
  1264. if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
  1265. // Remove this loop's (original) blocks from all of the intervening loops.
  1266. for (Loop *IL = L.getParentLoop(); IL != ParentL;
  1267. IL = IL->getParentLoop()) {
  1268. IL->getBlocksSet().erase(PH);
  1269. for (auto *BB : L.blocks())
  1270. IL->getBlocksSet().erase(BB);
  1271. llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
  1272. return BB == PH || L.contains(BB);
  1273. });
  1274. }
  1275. LI.changeLoopFor(PH, ParentL);
  1276. L.getParentLoop()->removeChildLoop(&L);
  1277. if (ParentL)
  1278. ParentL->addChildLoop(&L);
  1279. else
  1280. LI.addTopLevelLoop(&L);
  1281. }
  1282. // Now we update all the blocks which are no longer within the loop.
  1283. auto &Blocks = L.getBlocksVector();
  1284. auto BlocksSplitI =
  1285. LoopBlockSet.empty()
  1286. ? Blocks.begin()
  1287. : std::stable_partition(
  1288. Blocks.begin(), Blocks.end(),
  1289. [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
  1290. // Before we erase the list of unlooped blocks, build a set of them.
  1291. SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
  1292. if (LoopBlockSet.empty())
  1293. UnloopedBlocks.insert(PH);
  1294. // Now erase these blocks from the loop.
  1295. for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
  1296. L.getBlocksSet().erase(BB);
  1297. Blocks.erase(BlocksSplitI, Blocks.end());
  1298. // Sort the exits in ascending loop depth, we'll work backwards across these
  1299. // to process them inside out.
  1300. std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
  1301. [&](BasicBlock *LHS, BasicBlock *RHS) {
  1302. return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
  1303. });
  1304. // We'll build up a set for each exit loop.
  1305. SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
  1306. Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
  1307. auto RemoveUnloopedBlocksFromLoop =
  1308. [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
  1309. for (auto *BB : UnloopedBlocks)
  1310. L.getBlocksSet().erase(BB);
  1311. llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
  1312. return UnloopedBlocks.count(BB);
  1313. });
  1314. };
  1315. SmallVector<BasicBlock *, 16> Worklist;
  1316. while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
  1317. assert(Worklist.empty() && "Didn't clear worklist!");
  1318. assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
  1319. // Grab the next exit block, in decreasing loop depth order.
  1320. BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
  1321. Loop &ExitL = *LI.getLoopFor(ExitBB);
  1322. assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
  1323. // Erase all of the unlooped blocks from the loops between the previous
  1324. // exit loop and this exit loop. This works because the ExitInLoops list is
  1325. // sorted in increasing order of loop depth and thus we visit loops in
  1326. // decreasing order of loop depth.
  1327. for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
  1328. RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
  1329. // Walk the CFG back until we hit the cloned PH adding everything reachable
  1330. // and in the unlooped set to this exit block's loop.
  1331. Worklist.push_back(ExitBB);
  1332. do {
  1333. BasicBlock *BB = Worklist.pop_back_val();
  1334. // We can stop recursing at the cloned preheader (if we get there).
  1335. if (BB == PH)
  1336. continue;
  1337. for (BasicBlock *PredBB : predecessors(BB)) {
  1338. // If this pred has already been moved to our set or is part of some
  1339. // (inner) loop, no update needed.
  1340. if (!UnloopedBlocks.erase(PredBB)) {
  1341. assert((NewExitLoopBlocks.count(PredBB) ||
  1342. ExitL.contains(LI.getLoopFor(PredBB))) &&
  1343. "Predecessor not in a nested loop (or already visited)!");
  1344. continue;
  1345. }
  1346. // We just insert into the loop set here. We'll add these blocks to the
  1347. // exit loop after we build up the set in a deterministic order rather
  1348. // than the predecessor-influenced visit order.
  1349. bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
  1350. (void)Inserted;
  1351. assert(Inserted && "Should only visit an unlooped block once!");
  1352. // And recurse through to its predecessors.
  1353. Worklist.push_back(PredBB);
  1354. }
  1355. } while (!Worklist.empty());
  1356. // If blocks in this exit loop were directly part of the original loop (as
  1357. // opposed to a child loop) update the map to point to this exit loop. This
  1358. // just updates a map and so the fact that the order is unstable is fine.
  1359. for (auto *BB : NewExitLoopBlocks)
  1360. if (Loop *BBL = LI.getLoopFor(BB))
  1361. if (BBL == &L || !L.contains(BBL))
  1362. LI.changeLoopFor(BB, &ExitL);
  1363. // We will remove the remaining unlooped blocks from this loop in the next
  1364. // iteration or below.
  1365. NewExitLoopBlocks.clear();
  1366. }
  1367. // Any remaining unlooped blocks are no longer part of any loop unless they
  1368. // are part of some child loop.
  1369. for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
  1370. RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
  1371. for (auto *BB : UnloopedBlocks)
  1372. if (Loop *BBL = LI.getLoopFor(BB))
  1373. if (BBL == &L || !L.contains(BBL))
  1374. LI.changeLoopFor(BB, nullptr);
  1375. // Sink all the child loops whose headers are no longer in the loop set to
  1376. // the parent (or to be top level loops). We reach into the loop and directly
  1377. // update its subloop vector to make this batch update efficient.
  1378. auto &SubLoops = L.getSubLoopsVector();
  1379. auto SubLoopsSplitI =
  1380. LoopBlockSet.empty()
  1381. ? SubLoops.begin()
  1382. : std::stable_partition(
  1383. SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
  1384. return LoopBlockSet.count(SubL->getHeader());
  1385. });
  1386. for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
  1387. HoistedLoops.push_back(HoistedL);
  1388. HoistedL->setParentLoop(nullptr);
  1389. // To compute the new parent of this hoisted loop we look at where we
  1390. // placed the preheader above. We can't lookup the header itself because we
  1391. // retained the mapping from the header to the hoisted loop. But the
  1392. // preheader and header should have the exact same new parent computed
  1393. // based on the set of exit blocks from the original loop as the preheader
  1394. // is a predecessor of the header and so reached in the reverse walk. And
  1395. // because the loops were all in simplified form the preheader of the
  1396. // hoisted loop can't be part of some *other* loop.
  1397. if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
  1398. NewParentL->addChildLoop(HoistedL);
  1399. else
  1400. LI.addTopLevelLoop(HoistedL);
  1401. }
  1402. SubLoops.erase(SubLoopsSplitI, SubLoops.end());
  1403. // Actually delete the loop if nothing remained within it.
  1404. if (Blocks.empty()) {
  1405. assert(SubLoops.empty() &&
  1406. "Failed to remove all subloops from the original loop!");
  1407. if (Loop *ParentL = L.getParentLoop())
  1408. ParentL->removeChildLoop(llvm::find(*ParentL, &L));
  1409. else
  1410. LI.removeLoop(llvm::find(LI, &L));
  1411. LI.destroy(&L);
  1412. return false;
  1413. }
  1414. return true;
  1415. }
  1416. /// Helper to visit a dominator subtree, invoking a callable on each node.
  1417. ///
  1418. /// Returning false at any point will stop walking past that node of the tree.
  1419. template <typename CallableT>
  1420. void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
  1421. SmallVector<DomTreeNode *, 4> DomWorklist;
  1422. DomWorklist.push_back(DT[BB]);
  1423. #ifndef NDEBUG
  1424. SmallPtrSet<DomTreeNode *, 4> Visited;
  1425. Visited.insert(DT[BB]);
  1426. #endif
  1427. do {
  1428. DomTreeNode *N = DomWorklist.pop_back_val();
  1429. // Visit this node.
  1430. if (!Callable(N->getBlock()))
  1431. continue;
  1432. // Accumulate the child nodes.
  1433. for (DomTreeNode *ChildN : *N) {
  1434. assert(Visited.insert(ChildN).second &&
  1435. "Cannot visit a node twice when walking a tree!");
  1436. DomWorklist.push_back(ChildN);
  1437. }
  1438. } while (!DomWorklist.empty());
  1439. }
  1440. /// Take an invariant branch that has been determined to be safe and worthwhile
  1441. /// to unswitch despite being non-trivial to do so and perform the unswitch.
  1442. ///
  1443. /// This directly updates the CFG to hoist the predicate out of the loop, and
  1444. /// clone the necessary parts of the loop to maintain behavior.
  1445. ///
  1446. /// It also updates both dominator tree and loopinfo based on the unswitching.
  1447. ///
  1448. /// Once unswitching has been performed it runs the provided callback to report
  1449. /// the new loops and no-longer valid loops to the caller.
  1450. static bool unswitchInvariantBranch(
  1451. Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
  1452. AssumptionCache &AC,
  1453. function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
  1454. assert(BI.isConditional() && "Can only unswitch a conditional branch!");
  1455. assert(L.isLoopInvariant(BI.getCondition()) &&
  1456. "Can only unswitch an invariant branch condition!");
  1457. // Constant and BBs tracking the cloned and continuing successor.
  1458. const int ClonedSucc = 0;
  1459. auto *ParentBB = BI.getParent();
  1460. auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
  1461. auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
  1462. assert(UnswitchedSuccBB != ContinueSuccBB &&
  1463. "Should not unswitch a branch that always goes to the same place!");
  1464. // The branch should be in this exact loop. Any inner loop's invariant branch
  1465. // should be handled by unswitching that inner loop. The caller of this
  1466. // routine should filter out any candidates that remain (but were skipped for
  1467. // whatever reason).
  1468. assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
  1469. SmallVector<BasicBlock *, 4> ExitBlocks;
  1470. L.getUniqueExitBlocks(ExitBlocks);
  1471. // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
  1472. // don't know how to split those exit blocks.
  1473. // FIXME: We should teach SplitBlock to handle this and remove this
  1474. // restriction.
  1475. for (auto *ExitBB : ExitBlocks)
  1476. if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
  1477. return false;
  1478. SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
  1479. ExitBlocks.end());
  1480. // Compute the parent loop now before we start hacking on things.
  1481. Loop *ParentL = L.getParentLoop();
  1482. // Compute the outer-most loop containing one of our exit blocks. This is the
  1483. // furthest up our loopnest which can be mutated, which we will use below to
  1484. // update things.
  1485. Loop *OuterExitL = &L;
  1486. for (auto *ExitBB : ExitBlocks) {
  1487. Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
  1488. if (!NewOuterExitL) {
  1489. // We exited the entire nest with this block, so we're done.
  1490. OuterExitL = nullptr;
  1491. break;
  1492. }
  1493. if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
  1494. OuterExitL = NewOuterExitL;
  1495. }
  1496. // If the edge we *aren't* cloning in the unswitch (the continuing edge)
  1497. // dominates its target, we can skip cloning the dominated region of the loop
  1498. // and its exits. We compute this as a set of nodes to be skipped.
  1499. SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
  1500. if (ContinueSuccBB->getUniquePredecessor() ||
  1501. llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
  1502. return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
  1503. })) {
  1504. visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
  1505. SkippedLoopAndExitBlocks.insert(BB);
  1506. return true;
  1507. });
  1508. }
  1509. // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
  1510. // edge) dominates its target, we will end up with dead nodes in the original
  1511. // loop and its exits that will need to be deleted. Here, we just retain that
  1512. // the property holds and will compute the deleted set later.
  1513. bool DeleteUnswitchedSucc =
  1514. UnswitchedSuccBB->getUniquePredecessor() ||
  1515. llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
  1516. return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
  1517. });
  1518. // Split the preheader, so that we know that there is a safe place to insert
  1519. // the conditional branch. We will change the preheader to have a conditional
  1520. // branch on LoopCond. The original preheader will become the split point
  1521. // between the unswitched versions, and we will have a new preheader for the
  1522. // original loop.
  1523. BasicBlock *SplitBB = L.getLoopPreheader();
  1524. BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
  1525. // Keep a mapping for the cloned values.
  1526. ValueToValueMapTy VMap;
  1527. // Build the cloned blocks from the loop.
  1528. auto *ClonedPH = buildClonedLoopBlocks(
  1529. L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
  1530. ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, AC, DT, LI);
  1531. // Build the cloned loop structure itself. This may be substantially
  1532. // different from the original structure due to the simplified CFG. This also
  1533. // handles inserting all the cloned blocks into the correct loops.
  1534. SmallVector<Loop *, 4> NonChildClonedLoops;
  1535. Loop *ClonedL =
  1536. buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
  1537. // Remove the parent as a predecessor of the unswitched successor.
  1538. UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
  1539. // Now splice the branch from the original loop and use it to select between
  1540. // the two loops.
  1541. SplitBB->getTerminator()->eraseFromParent();
  1542. SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
  1543. BI.setSuccessor(ClonedSucc, ClonedPH);
  1544. BI.setSuccessor(1 - ClonedSucc, LoopPH);
  1545. // Create a new unconditional branch to the continuing block (as opposed to
  1546. // the one cloned).
  1547. BranchInst::Create(ContinueSuccBB, ParentBB);
  1548. // Delete anything that was made dead in the original loop due to
  1549. // unswitching.
  1550. if (DeleteUnswitchedSucc)
  1551. deleteDeadBlocksFromLoop(L, UnswitchedSuccBB, ExitBlocks, DT, LI);
  1552. SmallVector<Loop *, 4> HoistedLoops;
  1553. bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
  1554. // This will have completely invalidated the dominator tree. We can't easily
  1555. // bound how much is invalid because in some cases we will refine the
  1556. // predecessor set of exit blocks of the loop which can move large unrelated
  1557. // regions of code into a new subtree.
  1558. //
  1559. // FIXME: Eventually, we should use an incremental update utility that
  1560. // leverages the existing information in the dominator tree (and potentially
  1561. // the nature of the change) to more efficiently update things.
  1562. DT.recalculate(*SplitBB->getParent());
  1563. // We can change which blocks are exit blocks of all the cloned sibling
  1564. // loops, the current loop, and any parent loops which shared exit blocks
  1565. // with the current loop. As a consequence, we need to re-form LCSSA for
  1566. // them. But we shouldn't need to re-form LCSSA for any child loops.
  1567. // FIXME: This could be made more efficient by tracking which exit blocks are
  1568. // new, and focusing on them, but that isn't likely to be necessary.
  1569. //
  1570. // In order to reasonably rebuild LCSSA we need to walk inside-out across the
  1571. // loop nest and update every loop that could have had its exits changed. We
  1572. // also need to cover any intervening loops. We add all of these loops to
  1573. // a list and sort them by loop depth to achieve this without updating
  1574. // unnecessary loops.
  1575. auto UpdateLCSSA = [&](Loop &UpdateL) {
  1576. #ifndef NDEBUG
  1577. for (Loop *ChildL : UpdateL)
  1578. assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
  1579. "Perturbed a child loop's LCSSA form!");
  1580. #endif
  1581. formLCSSA(UpdateL, DT, &LI, nullptr);
  1582. };
  1583. // For non-child cloned loops and hoisted loops, we just need to update LCSSA
  1584. // and we can do it in any order as they don't nest relative to each other.
  1585. for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
  1586. UpdateLCSSA(*UpdatedL);
  1587. // If the original loop had exit blocks, walk up through the outer most loop
  1588. // of those exit blocks to update LCSSA and form updated dedicated exits.
  1589. if (OuterExitL != &L) {
  1590. SmallVector<Loop *, 4> OuterLoops;
  1591. // We start with the cloned loop and the current loop if they are loops and
  1592. // move toward OuterExitL. Also, if either the cloned loop or the current
  1593. // loop have become top level loops we need to walk all the way out.
  1594. if (ClonedL) {
  1595. OuterLoops.push_back(ClonedL);
  1596. if (!ClonedL->getParentLoop())
  1597. OuterExitL = nullptr;
  1598. }
  1599. if (IsStillLoop) {
  1600. OuterLoops.push_back(&L);
  1601. if (!L.getParentLoop())
  1602. OuterExitL = nullptr;
  1603. }
  1604. // Grab all of the enclosing loops now.
  1605. for (Loop *OuterL = ParentL; OuterL != OuterExitL;
  1606. OuterL = OuterL->getParentLoop())
  1607. OuterLoops.push_back(OuterL);
  1608. // Finally, update our list of outer loops. This is nicely ordered to work
  1609. // inside-out.
  1610. for (Loop *OuterL : OuterLoops) {
  1611. // First build LCSSA for this loop so that we can preserve it when
  1612. // forming dedicated exits. We don't want to perturb some other loop's
  1613. // LCSSA while doing that CFG edit.
  1614. UpdateLCSSA(*OuterL);
  1615. // For loops reached by this loop's original exit blocks we may
  1616. // introduced new, non-dedicated exits. At least try to re-form dedicated
  1617. // exits for these loops. This may fail if they couldn't have dedicated
  1618. // exits to start with.
  1619. formDedicatedExitBlocks(OuterL, &DT, &LI, /*PreserveLCSSA*/ true);
  1620. }
  1621. }
  1622. #ifndef NDEBUG
  1623. // Verify the entire loop structure to catch any incorrect updates before we
  1624. // progress in the pass pipeline.
  1625. LI.verify(DT);
  1626. #endif
  1627. // Now that we've unswitched something, make callbacks to report the changes.
  1628. // For that we need to merge together the updated loops and the cloned loops
  1629. // and check whether the original loop survived.
  1630. SmallVector<Loop *, 4> SibLoops;
  1631. for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
  1632. if (UpdatedL->getParentLoop() == ParentL)
  1633. SibLoops.push_back(UpdatedL);
  1634. NonTrivialUnswitchCB(IsStillLoop, SibLoops);
  1635. ++NumBranches;
  1636. return true;
  1637. }
  1638. /// Recursively compute the cost of a dominator subtree based on the per-block
  1639. /// cost map provided.
  1640. ///
  1641. /// The recursive computation is memozied into the provided DT-indexed cost map
  1642. /// to allow querying it for most nodes in the domtree without it becoming
  1643. /// quadratic.
  1644. static int
  1645. computeDomSubtreeCost(DomTreeNode &N,
  1646. const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
  1647. SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
  1648. // Don't accumulate cost (or recurse through) blocks not in our block cost
  1649. // map and thus not part of the duplication cost being considered.
  1650. auto BBCostIt = BBCostMap.find(N.getBlock());
  1651. if (BBCostIt == BBCostMap.end())
  1652. return 0;
  1653. // Lookup this node to see if we already computed its cost.
  1654. auto DTCostIt = DTCostMap.find(&N);
  1655. if (DTCostIt != DTCostMap.end())
  1656. return DTCostIt->second;
  1657. // If not, we have to compute it. We can't use insert above and update
  1658. // because computing the cost may insert more things into the map.
  1659. int Cost = std::accumulate(
  1660. N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
  1661. return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
  1662. });
  1663. bool Inserted = DTCostMap.insert({&N, Cost}).second;
  1664. (void)Inserted;
  1665. assert(Inserted && "Should not insert a node while visiting children!");
  1666. return Cost;
  1667. }
  1668. /// Unswitch control flow predicated on loop invariant conditions.
  1669. ///
  1670. /// This first hoists all branches or switches which are trivial (IE, do not
  1671. /// require duplicating any part of the loop) out of the loop body. It then
  1672. /// looks at other loop invariant control flows and tries to unswitch those as
  1673. /// well by cloning the loop if the result is small enough.
  1674. static bool
  1675. unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
  1676. TargetTransformInfo &TTI, bool NonTrivial,
  1677. function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
  1678. assert(L.isRecursivelyLCSSAForm(DT, LI) &&
  1679. "Loops must be in LCSSA form before unswitching.");
  1680. bool Changed = false;
  1681. // Must be in loop simplified form: we need a preheader and dedicated exits.
  1682. if (!L.isLoopSimplifyForm())
  1683. return false;
  1684. // Try trivial unswitch first before loop over other basic blocks in the loop.
  1685. Changed |= unswitchAllTrivialConditions(L, DT, LI);
  1686. // If we're not doing non-trivial unswitching, we're done. We both accept
  1687. // a parameter but also check a local flag that can be used for testing
  1688. // a debugging.
  1689. if (!NonTrivial && !EnableNonTrivialUnswitch)
  1690. return Changed;
  1691. // Collect all remaining invariant branch conditions within this loop (as
  1692. // opposed to an inner loop which would be handled when visiting that inner
  1693. // loop).
  1694. SmallVector<TerminatorInst *, 4> UnswitchCandidates;
  1695. for (auto *BB : L.blocks())
  1696. if (LI.getLoopFor(BB) == &L)
  1697. if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
  1698. if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
  1699. BI->getSuccessor(0) != BI->getSuccessor(1))
  1700. UnswitchCandidates.push_back(BI);
  1701. // If we didn't find any candidates, we're done.
  1702. if (UnswitchCandidates.empty())
  1703. return Changed;
  1704. DEBUG(dbgs() << "Considering " << UnswitchCandidates.size()
  1705. << " non-trivial loop invariant conditions for unswitching.\n");
  1706. // Given that unswitching these terminators will require duplicating parts of
  1707. // the loop, so we need to be able to model that cost. Compute the ephemeral
  1708. // values and set up a data structure to hold per-BB costs. We cache each
  1709. // block's cost so that we don't recompute this when considering different
  1710. // subsets of the loop for duplication during unswitching.
  1711. SmallPtrSet<const Value *, 4> EphValues;
  1712. CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
  1713. SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
  1714. // Compute the cost of each block, as well as the total loop cost. Also, bail
  1715. // out if we see instructions which are incompatible with loop unswitching
  1716. // (convergent, noduplicate, or cross-basic-block tokens).
  1717. // FIXME: We might be able to safely handle some of these in non-duplicated
  1718. // regions.
  1719. int LoopCost = 0;
  1720. for (auto *BB : L.blocks()) {
  1721. int Cost = 0;
  1722. for (auto &I : *BB) {
  1723. if (EphValues.count(&I))
  1724. continue;
  1725. if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
  1726. return Changed;
  1727. if (auto CS = CallSite(&I))
  1728. if (CS.isConvergent() || CS.cannotDuplicate())
  1729. return Changed;
  1730. Cost += TTI.getUserCost(&I);
  1731. }
  1732. assert(Cost >= 0 && "Must not have negative costs!");
  1733. LoopCost += Cost;
  1734. assert(LoopCost >= 0 && "Must not have negative loop costs!");
  1735. BBCostMap[BB] = Cost;
  1736. }
  1737. DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
  1738. // Now we find the best candidate by searching for the one with the following
  1739. // properties in order:
  1740. //
  1741. // 1) An unswitching cost below the threshold
  1742. // 2) The smallest number of duplicated unswitch candidates (to avoid
  1743. // creating redundant subsequent unswitching)
  1744. // 3) The smallest cost after unswitching.
  1745. //
  1746. // We prioritize reducing fanout of unswitch candidates provided the cost
  1747. // remains below the threshold because this has a multiplicative effect.
  1748. //
  1749. // This requires memoizing each dominator subtree to avoid redundant work.
  1750. //
  1751. // FIXME: Need to actually do the number of candidates part above.
  1752. SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
  1753. // Given a terminator which might be unswitched, computes the non-duplicated
  1754. // cost for that terminator.
  1755. auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
  1756. BasicBlock &BB = *TI->getParent();
  1757. SmallPtrSet<BasicBlock *, 4> Visited;
  1758. int Cost = LoopCost;
  1759. for (BasicBlock *SuccBB : successors(&BB)) {
  1760. // Don't count successors more than once.
  1761. if (!Visited.insert(SuccBB).second)
  1762. continue;
  1763. // This successor's domtree will not need to be duplicated after
  1764. // unswitching if the edge to the successor dominates it (and thus the
  1765. // entire tree). This essentially means there is no other path into this
  1766. // subtree and so it will end up live in only one clone of the loop.
  1767. if (SuccBB->getUniquePredecessor() ||
  1768. llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
  1769. return PredBB == &BB || DT.dominates(SuccBB, PredBB);
  1770. })) {
  1771. Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
  1772. assert(Cost >= 0 &&
  1773. "Non-duplicated cost should never exceed total loop cost!");
  1774. }
  1775. }
  1776. // Now scale the cost by the number of unique successors minus one. We
  1777. // subtract one because there is already at least one copy of the entire
  1778. // loop. This is computing the new cost of unswitching a condition.
  1779. assert(Visited.size() > 1 &&
  1780. "Cannot unswitch a condition without multiple distinct successors!");
  1781. return Cost * (Visited.size() - 1);
  1782. };
  1783. TerminatorInst *BestUnswitchTI = nullptr;
  1784. int BestUnswitchCost;
  1785. for (TerminatorInst *CandidateTI : UnswitchCandidates) {
  1786. int CandidateCost = ComputeUnswitchedCost(CandidateTI);
  1787. DEBUG(dbgs() << " Computed cost of " << CandidateCost
  1788. << " for unswitch candidate: " << *CandidateTI << "\n");
  1789. if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
  1790. BestUnswitchTI = CandidateTI;
  1791. BestUnswitchCost = CandidateCost;
  1792. }
  1793. }
  1794. if (BestUnswitchCost < UnswitchThreshold) {
  1795. DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
  1796. << BestUnswitchCost << ") branch: " << *BestUnswitchTI
  1797. << "\n");
  1798. Changed |= unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT,
  1799. LI, AC, NonTrivialUnswitchCB);
  1800. } else {
  1801. DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << BestUnswitchCost
  1802. << "\n");
  1803. }
  1804. return Changed;
  1805. }
  1806. PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
  1807. LoopStandardAnalysisResults &AR,
  1808. LPMUpdater &U) {
  1809. Function &F = *L.getHeader()->getParent();
  1810. (void)F;
  1811. DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
  1812. // Save the current loop name in a variable so that we can report it even
  1813. // after it has been deleted.
  1814. std::string LoopName = L.getName();
  1815. auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
  1816. ArrayRef<Loop *> NewLoops) {
  1817. // If we did a non-trivial unswitch, we have added new (cloned) loops.
  1818. U.addSiblingLoops(NewLoops);
  1819. // If the current loop remains valid, we should revisit it to catch any
  1820. // other unswitch opportunities. Otherwise, we need to mark it as deleted.
  1821. if (CurrentLoopValid)
  1822. U.revisitCurrentLoop();
  1823. else
  1824. U.markLoopAsDeleted(L, LoopName);
  1825. };
  1826. if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
  1827. NonTrivialUnswitchCB))
  1828. return PreservedAnalyses::all();
  1829. #ifndef NDEBUG
  1830. // Historically this pass has had issues with the dominator tree so verify it
  1831. // in asserts builds.
  1832. AR.DT.verifyDomTree();
  1833. #endif
  1834. return getLoopPassPreservedAnalyses();
  1835. }
  1836. namespace {
  1837. class SimpleLoopUnswitchLegacyPass : public LoopPass {
  1838. bool NonTrivial;
  1839. public:
  1840. static char ID; // Pass ID, replacement for typeid
  1841. explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
  1842. : LoopPass(ID), NonTrivial(NonTrivial) {
  1843. initializeSimpleLoopUnswitchLegacyPassPass(
  1844. *PassRegistry::getPassRegistry());
  1845. }
  1846. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  1847. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1848. AU.addRequired<AssumptionCacheTracker>();
  1849. AU.addRequired<TargetTransformInfoWrapperPass>();
  1850. getLoopAnalysisUsage(AU);
  1851. }
  1852. };
  1853. } // end anonymous namespace
  1854. bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
  1855. if (skipLoop(L))
  1856. return false;
  1857. Function &F = *L->getHeader()->getParent();
  1858. DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
  1859. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1860. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  1861. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  1862. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  1863. auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid,
  1864. ArrayRef<Loop *> NewLoops) {
  1865. // If we did a non-trivial unswitch, we have added new (cloned) loops.
  1866. for (auto *NewL : NewLoops)
  1867. LPM.addLoop(*NewL);
  1868. // If the current loop remains valid, re-add it to the queue. This is
  1869. // a little wasteful as we'll finish processing the current loop as well,
  1870. // but it is the best we can do in the old PM.
  1871. if (CurrentLoopValid)
  1872. LPM.addLoop(*L);
  1873. else
  1874. LPM.markLoopAsDeleted(*L);
  1875. };
  1876. bool Changed =
  1877. unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB);
  1878. // If anything was unswitched, also clear any cached information about this
  1879. // loop.
  1880. LPM.deleteSimpleAnalysisLoop(L);
  1881. #ifndef NDEBUG
  1882. // Historically this pass has had issues with the dominator tree so verify it
  1883. // in asserts builds.
  1884. DT.verifyDomTree();
  1885. #endif
  1886. return Changed;
  1887. }
  1888. char SimpleLoopUnswitchLegacyPass::ID = 0;
  1889. INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
  1890. "Simple unswitch loops", false, false)
  1891. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  1892. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1893. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1894. INITIALIZE_PASS_DEPENDENCY(LoopPass)
  1895. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  1896. INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
  1897. "Simple unswitch loops", false, false)
  1898. Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
  1899. return new SimpleLoopUnswitchLegacyPass(NonTrivial);
  1900. }