LoopUtils.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940
  1. //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines common loop utility functions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Utils/LoopUtils.h"
  14. #include "llvm/ADT/ScopeExit.h"
  15. #include "llvm/Analysis/AliasAnalysis.h"
  16. #include "llvm/Analysis/BasicAliasAnalysis.h"
  17. #include "llvm/Analysis/GlobalsModRef.h"
  18. #include "llvm/Analysis/InstructionSimplify.h"
  19. #include "llvm/Analysis/LoopInfo.h"
  20. #include "llvm/Analysis/LoopPass.h"
  21. #include "llvm/Analysis/MustExecute.h"
  22. #include "llvm/Analysis/ScalarEvolution.h"
  23. #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
  24. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  25. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  26. #include "llvm/Analysis/TargetTransformInfo.h"
  27. #include "llvm/Analysis/ValueTracking.h"
  28. #include "llvm/IR/DomTreeUpdater.h"
  29. #include "llvm/IR/Dominators.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/IR/PatternMatch.h"
  33. #include "llvm/IR/ValueHandle.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/Debug.h"
  36. #include "llvm/Support/KnownBits.h"
  37. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  38. using namespace llvm;
  39. using namespace llvm::PatternMatch;
  40. #define DEBUG_TYPE "loop-utils"
  41. static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
  42. bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
  43. bool PreserveLCSSA) {
  44. bool Changed = false;
  45. // We re-use a vector for the in-loop predecesosrs.
  46. SmallVector<BasicBlock *, 4> InLoopPredecessors;
  47. auto RewriteExit = [&](BasicBlock *BB) {
  48. assert(InLoopPredecessors.empty() &&
  49. "Must start with an empty predecessors list!");
  50. auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
  51. // See if there are any non-loop predecessors of this exit block and
  52. // keep track of the in-loop predecessors.
  53. bool IsDedicatedExit = true;
  54. for (auto *PredBB : predecessors(BB))
  55. if (L->contains(PredBB)) {
  56. if (isa<IndirectBrInst>(PredBB->getTerminator()))
  57. // We cannot rewrite exiting edges from an indirectbr.
  58. return false;
  59. InLoopPredecessors.push_back(PredBB);
  60. } else {
  61. IsDedicatedExit = false;
  62. }
  63. assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
  64. // Nothing to do if this is already a dedicated exit.
  65. if (IsDedicatedExit)
  66. return false;
  67. auto *NewExitBB = SplitBlockPredecessors(
  68. BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
  69. if (!NewExitBB)
  70. LLVM_DEBUG(
  71. dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
  72. << *L << "\n");
  73. else
  74. LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
  75. << NewExitBB->getName() << "\n");
  76. return true;
  77. };
  78. // Walk the exit blocks directly rather than building up a data structure for
  79. // them, but only visit each one once.
  80. SmallPtrSet<BasicBlock *, 4> Visited;
  81. for (auto *BB : L->blocks())
  82. for (auto *SuccBB : successors(BB)) {
  83. // We're looking for exit blocks so skip in-loop successors.
  84. if (L->contains(SuccBB))
  85. continue;
  86. // Visit each exit block exactly once.
  87. if (!Visited.insert(SuccBB).second)
  88. continue;
  89. Changed |= RewriteExit(SuccBB);
  90. }
  91. return Changed;
  92. }
  93. /// Returns the instructions that use values defined in the loop.
  94. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
  95. SmallVector<Instruction *, 8> UsedOutside;
  96. for (auto *Block : L->getBlocks())
  97. // FIXME: I believe that this could use copy_if if the Inst reference could
  98. // be adapted into a pointer.
  99. for (auto &Inst : *Block) {
  100. auto Users = Inst.users();
  101. if (any_of(Users, [&](User *U) {
  102. auto *Use = cast<Instruction>(U);
  103. return !L->contains(Use->getParent());
  104. }))
  105. UsedOutside.push_back(&Inst);
  106. }
  107. return UsedOutside;
  108. }
  109. void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
  110. // By definition, all loop passes need the LoopInfo analysis and the
  111. // Dominator tree it depends on. Because they all participate in the loop
  112. // pass manager, they must also preserve these.
  113. AU.addRequired<DominatorTreeWrapperPass>();
  114. AU.addPreserved<DominatorTreeWrapperPass>();
  115. AU.addRequired<LoopInfoWrapperPass>();
  116. AU.addPreserved<LoopInfoWrapperPass>();
  117. // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
  118. // here because users shouldn't directly get them from this header.
  119. extern char &LoopSimplifyID;
  120. extern char &LCSSAID;
  121. AU.addRequiredID(LoopSimplifyID);
  122. AU.addPreservedID(LoopSimplifyID);
  123. AU.addRequiredID(LCSSAID);
  124. AU.addPreservedID(LCSSAID);
  125. // This is used in the LPPassManager to perform LCSSA verification on passes
  126. // which preserve lcssa form
  127. AU.addRequired<LCSSAVerificationPass>();
  128. AU.addPreserved<LCSSAVerificationPass>();
  129. // Loop passes are designed to run inside of a loop pass manager which means
  130. // that any function analyses they require must be required by the first loop
  131. // pass in the manager (so that it is computed before the loop pass manager
  132. // runs) and preserved by all loop pasess in the manager. To make this
  133. // reasonably robust, the set needed for most loop passes is maintained here.
  134. // If your loop pass requires an analysis not listed here, you will need to
  135. // carefully audit the loop pass manager nesting structure that results.
  136. AU.addRequired<AAResultsWrapperPass>();
  137. AU.addPreserved<AAResultsWrapperPass>();
  138. AU.addPreserved<BasicAAWrapperPass>();
  139. AU.addPreserved<GlobalsAAWrapperPass>();
  140. AU.addPreserved<SCEVAAWrapperPass>();
  141. AU.addRequired<ScalarEvolutionWrapperPass>();
  142. AU.addPreserved<ScalarEvolutionWrapperPass>();
  143. }
  144. /// Manually defined generic "LoopPass" dependency initialization. This is used
  145. /// to initialize the exact set of passes from above in \c
  146. /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
  147. /// with:
  148. ///
  149. /// INITIALIZE_PASS_DEPENDENCY(LoopPass)
  150. ///
  151. /// As-if "LoopPass" were a pass.
  152. void llvm::initializeLoopPassPass(PassRegistry &Registry) {
  153. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  154. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  155. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  156. INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
  157. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  158. INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
  159. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  160. INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
  161. INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  162. }
  163. static Optional<MDNode *> findOptionMDForLoopID(MDNode *LoopID,
  164. StringRef Name) {
  165. // Return none if LoopID is false.
  166. if (!LoopID)
  167. return None;
  168. // First operand should refer to the loop id itself.
  169. assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
  170. assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
  171. // Iterate over LoopID operands and look for MDString Metadata
  172. for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
  173. MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
  174. if (!MD)
  175. continue;
  176. MDString *S = dyn_cast<MDString>(MD->getOperand(0));
  177. if (!S)
  178. continue;
  179. // Return true if MDString holds expected MetaData.
  180. if (Name.equals(S->getString()))
  181. return MD;
  182. }
  183. return None;
  184. }
  185. static Optional<MDNode *> findOptionMDForLoop(const Loop *TheLoop,
  186. StringRef Name) {
  187. return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
  188. }
  189. /// Find string metadata for loop
  190. ///
  191. /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
  192. /// operand or null otherwise. If the string metadata is not found return
  193. /// Optional's not-a-value.
  194. Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
  195. StringRef Name) {
  196. auto MD = findOptionMDForLoop(TheLoop, Name).getValueOr(nullptr);
  197. if (!MD)
  198. return None;
  199. switch (MD->getNumOperands()) {
  200. case 1:
  201. return nullptr;
  202. case 2:
  203. return &MD->getOperand(1);
  204. default:
  205. llvm_unreachable("loop metadata has 0 or 1 operand");
  206. }
  207. }
  208. static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
  209. StringRef Name) {
  210. Optional<MDNode *> MD = findOptionMDForLoop(TheLoop, Name);
  211. if (!MD.hasValue())
  212. return None;
  213. MDNode *OptionNode = MD.getValue();
  214. if (OptionNode == nullptr)
  215. return None;
  216. switch (OptionNode->getNumOperands()) {
  217. case 1:
  218. // When the value is absent it is interpreted as 'attribute set'.
  219. return true;
  220. case 2:
  221. return mdconst::extract_or_null<ConstantInt>(
  222. OptionNode->getOperand(1).get());
  223. }
  224. llvm_unreachable("unexpected number of options");
  225. }
  226. static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
  227. return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
  228. }
  229. llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
  230. StringRef Name) {
  231. const MDOperand *AttrMD =
  232. findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
  233. if (!AttrMD)
  234. return None;
  235. ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
  236. if (!IntMD)
  237. return None;
  238. return IntMD->getSExtValue();
  239. }
  240. Optional<MDNode *> llvm::makeFollowupLoopID(
  241. MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
  242. const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
  243. if (!OrigLoopID) {
  244. if (AlwaysNew)
  245. return nullptr;
  246. return None;
  247. }
  248. assert(OrigLoopID->getOperand(0) == OrigLoopID);
  249. bool InheritAllAttrs = !InheritOptionsExceptPrefix;
  250. bool InheritSomeAttrs =
  251. InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
  252. SmallVector<Metadata *, 8> MDs;
  253. MDs.push_back(nullptr);
  254. bool Changed = false;
  255. if (InheritAllAttrs || InheritSomeAttrs) {
  256. for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
  257. MDNode *Op = cast<MDNode>(Existing.get());
  258. auto InheritThisAttribute = [InheritSomeAttrs,
  259. InheritOptionsExceptPrefix](MDNode *Op) {
  260. if (!InheritSomeAttrs)
  261. return false;
  262. // Skip malformatted attribute metadata nodes.
  263. if (Op->getNumOperands() == 0)
  264. return true;
  265. Metadata *NameMD = Op->getOperand(0).get();
  266. if (!isa<MDString>(NameMD))
  267. return true;
  268. StringRef AttrName = cast<MDString>(NameMD)->getString();
  269. // Do not inherit excluded attributes.
  270. return !AttrName.startswith(InheritOptionsExceptPrefix);
  271. };
  272. if (InheritThisAttribute(Op))
  273. MDs.push_back(Op);
  274. else
  275. Changed = true;
  276. }
  277. } else {
  278. // Modified if we dropped at least one attribute.
  279. Changed = OrigLoopID->getNumOperands() > 1;
  280. }
  281. bool HasAnyFollowup = false;
  282. for (StringRef OptionName : FollowupOptions) {
  283. MDNode *FollowupNode =
  284. findOptionMDForLoopID(OrigLoopID, OptionName).getValueOr(nullptr);
  285. if (!FollowupNode)
  286. continue;
  287. HasAnyFollowup = true;
  288. for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
  289. MDs.push_back(Option.get());
  290. Changed = true;
  291. }
  292. }
  293. // Attributes of the followup loop not specified explicity, so signal to the
  294. // transformation pass to add suitable attributes.
  295. if (!AlwaysNew && !HasAnyFollowup)
  296. return None;
  297. // If no attributes were added or remove, the previous loop Id can be reused.
  298. if (!AlwaysNew && !Changed)
  299. return OrigLoopID;
  300. // No attributes is equivalent to having no !llvm.loop metadata at all.
  301. if (MDs.size() == 1)
  302. return nullptr;
  303. // Build the new loop ID.
  304. MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
  305. FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
  306. return FollowupLoopID;
  307. }
  308. bool llvm::hasDisableAllTransformsHint(const Loop *L) {
  309. return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
  310. }
  311. TransformationMode llvm::hasUnrollTransformation(Loop *L) {
  312. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
  313. return TM_SuppressedByUser;
  314. Optional<int> Count =
  315. getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
  316. if (Count.hasValue())
  317. return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
  318. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
  319. return TM_ForcedByUser;
  320. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
  321. return TM_ForcedByUser;
  322. if (hasDisableAllTransformsHint(L))
  323. return TM_Disable;
  324. return TM_Unspecified;
  325. }
  326. TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
  327. if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
  328. return TM_SuppressedByUser;
  329. Optional<int> Count =
  330. getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
  331. if (Count.hasValue())
  332. return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
  333. if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
  334. return TM_ForcedByUser;
  335. if (hasDisableAllTransformsHint(L))
  336. return TM_Disable;
  337. return TM_Unspecified;
  338. }
  339. TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
  340. Optional<bool> Enable =
  341. getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
  342. if (Enable == false)
  343. return TM_SuppressedByUser;
  344. Optional<int> VectorizeWidth =
  345. getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
  346. Optional<int> InterleaveCount =
  347. getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
  348. if (Enable == true) {
  349. // 'Forcing' vector width and interleave count to one effectively disables
  350. // this tranformation.
  351. if (VectorizeWidth == 1 && InterleaveCount == 1)
  352. return TM_SuppressedByUser;
  353. return TM_ForcedByUser;
  354. }
  355. if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
  356. return TM_Disable;
  357. if (VectorizeWidth == 1 && InterleaveCount == 1)
  358. return TM_Disable;
  359. if (VectorizeWidth > 1 || InterleaveCount > 1)
  360. return TM_Enable;
  361. if (hasDisableAllTransformsHint(L))
  362. return TM_Disable;
  363. return TM_Unspecified;
  364. }
  365. TransformationMode llvm::hasDistributeTransformation(Loop *L) {
  366. if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
  367. return TM_ForcedByUser;
  368. if (hasDisableAllTransformsHint(L))
  369. return TM_Disable;
  370. return TM_Unspecified;
  371. }
  372. TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
  373. if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
  374. return TM_SuppressedByUser;
  375. if (hasDisableAllTransformsHint(L))
  376. return TM_Disable;
  377. return TM_Unspecified;
  378. }
  379. /// Does a BFS from a given node to all of its children inside a given loop.
  380. /// The returned vector of nodes includes the starting point.
  381. SmallVector<DomTreeNode *, 16>
  382. llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
  383. SmallVector<DomTreeNode *, 16> Worklist;
  384. auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
  385. // Only include subregions in the top level loop.
  386. BasicBlock *BB = DTN->getBlock();
  387. if (CurLoop->contains(BB))
  388. Worklist.push_back(DTN);
  389. };
  390. AddRegionToWorklist(N);
  391. for (size_t I = 0; I < Worklist.size(); I++)
  392. for (DomTreeNode *Child : Worklist[I]->getChildren())
  393. AddRegionToWorklist(Child);
  394. return Worklist;
  395. }
  396. void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
  397. ScalarEvolution *SE = nullptr,
  398. LoopInfo *LI = nullptr) {
  399. assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
  400. auto *Preheader = L->getLoopPreheader();
  401. assert(Preheader && "Preheader should exist!");
  402. // Now that we know the removal is safe, remove the loop by changing the
  403. // branch from the preheader to go to the single exit block.
  404. //
  405. // Because we're deleting a large chunk of code at once, the sequence in which
  406. // we remove things is very important to avoid invalidation issues.
  407. // Tell ScalarEvolution that the loop is deleted. Do this before
  408. // deleting the loop so that ScalarEvolution can look at the loop
  409. // to determine what it needs to clean up.
  410. if (SE)
  411. SE->forgetLoop(L);
  412. auto *ExitBlock = L->getUniqueExitBlock();
  413. assert(ExitBlock && "Should have a unique exit block!");
  414. assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
  415. auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
  416. assert(OldBr && "Preheader must end with a branch");
  417. assert(OldBr->isUnconditional() && "Preheader must have a single successor");
  418. // Connect the preheader to the exit block. Keep the old edge to the header
  419. // around to perform the dominator tree update in two separate steps
  420. // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
  421. // preheader -> header.
  422. //
  423. //
  424. // 0. Preheader 1. Preheader 2. Preheader
  425. // | | | |
  426. // V | V |
  427. // Header <--\ | Header <--\ | Header <--\
  428. // | | | | | | | | | | |
  429. // | V | | | V | | | V |
  430. // | Body --/ | | Body --/ | | Body --/
  431. // V V V V V
  432. // Exit Exit Exit
  433. //
  434. // By doing this is two separate steps we can perform the dominator tree
  435. // update without using the batch update API.
  436. //
  437. // Even when the loop is never executed, we cannot remove the edge from the
  438. // source block to the exit block. Consider the case where the unexecuted loop
  439. // branches back to an outer loop. If we deleted the loop and removed the edge
  440. // coming to this inner loop, this will break the outer loop structure (by
  441. // deleting the backedge of the outer loop). If the outer loop is indeed a
  442. // non-loop, it will be deleted in a future iteration of loop deletion pass.
  443. IRBuilder<> Builder(OldBr);
  444. Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
  445. // Remove the old branch. The conditional branch becomes a new terminator.
  446. OldBr->eraseFromParent();
  447. // Rewrite phis in the exit block to get their inputs from the Preheader
  448. // instead of the exiting block.
  449. for (PHINode &P : ExitBlock->phis()) {
  450. // Set the zero'th element of Phi to be from the preheader and remove all
  451. // other incoming values. Given the loop has dedicated exits, all other
  452. // incoming values must be from the exiting blocks.
  453. int PredIndex = 0;
  454. P.setIncomingBlock(PredIndex, Preheader);
  455. // Removes all incoming values from all other exiting blocks (including
  456. // duplicate values from an exiting block).
  457. // Nuke all entries except the zero'th entry which is the preheader entry.
  458. // NOTE! We need to remove Incoming Values in the reverse order as done
  459. // below, to keep the indices valid for deletion (removeIncomingValues
  460. // updates getNumIncomingValues and shifts all values down into the operand
  461. // being deleted).
  462. for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
  463. P.removeIncomingValue(e - i, false);
  464. assert((P.getNumIncomingValues() == 1 &&
  465. P.getIncomingBlock(PredIndex) == Preheader) &&
  466. "Should have exactly one value and that's from the preheader!");
  467. }
  468. // Disconnect the loop body by branching directly to its exit.
  469. Builder.SetInsertPoint(Preheader->getTerminator());
  470. Builder.CreateBr(ExitBlock);
  471. // Remove the old branch.
  472. Preheader->getTerminator()->eraseFromParent();
  473. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  474. if (DT) {
  475. // Update the dominator tree by informing it about the new edge from the
  476. // preheader to the exit.
  477. DTU.insertEdge(Preheader, ExitBlock);
  478. // Inform the dominator tree about the removed edge.
  479. DTU.deleteEdge(Preheader, L->getHeader());
  480. }
  481. // Given LCSSA form is satisfied, we should not have users of instructions
  482. // within the dead loop outside of the loop. However, LCSSA doesn't take
  483. // unreachable uses into account. We handle them here.
  484. // We could do it after drop all references (in this case all users in the
  485. // loop will be already eliminated and we have less work to do but according
  486. // to API doc of User::dropAllReferences only valid operation after dropping
  487. // references, is deletion. So let's substitute all usages of
  488. // instruction from the loop with undef value of corresponding type first.
  489. for (auto *Block : L->blocks())
  490. for (Instruction &I : *Block) {
  491. auto *Undef = UndefValue::get(I.getType());
  492. for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
  493. Use &U = *UI;
  494. ++UI;
  495. if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
  496. if (L->contains(Usr->getParent()))
  497. continue;
  498. // If we have a DT then we can check that uses outside a loop only in
  499. // unreachable block.
  500. if (DT)
  501. assert(!DT->isReachableFromEntry(U) &&
  502. "Unexpected user in reachable block");
  503. U.set(Undef);
  504. }
  505. }
  506. // Remove the block from the reference counting scheme, so that we can
  507. // delete it freely later.
  508. for (auto *Block : L->blocks())
  509. Block->dropAllReferences();
  510. if (LI) {
  511. // Erase the instructions and the blocks without having to worry
  512. // about ordering because we already dropped the references.
  513. // NOTE: This iteration is safe because erasing the block does not remove
  514. // its entry from the loop's block list. We do that in the next section.
  515. for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
  516. LpI != LpE; ++LpI)
  517. (*LpI)->eraseFromParent();
  518. // Finally, the blocks from loopinfo. This has to happen late because
  519. // otherwise our loop iterators won't work.
  520. SmallPtrSet<BasicBlock *, 8> blocks;
  521. blocks.insert(L->block_begin(), L->block_end());
  522. for (BasicBlock *BB : blocks)
  523. LI->removeBlock(BB);
  524. // The last step is to update LoopInfo now that we've eliminated this loop.
  525. LI->erase(L);
  526. }
  527. }
  528. Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
  529. // Only support loops with a unique exiting block, and a latch.
  530. if (!L->getExitingBlock())
  531. return None;
  532. // Get the branch weights for the loop's backedge.
  533. BranchInst *LatchBR =
  534. dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
  535. if (!LatchBR || LatchBR->getNumSuccessors() != 2)
  536. return None;
  537. assert((LatchBR->getSuccessor(0) == L->getHeader() ||
  538. LatchBR->getSuccessor(1) == L->getHeader()) &&
  539. "At least one edge out of the latch must go to the header");
  540. // To estimate the number of times the loop body was executed, we want to
  541. // know the number of times the backedge was taken, vs. the number of times
  542. // we exited the loop.
  543. uint64_t TrueVal, FalseVal;
  544. if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
  545. return None;
  546. if (!TrueVal || !FalseVal)
  547. return 0;
  548. // Divide the count of the backedge by the count of the edge exiting the loop,
  549. // rounding to nearest.
  550. if (LatchBR->getSuccessor(0) == L->getHeader())
  551. return (TrueVal + (FalseVal / 2)) / FalseVal;
  552. else
  553. return (FalseVal + (TrueVal / 2)) / TrueVal;
  554. }
  555. bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
  556. ScalarEvolution &SE) {
  557. Loop *OuterL = InnerLoop->getParentLoop();
  558. if (!OuterL)
  559. return true;
  560. // Get the backedge taken count for the inner loop
  561. BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  562. const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
  563. if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
  564. !InnerLoopBECountSC->getType()->isIntegerTy())
  565. return false;
  566. // Get whether count is invariant to the outer loop
  567. ScalarEvolution::LoopDisposition LD =
  568. SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
  569. if (LD != ScalarEvolution::LoopInvariant)
  570. return false;
  571. return true;
  572. }
  573. /// Adds a 'fast' flag to floating point operations.
  574. static Value *addFastMathFlag(Value *V) {
  575. if (isa<FPMathOperator>(V)) {
  576. FastMathFlags Flags;
  577. Flags.setFast();
  578. cast<Instruction>(V)->setFastMathFlags(Flags);
  579. }
  580. return V;
  581. }
  582. Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
  583. RecurrenceDescriptor::MinMaxRecurrenceKind RK,
  584. Value *Left, Value *Right) {
  585. CmpInst::Predicate P = CmpInst::ICMP_NE;
  586. switch (RK) {
  587. default:
  588. llvm_unreachable("Unknown min/max recurrence kind");
  589. case RecurrenceDescriptor::MRK_UIntMin:
  590. P = CmpInst::ICMP_ULT;
  591. break;
  592. case RecurrenceDescriptor::MRK_UIntMax:
  593. P = CmpInst::ICMP_UGT;
  594. break;
  595. case RecurrenceDescriptor::MRK_SIntMin:
  596. P = CmpInst::ICMP_SLT;
  597. break;
  598. case RecurrenceDescriptor::MRK_SIntMax:
  599. P = CmpInst::ICMP_SGT;
  600. break;
  601. case RecurrenceDescriptor::MRK_FloatMin:
  602. P = CmpInst::FCMP_OLT;
  603. break;
  604. case RecurrenceDescriptor::MRK_FloatMax:
  605. P = CmpInst::FCMP_OGT;
  606. break;
  607. }
  608. // We only match FP sequences that are 'fast', so we can unconditionally
  609. // set it on any generated instructions.
  610. IRBuilder<>::FastMathFlagGuard FMFG(Builder);
  611. FastMathFlags FMF;
  612. FMF.setFast();
  613. Builder.setFastMathFlags(FMF);
  614. Value *Cmp;
  615. if (RK == RecurrenceDescriptor::MRK_FloatMin ||
  616. RK == RecurrenceDescriptor::MRK_FloatMax)
  617. Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
  618. else
  619. Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
  620. Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
  621. return Select;
  622. }
  623. // Helper to generate an ordered reduction.
  624. Value *
  625. llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
  626. unsigned Op,
  627. RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
  628. ArrayRef<Value *> RedOps) {
  629. unsigned VF = Src->getType()->getVectorNumElements();
  630. // Extract and apply reduction ops in ascending order:
  631. // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
  632. Value *Result = Acc;
  633. for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
  634. Value *Ext =
  635. Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
  636. if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
  637. Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
  638. "bin.rdx");
  639. } else {
  640. assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
  641. "Invalid min/max");
  642. Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
  643. }
  644. if (!RedOps.empty())
  645. propagateIRFlags(Result, RedOps);
  646. }
  647. return Result;
  648. }
  649. // Helper to generate a log2 shuffle reduction.
  650. Value *
  651. llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
  652. RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
  653. ArrayRef<Value *> RedOps) {
  654. unsigned VF = Src->getType()->getVectorNumElements();
  655. // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
  656. // and vector ops, reducing the set of values being computed by half each
  657. // round.
  658. assert(isPowerOf2_32(VF) &&
  659. "Reduction emission only supported for pow2 vectors!");
  660. Value *TmpVec = Src;
  661. SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
  662. for (unsigned i = VF; i != 1; i >>= 1) {
  663. // Move the upper half of the vector to the lower half.
  664. for (unsigned j = 0; j != i / 2; ++j)
  665. ShuffleMask[j] = Builder.getInt32(i / 2 + j);
  666. // Fill the rest of the mask with undef.
  667. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
  668. UndefValue::get(Builder.getInt32Ty()));
  669. Value *Shuf = Builder.CreateShuffleVector(
  670. TmpVec, UndefValue::get(TmpVec->getType()),
  671. ConstantVector::get(ShuffleMask), "rdx.shuf");
  672. if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
  673. // Floating point operations had to be 'fast' to enable the reduction.
  674. TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
  675. TmpVec, Shuf, "bin.rdx"));
  676. } else {
  677. assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
  678. "Invalid min/max");
  679. TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
  680. }
  681. if (!RedOps.empty())
  682. propagateIRFlags(TmpVec, RedOps);
  683. }
  684. // The result is in the first element of the vector.
  685. return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
  686. }
  687. /// Create a simple vector reduction specified by an opcode and some
  688. /// flags (if generating min/max reductions).
  689. Value *llvm::createSimpleTargetReduction(
  690. IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
  691. Value *Src, TargetTransformInfo::ReductionFlags Flags,
  692. ArrayRef<Value *> RedOps) {
  693. assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
  694. Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
  695. std::function<Value *()> BuildFunc;
  696. using RD = RecurrenceDescriptor;
  697. RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
  698. // TODO: Support creating ordered reductions.
  699. FastMathFlags FMFFast;
  700. FMFFast.setFast();
  701. switch (Opcode) {
  702. case Instruction::Add:
  703. BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
  704. break;
  705. case Instruction::Mul:
  706. BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
  707. break;
  708. case Instruction::And:
  709. BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
  710. break;
  711. case Instruction::Or:
  712. BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
  713. break;
  714. case Instruction::Xor:
  715. BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
  716. break;
  717. case Instruction::FAdd:
  718. BuildFunc = [&]() {
  719. auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
  720. cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
  721. return Rdx;
  722. };
  723. break;
  724. case Instruction::FMul:
  725. BuildFunc = [&]() {
  726. auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
  727. cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
  728. return Rdx;
  729. };
  730. break;
  731. case Instruction::ICmp:
  732. if (Flags.IsMaxOp) {
  733. MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
  734. BuildFunc = [&]() {
  735. return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
  736. };
  737. } else {
  738. MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
  739. BuildFunc = [&]() {
  740. return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
  741. };
  742. }
  743. break;
  744. case Instruction::FCmp:
  745. if (Flags.IsMaxOp) {
  746. MinMaxKind = RD::MRK_FloatMax;
  747. BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
  748. } else {
  749. MinMaxKind = RD::MRK_FloatMin;
  750. BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
  751. }
  752. break;
  753. default:
  754. llvm_unreachable("Unhandled opcode");
  755. break;
  756. }
  757. if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
  758. return BuildFunc();
  759. return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
  760. }
  761. /// Create a vector reduction using a given recurrence descriptor.
  762. Value *llvm::createTargetReduction(IRBuilder<> &B,
  763. const TargetTransformInfo *TTI,
  764. RecurrenceDescriptor &Desc, Value *Src,
  765. bool NoNaN) {
  766. // TODO: Support in-order reductions based on the recurrence descriptor.
  767. using RD = RecurrenceDescriptor;
  768. RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
  769. TargetTransformInfo::ReductionFlags Flags;
  770. Flags.NoNaN = NoNaN;
  771. switch (RecKind) {
  772. case RD::RK_FloatAdd:
  773. return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
  774. case RD::RK_FloatMult:
  775. return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
  776. case RD::RK_IntegerAdd:
  777. return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
  778. case RD::RK_IntegerMult:
  779. return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
  780. case RD::RK_IntegerAnd:
  781. return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
  782. case RD::RK_IntegerOr:
  783. return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
  784. case RD::RK_IntegerXor:
  785. return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
  786. case RD::RK_IntegerMinMax: {
  787. RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
  788. Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
  789. Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
  790. return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
  791. }
  792. case RD::RK_FloatMinMax: {
  793. Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
  794. return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
  795. }
  796. default:
  797. llvm_unreachable("Unhandled RecKind");
  798. }
  799. }
  800. void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
  801. auto *VecOp = dyn_cast<Instruction>(I);
  802. if (!VecOp)
  803. return;
  804. auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
  805. : dyn_cast<Instruction>(OpValue);
  806. if (!Intersection)
  807. return;
  808. const unsigned Opcode = Intersection->getOpcode();
  809. VecOp->copyIRFlags(Intersection);
  810. for (auto *V : VL) {
  811. auto *Instr = dyn_cast<Instruction>(V);
  812. if (!Instr)
  813. continue;
  814. if (OpValue == nullptr || Opcode == Instr->getOpcode())
  815. VecOp->andIRFlags(V);
  816. }
  817. }