PromoteMemoryToRegister.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080
  1. //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file promotes memory references to be register references. It promotes
  11. // alloca instructions which only have loads and stores as uses. An alloca is
  12. // transformed by using iterated dominator frontiers to place PHI nodes, then
  13. // traversing the function in depth-first order to rewrite loads and stores as
  14. // appropriate.
  15. //
  16. // The algorithm used here is based on:
  17. //
  18. // Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
  19. // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
  20. // Programming Languages
  21. // POPL '95. ACM, New York, NY, 62-73.
  22. //
  23. // It has been modified to not explicitly use the DJ graph data structure and to
  24. // directly compute pruned SSA using per-variable liveness information.
  25. //
  26. //===----------------------------------------------------------------------===//
  27. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  28. #include "llvm/ADT/ArrayRef.h"
  29. #include "llvm/ADT/DenseMap.h"
  30. #include "llvm/ADT/STLExtras.h"
  31. #include "llvm/ADT/SmallPtrSet.h"
  32. #include "llvm/ADT/SmallVector.h"
  33. #include "llvm/ADT/Statistic.h"
  34. #include "llvm/Analysis/AliasSetTracker.h"
  35. #include "llvm/Analysis/InstructionSimplify.h"
  36. #include "llvm/Analysis/ValueTracking.h"
  37. #include "llvm/IR/CFG.h"
  38. #include "llvm/IR/Constants.h"
  39. #include "llvm/IR/DIBuilder.h"
  40. #include "llvm/IR/DebugInfo.h"
  41. #include "llvm/IR/DerivedTypes.h"
  42. #include "llvm/IR/Dominators.h"
  43. #include "llvm/IR/Function.h"
  44. #include "llvm/IR/Instructions.h"
  45. #include "llvm/IR/IntrinsicInst.h"
  46. #include "llvm/IR/Metadata.h"
  47. #include "llvm/Transforms/Utils/Local.h"
  48. #include <algorithm>
  49. #include <queue>
  50. using namespace llvm;
  51. #define DEBUG_TYPE "mem2reg"
  52. STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
  53. STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
  54. STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
  55. STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
  56. bool llvm::isAllocaPromotable(const AllocaInst *AI) {
  57. // FIXME: If the memory unit is of pointer or integer type, we can permit
  58. // assignments to subsections of the memory unit.
  59. unsigned AS = AI->getType()->getAddressSpace();
  60. // Only allow direct and non-volatile loads and stores...
  61. for (const User *U : AI->users()) {
  62. if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
  63. // Note that atomic loads can be transformed; atomic semantics do
  64. // not have any meaning for a local alloca.
  65. if (LI->isVolatile())
  66. return false;
  67. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
  68. if (SI->getOperand(0) == AI)
  69. return false; // Don't allow a store OF the AI, only INTO the AI.
  70. // Note that atomic stores can be transformed; atomic semantics do
  71. // not have any meaning for a local alloca.
  72. if (SI->isVolatile())
  73. return false;
  74. } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
  75. if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
  76. II->getIntrinsicID() != Intrinsic::lifetime_end)
  77. return false;
  78. } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  79. if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
  80. return false;
  81. if (!onlyUsedByLifetimeMarkers(BCI))
  82. return false;
  83. } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
  84. if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
  85. return false;
  86. if (!GEPI->hasAllZeroIndices())
  87. return false;
  88. if (!onlyUsedByLifetimeMarkers(GEPI))
  89. return false;
  90. } else {
  91. return false;
  92. }
  93. }
  94. return true;
  95. }
  96. namespace {
  97. struct AllocaInfo {
  98. SmallVector<BasicBlock *, 32> DefiningBlocks;
  99. SmallVector<BasicBlock *, 32> UsingBlocks;
  100. StoreInst *OnlyStore;
  101. BasicBlock *OnlyBlock;
  102. bool OnlyUsedInOneBlock;
  103. Value *AllocaPointerVal;
  104. DbgDeclareInst *DbgDeclare;
  105. void clear() {
  106. DefiningBlocks.clear();
  107. UsingBlocks.clear();
  108. OnlyStore = nullptr;
  109. OnlyBlock = nullptr;
  110. OnlyUsedInOneBlock = true;
  111. AllocaPointerVal = nullptr;
  112. DbgDeclare = nullptr;
  113. }
  114. /// Scan the uses of the specified alloca, filling in the AllocaInfo used
  115. /// by the rest of the pass to reason about the uses of this alloca.
  116. void AnalyzeAlloca(AllocaInst *AI) {
  117. clear();
  118. // As we scan the uses of the alloca instruction, keep track of stores,
  119. // and decide whether all of the loads and stores to the alloca are within
  120. // the same basic block.
  121. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  122. Instruction *User = cast<Instruction>(*UI++);
  123. if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  124. // Remember the basic blocks which define new values for the alloca
  125. DefiningBlocks.push_back(SI->getParent());
  126. AllocaPointerVal = SI->getOperand(0);
  127. OnlyStore = SI;
  128. } else {
  129. LoadInst *LI = cast<LoadInst>(User);
  130. // Otherwise it must be a load instruction, keep track of variable
  131. // reads.
  132. UsingBlocks.push_back(LI->getParent());
  133. AllocaPointerVal = LI;
  134. }
  135. if (OnlyUsedInOneBlock) {
  136. if (!OnlyBlock)
  137. OnlyBlock = User->getParent();
  138. else if (OnlyBlock != User->getParent())
  139. OnlyUsedInOneBlock = false;
  140. }
  141. }
  142. DbgDeclare = FindAllocaDbgDeclare(AI);
  143. }
  144. };
  145. // Data package used by RenamePass()
  146. class RenamePassData {
  147. public:
  148. typedef std::vector<Value *> ValVector;
  149. RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
  150. RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
  151. : BB(B), Pred(P), Values(V) {}
  152. BasicBlock *BB;
  153. BasicBlock *Pred;
  154. ValVector Values;
  155. void swap(RenamePassData &RHS) {
  156. std::swap(BB, RHS.BB);
  157. std::swap(Pred, RHS.Pred);
  158. Values.swap(RHS.Values);
  159. }
  160. };
  161. /// \brief This assigns and keeps a per-bb relative ordering of load/store
  162. /// instructions in the block that directly load or store an alloca.
  163. ///
  164. /// This functionality is important because it avoids scanning large basic
  165. /// blocks multiple times when promoting many allocas in the same block.
  166. class LargeBlockInfo {
  167. /// \brief For each instruction that we track, keep the index of the
  168. /// instruction.
  169. ///
  170. /// The index starts out as the number of the instruction from the start of
  171. /// the block.
  172. DenseMap<const Instruction *, unsigned> InstNumbers;
  173. public:
  174. /// This code only looks at accesses to allocas.
  175. static bool isInterestingInstruction(const Instruction *I) {
  176. return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
  177. (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
  178. }
  179. /// Get or calculate the index of the specified instruction.
  180. unsigned getInstructionIndex(const Instruction *I) {
  181. assert(isInterestingInstruction(I) &&
  182. "Not a load/store to/from an alloca?");
  183. // If we already have this instruction number, return it.
  184. DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
  185. if (It != InstNumbers.end())
  186. return It->second;
  187. // Scan the whole block to get the instruction. This accumulates
  188. // information for every interesting instruction in the block, in order to
  189. // avoid gratuitus rescans.
  190. const BasicBlock *BB = I->getParent();
  191. unsigned InstNo = 0;
  192. for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
  193. ++BBI)
  194. if (isInterestingInstruction(BBI))
  195. InstNumbers[BBI] = InstNo++;
  196. It = InstNumbers.find(I);
  197. assert(It != InstNumbers.end() && "Didn't insert instruction?");
  198. return It->second;
  199. }
  200. void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
  201. void clear() { InstNumbers.clear(); }
  202. };
  203. struct PromoteMem2Reg {
  204. /// The alloca instructions being promoted.
  205. std::vector<AllocaInst *> Allocas;
  206. DominatorTree &DT;
  207. DIBuilder DIB;
  208. /// An AliasSetTracker object to update. If null, don't update it.
  209. AliasSetTracker *AST;
  210. /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
  211. AssumptionTracker *AT;
  212. /// Reverse mapping of Allocas.
  213. DenseMap<AllocaInst *, unsigned> AllocaLookup;
  214. /// \brief The PhiNodes we're adding.
  215. ///
  216. /// That map is used to simplify some Phi nodes as we iterate over it, so
  217. /// it should have deterministic iterators. We could use a MapVector, but
  218. /// since we already maintain a map from BasicBlock* to a stable numbering
  219. /// (BBNumbers), the DenseMap is more efficient (also supports removal).
  220. DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
  221. /// For each PHI node, keep track of which entry in Allocas it corresponds
  222. /// to.
  223. DenseMap<PHINode *, unsigned> PhiToAllocaMap;
  224. /// If we are updating an AliasSetTracker, then for each alloca that is of
  225. /// pointer type, we keep track of what to copyValue to the inserted PHI
  226. /// nodes here.
  227. std::vector<Value *> PointerAllocaValues;
  228. /// For each alloca, we keep track of the dbg.declare intrinsic that
  229. /// describes it, if any, so that we can convert it to a dbg.value
  230. /// intrinsic if the alloca gets promoted.
  231. SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
  232. /// The set of basic blocks the renamer has already visited.
  233. ///
  234. SmallPtrSet<BasicBlock *, 16> Visited;
  235. /// Contains a stable numbering of basic blocks to avoid non-determinstic
  236. /// behavior.
  237. DenseMap<BasicBlock *, unsigned> BBNumbers;
  238. /// Maps DomTreeNodes to their level in the dominator tree.
  239. DenseMap<DomTreeNode *, unsigned> DomLevels;
  240. /// Lazily compute the number of predecessors a block has.
  241. DenseMap<const BasicBlock *, unsigned> BBNumPreds;
  242. public:
  243. PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  244. AliasSetTracker *AST, AssumptionTracker *AT)
  245. : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
  246. DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
  247. AST(AST), AT(AT) {}
  248. void run();
  249. private:
  250. void RemoveFromAllocasList(unsigned &AllocaIdx) {
  251. Allocas[AllocaIdx] = Allocas.back();
  252. Allocas.pop_back();
  253. --AllocaIdx;
  254. }
  255. unsigned getNumPreds(const BasicBlock *BB) {
  256. unsigned &NP = BBNumPreds[BB];
  257. if (NP == 0)
  258. NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
  259. return NP - 1;
  260. }
  261. void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
  262. AllocaInfo &Info);
  263. void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
  264. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  265. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
  266. void RenamePass(BasicBlock *BB, BasicBlock *Pred,
  267. RenamePassData::ValVector &IncVals,
  268. std::vector<RenamePassData> &Worklist);
  269. bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
  270. };
  271. } // end of anonymous namespace
  272. static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
  273. // Knowing that this alloca is promotable, we know that it's safe to kill all
  274. // instructions except for load and store.
  275. for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
  276. Instruction *I = cast<Instruction>(*UI);
  277. ++UI;
  278. if (isa<LoadInst>(I) || isa<StoreInst>(I))
  279. continue;
  280. if (!I->getType()->isVoidTy()) {
  281. // The only users of this bitcast/GEP instruction are lifetime intrinsics.
  282. // Follow the use/def chain to erase them now instead of leaving it for
  283. // dead code elimination later.
  284. for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
  285. Instruction *Inst = cast<Instruction>(*UUI);
  286. ++UUI;
  287. Inst->eraseFromParent();
  288. }
  289. }
  290. I->eraseFromParent();
  291. }
  292. }
  293. /// \brief Rewrite as many loads as possible given a single store.
  294. ///
  295. /// When there is only a single store, we can use the domtree to trivially
  296. /// replace all of the dominated loads with the stored value. Do so, and return
  297. /// true if this has successfully promoted the alloca entirely. If this returns
  298. /// false there were some loads which were not dominated by the single store
  299. /// and thus must be phi-ed with undef. We fall back to the standard alloca
  300. /// promotion algorithm in that case.
  301. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
  302. LargeBlockInfo &LBI,
  303. DominatorTree &DT,
  304. AliasSetTracker *AST) {
  305. StoreInst *OnlyStore = Info.OnlyStore;
  306. bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  307. BasicBlock *StoreBB = OnlyStore->getParent();
  308. int StoreIndex = -1;
  309. // Clear out UsingBlocks. We will reconstruct it here if needed.
  310. Info.UsingBlocks.clear();
  311. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  312. Instruction *UserInst = cast<Instruction>(*UI++);
  313. if (!isa<LoadInst>(UserInst)) {
  314. assert(UserInst == OnlyStore && "Should only have load/stores");
  315. continue;
  316. }
  317. LoadInst *LI = cast<LoadInst>(UserInst);
  318. // Okay, if we have a load from the alloca, we want to replace it with the
  319. // only value stored to the alloca. We can do this if the value is
  320. // dominated by the store. If not, we use the rest of the mem2reg machinery
  321. // to insert the phi nodes as needed.
  322. if (!StoringGlobalVal) { // Non-instructions are always dominated.
  323. if (LI->getParent() == StoreBB) {
  324. // If we have a use that is in the same block as the store, compare the
  325. // indices of the two instructions to see which one came first. If the
  326. // load came before the store, we can't handle it.
  327. if (StoreIndex == -1)
  328. StoreIndex = LBI.getInstructionIndex(OnlyStore);
  329. if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
  330. // Can't handle this load, bail out.
  331. Info.UsingBlocks.push_back(StoreBB);
  332. continue;
  333. }
  334. } else if (LI->getParent() != StoreBB &&
  335. !DT.dominates(StoreBB, LI->getParent())) {
  336. // If the load and store are in different blocks, use BB dominance to
  337. // check their relationships. If the store doesn't dom the use, bail
  338. // out.
  339. Info.UsingBlocks.push_back(LI->getParent());
  340. continue;
  341. }
  342. }
  343. // Otherwise, we *can* safely rewrite this load.
  344. Value *ReplVal = OnlyStore->getOperand(0);
  345. // If the replacement value is the load, this must occur in unreachable
  346. // code.
  347. if (ReplVal == LI)
  348. ReplVal = UndefValue::get(LI->getType());
  349. LI->replaceAllUsesWith(ReplVal);
  350. if (AST && LI->getType()->isPointerTy())
  351. AST->deleteValue(LI);
  352. LI->eraseFromParent();
  353. LBI.deleteValue(LI);
  354. }
  355. // Finally, after the scan, check to see if the store is all that is left.
  356. if (!Info.UsingBlocks.empty())
  357. return false; // If not, we'll have to fall back for the remainder.
  358. // Record debuginfo for the store and remove the declaration's
  359. // debuginfo.
  360. if (DbgDeclareInst *DDI = Info.DbgDeclare) {
  361. DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
  362. /*AllowUnresolved*/ false);
  363. ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
  364. DDI->eraseFromParent();
  365. LBI.deleteValue(DDI);
  366. }
  367. // Remove the (now dead) store and alloca.
  368. Info.OnlyStore->eraseFromParent();
  369. LBI.deleteValue(Info.OnlyStore);
  370. if (AST)
  371. AST->deleteValue(AI);
  372. AI->eraseFromParent();
  373. LBI.deleteValue(AI);
  374. return true;
  375. }
  376. /// Many allocas are only used within a single basic block. If this is the
  377. /// case, avoid traversing the CFG and inserting a lot of potentially useless
  378. /// PHI nodes by just performing a single linear pass over the basic block
  379. /// using the Alloca.
  380. ///
  381. /// If we cannot promote this alloca (because it is read before it is written),
  382. /// return true. This is necessary in cases where, due to control flow, the
  383. /// alloca is potentially undefined on some control flow paths. e.g. code like
  384. /// this is potentially correct:
  385. ///
  386. /// for (...) { if (c) { A = undef; undef = B; } }
  387. ///
  388. /// ... so long as A is not used before undef is set.
  389. static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
  390. LargeBlockInfo &LBI,
  391. AliasSetTracker *AST) {
  392. // The trickiest case to handle is when we have large blocks. Because of this,
  393. // this code is optimized assuming that large blocks happen. This does not
  394. // significantly pessimize the small block case. This uses LargeBlockInfo to
  395. // make it efficient to get the index of various operations in the block.
  396. // Walk the use-def list of the alloca, getting the locations of all stores.
  397. typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
  398. StoresByIndexTy StoresByIndex;
  399. for (User *U : AI->users())
  400. if (StoreInst *SI = dyn_cast<StoreInst>(U))
  401. StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
  402. // Sort the stores by their index, making it efficient to do a lookup with a
  403. // binary search.
  404. std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
  405. // Walk all of the loads from this alloca, replacing them with the nearest
  406. // store above them, if any.
  407. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  408. LoadInst *LI = dyn_cast<LoadInst>(*UI++);
  409. if (!LI)
  410. continue;
  411. unsigned LoadIdx = LBI.getInstructionIndex(LI);
  412. // Find the nearest store that has a lower index than this load.
  413. StoresByIndexTy::iterator I =
  414. std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
  415. std::make_pair(LoadIdx,
  416. static_cast<StoreInst *>(nullptr)),
  417. less_first());
  418. if (I == StoresByIndex.begin())
  419. // If there is no store before this load, the load takes the undef value.
  420. LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
  421. else
  422. // Otherwise, there was a store before this load, the load takes its value.
  423. LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
  424. if (AST && LI->getType()->isPointerTy())
  425. AST->deleteValue(LI);
  426. LI->eraseFromParent();
  427. LBI.deleteValue(LI);
  428. }
  429. // Remove the (now dead) stores and alloca.
  430. while (!AI->use_empty()) {
  431. StoreInst *SI = cast<StoreInst>(AI->user_back());
  432. // Record debuginfo for the store before removing it.
  433. if (DbgDeclareInst *DDI = Info.DbgDeclare) {
  434. DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
  435. /*AllowUnresolved*/ false);
  436. ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
  437. }
  438. SI->eraseFromParent();
  439. LBI.deleteValue(SI);
  440. }
  441. if (AST)
  442. AST->deleteValue(AI);
  443. AI->eraseFromParent();
  444. LBI.deleteValue(AI);
  445. // The alloca's debuginfo can be removed as well.
  446. if (DbgDeclareInst *DDI = Info.DbgDeclare) {
  447. DDI->eraseFromParent();
  448. LBI.deleteValue(DDI);
  449. }
  450. ++NumLocalPromoted;
  451. }
  452. void PromoteMem2Reg::run() {
  453. Function &F = *DT.getRoot()->getParent();
  454. if (AST)
  455. PointerAllocaValues.resize(Allocas.size());
  456. AllocaDbgDeclares.resize(Allocas.size());
  457. AllocaInfo Info;
  458. LargeBlockInfo LBI;
  459. for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
  460. AllocaInst *AI = Allocas[AllocaNum];
  461. assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
  462. assert(AI->getParent()->getParent() == &F &&
  463. "All allocas should be in the same function, which is same as DF!");
  464. removeLifetimeIntrinsicUsers(AI);
  465. if (AI->use_empty()) {
  466. // If there are no uses of the alloca, just delete it now.
  467. if (AST)
  468. AST->deleteValue(AI);
  469. AI->eraseFromParent();
  470. // Remove the alloca from the Allocas list, since it has been processed
  471. RemoveFromAllocasList(AllocaNum);
  472. ++NumDeadAlloca;
  473. continue;
  474. }
  475. // Calculate the set of read and write-locations for each alloca. This is
  476. // analogous to finding the 'uses' and 'definitions' of each variable.
  477. Info.AnalyzeAlloca(AI);
  478. // If there is only a single store to this value, replace any loads of
  479. // it that are directly dominated by the definition with the value stored.
  480. if (Info.DefiningBlocks.size() == 1) {
  481. if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
  482. // The alloca has been processed, move on.
  483. RemoveFromAllocasList(AllocaNum);
  484. ++NumSingleStore;
  485. continue;
  486. }
  487. }
  488. // If the alloca is only read and written in one basic block, just perform a
  489. // linear sweep over the block to eliminate it.
  490. if (Info.OnlyUsedInOneBlock) {
  491. promoteSingleBlockAlloca(AI, Info, LBI, AST);
  492. // The alloca has been processed, move on.
  493. RemoveFromAllocasList(AllocaNum);
  494. continue;
  495. }
  496. // If we haven't computed dominator tree levels, do so now.
  497. if (DomLevels.empty()) {
  498. SmallVector<DomTreeNode *, 32> Worklist;
  499. DomTreeNode *Root = DT.getRootNode();
  500. DomLevels[Root] = 0;
  501. Worklist.push_back(Root);
  502. while (!Worklist.empty()) {
  503. DomTreeNode *Node = Worklist.pop_back_val();
  504. unsigned ChildLevel = DomLevels[Node] + 1;
  505. for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
  506. CI != CE; ++CI) {
  507. DomLevels[*CI] = ChildLevel;
  508. Worklist.push_back(*CI);
  509. }
  510. }
  511. }
  512. // If we haven't computed a numbering for the BB's in the function, do so
  513. // now.
  514. if (BBNumbers.empty()) {
  515. unsigned ID = 0;
  516. for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
  517. BBNumbers[I] = ID++;
  518. }
  519. // If we have an AST to keep updated, remember some pointer value that is
  520. // stored into the alloca.
  521. if (AST)
  522. PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
  523. // Remember the dbg.declare intrinsic describing this alloca, if any.
  524. if (Info.DbgDeclare)
  525. AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
  526. // Keep the reverse mapping of the 'Allocas' array for the rename pass.
  527. AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
  528. // At this point, we're committed to promoting the alloca using IDF's, and
  529. // the standard SSA construction algorithm. Determine which blocks need PHI
  530. // nodes and see if we can optimize out some work by avoiding insertion of
  531. // dead phi nodes.
  532. DetermineInsertionPoint(AI, AllocaNum, Info);
  533. }
  534. if (Allocas.empty())
  535. return; // All of the allocas must have been trivial!
  536. LBI.clear();
  537. // Set the incoming values for the basic block to be null values for all of
  538. // the alloca's. We do this in case there is a load of a value that has not
  539. // been stored yet. In this case, it will get this null value.
  540. //
  541. RenamePassData::ValVector Values(Allocas.size());
  542. for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
  543. Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
  544. // Walks all basic blocks in the function performing the SSA rename algorithm
  545. // and inserting the phi nodes we marked as necessary
  546. //
  547. std::vector<RenamePassData> RenamePassWorkList;
  548. RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
  549. do {
  550. RenamePassData RPD;
  551. RPD.swap(RenamePassWorkList.back());
  552. RenamePassWorkList.pop_back();
  553. // RenamePass may add new worklist entries.
  554. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
  555. } while (!RenamePassWorkList.empty());
  556. // The renamer uses the Visited set to avoid infinite loops. Clear it now.
  557. Visited.clear();
  558. // Remove the allocas themselves from the function.
  559. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
  560. Instruction *A = Allocas[i];
  561. // If there are any uses of the alloca instructions left, they must be in
  562. // unreachable basic blocks that were not processed by walking the dominator
  563. // tree. Just delete the users now.
  564. if (!A->use_empty())
  565. A->replaceAllUsesWith(UndefValue::get(A->getType()));
  566. if (AST)
  567. AST->deleteValue(A);
  568. A->eraseFromParent();
  569. }
  570. // Remove alloca's dbg.declare instrinsics from the function.
  571. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
  572. if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
  573. DDI->eraseFromParent();
  574. // Loop over all of the PHI nodes and see if there are any that we can get
  575. // rid of because they merge all of the same incoming values. This can
  576. // happen due to undef values coming into the PHI nodes. This process is
  577. // iterative, because eliminating one PHI node can cause others to be removed.
  578. bool EliminatedAPHI = true;
  579. while (EliminatedAPHI) {
  580. EliminatedAPHI = false;
  581. // Iterating over NewPhiNodes is deterministic, so it is safe to try to
  582. // simplify and RAUW them as we go. If it was not, we could add uses to
  583. // the values we replace with in a non-deterministic order, thus creating
  584. // non-deterministic def->use chains.
  585. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  586. I = NewPhiNodes.begin(),
  587. E = NewPhiNodes.end();
  588. I != E;) {
  589. PHINode *PN = I->second;
  590. // If this PHI node merges one value and/or undefs, get the value.
  591. if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AT)) {
  592. if (AST && PN->getType()->isPointerTy())
  593. AST->deleteValue(PN);
  594. PN->replaceAllUsesWith(V);
  595. PN->eraseFromParent();
  596. NewPhiNodes.erase(I++);
  597. EliminatedAPHI = true;
  598. continue;
  599. }
  600. ++I;
  601. }
  602. }
  603. // At this point, the renamer has added entries to PHI nodes for all reachable
  604. // code. Unfortunately, there may be unreachable blocks which the renamer
  605. // hasn't traversed. If this is the case, the PHI nodes may not
  606. // have incoming values for all predecessors. Loop over all PHI nodes we have
  607. // created, inserting undef values if they are missing any incoming values.
  608. //
  609. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  610. I = NewPhiNodes.begin(),
  611. E = NewPhiNodes.end();
  612. I != E; ++I) {
  613. // We want to do this once per basic block. As such, only process a block
  614. // when we find the PHI that is the first entry in the block.
  615. PHINode *SomePHI = I->second;
  616. BasicBlock *BB = SomePHI->getParent();
  617. if (&BB->front() != SomePHI)
  618. continue;
  619. // Only do work here if there the PHI nodes are missing incoming values. We
  620. // know that all PHI nodes that were inserted in a block will have the same
  621. // number of incoming values, so we can just check any of them.
  622. if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
  623. continue;
  624. // Get the preds for BB.
  625. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  626. // Ok, now we know that all of the PHI nodes are missing entries for some
  627. // basic blocks. Start by sorting the incoming predecessors for efficient
  628. // access.
  629. std::sort(Preds.begin(), Preds.end());
  630. // Now we loop through all BB's which have entries in SomePHI and remove
  631. // them from the Preds list.
  632. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
  633. // Do a log(n) search of the Preds list for the entry we want.
  634. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
  635. Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
  636. assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
  637. "PHI node has entry for a block which is not a predecessor!");
  638. // Remove the entry
  639. Preds.erase(EntIt);
  640. }
  641. // At this point, the blocks left in the preds list must have dummy
  642. // entries inserted into every PHI nodes for the block. Update all the phi
  643. // nodes in this block that we are inserting (there could be phis before
  644. // mem2reg runs).
  645. unsigned NumBadPreds = SomePHI->getNumIncomingValues();
  646. BasicBlock::iterator BBI = BB->begin();
  647. while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
  648. SomePHI->getNumIncomingValues() == NumBadPreds) {
  649. Value *UndefVal = UndefValue::get(SomePHI->getType());
  650. for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
  651. SomePHI->addIncoming(UndefVal, Preds[pred]);
  652. }
  653. }
  654. NewPhiNodes.clear();
  655. }
  656. /// \brief Determine which blocks the value is live in.
  657. ///
  658. /// These are blocks which lead to uses. Knowing this allows us to avoid
  659. /// inserting PHI nodes into blocks which don't lead to uses (thus, the
  660. /// inserted phi nodes would be dead).
  661. void PromoteMem2Reg::ComputeLiveInBlocks(
  662. AllocaInst *AI, AllocaInfo &Info,
  663. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  664. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
  665. // To determine liveness, we must iterate through the predecessors of blocks
  666. // where the def is live. Blocks are added to the worklist if we need to
  667. // check their predecessors. Start with all the using blocks.
  668. SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
  669. Info.UsingBlocks.end());
  670. // If any of the using blocks is also a definition block, check to see if the
  671. // definition occurs before or after the use. If it happens before the use,
  672. // the value isn't really live-in.
  673. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
  674. BasicBlock *BB = LiveInBlockWorklist[i];
  675. if (!DefBlocks.count(BB))
  676. continue;
  677. // Okay, this is a block that both uses and defines the value. If the first
  678. // reference to the alloca is a def (store), then we know it isn't live-in.
  679. for (BasicBlock::iterator I = BB->begin();; ++I) {
  680. if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  681. if (SI->getOperand(1) != AI)
  682. continue;
  683. // We found a store to the alloca before a load. The alloca is not
  684. // actually live-in here.
  685. LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
  686. LiveInBlockWorklist.pop_back();
  687. --i, --e;
  688. break;
  689. }
  690. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  691. if (LI->getOperand(0) != AI)
  692. continue;
  693. // Okay, we found a load before a store to the alloca. It is actually
  694. // live into this block.
  695. break;
  696. }
  697. }
  698. }
  699. // Now that we have a set of blocks where the phi is live-in, recursively add
  700. // their predecessors until we find the full region the value is live.
  701. while (!LiveInBlockWorklist.empty()) {
  702. BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
  703. // The block really is live in here, insert it into the set. If already in
  704. // the set, then it has already been processed.
  705. if (!LiveInBlocks.insert(BB).second)
  706. continue;
  707. // Since the value is live into BB, it is either defined in a predecessor or
  708. // live into it to. Add the preds to the worklist unless they are a
  709. // defining block.
  710. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
  711. BasicBlock *P = *PI;
  712. // The value is not live into a predecessor if it defines the value.
  713. if (DefBlocks.count(P))
  714. continue;
  715. // Otherwise it is, add to the worklist.
  716. LiveInBlockWorklist.push_back(P);
  717. }
  718. }
  719. }
  720. /// At this point, we're committed to promoting the alloca using IDF's, and the
  721. /// standard SSA construction algorithm. Determine which blocks need phi nodes
  722. /// and see if we can optimize out some work by avoiding insertion of dead phi
  723. /// nodes.
  724. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
  725. AllocaInfo &Info) {
  726. // Unique the set of defining blocks for efficient lookup.
  727. SmallPtrSet<BasicBlock *, 32> DefBlocks;
  728. DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
  729. // Determine which blocks the value is live in. These are blocks which lead
  730. // to uses.
  731. SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
  732. ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
  733. // Use a priority queue keyed on dominator tree level so that inserted nodes
  734. // are handled from the bottom of the dominator tree upwards.
  735. typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
  736. typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
  737. less_second> IDFPriorityQueue;
  738. IDFPriorityQueue PQ;
  739. for (BasicBlock *BB : DefBlocks) {
  740. if (DomTreeNode *Node = DT.getNode(BB))
  741. PQ.push(std::make_pair(Node, DomLevels[Node]));
  742. }
  743. SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
  744. SmallPtrSet<DomTreeNode *, 32> Visited;
  745. SmallVector<DomTreeNode *, 32> Worklist;
  746. while (!PQ.empty()) {
  747. DomTreeNodePair RootPair = PQ.top();
  748. PQ.pop();
  749. DomTreeNode *Root = RootPair.first;
  750. unsigned RootLevel = RootPair.second;
  751. // Walk all dominator tree children of Root, inspecting their CFG edges with
  752. // targets elsewhere on the dominator tree. Only targets whose level is at
  753. // most Root's level are added to the iterated dominance frontier of the
  754. // definition set.
  755. Worklist.clear();
  756. Worklist.push_back(Root);
  757. while (!Worklist.empty()) {
  758. DomTreeNode *Node = Worklist.pop_back_val();
  759. BasicBlock *BB = Node->getBlock();
  760. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
  761. ++SI) {
  762. DomTreeNode *SuccNode = DT.getNode(*SI);
  763. // Quickly skip all CFG edges that are also dominator tree edges instead
  764. // of catching them below.
  765. if (SuccNode->getIDom() == Node)
  766. continue;
  767. unsigned SuccLevel = DomLevels[SuccNode];
  768. if (SuccLevel > RootLevel)
  769. continue;
  770. if (!Visited.insert(SuccNode).second)
  771. continue;
  772. BasicBlock *SuccBB = SuccNode->getBlock();
  773. if (!LiveInBlocks.count(SuccBB))
  774. continue;
  775. DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
  776. if (!DefBlocks.count(SuccBB))
  777. PQ.push(std::make_pair(SuccNode, SuccLevel));
  778. }
  779. for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
  780. ++CI) {
  781. if (!Visited.count(*CI))
  782. Worklist.push_back(*CI);
  783. }
  784. }
  785. }
  786. if (DFBlocks.size() > 1)
  787. std::sort(DFBlocks.begin(), DFBlocks.end());
  788. unsigned CurrentVersion = 0;
  789. for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
  790. QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
  791. }
  792. /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
  793. ///
  794. /// Returns true if there wasn't already a phi-node for that variable
  795. bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
  796. unsigned &Version) {
  797. // Look up the basic-block in question.
  798. PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
  799. // If the BB already has a phi node added for the i'th alloca then we're done!
  800. if (PN)
  801. return false;
  802. // Create a PhiNode using the dereferenced type... and add the phi-node to the
  803. // BasicBlock.
  804. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
  805. Allocas[AllocaNo]->getName() + "." + Twine(Version++),
  806. BB->begin());
  807. ++NumPHIInsert;
  808. PhiToAllocaMap[PN] = AllocaNo;
  809. if (AST && PN->getType()->isPointerTy())
  810. AST->copyValue(PointerAllocaValues[AllocaNo], PN);
  811. return true;
  812. }
  813. /// \brief Recursively traverse the CFG of the function, renaming loads and
  814. /// stores to the allocas which we are promoting.
  815. ///
  816. /// IncomingVals indicates what value each Alloca contains on exit from the
  817. /// predecessor block Pred.
  818. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
  819. RenamePassData::ValVector &IncomingVals,
  820. std::vector<RenamePassData> &Worklist) {
  821. NextIteration:
  822. // If we are inserting any phi nodes into this BB, they will already be in the
  823. // block.
  824. if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
  825. // If we have PHI nodes to update, compute the number of edges from Pred to
  826. // BB.
  827. if (PhiToAllocaMap.count(APN)) {
  828. // We want to be able to distinguish between PHI nodes being inserted by
  829. // this invocation of mem2reg from those phi nodes that already existed in
  830. // the IR before mem2reg was run. We determine that APN is being inserted
  831. // because it is missing incoming edges. All other PHI nodes being
  832. // inserted by this pass of mem2reg will have the same number of incoming
  833. // operands so far. Remember this count.
  834. unsigned NewPHINumOperands = APN->getNumOperands();
  835. unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
  836. assert(NumEdges && "Must be at least one edge from Pred to BB!");
  837. // Add entries for all the phis.
  838. BasicBlock::iterator PNI = BB->begin();
  839. do {
  840. unsigned AllocaNo = PhiToAllocaMap[APN];
  841. // Add N incoming values to the PHI node.
  842. for (unsigned i = 0; i != NumEdges; ++i)
  843. APN->addIncoming(IncomingVals[AllocaNo], Pred);
  844. // The currently active variable for this block is now the PHI.
  845. IncomingVals[AllocaNo] = APN;
  846. // Get the next phi node.
  847. ++PNI;
  848. APN = dyn_cast<PHINode>(PNI);
  849. if (!APN)
  850. break;
  851. // Verify that it is missing entries. If not, it is not being inserted
  852. // by this mem2reg invocation so we want to ignore it.
  853. } while (APN->getNumOperands() == NewPHINumOperands);
  854. }
  855. }
  856. // Don't revisit blocks.
  857. if (!Visited.insert(BB).second)
  858. return;
  859. for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
  860. Instruction *I = II++; // get the instruction, increment iterator
  861. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  862. AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
  863. if (!Src)
  864. continue;
  865. DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
  866. if (AI == AllocaLookup.end())
  867. continue;
  868. Value *V = IncomingVals[AI->second];
  869. // Anything using the load now uses the current value.
  870. LI->replaceAllUsesWith(V);
  871. if (AST && LI->getType()->isPointerTy())
  872. AST->deleteValue(LI);
  873. BB->getInstList().erase(LI);
  874. } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  875. // Delete this instruction and mark the name as the current holder of the
  876. // value
  877. AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
  878. if (!Dest)
  879. continue;
  880. DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
  881. if (ai == AllocaLookup.end())
  882. continue;
  883. // what value were we writing?
  884. IncomingVals[ai->second] = SI->getOperand(0);
  885. // Record debuginfo for the store before removing it.
  886. if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
  887. ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
  888. BB->getInstList().erase(SI);
  889. }
  890. }
  891. // 'Recurse' to our successors.
  892. succ_iterator I = succ_begin(BB), E = succ_end(BB);
  893. if (I == E)
  894. return;
  895. // Keep track of the successors so we don't visit the same successor twice
  896. SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
  897. // Handle the first successor without using the worklist.
  898. VisitedSuccs.insert(*I);
  899. Pred = BB;
  900. BB = *I;
  901. ++I;
  902. for (; I != E; ++I)
  903. if (VisitedSuccs.insert(*I).second)
  904. Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
  905. goto NextIteration;
  906. }
  907. void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  908. AliasSetTracker *AST, AssumptionTracker *AT) {
  909. // If there is nothing to do, bail out...
  910. if (Allocas.empty())
  911. return;
  912. PromoteMem2Reg(Allocas, DT, AST, AT).run();
  913. }