PromoteMemoryToRegister.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024
  1. //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file promotes memory references to be register references. It promotes
  11. // alloca instructions which only have loads and stores as uses. An alloca is
  12. // transformed by using iterated dominator frontiers to place PHI nodes, then
  13. // traversing the function in depth-first order to rewrite loads and stores as
  14. // appropriate.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/ADT/ArrayRef.h"
  18. #include "llvm/ADT/DenseMap.h"
  19. #include "llvm/ADT/STLExtras.h"
  20. #include "llvm/ADT/SmallPtrSet.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/ADT/TinyPtrVector.h"
  24. #include "llvm/ADT/Twine.h"
  25. #include "llvm/Analysis/AssumptionCache.h"
  26. #include "llvm/Analysis/InstructionSimplify.h"
  27. #include "llvm/Analysis/IteratedDominanceFrontier.h"
  28. #include "llvm/Analysis/Utils/Local.h"
  29. #include "llvm/Analysis/ValueTracking.h"
  30. #include "llvm/IR/BasicBlock.h"
  31. #include "llvm/IR/CFG.h"
  32. #include "llvm/IR/Constant.h"
  33. #include "llvm/IR/Constants.h"
  34. #include "llvm/IR/DIBuilder.h"
  35. #include "llvm/IR/DerivedTypes.h"
  36. #include "llvm/IR/Dominators.h"
  37. #include "llvm/IR/Function.h"
  38. #include "llvm/IR/InstrTypes.h"
  39. #include "llvm/IR/Instruction.h"
  40. #include "llvm/IR/Instructions.h"
  41. #include "llvm/IR/IntrinsicInst.h"
  42. #include "llvm/IR/Intrinsics.h"
  43. #include "llvm/IR/LLVMContext.h"
  44. #include "llvm/IR/Module.h"
  45. #include "llvm/IR/Type.h"
  46. #include "llvm/IR/User.h"
  47. #include "llvm/Support/Casting.h"
  48. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  49. #include <algorithm>
  50. #include <cassert>
  51. #include <iterator>
  52. #include <utility>
  53. #include <vector>
  54. using namespace llvm;
  55. #define DEBUG_TYPE "mem2reg"
  56. STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
  57. STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
  58. STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
  59. STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
  60. bool llvm::isAllocaPromotable(const AllocaInst *AI) {
  61. // FIXME: If the memory unit is of pointer or integer type, we can permit
  62. // assignments to subsections of the memory unit.
  63. unsigned AS = AI->getType()->getAddressSpace();
  64. // Only allow direct and non-volatile loads and stores...
  65. for (const User *U : AI->users()) {
  66. if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
  67. // Note that atomic loads can be transformed; atomic semantics do
  68. // not have any meaning for a local alloca.
  69. if (LI->isVolatile())
  70. return false;
  71. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
  72. if (SI->getOperand(0) == AI)
  73. return false; // Don't allow a store OF the AI, only INTO the AI.
  74. // Note that atomic stores can be transformed; atomic semantics do
  75. // not have any meaning for a local alloca.
  76. if (SI->isVolatile())
  77. return false;
  78. } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
  79. if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
  80. II->getIntrinsicID() != Intrinsic::lifetime_end)
  81. return false;
  82. } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  83. if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
  84. return false;
  85. if (!onlyUsedByLifetimeMarkers(BCI))
  86. return false;
  87. } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
  88. if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
  89. return false;
  90. if (!GEPI->hasAllZeroIndices())
  91. return false;
  92. if (!onlyUsedByLifetimeMarkers(GEPI))
  93. return false;
  94. } else {
  95. return false;
  96. }
  97. }
  98. return true;
  99. }
  100. namespace {
  101. struct AllocaInfo {
  102. SmallVector<BasicBlock *, 32> DefiningBlocks;
  103. SmallVector<BasicBlock *, 32> UsingBlocks;
  104. StoreInst *OnlyStore;
  105. BasicBlock *OnlyBlock;
  106. bool OnlyUsedInOneBlock;
  107. Value *AllocaPointerVal;
  108. TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
  109. void clear() {
  110. DefiningBlocks.clear();
  111. UsingBlocks.clear();
  112. OnlyStore = nullptr;
  113. OnlyBlock = nullptr;
  114. OnlyUsedInOneBlock = true;
  115. AllocaPointerVal = nullptr;
  116. DbgDeclares.clear();
  117. }
  118. /// Scan the uses of the specified alloca, filling in the AllocaInfo used
  119. /// by the rest of the pass to reason about the uses of this alloca.
  120. void AnalyzeAlloca(AllocaInst *AI) {
  121. clear();
  122. // As we scan the uses of the alloca instruction, keep track of stores,
  123. // and decide whether all of the loads and stores to the alloca are within
  124. // the same basic block.
  125. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  126. Instruction *User = cast<Instruction>(*UI++);
  127. if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  128. // Remember the basic blocks which define new values for the alloca
  129. DefiningBlocks.push_back(SI->getParent());
  130. AllocaPointerVal = SI->getOperand(0);
  131. OnlyStore = SI;
  132. } else {
  133. LoadInst *LI = cast<LoadInst>(User);
  134. // Otherwise it must be a load instruction, keep track of variable
  135. // reads.
  136. UsingBlocks.push_back(LI->getParent());
  137. AllocaPointerVal = LI;
  138. }
  139. if (OnlyUsedInOneBlock) {
  140. if (!OnlyBlock)
  141. OnlyBlock = User->getParent();
  142. else if (OnlyBlock != User->getParent())
  143. OnlyUsedInOneBlock = false;
  144. }
  145. }
  146. DbgDeclares = FindDbgAddrUses(AI);
  147. }
  148. };
  149. /// Data package used by RenamePass().
  150. struct RenamePassData {
  151. using ValVector = std::vector<Value *>;
  152. using LocationVector = std::vector<DebugLoc>;
  153. RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
  154. : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
  155. BasicBlock *BB;
  156. BasicBlock *Pred;
  157. ValVector Values;
  158. LocationVector Locations;
  159. };
  160. /// This assigns and keeps a per-bb relative ordering of load/store
  161. /// instructions in the block that directly load or store an alloca.
  162. ///
  163. /// This functionality is important because it avoids scanning large basic
  164. /// blocks multiple times when promoting many allocas in the same block.
  165. class LargeBlockInfo {
  166. /// For each instruction that we track, keep the index of the
  167. /// instruction.
  168. ///
  169. /// The index starts out as the number of the instruction from the start of
  170. /// the block.
  171. DenseMap<const Instruction *, unsigned> InstNumbers;
  172. public:
  173. /// This code only looks at accesses to allocas.
  174. static bool isInterestingInstruction(const Instruction *I) {
  175. return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
  176. (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
  177. }
  178. /// Get or calculate the index of the specified instruction.
  179. unsigned getInstructionIndex(const Instruction *I) {
  180. assert(isInterestingInstruction(I) &&
  181. "Not a load/store to/from an alloca?");
  182. // If we already have this instruction number, return it.
  183. DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
  184. if (It != InstNumbers.end())
  185. return It->second;
  186. // Scan the whole block to get the instruction. This accumulates
  187. // information for every interesting instruction in the block, in order to
  188. // avoid gratuitus rescans.
  189. const BasicBlock *BB = I->getParent();
  190. unsigned InstNo = 0;
  191. for (const Instruction &BBI : *BB)
  192. if (isInterestingInstruction(&BBI))
  193. InstNumbers[&BBI] = InstNo++;
  194. It = InstNumbers.find(I);
  195. assert(It != InstNumbers.end() && "Didn't insert instruction?");
  196. return It->second;
  197. }
  198. void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
  199. void clear() { InstNumbers.clear(); }
  200. };
  201. struct PromoteMem2Reg {
  202. /// The alloca instructions being promoted.
  203. std::vector<AllocaInst *> Allocas;
  204. DominatorTree &DT;
  205. DIBuilder DIB;
  206. /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
  207. AssumptionCache *AC;
  208. const SimplifyQuery SQ;
  209. /// Reverse mapping of Allocas.
  210. DenseMap<AllocaInst *, unsigned> AllocaLookup;
  211. /// The PhiNodes we're adding.
  212. ///
  213. /// That map is used to simplify some Phi nodes as we iterate over it, so
  214. /// it should have deterministic iterators. We could use a MapVector, but
  215. /// since we already maintain a map from BasicBlock* to a stable numbering
  216. /// (BBNumbers), the DenseMap is more efficient (also supports removal).
  217. DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
  218. /// For each PHI node, keep track of which entry in Allocas it corresponds
  219. /// to.
  220. DenseMap<PHINode *, unsigned> PhiToAllocaMap;
  221. /// If we are updating an AliasSetTracker, then for each alloca that is of
  222. /// pointer type, we keep track of what to copyValue to the inserted PHI
  223. /// nodes here.
  224. std::vector<Value *> PointerAllocaValues;
  225. /// For each alloca, we keep track of the dbg.declare intrinsic that
  226. /// describes it, if any, so that we can convert it to a dbg.value
  227. /// intrinsic if the alloca gets promoted.
  228. SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
  229. /// The set of basic blocks the renamer has already visited.
  230. SmallPtrSet<BasicBlock *, 16> Visited;
  231. /// Contains a stable numbering of basic blocks to avoid non-determinstic
  232. /// behavior.
  233. DenseMap<BasicBlock *, unsigned> BBNumbers;
  234. /// Lazily compute the number of predecessors a block has.
  235. DenseMap<const BasicBlock *, unsigned> BBNumPreds;
  236. public:
  237. PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  238. AssumptionCache *AC)
  239. : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
  240. DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
  241. AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
  242. nullptr, &DT, AC) {}
  243. void run();
  244. private:
  245. void RemoveFromAllocasList(unsigned &AllocaIdx) {
  246. Allocas[AllocaIdx] = Allocas.back();
  247. Allocas.pop_back();
  248. --AllocaIdx;
  249. }
  250. unsigned getNumPreds(const BasicBlock *BB) {
  251. unsigned &NP = BBNumPreds[BB];
  252. if (NP == 0)
  253. NP = pred_size(BB) + 1;
  254. return NP - 1;
  255. }
  256. void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
  257. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  258. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
  259. void RenamePass(BasicBlock *BB, BasicBlock *Pred,
  260. RenamePassData::ValVector &IncVals,
  261. RenamePassData::LocationVector &IncLocs,
  262. std::vector<RenamePassData> &Worklist);
  263. bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
  264. };
  265. } // end anonymous namespace
  266. /// Given a LoadInst LI this adds assume(LI != null) after it.
  267. static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
  268. Function *AssumeIntrinsic =
  269. Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
  270. ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
  271. Constant::getNullValue(LI->getType()));
  272. LoadNotNull->insertAfter(LI);
  273. CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
  274. CI->insertAfter(LoadNotNull);
  275. AC->registerAssumption(CI);
  276. }
  277. static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
  278. // Knowing that this alloca is promotable, we know that it's safe to kill all
  279. // instructions except for load and store.
  280. for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
  281. Instruction *I = cast<Instruction>(*UI);
  282. ++UI;
  283. if (isa<LoadInst>(I) || isa<StoreInst>(I))
  284. continue;
  285. if (!I->getType()->isVoidTy()) {
  286. // The only users of this bitcast/GEP instruction are lifetime intrinsics.
  287. // Follow the use/def chain to erase them now instead of leaving it for
  288. // dead code elimination later.
  289. for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
  290. Instruction *Inst = cast<Instruction>(*UUI);
  291. ++UUI;
  292. Inst->eraseFromParent();
  293. }
  294. }
  295. I->eraseFromParent();
  296. }
  297. }
  298. /// Rewrite as many loads as possible given a single store.
  299. ///
  300. /// When there is only a single store, we can use the domtree to trivially
  301. /// replace all of the dominated loads with the stored value. Do so, and return
  302. /// true if this has successfully promoted the alloca entirely. If this returns
  303. /// false there were some loads which were not dominated by the single store
  304. /// and thus must be phi-ed with undef. We fall back to the standard alloca
  305. /// promotion algorithm in that case.
  306. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
  307. LargeBlockInfo &LBI, const DataLayout &DL,
  308. DominatorTree &DT, AssumptionCache *AC) {
  309. StoreInst *OnlyStore = Info.OnlyStore;
  310. bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  311. BasicBlock *StoreBB = OnlyStore->getParent();
  312. int StoreIndex = -1;
  313. // Clear out UsingBlocks. We will reconstruct it here if needed.
  314. Info.UsingBlocks.clear();
  315. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  316. Instruction *UserInst = cast<Instruction>(*UI++);
  317. if (!isa<LoadInst>(UserInst)) {
  318. assert(UserInst == OnlyStore && "Should only have load/stores");
  319. continue;
  320. }
  321. LoadInst *LI = cast<LoadInst>(UserInst);
  322. // Okay, if we have a load from the alloca, we want to replace it with the
  323. // only value stored to the alloca. We can do this if the value is
  324. // dominated by the store. If not, we use the rest of the mem2reg machinery
  325. // to insert the phi nodes as needed.
  326. if (!StoringGlobalVal) { // Non-instructions are always dominated.
  327. if (LI->getParent() == StoreBB) {
  328. // If we have a use that is in the same block as the store, compare the
  329. // indices of the two instructions to see which one came first. If the
  330. // load came before the store, we can't handle it.
  331. if (StoreIndex == -1)
  332. StoreIndex = LBI.getInstructionIndex(OnlyStore);
  333. if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
  334. // Can't handle this load, bail out.
  335. Info.UsingBlocks.push_back(StoreBB);
  336. continue;
  337. }
  338. } else if (LI->getParent() != StoreBB &&
  339. !DT.dominates(StoreBB, LI->getParent())) {
  340. // If the load and store are in different blocks, use BB dominance to
  341. // check their relationships. If the store doesn't dom the use, bail
  342. // out.
  343. Info.UsingBlocks.push_back(LI->getParent());
  344. continue;
  345. }
  346. }
  347. // Otherwise, we *can* safely rewrite this load.
  348. Value *ReplVal = OnlyStore->getOperand(0);
  349. // If the replacement value is the load, this must occur in unreachable
  350. // code.
  351. if (ReplVal == LI)
  352. ReplVal = UndefValue::get(LI->getType());
  353. // If the load was marked as nonnull we don't want to lose
  354. // that information when we erase this Load. So we preserve
  355. // it with an assume.
  356. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  357. !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
  358. addAssumeNonNull(AC, LI);
  359. LI->replaceAllUsesWith(ReplVal);
  360. LI->eraseFromParent();
  361. LBI.deleteValue(LI);
  362. }
  363. // Finally, after the scan, check to see if the store is all that is left.
  364. if (!Info.UsingBlocks.empty())
  365. return false; // If not, we'll have to fall back for the remainder.
  366. // Record debuginfo for the store and remove the declaration's
  367. // debuginfo.
  368. for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
  369. DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
  370. ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
  371. DII->eraseFromParent();
  372. LBI.deleteValue(DII);
  373. }
  374. // Remove the (now dead) store and alloca.
  375. Info.OnlyStore->eraseFromParent();
  376. LBI.deleteValue(Info.OnlyStore);
  377. AI->eraseFromParent();
  378. LBI.deleteValue(AI);
  379. return true;
  380. }
  381. /// Many allocas are only used within a single basic block. If this is the
  382. /// case, avoid traversing the CFG and inserting a lot of potentially useless
  383. /// PHI nodes by just performing a single linear pass over the basic block
  384. /// using the Alloca.
  385. ///
  386. /// If we cannot promote this alloca (because it is read before it is written),
  387. /// return false. This is necessary in cases where, due to control flow, the
  388. /// alloca is undefined only on some control flow paths. e.g. code like
  389. /// this is correct in LLVM IR:
  390. /// // A is an alloca with no stores so far
  391. /// for (...) {
  392. /// int t = *A;
  393. /// if (!first_iteration)
  394. /// use(t);
  395. /// *A = 42;
  396. /// }
  397. static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
  398. LargeBlockInfo &LBI,
  399. const DataLayout &DL,
  400. DominatorTree &DT,
  401. AssumptionCache *AC) {
  402. // The trickiest case to handle is when we have large blocks. Because of this,
  403. // this code is optimized assuming that large blocks happen. This does not
  404. // significantly pessimize the small block case. This uses LargeBlockInfo to
  405. // make it efficient to get the index of various operations in the block.
  406. // Walk the use-def list of the alloca, getting the locations of all stores.
  407. using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
  408. StoresByIndexTy StoresByIndex;
  409. for (User *U : AI->users())
  410. if (StoreInst *SI = dyn_cast<StoreInst>(U))
  411. StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
  412. // Sort the stores by their index, making it efficient to do a lookup with a
  413. // binary search.
  414. llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
  415. // Walk all of the loads from this alloca, replacing them with the nearest
  416. // store above them, if any.
  417. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
  418. LoadInst *LI = dyn_cast<LoadInst>(*UI++);
  419. if (!LI)
  420. continue;
  421. unsigned LoadIdx = LBI.getInstructionIndex(LI);
  422. // Find the nearest store that has a lower index than this load.
  423. StoresByIndexTy::iterator I =
  424. std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
  425. std::make_pair(LoadIdx,
  426. static_cast<StoreInst *>(nullptr)),
  427. less_first());
  428. if (I == StoresByIndex.begin()) {
  429. if (StoresByIndex.empty())
  430. // If there are no stores, the load takes the undef value.
  431. LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
  432. else
  433. // There is no store before this load, bail out (load may be affected
  434. // by the following stores - see main comment).
  435. return false;
  436. } else {
  437. // Otherwise, there was a store before this load, the load takes its value.
  438. // Note, if the load was marked as nonnull we don't want to lose that
  439. // information when we erase it. So we preserve it with an assume.
  440. Value *ReplVal = std::prev(I)->second->getOperand(0);
  441. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  442. !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
  443. addAssumeNonNull(AC, LI);
  444. LI->replaceAllUsesWith(ReplVal);
  445. }
  446. LI->eraseFromParent();
  447. LBI.deleteValue(LI);
  448. }
  449. // Remove the (now dead) stores and alloca.
  450. while (!AI->use_empty()) {
  451. StoreInst *SI = cast<StoreInst>(AI->user_back());
  452. // Record debuginfo for the store before removing it.
  453. for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
  454. DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
  455. ConvertDebugDeclareToDebugValue(DII, SI, DIB);
  456. }
  457. SI->eraseFromParent();
  458. LBI.deleteValue(SI);
  459. }
  460. AI->eraseFromParent();
  461. LBI.deleteValue(AI);
  462. // The alloca's debuginfo can be removed as well.
  463. for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
  464. DII->eraseFromParent();
  465. LBI.deleteValue(DII);
  466. }
  467. ++NumLocalPromoted;
  468. return true;
  469. }
  470. void PromoteMem2Reg::run() {
  471. Function &F = *DT.getRoot()->getParent();
  472. AllocaDbgDeclares.resize(Allocas.size());
  473. AllocaInfo Info;
  474. LargeBlockInfo LBI;
  475. ForwardIDFCalculator IDF(DT);
  476. for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
  477. AllocaInst *AI = Allocas[AllocaNum];
  478. assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
  479. assert(AI->getParent()->getParent() == &F &&
  480. "All allocas should be in the same function, which is same as DF!");
  481. removeLifetimeIntrinsicUsers(AI);
  482. if (AI->use_empty()) {
  483. // If there are no uses of the alloca, just delete it now.
  484. AI->eraseFromParent();
  485. // Remove the alloca from the Allocas list, since it has been processed
  486. RemoveFromAllocasList(AllocaNum);
  487. ++NumDeadAlloca;
  488. continue;
  489. }
  490. // Calculate the set of read and write-locations for each alloca. This is
  491. // analogous to finding the 'uses' and 'definitions' of each variable.
  492. Info.AnalyzeAlloca(AI);
  493. // If there is only a single store to this value, replace any loads of
  494. // it that are directly dominated by the definition with the value stored.
  495. if (Info.DefiningBlocks.size() == 1) {
  496. if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
  497. // The alloca has been processed, move on.
  498. RemoveFromAllocasList(AllocaNum);
  499. ++NumSingleStore;
  500. continue;
  501. }
  502. }
  503. // If the alloca is only read and written in one basic block, just perform a
  504. // linear sweep over the block to eliminate it.
  505. if (Info.OnlyUsedInOneBlock &&
  506. promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
  507. // The alloca has been processed, move on.
  508. RemoveFromAllocasList(AllocaNum);
  509. continue;
  510. }
  511. // If we haven't computed a numbering for the BB's in the function, do so
  512. // now.
  513. if (BBNumbers.empty()) {
  514. unsigned ID = 0;
  515. for (auto &BB : F)
  516. BBNumbers[&BB] = ID++;
  517. }
  518. // Remember the dbg.declare intrinsic describing this alloca, if any.
  519. if (!Info.DbgDeclares.empty())
  520. AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
  521. // Keep the reverse mapping of the 'Allocas' array for the rename pass.
  522. AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
  523. // At this point, we're committed to promoting the alloca using IDF's, and
  524. // the standard SSA construction algorithm. Determine which blocks need PHI
  525. // nodes and see if we can optimize out some work by avoiding insertion of
  526. // dead phi nodes.
  527. // Unique the set of defining blocks for efficient lookup.
  528. SmallPtrSet<BasicBlock *, 32> DefBlocks;
  529. DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
  530. // Determine which blocks the value is live in. These are blocks which lead
  531. // to uses.
  532. SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
  533. ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
  534. // At this point, we're committed to promoting the alloca using IDF's, and
  535. // the standard SSA construction algorithm. Determine which blocks need phi
  536. // nodes and see if we can optimize out some work by avoiding insertion of
  537. // dead phi nodes.
  538. IDF.setLiveInBlocks(LiveInBlocks);
  539. IDF.setDefiningBlocks(DefBlocks);
  540. SmallVector<BasicBlock *, 32> PHIBlocks;
  541. IDF.calculate(PHIBlocks);
  542. if (PHIBlocks.size() > 1)
  543. llvm::sort(PHIBlocks.begin(), PHIBlocks.end(),
  544. [this](BasicBlock *A, BasicBlock *B) {
  545. return BBNumbers.lookup(A) < BBNumbers.lookup(B);
  546. });
  547. unsigned CurrentVersion = 0;
  548. for (BasicBlock *BB : PHIBlocks)
  549. QueuePhiNode(BB, AllocaNum, CurrentVersion);
  550. }
  551. if (Allocas.empty())
  552. return; // All of the allocas must have been trivial!
  553. LBI.clear();
  554. // Set the incoming values for the basic block to be null values for all of
  555. // the alloca's. We do this in case there is a load of a value that has not
  556. // been stored yet. In this case, it will get this null value.
  557. RenamePassData::ValVector Values(Allocas.size());
  558. for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
  559. Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
  560. // When handling debug info, treat all incoming values as if they have unknown
  561. // locations until proven otherwise.
  562. RenamePassData::LocationVector Locations(Allocas.size());
  563. // Walks all basic blocks in the function performing the SSA rename algorithm
  564. // and inserting the phi nodes we marked as necessary
  565. std::vector<RenamePassData> RenamePassWorkList;
  566. RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
  567. std::move(Locations));
  568. do {
  569. RenamePassData RPD = std::move(RenamePassWorkList.back());
  570. RenamePassWorkList.pop_back();
  571. // RenamePass may add new worklist entries.
  572. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
  573. } while (!RenamePassWorkList.empty());
  574. // The renamer uses the Visited set to avoid infinite loops. Clear it now.
  575. Visited.clear();
  576. // Remove the allocas themselves from the function.
  577. for (Instruction *A : Allocas) {
  578. // If there are any uses of the alloca instructions left, they must be in
  579. // unreachable basic blocks that were not processed by walking the dominator
  580. // tree. Just delete the users now.
  581. if (!A->use_empty())
  582. A->replaceAllUsesWith(UndefValue::get(A->getType()));
  583. A->eraseFromParent();
  584. }
  585. // Remove alloca's dbg.declare instrinsics from the function.
  586. for (auto &Declares : AllocaDbgDeclares)
  587. for (auto *DII : Declares)
  588. DII->eraseFromParent();
  589. // Loop over all of the PHI nodes and see if there are any that we can get
  590. // rid of because they merge all of the same incoming values. This can
  591. // happen due to undef values coming into the PHI nodes. This process is
  592. // iterative, because eliminating one PHI node can cause others to be removed.
  593. bool EliminatedAPHI = true;
  594. while (EliminatedAPHI) {
  595. EliminatedAPHI = false;
  596. // Iterating over NewPhiNodes is deterministic, so it is safe to try to
  597. // simplify and RAUW them as we go. If it was not, we could add uses to
  598. // the values we replace with in a non-deterministic order, thus creating
  599. // non-deterministic def->use chains.
  600. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  601. I = NewPhiNodes.begin(),
  602. E = NewPhiNodes.end();
  603. I != E;) {
  604. PHINode *PN = I->second;
  605. // If this PHI node merges one value and/or undefs, get the value.
  606. if (Value *V = SimplifyInstruction(PN, SQ)) {
  607. PN->replaceAllUsesWith(V);
  608. PN->eraseFromParent();
  609. NewPhiNodes.erase(I++);
  610. EliminatedAPHI = true;
  611. continue;
  612. }
  613. ++I;
  614. }
  615. }
  616. // At this point, the renamer has added entries to PHI nodes for all reachable
  617. // code. Unfortunately, there may be unreachable blocks which the renamer
  618. // hasn't traversed. If this is the case, the PHI nodes may not
  619. // have incoming values for all predecessors. Loop over all PHI nodes we have
  620. // created, inserting undef values if they are missing any incoming values.
  621. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
  622. I = NewPhiNodes.begin(),
  623. E = NewPhiNodes.end();
  624. I != E; ++I) {
  625. // We want to do this once per basic block. As such, only process a block
  626. // when we find the PHI that is the first entry in the block.
  627. PHINode *SomePHI = I->second;
  628. BasicBlock *BB = SomePHI->getParent();
  629. if (&BB->front() != SomePHI)
  630. continue;
  631. // Only do work here if there the PHI nodes are missing incoming values. We
  632. // know that all PHI nodes that were inserted in a block will have the same
  633. // number of incoming values, so we can just check any of them.
  634. if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
  635. continue;
  636. // Get the preds for BB.
  637. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  638. // Ok, now we know that all of the PHI nodes are missing entries for some
  639. // basic blocks. Start by sorting the incoming predecessors for efficient
  640. // access.
  641. llvm::sort(Preds.begin(), Preds.end());
  642. // Now we loop through all BB's which have entries in SomePHI and remove
  643. // them from the Preds list.
  644. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
  645. // Do a log(n) search of the Preds list for the entry we want.
  646. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
  647. Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
  648. assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
  649. "PHI node has entry for a block which is not a predecessor!");
  650. // Remove the entry
  651. Preds.erase(EntIt);
  652. }
  653. // At this point, the blocks left in the preds list must have dummy
  654. // entries inserted into every PHI nodes for the block. Update all the phi
  655. // nodes in this block that we are inserting (there could be phis before
  656. // mem2reg runs).
  657. unsigned NumBadPreds = SomePHI->getNumIncomingValues();
  658. BasicBlock::iterator BBI = BB->begin();
  659. while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
  660. SomePHI->getNumIncomingValues() == NumBadPreds) {
  661. Value *UndefVal = UndefValue::get(SomePHI->getType());
  662. for (BasicBlock *Pred : Preds)
  663. SomePHI->addIncoming(UndefVal, Pred);
  664. }
  665. }
  666. NewPhiNodes.clear();
  667. }
  668. /// Determine which blocks the value is live in.
  669. ///
  670. /// These are blocks which lead to uses. Knowing this allows us to avoid
  671. /// inserting PHI nodes into blocks which don't lead to uses (thus, the
  672. /// inserted phi nodes would be dead).
  673. void PromoteMem2Reg::ComputeLiveInBlocks(
  674. AllocaInst *AI, AllocaInfo &Info,
  675. const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
  676. SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
  677. // To determine liveness, we must iterate through the predecessors of blocks
  678. // where the def is live. Blocks are added to the worklist if we need to
  679. // check their predecessors. Start with all the using blocks.
  680. SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
  681. Info.UsingBlocks.end());
  682. // If any of the using blocks is also a definition block, check to see if the
  683. // definition occurs before or after the use. If it happens before the use,
  684. // the value isn't really live-in.
  685. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
  686. BasicBlock *BB = LiveInBlockWorklist[i];
  687. if (!DefBlocks.count(BB))
  688. continue;
  689. // Okay, this is a block that both uses and defines the value. If the first
  690. // reference to the alloca is a def (store), then we know it isn't live-in.
  691. for (BasicBlock::iterator I = BB->begin();; ++I) {
  692. if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  693. if (SI->getOperand(1) != AI)
  694. continue;
  695. // We found a store to the alloca before a load. The alloca is not
  696. // actually live-in here.
  697. LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
  698. LiveInBlockWorklist.pop_back();
  699. --i;
  700. --e;
  701. break;
  702. }
  703. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  704. if (LI->getOperand(0) != AI)
  705. continue;
  706. // Okay, we found a load before a store to the alloca. It is actually
  707. // live into this block.
  708. break;
  709. }
  710. }
  711. }
  712. // Now that we have a set of blocks where the phi is live-in, recursively add
  713. // their predecessors until we find the full region the value is live.
  714. while (!LiveInBlockWorklist.empty()) {
  715. BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
  716. // The block really is live in here, insert it into the set. If already in
  717. // the set, then it has already been processed.
  718. if (!LiveInBlocks.insert(BB).second)
  719. continue;
  720. // Since the value is live into BB, it is either defined in a predecessor or
  721. // live into it to. Add the preds to the worklist unless they are a
  722. // defining block.
  723. for (BasicBlock *P : predecessors(BB)) {
  724. // The value is not live into a predecessor if it defines the value.
  725. if (DefBlocks.count(P))
  726. continue;
  727. // Otherwise it is, add to the worklist.
  728. LiveInBlockWorklist.push_back(P);
  729. }
  730. }
  731. }
  732. /// Queue a phi-node to be added to a basic-block for a specific Alloca.
  733. ///
  734. /// Returns true if there wasn't already a phi-node for that variable
  735. bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
  736. unsigned &Version) {
  737. // Look up the basic-block in question.
  738. PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
  739. // If the BB already has a phi node added for the i'th alloca then we're done!
  740. if (PN)
  741. return false;
  742. // Create a PhiNode using the dereferenced type... and add the phi-node to the
  743. // BasicBlock.
  744. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
  745. Allocas[AllocaNo]->getName() + "." + Twine(Version++),
  746. &BB->front());
  747. ++NumPHIInsert;
  748. PhiToAllocaMap[PN] = AllocaNo;
  749. return true;
  750. }
  751. /// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
  752. /// create a merged location incorporating \p DL, or to set \p DL directly.
  753. static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
  754. bool ApplyMergedLoc) {
  755. if (ApplyMergedLoc)
  756. PN->applyMergedLocation(PN->getDebugLoc(), DL);
  757. else
  758. PN->setDebugLoc(DL);
  759. }
  760. /// Recursively traverse the CFG of the function, renaming loads and
  761. /// stores to the allocas which we are promoting.
  762. ///
  763. /// IncomingVals indicates what value each Alloca contains on exit from the
  764. /// predecessor block Pred.
  765. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
  766. RenamePassData::ValVector &IncomingVals,
  767. RenamePassData::LocationVector &IncomingLocs,
  768. std::vector<RenamePassData> &Worklist) {
  769. NextIteration:
  770. // If we are inserting any phi nodes into this BB, they will already be in the
  771. // block.
  772. if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
  773. // If we have PHI nodes to update, compute the number of edges from Pred to
  774. // BB.
  775. if (PhiToAllocaMap.count(APN)) {
  776. // We want to be able to distinguish between PHI nodes being inserted by
  777. // this invocation of mem2reg from those phi nodes that already existed in
  778. // the IR before mem2reg was run. We determine that APN is being inserted
  779. // because it is missing incoming edges. All other PHI nodes being
  780. // inserted by this pass of mem2reg will have the same number of incoming
  781. // operands so far. Remember this count.
  782. unsigned NewPHINumOperands = APN->getNumOperands();
  783. unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
  784. assert(NumEdges && "Must be at least one edge from Pred to BB!");
  785. // Add entries for all the phis.
  786. BasicBlock::iterator PNI = BB->begin();
  787. do {
  788. unsigned AllocaNo = PhiToAllocaMap[APN];
  789. // Update the location of the phi node.
  790. updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
  791. APN->getNumIncomingValues() > 0);
  792. // Add N incoming values to the PHI node.
  793. for (unsigned i = 0; i != NumEdges; ++i)
  794. APN->addIncoming(IncomingVals[AllocaNo], Pred);
  795. // The currently active variable for this block is now the PHI.
  796. IncomingVals[AllocaNo] = APN;
  797. for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
  798. ConvertDebugDeclareToDebugValue(DII, APN, DIB);
  799. // Get the next phi node.
  800. ++PNI;
  801. APN = dyn_cast<PHINode>(PNI);
  802. if (!APN)
  803. break;
  804. // Verify that it is missing entries. If not, it is not being inserted
  805. // by this mem2reg invocation so we want to ignore it.
  806. } while (APN->getNumOperands() == NewPHINumOperands);
  807. }
  808. }
  809. // Don't revisit blocks.
  810. if (!Visited.insert(BB).second)
  811. return;
  812. for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
  813. Instruction *I = &*II++; // get the instruction, increment iterator
  814. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  815. AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
  816. if (!Src)
  817. continue;
  818. DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
  819. if (AI == AllocaLookup.end())
  820. continue;
  821. Value *V = IncomingVals[AI->second];
  822. // If the load was marked as nonnull we don't want to lose
  823. // that information when we erase this Load. So we preserve
  824. // it with an assume.
  825. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
  826. !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
  827. addAssumeNonNull(AC, LI);
  828. // Anything using the load now uses the current value.
  829. LI->replaceAllUsesWith(V);
  830. BB->getInstList().erase(LI);
  831. } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
  832. // Delete this instruction and mark the name as the current holder of the
  833. // value
  834. AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
  835. if (!Dest)
  836. continue;
  837. DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
  838. if (ai == AllocaLookup.end())
  839. continue;
  840. // what value were we writing?
  841. unsigned AllocaNo = ai->second;
  842. IncomingVals[AllocaNo] = SI->getOperand(0);
  843. // Record debuginfo for the store before removing it.
  844. IncomingLocs[AllocaNo] = SI->getDebugLoc();
  845. for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
  846. ConvertDebugDeclareToDebugValue(DII, SI, DIB);
  847. BB->getInstList().erase(SI);
  848. }
  849. }
  850. // 'Recurse' to our successors.
  851. succ_iterator I = succ_begin(BB), E = succ_end(BB);
  852. if (I == E)
  853. return;
  854. // Keep track of the successors so we don't visit the same successor twice
  855. SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
  856. // Handle the first successor without using the worklist.
  857. VisitedSuccs.insert(*I);
  858. Pred = BB;
  859. BB = *I;
  860. ++I;
  861. for (; I != E; ++I)
  862. if (VisitedSuccs.insert(*I).second)
  863. Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
  864. goto NextIteration;
  865. }
  866. void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
  867. AssumptionCache *AC) {
  868. // If there is nothing to do, bail out...
  869. if (Allocas.empty())
  870. return;
  871. PromoteMem2Reg(Allocas, DT, AC).run();
  872. }