MachineCSE.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs global common subexpression elimination on machine
  11. // instructions using a scoped hash table based value numbering scheme. It
  12. // must be run while the machine function is still in SSA form.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #define DEBUG_TYPE "machine-cse"
  16. #include "llvm/CodeGen/Passes.h"
  17. #include "llvm/CodeGen/MachineDominators.h"
  18. #include "llvm/CodeGen/MachineInstr.h"
  19. #include "llvm/CodeGen/MachineRegisterInfo.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Target/TargetInstrInfo.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/ScopedHashTable.h"
  24. #include "llvm/ADT/Statistic.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. using namespace llvm;
  28. STATISTIC(NumCoalesces, "Number of copies coalesced");
  29. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  30. STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
  31. namespace {
  32. class MachineCSE : public MachineFunctionPass {
  33. const TargetInstrInfo *TII;
  34. const TargetRegisterInfo *TRI;
  35. AliasAnalysis *AA;
  36. MachineDominatorTree *DT;
  37. MachineRegisterInfo *MRI;
  38. public:
  39. static char ID; // Pass identification
  40. MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
  41. virtual bool runOnMachineFunction(MachineFunction &MF);
  42. virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  43. AU.setPreservesCFG();
  44. MachineFunctionPass::getAnalysisUsage(AU);
  45. AU.addRequired<AliasAnalysis>();
  46. AU.addPreservedID(MachineLoopInfoID);
  47. AU.addRequired<MachineDominatorTree>();
  48. AU.addPreserved<MachineDominatorTree>();
  49. }
  50. virtual void releaseMemory() {
  51. ScopeMap.clear();
  52. Exps.clear();
  53. }
  54. private:
  55. const unsigned LookAheadLimit;
  56. typedef ScopedHashTableScope<MachineInstr*, unsigned,
  57. MachineInstrExpressionTrait> ScopeType;
  58. DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
  59. ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
  60. SmallVector<MachineInstr*, 64> Exps;
  61. unsigned CurrVN;
  62. bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
  63. bool isPhysDefTriviallyDead(unsigned Reg,
  64. MachineBasicBlock::const_iterator I,
  65. MachineBasicBlock::const_iterator E) const ;
  66. bool hasLivePhysRegDefUse(const MachineInstr *MI,
  67. const MachineBasicBlock *MBB,
  68. unsigned &PhysDef) const;
  69. bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
  70. unsigned PhysDef) const;
  71. bool isCSECandidate(MachineInstr *MI);
  72. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  73. MachineInstr *CSMI, MachineInstr *MI);
  74. void EnterScope(MachineBasicBlock *MBB);
  75. void ExitScope(MachineBasicBlock *MBB);
  76. bool ProcessBlock(MachineBasicBlock *MBB);
  77. void ExitScopeIfDone(MachineDomTreeNode *Node,
  78. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
  79. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
  80. bool PerformCSE(MachineDomTreeNode *Node);
  81. };
  82. } // end anonymous namespace
  83. char MachineCSE::ID = 0;
  84. INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
  85. "Machine Common Subexpression Elimination", false, false)
  86. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  87. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  88. INITIALIZE_PASS_END(MachineCSE, "machine-cse",
  89. "Machine Common Subexpression Elimination", false, false)
  90. FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
  91. bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
  92. MachineBasicBlock *MBB) {
  93. bool Changed = false;
  94. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  95. MachineOperand &MO = MI->getOperand(i);
  96. if (!MO.isReg() || !MO.isUse())
  97. continue;
  98. unsigned Reg = MO.getReg();
  99. if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
  100. continue;
  101. if (!MRI->hasOneNonDBGUse(Reg))
  102. // Only coalesce single use copies. This ensure the copy will be
  103. // deleted.
  104. continue;
  105. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  106. if (DefMI->getParent() != MBB)
  107. continue;
  108. if (!DefMI->isCopy())
  109. continue;
  110. unsigned SrcReg = DefMI->getOperand(1).getReg();
  111. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  112. continue;
  113. if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
  114. continue;
  115. if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
  116. continue;
  117. DEBUG(dbgs() << "Coalescing: " << *DefMI);
  118. DEBUG(dbgs() << "*** to: " << *MI);
  119. MO.setReg(SrcReg);
  120. MRI->clearKillFlags(SrcReg);
  121. DefMI->eraseFromParent();
  122. ++NumCoalesces;
  123. Changed = true;
  124. }
  125. return Changed;
  126. }
  127. bool
  128. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  129. MachineBasicBlock::const_iterator I,
  130. MachineBasicBlock::const_iterator E) const {
  131. unsigned LookAheadLeft = LookAheadLimit;
  132. while (LookAheadLeft) {
  133. // Skip over dbg_value's.
  134. while (I != E && I->isDebugValue())
  135. ++I;
  136. if (I == E)
  137. // Reached end of block, register is obviously dead.
  138. return true;
  139. bool SeenDef = false;
  140. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  141. const MachineOperand &MO = I->getOperand(i);
  142. if (!MO.isReg() || !MO.getReg())
  143. continue;
  144. if (!TRI->regsOverlap(MO.getReg(), Reg))
  145. continue;
  146. if (MO.isUse())
  147. // Found a use!
  148. return false;
  149. SeenDef = true;
  150. }
  151. if (SeenDef)
  152. // See a def of Reg (or an alias) before encountering any use, it's
  153. // trivially dead.
  154. return true;
  155. --LookAheadLeft;
  156. ++I;
  157. }
  158. return false;
  159. }
  160. /// hasLivePhysRegDefUse - Return true if the specified instruction read / write
  161. /// physical registers (except for dead defs of physical registers). It also
  162. /// returns the physical register def by reference if it's the only one and the
  163. /// instruction does not uses a physical register.
  164. bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
  165. const MachineBasicBlock *MBB,
  166. unsigned &PhysDef) const {
  167. PhysDef = 0;
  168. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  169. const MachineOperand &MO = MI->getOperand(i);
  170. if (!MO.isReg())
  171. continue;
  172. unsigned Reg = MO.getReg();
  173. if (!Reg)
  174. continue;
  175. if (TargetRegisterInfo::isVirtualRegister(Reg))
  176. continue;
  177. if (MO.isUse()) {
  178. // Can't touch anything to read a physical register.
  179. PhysDef = 0;
  180. return true;
  181. }
  182. if (MO.isDead())
  183. // If the def is dead, it's ok.
  184. continue;
  185. // Ok, this is a physical register def that's not marked "dead". That's
  186. // common since this pass is run before livevariables. We can scan
  187. // forward a few instructions and check if it is obviously dead.
  188. if (PhysDef) {
  189. // Multiple physical register defs. These are rare, forget about it.
  190. PhysDef = 0;
  191. return true;
  192. }
  193. PhysDef = Reg;
  194. }
  195. if (PhysDef) {
  196. MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
  197. if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
  198. return true;
  199. }
  200. return false;
  201. }
  202. bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
  203. unsigned PhysDef) const {
  204. // For now conservatively returns false if the common subexpression is
  205. // not in the same basic block as the given instruction.
  206. MachineBasicBlock *MBB = MI->getParent();
  207. if (CSMI->getParent() != MBB)
  208. return false;
  209. MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
  210. MachineBasicBlock::const_iterator E = MI;
  211. unsigned LookAheadLeft = LookAheadLimit;
  212. while (LookAheadLeft) {
  213. // Skip over dbg_value's.
  214. while (I != E && I->isDebugValue())
  215. ++I;
  216. if (I == E)
  217. return true;
  218. if (I->modifiesRegister(PhysDef, TRI))
  219. return false;
  220. --LookAheadLeft;
  221. ++I;
  222. }
  223. return false;
  224. }
  225. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  226. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
  227. MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
  228. return false;
  229. // Ignore copies.
  230. if (MI->isCopyLike())
  231. return false;
  232. // Ignore stuff that we obviously can't move.
  233. const TargetInstrDesc &TID = MI->getDesc();
  234. if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
  235. TID.hasUnmodeledSideEffects())
  236. return false;
  237. if (TID.mayLoad()) {
  238. // Okay, this instruction does a load. As a refinement, we allow the target
  239. // to decide whether the loaded value is actually a constant. If so, we can
  240. // actually use it as a load.
  241. if (!MI->isInvariantLoad(AA))
  242. // FIXME: we should be able to hoist loads with no other side effects if
  243. // there are no other instructions which can change memory in this loop.
  244. // This is a trivial form of alias analysis.
  245. return false;
  246. }
  247. return true;
  248. }
  249. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  250. /// common expression that defines Reg.
  251. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  252. MachineInstr *CSMI, MachineInstr *MI) {
  253. // FIXME: Heuristics that works around the lack the live range splitting.
  254. // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
  255. // immediate predecessor. We don't want to increase register pressure and end up
  256. // causing other computation to be spilled.
  257. if (MI->getDesc().isAsCheapAsAMove()) {
  258. MachineBasicBlock *CSBB = CSMI->getParent();
  259. MachineBasicBlock *BB = MI->getParent();
  260. if (CSBB != BB &&
  261. find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
  262. return false;
  263. }
  264. // Heuristics #2: If the expression doesn't not use a vr and the only use
  265. // of the redundant computation are copies, do not cse.
  266. bool HasVRegUse = false;
  267. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  268. const MachineOperand &MO = MI->getOperand(i);
  269. if (MO.isReg() && MO.isUse() && MO.getReg() &&
  270. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  271. HasVRegUse = true;
  272. break;
  273. }
  274. }
  275. if (!HasVRegUse) {
  276. bool HasNonCopyUse = false;
  277. for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
  278. E = MRI->use_nodbg_end(); I != E; ++I) {
  279. MachineInstr *Use = &*I;
  280. // Ignore copies.
  281. if (!Use->isCopyLike()) {
  282. HasNonCopyUse = true;
  283. break;
  284. }
  285. }
  286. if (!HasNonCopyUse)
  287. return false;
  288. }
  289. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  290. // it unless the defined value is already used in the BB of the new use.
  291. bool HasPHI = false;
  292. SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
  293. for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg),
  294. E = MRI->use_nodbg_end(); I != E; ++I) {
  295. MachineInstr *Use = &*I;
  296. HasPHI |= Use->isPHI();
  297. CSBBs.insert(Use->getParent());
  298. }
  299. if (!HasPHI)
  300. return true;
  301. return CSBBs.count(MI->getParent());
  302. }
  303. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  304. DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  305. ScopeType *Scope = new ScopeType(VNT);
  306. ScopeMap[MBB] = Scope;
  307. }
  308. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  309. DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  310. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  311. assert(SI != ScopeMap.end());
  312. ScopeMap.erase(SI);
  313. delete SI->second;
  314. }
  315. bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
  316. bool Changed = false;
  317. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  318. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  319. MachineInstr *MI = &*I;
  320. ++I;
  321. if (!isCSECandidate(MI))
  322. continue;
  323. bool DefPhys = false;
  324. bool FoundCSE = VNT.count(MI);
  325. if (!FoundCSE) {
  326. // Look for trivial copy coalescing opportunities.
  327. if (PerformTrivialCoalescing(MI, MBB)) {
  328. // After coalescing MI itself may become a copy.
  329. if (MI->isCopyLike())
  330. continue;
  331. FoundCSE = VNT.count(MI);
  332. }
  333. }
  334. // FIXME: commute commutable instructions?
  335. // If the instruction defines a physical register and the value *may* be
  336. // used, then it's not safe to replace it with a common subexpression.
  337. unsigned PhysDef = 0;
  338. if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
  339. FoundCSE = false;
  340. // ... Unless the CS is local and it also defines the physical register
  341. // which is not clobbered in between.
  342. if (PhysDef) {
  343. unsigned CSVN = VNT.lookup(MI);
  344. MachineInstr *CSMI = Exps[CSVN];
  345. if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
  346. FoundCSE = true;
  347. DefPhys = true;
  348. }
  349. }
  350. }
  351. if (!FoundCSE) {
  352. VNT.insert(MI, CurrVN++);
  353. Exps.push_back(MI);
  354. continue;
  355. }
  356. // Found a common subexpression, eliminate it.
  357. unsigned CSVN = VNT.lookup(MI);
  358. MachineInstr *CSMI = Exps[CSVN];
  359. DEBUG(dbgs() << "Examining: " << *MI);
  360. DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  361. // Check if it's profitable to perform this CSE.
  362. bool DoCSE = true;
  363. unsigned NumDefs = MI->getDesc().getNumDefs();
  364. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  365. MachineOperand &MO = MI->getOperand(i);
  366. if (!MO.isReg() || !MO.isDef())
  367. continue;
  368. unsigned OldReg = MO.getReg();
  369. unsigned NewReg = CSMI->getOperand(i).getReg();
  370. if (OldReg == NewReg)
  371. continue;
  372. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  373. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  374. "Do not CSE physical register defs!");
  375. if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
  376. DoCSE = false;
  377. break;
  378. }
  379. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  380. --NumDefs;
  381. }
  382. // Actually perform the elimination.
  383. if (DoCSE) {
  384. for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
  385. MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
  386. MRI->clearKillFlags(CSEPairs[i].second);
  387. }
  388. MI->eraseFromParent();
  389. ++NumCSEs;
  390. if (DefPhys)
  391. ++NumPhysCSEs;
  392. } else {
  393. DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  394. VNT.insert(MI, CurrVN++);
  395. Exps.push_back(MI);
  396. }
  397. CSEPairs.clear();
  398. }
  399. return Changed;
  400. }
  401. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  402. /// dominator tree node if its a leaf or all of its children are done. Walk
  403. /// up the dominator tree to destroy ancestors which are now done.
  404. void
  405. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  406. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
  407. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
  408. if (OpenChildren[Node])
  409. return;
  410. // Pop scope.
  411. ExitScope(Node->getBlock());
  412. // Now traverse upwards to pop ancestors whose offsprings are all done.
  413. while (MachineDomTreeNode *Parent = ParentMap[Node]) {
  414. unsigned Left = --OpenChildren[Parent];
  415. if (Left != 0)
  416. break;
  417. ExitScope(Parent->getBlock());
  418. Node = Parent;
  419. }
  420. }
  421. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  422. SmallVector<MachineDomTreeNode*, 32> Scopes;
  423. SmallVector<MachineDomTreeNode*, 8> WorkList;
  424. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
  425. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  426. CurrVN = 0;
  427. // Perform a DFS walk to determine the order of visit.
  428. WorkList.push_back(Node);
  429. do {
  430. Node = WorkList.pop_back_val();
  431. Scopes.push_back(Node);
  432. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  433. unsigned NumChildren = Children.size();
  434. OpenChildren[Node] = NumChildren;
  435. for (unsigned i = 0; i != NumChildren; ++i) {
  436. MachineDomTreeNode *Child = Children[i];
  437. ParentMap[Child] = Node;
  438. WorkList.push_back(Child);
  439. }
  440. } while (!WorkList.empty());
  441. // Now perform CSE.
  442. bool Changed = false;
  443. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
  444. MachineDomTreeNode *Node = Scopes[i];
  445. MachineBasicBlock *MBB = Node->getBlock();
  446. EnterScope(MBB);
  447. Changed |= ProcessBlock(MBB);
  448. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  449. ExitScopeIfDone(Node, OpenChildren, ParentMap);
  450. }
  451. return Changed;
  452. }
  453. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  454. TII = MF.getTarget().getInstrInfo();
  455. TRI = MF.getTarget().getRegisterInfo();
  456. MRI = &MF.getRegInfo();
  457. AA = &getAnalysis<AliasAnalysis>();
  458. DT = &getAnalysis<MachineDominatorTree>();
  459. return PerformCSE(DT->getRootNode());
  460. }