MachineCSE.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs global common subexpression elimination on machine
  11. // instructions using a scoped hash table based value numbering scheme. It
  12. // must be run while the machine function is still in SSA form.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #define DEBUG_TYPE "machine-cse"
  16. #include "llvm/CodeGen/Passes.h"
  17. #include "llvm/ADT/DenseMap.h"
  18. #include "llvm/ADT/ScopedHashTable.h"
  19. #include "llvm/ADT/SmallSet.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/AliasAnalysis.h"
  22. #include "llvm/CodeGen/MachineDominators.h"
  23. #include "llvm/CodeGen/MachineInstr.h"
  24. #include "llvm/CodeGen/MachineRegisterInfo.h"
  25. #include "llvm/Support/Debug.h"
  26. #include "llvm/Support/RecyclingAllocator.h"
  27. #include "llvm/Target/TargetInstrInfo.h"
  28. using namespace llvm;
  29. STATISTIC(NumCoalesces, "Number of copies coalesced");
  30. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  31. STATISTIC(NumPhysCSEs,
  32. "Number of physreg referencing common subexpr eliminated");
  33. STATISTIC(NumCrossBBCSEs,
  34. "Number of cross-MBB physreg referencing CS eliminated");
  35. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  36. namespace {
  37. class MachineCSE : public MachineFunctionPass {
  38. const TargetInstrInfo *TII;
  39. const TargetRegisterInfo *TRI;
  40. AliasAnalysis *AA;
  41. MachineDominatorTree *DT;
  42. MachineRegisterInfo *MRI;
  43. public:
  44. static char ID; // Pass identification
  45. MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
  46. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  47. }
  48. virtual bool runOnMachineFunction(MachineFunction &MF);
  49. virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  50. AU.setPreservesCFG();
  51. MachineFunctionPass::getAnalysisUsage(AU);
  52. AU.addRequired<AliasAnalysis>();
  53. AU.addPreservedID(MachineLoopInfoID);
  54. AU.addRequired<MachineDominatorTree>();
  55. AU.addPreserved<MachineDominatorTree>();
  56. }
  57. virtual void releaseMemory() {
  58. ScopeMap.clear();
  59. Exps.clear();
  60. }
  61. private:
  62. const unsigned LookAheadLimit;
  63. typedef RecyclingAllocator<BumpPtrAllocator,
  64. ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
  65. typedef ScopedHashTable<MachineInstr*, unsigned,
  66. MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
  67. typedef ScopedHTType::ScopeTy ScopeType;
  68. DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
  69. ScopedHTType VNT;
  70. SmallVector<MachineInstr*, 64> Exps;
  71. unsigned CurrVN;
  72. bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
  73. bool isPhysDefTriviallyDead(unsigned Reg,
  74. MachineBasicBlock::const_iterator I,
  75. MachineBasicBlock::const_iterator E) const;
  76. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  77. const MachineBasicBlock *MBB,
  78. SmallSet<unsigned,8> &PhysRefs,
  79. SmallVectorImpl<unsigned> &PhysDefs,
  80. bool &PhysUseDef) const;
  81. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  82. SmallSet<unsigned,8> &PhysRefs,
  83. SmallVectorImpl<unsigned> &PhysDefs,
  84. bool &NonLocal) const;
  85. bool isCSECandidate(MachineInstr *MI);
  86. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  87. MachineInstr *CSMI, MachineInstr *MI);
  88. void EnterScope(MachineBasicBlock *MBB);
  89. void ExitScope(MachineBasicBlock *MBB);
  90. bool ProcessBlock(MachineBasicBlock *MBB);
  91. void ExitScopeIfDone(MachineDomTreeNode *Node,
  92. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  93. bool PerformCSE(MachineDomTreeNode *Node);
  94. };
  95. } // end anonymous namespace
  96. char MachineCSE::ID = 0;
  97. char &llvm::MachineCSEID = MachineCSE::ID;
  98. INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
  99. "Machine Common Subexpression Elimination", false, false)
  100. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  101. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  102. INITIALIZE_PASS_END(MachineCSE, "machine-cse",
  103. "Machine Common Subexpression Elimination", false, false)
  104. bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
  105. MachineBasicBlock *MBB) {
  106. bool Changed = false;
  107. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  108. MachineOperand &MO = MI->getOperand(i);
  109. if (!MO.isReg() || !MO.isUse())
  110. continue;
  111. unsigned Reg = MO.getReg();
  112. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  113. continue;
  114. if (!MRI->hasOneNonDBGUse(Reg))
  115. // Only coalesce single use copies. This ensure the copy will be
  116. // deleted.
  117. continue;
  118. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  119. if (!DefMI->isCopy())
  120. continue;
  121. unsigned SrcReg = DefMI->getOperand(1).getReg();
  122. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  123. continue;
  124. if (DefMI->getOperand(0).getSubReg())
  125. continue;
  126. // FIXME: We should trivially coalesce subregister copies to expose CSE
  127. // opportunities on instructions with truncated operands (see
  128. // cse-add-with-overflow.ll). This can be done here as follows:
  129. // if (SrcSubReg)
  130. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  131. // SrcSubReg);
  132. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  133. //
  134. // The 2-addr pass has been updated to handle coalesced subregs. However,
  135. // some machine-specific code still can't handle it.
  136. // To handle it properly we also need a way find a constrained subregister
  137. // class given a super-reg class and subreg index.
  138. if (DefMI->getOperand(1).getSubReg())
  139. continue;
  140. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  141. if (!MRI->constrainRegClass(SrcReg, RC))
  142. continue;
  143. DEBUG(dbgs() << "Coalescing: " << *DefMI);
  144. DEBUG(dbgs() << "*** to: " << *MI);
  145. MO.setReg(SrcReg);
  146. MRI->clearKillFlags(SrcReg);
  147. DefMI->eraseFromParent();
  148. ++NumCoalesces;
  149. Changed = true;
  150. }
  151. return Changed;
  152. }
  153. bool
  154. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  155. MachineBasicBlock::const_iterator I,
  156. MachineBasicBlock::const_iterator E) const {
  157. unsigned LookAheadLeft = LookAheadLimit;
  158. while (LookAheadLeft) {
  159. // Skip over dbg_value's.
  160. while (I != E && I->isDebugValue())
  161. ++I;
  162. if (I == E)
  163. // Reached end of block, register is obviously dead.
  164. return true;
  165. bool SeenDef = false;
  166. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  167. const MachineOperand &MO = I->getOperand(i);
  168. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  169. SeenDef = true;
  170. if (!MO.isReg() || !MO.getReg())
  171. continue;
  172. if (!TRI->regsOverlap(MO.getReg(), Reg))
  173. continue;
  174. if (MO.isUse())
  175. // Found a use!
  176. return false;
  177. SeenDef = true;
  178. }
  179. if (SeenDef)
  180. // See a def of Reg (or an alias) before encountering any use, it's
  181. // trivially dead.
  182. return true;
  183. --LookAheadLeft;
  184. ++I;
  185. }
  186. return false;
  187. }
  188. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  189. /// physical registers (except for dead defs of physical registers). It also
  190. /// returns the physical register def by reference if it's the only one and the
  191. /// instruction does not uses a physical register.
  192. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  193. const MachineBasicBlock *MBB,
  194. SmallSet<unsigned,8> &PhysRefs,
  195. SmallVectorImpl<unsigned> &PhysDefs,
  196. bool &PhysUseDef) const{
  197. // First, add all uses to PhysRefs.
  198. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  199. const MachineOperand &MO = MI->getOperand(i);
  200. if (!MO.isReg() || MO.isDef())
  201. continue;
  202. unsigned Reg = MO.getReg();
  203. if (!Reg)
  204. continue;
  205. if (TargetRegisterInfo::isVirtualRegister(Reg))
  206. continue;
  207. // Reading constant physregs is ok.
  208. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
  209. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  210. PhysRefs.insert(*AI);
  211. }
  212. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  213. // (which currently contains only uses), set the PhysUseDef flag.
  214. PhysUseDef = false;
  215. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  216. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  217. const MachineOperand &MO = MI->getOperand(i);
  218. if (!MO.isReg() || !MO.isDef())
  219. continue;
  220. unsigned Reg = MO.getReg();
  221. if (!Reg)
  222. continue;
  223. if (TargetRegisterInfo::isVirtualRegister(Reg))
  224. continue;
  225. // Check against PhysRefs even if the def is "dead".
  226. if (PhysRefs.count(Reg))
  227. PhysUseDef = true;
  228. // If the def is dead, it's ok. But the def may not marked "dead". That's
  229. // common since this pass is run before livevariables. We can scan
  230. // forward a few instructions and check if it is obviously dead.
  231. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  232. PhysDefs.push_back(Reg);
  233. }
  234. // Finally, add all defs to PhysRefs as well.
  235. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  236. for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
  237. PhysRefs.insert(*AI);
  238. return !PhysRefs.empty();
  239. }
  240. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  241. SmallSet<unsigned,8> &PhysRefs,
  242. SmallVectorImpl<unsigned> &PhysDefs,
  243. bool &NonLocal) const {
  244. // For now conservatively returns false if the common subexpression is
  245. // not in the same basic block as the given instruction. The only exception
  246. // is if the common subexpression is in the sole predecessor block.
  247. const MachineBasicBlock *MBB = MI->getParent();
  248. const MachineBasicBlock *CSMBB = CSMI->getParent();
  249. bool CrossMBB = false;
  250. if (CSMBB != MBB) {
  251. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  252. return false;
  253. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  254. if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
  255. // Avoid extending live range of physical registers if they are
  256. //allocatable or reserved.
  257. return false;
  258. }
  259. CrossMBB = true;
  260. }
  261. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  262. MachineBasicBlock::const_iterator E = MI;
  263. MachineBasicBlock::const_iterator EE = CSMBB->end();
  264. unsigned LookAheadLeft = LookAheadLimit;
  265. while (LookAheadLeft) {
  266. // Skip over dbg_value's.
  267. while (I != E && I != EE && I->isDebugValue())
  268. ++I;
  269. if (I == EE) {
  270. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  271. (void)CrossMBB;
  272. CrossMBB = false;
  273. NonLocal = true;
  274. I = MBB->begin();
  275. EE = MBB->end();
  276. continue;
  277. }
  278. if (I == E)
  279. return true;
  280. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  281. const MachineOperand &MO = I->getOperand(i);
  282. // RegMasks go on instructions like calls that clobber lots of physregs.
  283. // Don't attempt to CSE across such an instruction.
  284. if (MO.isRegMask())
  285. return false;
  286. if (!MO.isReg() || !MO.isDef())
  287. continue;
  288. unsigned MOReg = MO.getReg();
  289. if (TargetRegisterInfo::isVirtualRegister(MOReg))
  290. continue;
  291. if (PhysRefs.count(MOReg))
  292. return false;
  293. }
  294. --LookAheadLeft;
  295. ++I;
  296. }
  297. return false;
  298. }
  299. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  300. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
  301. MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
  302. return false;
  303. // Ignore copies.
  304. if (MI->isCopyLike())
  305. return false;
  306. // Ignore stuff that we obviously can't move.
  307. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  308. MI->hasUnmodeledSideEffects())
  309. return false;
  310. if (MI->mayLoad()) {
  311. // Okay, this instruction does a load. As a refinement, we allow the target
  312. // to decide whether the loaded value is actually a constant. If so, we can
  313. // actually use it as a load.
  314. if (!MI->isInvariantLoad(AA))
  315. // FIXME: we should be able to hoist loads with no other side effects if
  316. // there are no other instructions which can change memory in this loop.
  317. // This is a trivial form of alias analysis.
  318. return false;
  319. }
  320. return true;
  321. }
  322. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  323. /// common expression that defines Reg.
  324. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  325. MachineInstr *CSMI, MachineInstr *MI) {
  326. // FIXME: Heuristics that works around the lack the live range splitting.
  327. // If CSReg is used at all uses of Reg, CSE should not increase register
  328. // pressure of CSReg.
  329. bool MayIncreasePressure = true;
  330. if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
  331. TargetRegisterInfo::isVirtualRegister(Reg)) {
  332. MayIncreasePressure = false;
  333. SmallPtrSet<MachineInstr*, 8> CSUses;
  334. for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
  335. E = MRI->use_nodbg_end(); I != E; ++I) {
  336. MachineInstr *Use = &*I;
  337. CSUses.insert(Use);
  338. }
  339. for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
  340. E = MRI->use_nodbg_end(); I != E; ++I) {
  341. MachineInstr *Use = &*I;
  342. if (!CSUses.count(Use)) {
  343. MayIncreasePressure = true;
  344. break;
  345. }
  346. }
  347. }
  348. if (!MayIncreasePressure) return true;
  349. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  350. // an immediate predecessor. We don't want to increase register pressure and
  351. // end up causing other computation to be spilled.
  352. if (MI->isAsCheapAsAMove()) {
  353. MachineBasicBlock *CSBB = CSMI->getParent();
  354. MachineBasicBlock *BB = MI->getParent();
  355. if (CSBB != BB && !CSBB->isSuccessor(BB))
  356. return false;
  357. }
  358. // Heuristics #2: If the expression doesn't not use a vr and the only use
  359. // of the redundant computation are copies, do not cse.
  360. bool HasVRegUse = false;
  361. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  362. const MachineOperand &MO = MI->getOperand(i);
  363. if (MO.isReg() && MO.isUse() &&
  364. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  365. HasVRegUse = true;
  366. break;
  367. }
  368. }
  369. if (!HasVRegUse) {
  370. bool HasNonCopyUse = false;
  371. for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
  372. E = MRI->use_nodbg_end(); I != E; ++I) {
  373. MachineInstr *Use = &*I;
  374. // Ignore copies.
  375. if (!Use->isCopyLike()) {
  376. HasNonCopyUse = true;
  377. break;
  378. }
  379. }
  380. if (!HasNonCopyUse)
  381. return false;
  382. }
  383. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  384. // it unless the defined value is already used in the BB of the new use.
  385. bool HasPHI = false;
  386. SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
  387. for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg),
  388. E = MRI->use_nodbg_end(); I != E; ++I) {
  389. MachineInstr *Use = &*I;
  390. HasPHI |= Use->isPHI();
  391. CSBBs.insert(Use->getParent());
  392. }
  393. if (!HasPHI)
  394. return true;
  395. return CSBBs.count(MI->getParent());
  396. }
  397. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  398. DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  399. ScopeType *Scope = new ScopeType(VNT);
  400. ScopeMap[MBB] = Scope;
  401. }
  402. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  403. DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  404. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  405. assert(SI != ScopeMap.end());
  406. delete SI->second;
  407. ScopeMap.erase(SI);
  408. }
  409. bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
  410. bool Changed = false;
  411. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  412. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  413. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  414. MachineInstr *MI = &*I;
  415. ++I;
  416. if (!isCSECandidate(MI))
  417. continue;
  418. bool FoundCSE = VNT.count(MI);
  419. if (!FoundCSE) {
  420. // Look for trivial copy coalescing opportunities.
  421. if (PerformTrivialCoalescing(MI, MBB)) {
  422. Changed = true;
  423. // After coalescing MI itself may become a copy.
  424. if (MI->isCopyLike())
  425. continue;
  426. FoundCSE = VNT.count(MI);
  427. }
  428. }
  429. // Commute commutable instructions.
  430. bool Commuted = false;
  431. if (!FoundCSE && MI->isCommutable()) {
  432. MachineInstr *NewMI = TII->commuteInstruction(MI);
  433. if (NewMI) {
  434. Commuted = true;
  435. FoundCSE = VNT.count(NewMI);
  436. if (NewMI != MI) {
  437. // New instruction. It doesn't need to be kept.
  438. NewMI->eraseFromParent();
  439. Changed = true;
  440. } else if (!FoundCSE)
  441. // MI was changed but it didn't help, commute it back!
  442. (void)TII->commuteInstruction(MI);
  443. }
  444. }
  445. // If the instruction defines physical registers and the values *may* be
  446. // used, then it's not safe to replace it with a common subexpression.
  447. // It's also not safe if the instruction uses physical registers.
  448. bool CrossMBBPhysDef = false;
  449. SmallSet<unsigned, 8> PhysRefs;
  450. SmallVector<unsigned, 2> PhysDefs;
  451. bool PhysUseDef = false;
  452. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  453. PhysDefs, PhysUseDef)) {
  454. FoundCSE = false;
  455. // ... Unless the CS is local or is in the sole predecessor block
  456. // and it also defines the physical register which is not clobbered
  457. // in between and the physical register uses were not clobbered.
  458. // This can never be the case if the instruction both uses and
  459. // defines the same physical register, which was detected above.
  460. if (!PhysUseDef) {
  461. unsigned CSVN = VNT.lookup(MI);
  462. MachineInstr *CSMI = Exps[CSVN];
  463. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  464. FoundCSE = true;
  465. }
  466. }
  467. if (!FoundCSE) {
  468. VNT.insert(MI, CurrVN++);
  469. Exps.push_back(MI);
  470. continue;
  471. }
  472. // Found a common subexpression, eliminate it.
  473. unsigned CSVN = VNT.lookup(MI);
  474. MachineInstr *CSMI = Exps[CSVN];
  475. DEBUG(dbgs() << "Examining: " << *MI);
  476. DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  477. // Check if it's profitable to perform this CSE.
  478. bool DoCSE = true;
  479. unsigned NumDefs = MI->getDesc().getNumDefs() +
  480. MI->getDesc().getNumImplicitDefs();
  481. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  482. MachineOperand &MO = MI->getOperand(i);
  483. if (!MO.isReg() || !MO.isDef())
  484. continue;
  485. unsigned OldReg = MO.getReg();
  486. unsigned NewReg = CSMI->getOperand(i).getReg();
  487. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  488. // we should make sure it is not dead at CSMI.
  489. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  490. ImplicitDefsToUpdate.push_back(i);
  491. if (OldReg == NewReg) {
  492. --NumDefs;
  493. continue;
  494. }
  495. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  496. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  497. "Do not CSE physical register defs!");
  498. if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
  499. DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  500. DoCSE = false;
  501. break;
  502. }
  503. // Don't perform CSE if the result of the old instruction cannot exist
  504. // within the register class of the new instruction.
  505. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
  506. if (!MRI->constrainRegClass(NewReg, OldRC)) {
  507. DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
  508. DoCSE = false;
  509. break;
  510. }
  511. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  512. --NumDefs;
  513. }
  514. // Actually perform the elimination.
  515. if (DoCSE) {
  516. for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
  517. MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
  518. MRI->clearKillFlags(CSEPairs[i].second);
  519. }
  520. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  521. // we should make sure it is not dead at CSMI.
  522. for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
  523. CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
  524. if (CrossMBBPhysDef) {
  525. // Add physical register defs now coming in from a predecessor to MBB
  526. // livein list.
  527. while (!PhysDefs.empty()) {
  528. unsigned LiveIn = PhysDefs.pop_back_val();
  529. if (!MBB->isLiveIn(LiveIn))
  530. MBB->addLiveIn(LiveIn);
  531. }
  532. ++NumCrossBBCSEs;
  533. }
  534. MI->eraseFromParent();
  535. ++NumCSEs;
  536. if (!PhysRefs.empty())
  537. ++NumPhysCSEs;
  538. if (Commuted)
  539. ++NumCommutes;
  540. Changed = true;
  541. } else {
  542. VNT.insert(MI, CurrVN++);
  543. Exps.push_back(MI);
  544. }
  545. CSEPairs.clear();
  546. ImplicitDefsToUpdate.clear();
  547. }
  548. return Changed;
  549. }
  550. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  551. /// dominator tree node if its a leaf or all of its children are done. Walk
  552. /// up the dominator tree to destroy ancestors which are now done.
  553. void
  554. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  555. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  556. if (OpenChildren[Node])
  557. return;
  558. // Pop scope.
  559. ExitScope(Node->getBlock());
  560. // Now traverse upwards to pop ancestors whose offsprings are all done.
  561. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  562. unsigned Left = --OpenChildren[Parent];
  563. if (Left != 0)
  564. break;
  565. ExitScope(Parent->getBlock());
  566. Node = Parent;
  567. }
  568. }
  569. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  570. SmallVector<MachineDomTreeNode*, 32> Scopes;
  571. SmallVector<MachineDomTreeNode*, 8> WorkList;
  572. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  573. CurrVN = 0;
  574. // Perform a DFS walk to determine the order of visit.
  575. WorkList.push_back(Node);
  576. do {
  577. Node = WorkList.pop_back_val();
  578. Scopes.push_back(Node);
  579. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  580. unsigned NumChildren = Children.size();
  581. OpenChildren[Node] = NumChildren;
  582. for (unsigned i = 0; i != NumChildren; ++i) {
  583. MachineDomTreeNode *Child = Children[i];
  584. WorkList.push_back(Child);
  585. }
  586. } while (!WorkList.empty());
  587. // Now perform CSE.
  588. bool Changed = false;
  589. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
  590. MachineDomTreeNode *Node = Scopes[i];
  591. MachineBasicBlock *MBB = Node->getBlock();
  592. EnterScope(MBB);
  593. Changed |= ProcessBlock(MBB);
  594. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  595. ExitScopeIfDone(Node, OpenChildren);
  596. }
  597. return Changed;
  598. }
  599. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  600. TII = MF.getTarget().getInstrInfo();
  601. TRI = MF.getTarget().getRegisterInfo();
  602. MRI = &MF.getRegInfo();
  603. AA = &getAnalysis<AliasAnalysis>();
  604. DT = &getAnalysis<MachineDominatorTree>();
  605. return PerformCSE(DT->getRootNode());
  606. }