MachineCSE.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs global common subexpression elimination on machine
  11. // instructions using a scoped hash table based value numbering scheme. It
  12. // must be run while the machine function is still in SSA form.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/ScopedHashTable.h"
  17. #include "llvm/ADT/SmallPtrSet.h"
  18. #include "llvm/ADT/SmallSet.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/AliasAnalysis.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineDominators.h"
  24. #include "llvm/CodeGen/MachineFunction.h"
  25. #include "llvm/CodeGen/MachineFunctionPass.h"
  26. #include "llvm/CodeGen/MachineInstr.h"
  27. #include "llvm/CodeGen/MachineOperand.h"
  28. #include "llvm/CodeGen/MachineRegisterInfo.h"
  29. #include "llvm/CodeGen/Passes.h"
  30. #include "llvm/CodeGen/TargetInstrInfo.h"
  31. #include "llvm/CodeGen/TargetOpcodes.h"
  32. #include "llvm/CodeGen/TargetRegisterInfo.h"
  33. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  34. #include "llvm/MC/MCInstrDesc.h"
  35. #include "llvm/MC/MCRegisterInfo.h"
  36. #include "llvm/Pass.h"
  37. #include "llvm/Support/Allocator.h"
  38. #include "llvm/Support/Debug.h"
  39. #include "llvm/Support/RecyclingAllocator.h"
  40. #include "llvm/Support/raw_ostream.h"
  41. #include <cassert>
  42. #include <iterator>
  43. #include <utility>
  44. #include <vector>
  45. using namespace llvm;
  46. #define DEBUG_TYPE "machine-cse"
  47. STATISTIC(NumCoalesces, "Number of copies coalesced");
  48. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  49. STATISTIC(NumPhysCSEs,
  50. "Number of physreg referencing common subexpr eliminated");
  51. STATISTIC(NumCrossBBCSEs,
  52. "Number of cross-MBB physreg referencing CS eliminated");
  53. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  54. namespace {
  55. class MachineCSE : public MachineFunctionPass {
  56. const TargetInstrInfo *TII;
  57. const TargetRegisterInfo *TRI;
  58. AliasAnalysis *AA;
  59. MachineDominatorTree *DT;
  60. MachineRegisterInfo *MRI;
  61. public:
  62. static char ID; // Pass identification
  63. MachineCSE() : MachineFunctionPass(ID) {
  64. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  65. }
  66. bool runOnMachineFunction(MachineFunction &MF) override;
  67. void getAnalysisUsage(AnalysisUsage &AU) const override {
  68. AU.setPreservesCFG();
  69. MachineFunctionPass::getAnalysisUsage(AU);
  70. AU.addRequired<AAResultsWrapperPass>();
  71. AU.addPreservedID(MachineLoopInfoID);
  72. AU.addRequired<MachineDominatorTree>();
  73. AU.addPreserved<MachineDominatorTree>();
  74. }
  75. void releaseMemory() override {
  76. ScopeMap.clear();
  77. Exps.clear();
  78. }
  79. private:
  80. using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
  81. ScopedHashTableVal<MachineInstr *, unsigned>>;
  82. using ScopedHTType =
  83. ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
  84. AllocatorTy>;
  85. using ScopeType = ScopedHTType::ScopeTy;
  86. unsigned LookAheadLimit = 0;
  87. DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
  88. ScopedHTType VNT;
  89. SmallVector<MachineInstr *, 64> Exps;
  90. unsigned CurrVN = 0;
  91. bool PerformTrivialCopyPropagation(MachineInstr *MI,
  92. MachineBasicBlock *MBB);
  93. bool isPhysDefTriviallyDead(unsigned Reg,
  94. MachineBasicBlock::const_iterator I,
  95. MachineBasicBlock::const_iterator E) const;
  96. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  97. const MachineBasicBlock *MBB,
  98. SmallSet<unsigned,8> &PhysRefs,
  99. SmallVectorImpl<unsigned> &PhysDefs,
  100. bool &PhysUseDef) const;
  101. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  102. SmallSet<unsigned,8> &PhysRefs,
  103. SmallVectorImpl<unsigned> &PhysDefs,
  104. bool &NonLocal) const;
  105. bool isCSECandidate(MachineInstr *MI);
  106. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  107. MachineInstr *CSMI, MachineInstr *MI);
  108. void EnterScope(MachineBasicBlock *MBB);
  109. void ExitScope(MachineBasicBlock *MBB);
  110. bool ProcessBlock(MachineBasicBlock *MBB);
  111. void ExitScopeIfDone(MachineDomTreeNode *Node,
  112. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  113. bool PerformCSE(MachineDomTreeNode *Node);
  114. };
  115. } // end anonymous namespace
  116. char MachineCSE::ID = 0;
  117. char &llvm::MachineCSEID = MachineCSE::ID;
  118. INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
  119. "Machine Common Subexpression Elimination", false, false)
  120. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  121. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  122. INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
  123. "Machine Common Subexpression Elimination", false, false)
  124. /// The source register of a COPY machine instruction can be propagated to all
  125. /// its users, and this propagation could increase the probability of finding
  126. /// common subexpressions. If the COPY has only one user, the COPY itself can
  127. /// be removed.
  128. bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
  129. MachineBasicBlock *MBB) {
  130. bool Changed = false;
  131. for (MachineOperand &MO : MI->operands()) {
  132. if (!MO.isReg() || !MO.isUse())
  133. continue;
  134. unsigned Reg = MO.getReg();
  135. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  136. continue;
  137. bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
  138. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  139. if (!DefMI->isCopy())
  140. continue;
  141. unsigned SrcReg = DefMI->getOperand(1).getReg();
  142. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  143. continue;
  144. if (DefMI->getOperand(0).getSubReg())
  145. continue;
  146. // FIXME: We should trivially coalesce subregister copies to expose CSE
  147. // opportunities on instructions with truncated operands (see
  148. // cse-add-with-overflow.ll). This can be done here as follows:
  149. // if (SrcSubReg)
  150. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  151. // SrcSubReg);
  152. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  153. //
  154. // The 2-addr pass has been updated to handle coalesced subregs. However,
  155. // some machine-specific code still can't handle it.
  156. // To handle it properly we also need a way find a constrained subregister
  157. // class given a super-reg class and subreg index.
  158. if (DefMI->getOperand(1).getSubReg())
  159. continue;
  160. if (!MRI->constrainRegAttrs(SrcReg, Reg))
  161. continue;
  162. DEBUG(dbgs() << "Coalescing: " << *DefMI);
  163. DEBUG(dbgs() << "*** to: " << *MI);
  164. // Propagate SrcReg of copies to MI.
  165. MO.setReg(SrcReg);
  166. MRI->clearKillFlags(SrcReg);
  167. // Coalesce single use copies.
  168. if (OnlyOneUse) {
  169. DefMI->eraseFromParent();
  170. ++NumCoalesces;
  171. }
  172. Changed = true;
  173. }
  174. return Changed;
  175. }
  176. bool
  177. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  178. MachineBasicBlock::const_iterator I,
  179. MachineBasicBlock::const_iterator E) const {
  180. unsigned LookAheadLeft = LookAheadLimit;
  181. while (LookAheadLeft) {
  182. // Skip over dbg_value's.
  183. I = skipDebugInstructionsForward(I, E);
  184. if (I == E)
  185. // Reached end of block, we don't know if register is dead or not.
  186. return false;
  187. bool SeenDef = false;
  188. for (const MachineOperand &MO : I->operands()) {
  189. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  190. SeenDef = true;
  191. if (!MO.isReg() || !MO.getReg())
  192. continue;
  193. if (!TRI->regsOverlap(MO.getReg(), Reg))
  194. continue;
  195. if (MO.isUse())
  196. // Found a use!
  197. return false;
  198. SeenDef = true;
  199. }
  200. if (SeenDef)
  201. // See a def of Reg (or an alias) before encountering any use, it's
  202. // trivially dead.
  203. return true;
  204. --LookAheadLeft;
  205. ++I;
  206. }
  207. return false;
  208. }
  209. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  210. /// physical registers (except for dead defs of physical registers). It also
  211. /// returns the physical register def by reference if it's the only one and the
  212. /// instruction does not uses a physical register.
  213. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  214. const MachineBasicBlock *MBB,
  215. SmallSet<unsigned,8> &PhysRefs,
  216. SmallVectorImpl<unsigned> &PhysDefs,
  217. bool &PhysUseDef) const{
  218. // First, add all uses to PhysRefs.
  219. for (const MachineOperand &MO : MI->operands()) {
  220. if (!MO.isReg() || MO.isDef())
  221. continue;
  222. unsigned Reg = MO.getReg();
  223. if (!Reg)
  224. continue;
  225. if (TargetRegisterInfo::isVirtualRegister(Reg))
  226. continue;
  227. // Reading either caller preserved or constant physregs is ok.
  228. if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
  229. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  230. PhysRefs.insert(*AI);
  231. }
  232. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  233. // (which currently contains only uses), set the PhysUseDef flag.
  234. PhysUseDef = false;
  235. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  236. for (const MachineOperand &MO : MI->operands()) {
  237. if (!MO.isReg() || !MO.isDef())
  238. continue;
  239. unsigned Reg = MO.getReg();
  240. if (!Reg)
  241. continue;
  242. if (TargetRegisterInfo::isVirtualRegister(Reg))
  243. continue;
  244. // Check against PhysRefs even if the def is "dead".
  245. if (PhysRefs.count(Reg))
  246. PhysUseDef = true;
  247. // If the def is dead, it's ok. But the def may not marked "dead". That's
  248. // common since this pass is run before livevariables. We can scan
  249. // forward a few instructions and check if it is obviously dead.
  250. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  251. PhysDefs.push_back(Reg);
  252. }
  253. // Finally, add all defs to PhysRefs as well.
  254. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  255. for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
  256. PhysRefs.insert(*AI);
  257. return !PhysRefs.empty();
  258. }
  259. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  260. SmallSet<unsigned,8> &PhysRefs,
  261. SmallVectorImpl<unsigned> &PhysDefs,
  262. bool &NonLocal) const {
  263. // For now conservatively returns false if the common subexpression is
  264. // not in the same basic block as the given instruction. The only exception
  265. // is if the common subexpression is in the sole predecessor block.
  266. const MachineBasicBlock *MBB = MI->getParent();
  267. const MachineBasicBlock *CSMBB = CSMI->getParent();
  268. bool CrossMBB = false;
  269. if (CSMBB != MBB) {
  270. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  271. return false;
  272. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  273. if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
  274. // Avoid extending live range of physical registers if they are
  275. //allocatable or reserved.
  276. return false;
  277. }
  278. CrossMBB = true;
  279. }
  280. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  281. MachineBasicBlock::const_iterator E = MI;
  282. MachineBasicBlock::const_iterator EE = CSMBB->end();
  283. unsigned LookAheadLeft = LookAheadLimit;
  284. while (LookAheadLeft) {
  285. // Skip over dbg_value's.
  286. while (I != E && I != EE && I->isDebugInstr())
  287. ++I;
  288. if (I == EE) {
  289. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  290. (void)CrossMBB;
  291. CrossMBB = false;
  292. NonLocal = true;
  293. I = MBB->begin();
  294. EE = MBB->end();
  295. continue;
  296. }
  297. if (I == E)
  298. return true;
  299. for (const MachineOperand &MO : I->operands()) {
  300. // RegMasks go on instructions like calls that clobber lots of physregs.
  301. // Don't attempt to CSE across such an instruction.
  302. if (MO.isRegMask())
  303. return false;
  304. if (!MO.isReg() || !MO.isDef())
  305. continue;
  306. unsigned MOReg = MO.getReg();
  307. if (TargetRegisterInfo::isVirtualRegister(MOReg))
  308. continue;
  309. if (PhysRefs.count(MOReg))
  310. return false;
  311. }
  312. --LookAheadLeft;
  313. ++I;
  314. }
  315. return false;
  316. }
  317. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  318. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
  319. MI->isInlineAsm() || MI->isDebugInstr())
  320. return false;
  321. // Ignore copies.
  322. if (MI->isCopyLike())
  323. return false;
  324. // Ignore stuff that we obviously can't move.
  325. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  326. MI->hasUnmodeledSideEffects())
  327. return false;
  328. if (MI->mayLoad()) {
  329. // Okay, this instruction does a load. As a refinement, we allow the target
  330. // to decide whether the loaded value is actually a constant. If so, we can
  331. // actually use it as a load.
  332. if (!MI->isDereferenceableInvariantLoad(AA))
  333. // FIXME: we should be able to hoist loads with no other side effects if
  334. // there are no other instructions which can change memory in this loop.
  335. // This is a trivial form of alias analysis.
  336. return false;
  337. }
  338. // Ignore stack guard loads, otherwise the register that holds CSEed value may
  339. // be spilled and get loaded back with corrupted data.
  340. if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
  341. return false;
  342. return true;
  343. }
  344. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  345. /// common expression that defines Reg.
  346. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  347. MachineInstr *CSMI, MachineInstr *MI) {
  348. // FIXME: Heuristics that works around the lack the live range splitting.
  349. // If CSReg is used at all uses of Reg, CSE should not increase register
  350. // pressure of CSReg.
  351. bool MayIncreasePressure = true;
  352. if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
  353. TargetRegisterInfo::isVirtualRegister(Reg)) {
  354. MayIncreasePressure = false;
  355. SmallPtrSet<MachineInstr*, 8> CSUses;
  356. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  357. CSUses.insert(&MI);
  358. }
  359. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  360. if (!CSUses.count(&MI)) {
  361. MayIncreasePressure = true;
  362. break;
  363. }
  364. }
  365. }
  366. if (!MayIncreasePressure) return true;
  367. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  368. // an immediate predecessor. We don't want to increase register pressure and
  369. // end up causing other computation to be spilled.
  370. if (TII->isAsCheapAsAMove(*MI)) {
  371. MachineBasicBlock *CSBB = CSMI->getParent();
  372. MachineBasicBlock *BB = MI->getParent();
  373. if (CSBB != BB && !CSBB->isSuccessor(BB))
  374. return false;
  375. }
  376. // Heuristics #2: If the expression doesn't not use a vr and the only use
  377. // of the redundant computation are copies, do not cse.
  378. bool HasVRegUse = false;
  379. for (const MachineOperand &MO : MI->operands()) {
  380. if (MO.isReg() && MO.isUse() &&
  381. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  382. HasVRegUse = true;
  383. break;
  384. }
  385. }
  386. if (!HasVRegUse) {
  387. bool HasNonCopyUse = false;
  388. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  389. // Ignore copies.
  390. if (!MI.isCopyLike()) {
  391. HasNonCopyUse = true;
  392. break;
  393. }
  394. }
  395. if (!HasNonCopyUse)
  396. return false;
  397. }
  398. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  399. // it unless the defined value is already used in the BB of the new use.
  400. bool HasPHI = false;
  401. for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
  402. HasPHI |= UseMI.isPHI();
  403. if (UseMI.getParent() == MI->getParent())
  404. return true;
  405. }
  406. return !HasPHI;
  407. }
  408. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  409. DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  410. ScopeType *Scope = new ScopeType(VNT);
  411. ScopeMap[MBB] = Scope;
  412. }
  413. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  414. DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  415. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  416. assert(SI != ScopeMap.end());
  417. delete SI->second;
  418. ScopeMap.erase(SI);
  419. }
  420. bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
  421. bool Changed = false;
  422. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  423. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  424. SmallVector<unsigned, 2> ImplicitDefs;
  425. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  426. MachineInstr *MI = &*I;
  427. ++I;
  428. if (!isCSECandidate(MI))
  429. continue;
  430. bool FoundCSE = VNT.count(MI);
  431. if (!FoundCSE) {
  432. // Using trivial copy propagation to find more CSE opportunities.
  433. if (PerformTrivialCopyPropagation(MI, MBB)) {
  434. Changed = true;
  435. // After coalescing MI itself may become a copy.
  436. if (MI->isCopyLike())
  437. continue;
  438. // Try again to see if CSE is possible.
  439. FoundCSE = VNT.count(MI);
  440. }
  441. }
  442. // Commute commutable instructions.
  443. bool Commuted = false;
  444. if (!FoundCSE && MI->isCommutable()) {
  445. if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
  446. Commuted = true;
  447. FoundCSE = VNT.count(NewMI);
  448. if (NewMI != MI) {
  449. // New instruction. It doesn't need to be kept.
  450. NewMI->eraseFromParent();
  451. Changed = true;
  452. } else if (!FoundCSE)
  453. // MI was changed but it didn't help, commute it back!
  454. (void)TII->commuteInstruction(*MI);
  455. }
  456. }
  457. // If the instruction defines physical registers and the values *may* be
  458. // used, then it's not safe to replace it with a common subexpression.
  459. // It's also not safe if the instruction uses physical registers.
  460. bool CrossMBBPhysDef = false;
  461. SmallSet<unsigned, 8> PhysRefs;
  462. SmallVector<unsigned, 2> PhysDefs;
  463. bool PhysUseDef = false;
  464. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  465. PhysDefs, PhysUseDef)) {
  466. FoundCSE = false;
  467. // ... Unless the CS is local or is in the sole predecessor block
  468. // and it also defines the physical register which is not clobbered
  469. // in between and the physical register uses were not clobbered.
  470. // This can never be the case if the instruction both uses and
  471. // defines the same physical register, which was detected above.
  472. if (!PhysUseDef) {
  473. unsigned CSVN = VNT.lookup(MI);
  474. MachineInstr *CSMI = Exps[CSVN];
  475. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  476. FoundCSE = true;
  477. }
  478. }
  479. if (!FoundCSE) {
  480. VNT.insert(MI, CurrVN++);
  481. Exps.push_back(MI);
  482. continue;
  483. }
  484. // Found a common subexpression, eliminate it.
  485. unsigned CSVN = VNT.lookup(MI);
  486. MachineInstr *CSMI = Exps[CSVN];
  487. DEBUG(dbgs() << "Examining: " << *MI);
  488. DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  489. // Check if it's profitable to perform this CSE.
  490. bool DoCSE = true;
  491. unsigned NumDefs = MI->getDesc().getNumDefs() +
  492. MI->getDesc().getNumImplicitDefs();
  493. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  494. MachineOperand &MO = MI->getOperand(i);
  495. if (!MO.isReg() || !MO.isDef())
  496. continue;
  497. unsigned OldReg = MO.getReg();
  498. unsigned NewReg = CSMI->getOperand(i).getReg();
  499. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  500. // we should make sure it is not dead at CSMI.
  501. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  502. ImplicitDefsToUpdate.push_back(i);
  503. // Keep track of implicit defs of CSMI and MI, to clear possibly
  504. // made-redundant kill flags.
  505. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
  506. ImplicitDefs.push_back(OldReg);
  507. if (OldReg == NewReg) {
  508. --NumDefs;
  509. continue;
  510. }
  511. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  512. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  513. "Do not CSE physical register defs!");
  514. if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
  515. DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  516. DoCSE = false;
  517. break;
  518. }
  519. // Don't perform CSE if the result of the new instruction cannot exist
  520. // within the constraints (register class, bank, or low-level type) of
  521. // the old instruction.
  522. if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
  523. DEBUG(dbgs() << "*** Not the same register constraints, avoid CSE!\n");
  524. DoCSE = false;
  525. break;
  526. }
  527. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  528. --NumDefs;
  529. }
  530. // Actually perform the elimination.
  531. if (DoCSE) {
  532. for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
  533. unsigned OldReg = CSEPair.first;
  534. unsigned NewReg = CSEPair.second;
  535. // OldReg may have been unused but is used now, clear the Dead flag
  536. MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
  537. assert(Def != nullptr && "CSEd register has no unique definition?");
  538. Def->clearRegisterDeads(NewReg);
  539. // Replace with NewReg and clear kill flags which may be wrong now.
  540. MRI->replaceRegWith(OldReg, NewReg);
  541. MRI->clearKillFlags(NewReg);
  542. }
  543. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  544. // we should make sure it is not dead at CSMI.
  545. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
  546. CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
  547. // Go through implicit defs of CSMI and MI, and clear the kill flags on
  548. // their uses in all the instructions between CSMI and MI.
  549. // We might have made some of the kill flags redundant, consider:
  550. // subs ... implicit-def %nzcv <- CSMI
  551. // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
  552. // subs ... implicit-def %nzcv <- MI, to be eliminated
  553. // csinc ... implicit killed %nzcv
  554. // Since we eliminated MI, and reused a register imp-def'd by CSMI
  555. // (here %nzcv), that register, if it was killed before MI, should have
  556. // that kill flag removed, because it's lifetime was extended.
  557. if (CSMI->getParent() == MI->getParent()) {
  558. for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
  559. for (auto ImplicitDef : ImplicitDefs)
  560. if (MachineOperand *MO = II->findRegisterUseOperand(
  561. ImplicitDef, /*isKill=*/true, TRI))
  562. MO->setIsKill(false);
  563. } else {
  564. // If the instructions aren't in the same BB, bail out and clear the
  565. // kill flag on all uses of the imp-def'd register.
  566. for (auto ImplicitDef : ImplicitDefs)
  567. MRI->clearKillFlags(ImplicitDef);
  568. }
  569. if (CrossMBBPhysDef) {
  570. // Add physical register defs now coming in from a predecessor to MBB
  571. // livein list.
  572. while (!PhysDefs.empty()) {
  573. unsigned LiveIn = PhysDefs.pop_back_val();
  574. if (!MBB->isLiveIn(LiveIn))
  575. MBB->addLiveIn(LiveIn);
  576. }
  577. ++NumCrossBBCSEs;
  578. }
  579. MI->eraseFromParent();
  580. ++NumCSEs;
  581. if (!PhysRefs.empty())
  582. ++NumPhysCSEs;
  583. if (Commuted)
  584. ++NumCommutes;
  585. Changed = true;
  586. } else {
  587. VNT.insert(MI, CurrVN++);
  588. Exps.push_back(MI);
  589. }
  590. CSEPairs.clear();
  591. ImplicitDefsToUpdate.clear();
  592. ImplicitDefs.clear();
  593. }
  594. return Changed;
  595. }
  596. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  597. /// dominator tree node if its a leaf or all of its children are done. Walk
  598. /// up the dominator tree to destroy ancestors which are now done.
  599. void
  600. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  601. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  602. if (OpenChildren[Node])
  603. return;
  604. // Pop scope.
  605. ExitScope(Node->getBlock());
  606. // Now traverse upwards to pop ancestors whose offsprings are all done.
  607. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  608. unsigned Left = --OpenChildren[Parent];
  609. if (Left != 0)
  610. break;
  611. ExitScope(Parent->getBlock());
  612. Node = Parent;
  613. }
  614. }
  615. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  616. SmallVector<MachineDomTreeNode*, 32> Scopes;
  617. SmallVector<MachineDomTreeNode*, 8> WorkList;
  618. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  619. CurrVN = 0;
  620. // Perform a DFS walk to determine the order of visit.
  621. WorkList.push_back(Node);
  622. do {
  623. Node = WorkList.pop_back_val();
  624. Scopes.push_back(Node);
  625. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  626. OpenChildren[Node] = Children.size();
  627. for (MachineDomTreeNode *Child : Children)
  628. WorkList.push_back(Child);
  629. } while (!WorkList.empty());
  630. // Now perform CSE.
  631. bool Changed = false;
  632. for (MachineDomTreeNode *Node : Scopes) {
  633. MachineBasicBlock *MBB = Node->getBlock();
  634. EnterScope(MBB);
  635. Changed |= ProcessBlock(MBB);
  636. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  637. ExitScopeIfDone(Node, OpenChildren);
  638. }
  639. return Changed;
  640. }
  641. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  642. if (skipFunction(MF.getFunction()))
  643. return false;
  644. TII = MF.getSubtarget().getInstrInfo();
  645. TRI = MF.getSubtarget().getRegisterInfo();
  646. MRI = &MF.getRegInfo();
  647. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  648. DT = &getAnalysis<MachineDominatorTree>();
  649. LookAheadLimit = TII->getMachineCSELookAheadLimit();
  650. return PerformCSE(DT->getRootNode());
  651. }