MachineCSE.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs global common subexpression elimination on machine
  10. // instructions using a scoped hash table based value numbering scheme. It
  11. // must be run while the machine function is still in SSA form.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/ScopedHashTable.h"
  16. #include "llvm/ADT/SmallPtrSet.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/CodeGen/MachineBasicBlock.h"
  22. #include "llvm/CodeGen/MachineDominators.h"
  23. #include "llvm/CodeGen/MachineFunction.h"
  24. #include "llvm/CodeGen/MachineFunctionPass.h"
  25. #include "llvm/CodeGen/MachineInstr.h"
  26. #include "llvm/CodeGen/MachineOperand.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/Passes.h"
  29. #include "llvm/CodeGen/TargetInstrInfo.h"
  30. #include "llvm/CodeGen/TargetOpcodes.h"
  31. #include "llvm/CodeGen/TargetRegisterInfo.h"
  32. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  33. #include "llvm/MC/MCInstrDesc.h"
  34. #include "llvm/MC/MCRegisterInfo.h"
  35. #include "llvm/Pass.h"
  36. #include "llvm/Support/Allocator.h"
  37. #include "llvm/Support/Debug.h"
  38. #include "llvm/Support/RecyclingAllocator.h"
  39. #include "llvm/Support/raw_ostream.h"
  40. #include <cassert>
  41. #include <iterator>
  42. #include <utility>
  43. #include <vector>
  44. using namespace llvm;
  45. #define DEBUG_TYPE "machine-cse"
  46. STATISTIC(NumCoalesces, "Number of copies coalesced");
  47. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  48. STATISTIC(NumPhysCSEs,
  49. "Number of physreg referencing common subexpr eliminated");
  50. STATISTIC(NumCrossBBCSEs,
  51. "Number of cross-MBB physreg referencing CS eliminated");
  52. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  53. namespace {
  54. class MachineCSE : public MachineFunctionPass {
  55. const TargetInstrInfo *TII;
  56. const TargetRegisterInfo *TRI;
  57. AliasAnalysis *AA;
  58. MachineDominatorTree *DT;
  59. MachineRegisterInfo *MRI;
  60. public:
  61. static char ID; // Pass identification
  62. MachineCSE() : MachineFunctionPass(ID) {
  63. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  64. }
  65. bool runOnMachineFunction(MachineFunction &MF) override;
  66. void getAnalysisUsage(AnalysisUsage &AU) const override {
  67. AU.setPreservesCFG();
  68. MachineFunctionPass::getAnalysisUsage(AU);
  69. AU.addRequired<AAResultsWrapperPass>();
  70. AU.addPreservedID(MachineLoopInfoID);
  71. AU.addRequired<MachineDominatorTree>();
  72. AU.addPreserved<MachineDominatorTree>();
  73. }
  74. void releaseMemory() override {
  75. ScopeMap.clear();
  76. Exps.clear();
  77. }
  78. private:
  79. using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
  80. ScopedHashTableVal<MachineInstr *, unsigned>>;
  81. using ScopedHTType =
  82. ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
  83. AllocatorTy>;
  84. using ScopeType = ScopedHTType::ScopeTy;
  85. using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
  86. unsigned LookAheadLimit = 0;
  87. DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
  88. ScopedHTType VNT;
  89. SmallVector<MachineInstr *, 64> Exps;
  90. unsigned CurrVN = 0;
  91. bool PerformTrivialCopyPropagation(MachineInstr *MI,
  92. MachineBasicBlock *MBB);
  93. bool isPhysDefTriviallyDead(unsigned Reg,
  94. MachineBasicBlock::const_iterator I,
  95. MachineBasicBlock::const_iterator E) const;
  96. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  97. const MachineBasicBlock *MBB,
  98. SmallSet<unsigned, 8> &PhysRefs,
  99. PhysDefVector &PhysDefs, bool &PhysUseDef) const;
  100. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  101. SmallSet<unsigned, 8> &PhysRefs,
  102. PhysDefVector &PhysDefs, bool &NonLocal) const;
  103. bool isCSECandidate(MachineInstr *MI);
  104. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  105. MachineInstr *CSMI, MachineInstr *MI);
  106. void EnterScope(MachineBasicBlock *MBB);
  107. void ExitScope(MachineBasicBlock *MBB);
  108. bool ProcessBlock(MachineBasicBlock *MBB);
  109. void ExitScopeIfDone(MachineDomTreeNode *Node,
  110. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  111. bool PerformCSE(MachineDomTreeNode *Node);
  112. };
  113. } // end anonymous namespace
  114. char MachineCSE::ID = 0;
  115. char &llvm::MachineCSEID = MachineCSE::ID;
  116. INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
  117. "Machine Common Subexpression Elimination", false, false)
  118. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  119. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  120. INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
  121. "Machine Common Subexpression Elimination", false, false)
  122. /// The source register of a COPY machine instruction can be propagated to all
  123. /// its users, and this propagation could increase the probability of finding
  124. /// common subexpressions. If the COPY has only one user, the COPY itself can
  125. /// be removed.
  126. bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
  127. MachineBasicBlock *MBB) {
  128. bool Changed = false;
  129. for (MachineOperand &MO : MI->operands()) {
  130. if (!MO.isReg() || !MO.isUse())
  131. continue;
  132. unsigned Reg = MO.getReg();
  133. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  134. continue;
  135. bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
  136. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  137. if (!DefMI->isCopy())
  138. continue;
  139. unsigned SrcReg = DefMI->getOperand(1).getReg();
  140. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  141. continue;
  142. if (DefMI->getOperand(0).getSubReg())
  143. continue;
  144. // FIXME: We should trivially coalesce subregister copies to expose CSE
  145. // opportunities on instructions with truncated operands (see
  146. // cse-add-with-overflow.ll). This can be done here as follows:
  147. // if (SrcSubReg)
  148. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  149. // SrcSubReg);
  150. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  151. //
  152. // The 2-addr pass has been updated to handle coalesced subregs. However,
  153. // some machine-specific code still can't handle it.
  154. // To handle it properly we also need a way find a constrained subregister
  155. // class given a super-reg class and subreg index.
  156. if (DefMI->getOperand(1).getSubReg())
  157. continue;
  158. if (!MRI->constrainRegAttrs(SrcReg, Reg))
  159. continue;
  160. LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
  161. LLVM_DEBUG(dbgs() << "*** to: " << *MI);
  162. // Update matching debug values.
  163. DefMI->changeDebugValuesDefReg(SrcReg);
  164. // Propagate SrcReg of copies to MI.
  165. MO.setReg(SrcReg);
  166. MRI->clearKillFlags(SrcReg);
  167. // Coalesce single use copies.
  168. if (OnlyOneUse) {
  169. DefMI->eraseFromParent();
  170. ++NumCoalesces;
  171. }
  172. Changed = true;
  173. }
  174. return Changed;
  175. }
  176. bool
  177. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  178. MachineBasicBlock::const_iterator I,
  179. MachineBasicBlock::const_iterator E) const {
  180. unsigned LookAheadLeft = LookAheadLimit;
  181. while (LookAheadLeft) {
  182. // Skip over dbg_value's.
  183. I = skipDebugInstructionsForward(I, E);
  184. if (I == E)
  185. // Reached end of block, we don't know if register is dead or not.
  186. return false;
  187. bool SeenDef = false;
  188. for (const MachineOperand &MO : I->operands()) {
  189. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  190. SeenDef = true;
  191. if (!MO.isReg() || !MO.getReg())
  192. continue;
  193. if (!TRI->regsOverlap(MO.getReg(), Reg))
  194. continue;
  195. if (MO.isUse())
  196. // Found a use!
  197. return false;
  198. SeenDef = true;
  199. }
  200. if (SeenDef)
  201. // See a def of Reg (or an alias) before encountering any use, it's
  202. // trivially dead.
  203. return true;
  204. --LookAheadLeft;
  205. ++I;
  206. }
  207. return false;
  208. }
  209. static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
  210. const MachineFunction &MF,
  211. const TargetRegisterInfo &TRI) {
  212. // MachineRegisterInfo::isConstantPhysReg directly called by
  213. // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
  214. // reserved registers to be frozen. That doesn't cause a problem post-ISel as
  215. // most (if not all) targets freeze reserved registers right after ISel.
  216. //
  217. // It does cause issues mid-GlobalISel, however, hence the additional
  218. // reservedRegsFrozen check.
  219. const MachineRegisterInfo &MRI = MF.getRegInfo();
  220. return TRI.isCallerPreservedPhysReg(Reg, MF) ||
  221. (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
  222. }
  223. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  224. /// physical registers (except for dead defs of physical registers). It also
  225. /// returns the physical register def by reference if it's the only one and the
  226. /// instruction does not uses a physical register.
  227. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  228. const MachineBasicBlock *MBB,
  229. SmallSet<unsigned, 8> &PhysRefs,
  230. PhysDefVector &PhysDefs,
  231. bool &PhysUseDef) const {
  232. // First, add all uses to PhysRefs.
  233. for (const MachineOperand &MO : MI->operands()) {
  234. if (!MO.isReg() || MO.isDef())
  235. continue;
  236. unsigned Reg = MO.getReg();
  237. if (!Reg)
  238. continue;
  239. if (TargetRegisterInfo::isVirtualRegister(Reg))
  240. continue;
  241. // Reading either caller preserved or constant physregs is ok.
  242. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
  243. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  244. PhysRefs.insert(*AI);
  245. }
  246. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  247. // (which currently contains only uses), set the PhysUseDef flag.
  248. PhysUseDef = false;
  249. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  250. for (const auto &MOP : llvm::enumerate(MI->operands())) {
  251. const MachineOperand &MO = MOP.value();
  252. if (!MO.isReg() || !MO.isDef())
  253. continue;
  254. unsigned Reg = MO.getReg();
  255. if (!Reg)
  256. continue;
  257. if (TargetRegisterInfo::isVirtualRegister(Reg))
  258. continue;
  259. // Check against PhysRefs even if the def is "dead".
  260. if (PhysRefs.count(Reg))
  261. PhysUseDef = true;
  262. // If the def is dead, it's ok. But the def may not marked "dead". That's
  263. // common since this pass is run before livevariables. We can scan
  264. // forward a few instructions and check if it is obviously dead.
  265. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  266. PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
  267. }
  268. // Finally, add all defs to PhysRefs as well.
  269. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  270. for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
  271. ++AI)
  272. PhysRefs.insert(*AI);
  273. return !PhysRefs.empty();
  274. }
  275. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  276. SmallSet<unsigned, 8> &PhysRefs,
  277. PhysDefVector &PhysDefs,
  278. bool &NonLocal) const {
  279. // For now conservatively returns false if the common subexpression is
  280. // not in the same basic block as the given instruction. The only exception
  281. // is if the common subexpression is in the sole predecessor block.
  282. const MachineBasicBlock *MBB = MI->getParent();
  283. const MachineBasicBlock *CSMBB = CSMI->getParent();
  284. bool CrossMBB = false;
  285. if (CSMBB != MBB) {
  286. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  287. return false;
  288. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  289. if (MRI->isAllocatable(PhysDefs[i].second) ||
  290. MRI->isReserved(PhysDefs[i].second))
  291. // Avoid extending live range of physical registers if they are
  292. //allocatable or reserved.
  293. return false;
  294. }
  295. CrossMBB = true;
  296. }
  297. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  298. MachineBasicBlock::const_iterator E = MI;
  299. MachineBasicBlock::const_iterator EE = CSMBB->end();
  300. unsigned LookAheadLeft = LookAheadLimit;
  301. while (LookAheadLeft) {
  302. // Skip over dbg_value's.
  303. while (I != E && I != EE && I->isDebugInstr())
  304. ++I;
  305. if (I == EE) {
  306. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  307. (void)CrossMBB;
  308. CrossMBB = false;
  309. NonLocal = true;
  310. I = MBB->begin();
  311. EE = MBB->end();
  312. continue;
  313. }
  314. if (I == E)
  315. return true;
  316. for (const MachineOperand &MO : I->operands()) {
  317. // RegMasks go on instructions like calls that clobber lots of physregs.
  318. // Don't attempt to CSE across such an instruction.
  319. if (MO.isRegMask())
  320. return false;
  321. if (!MO.isReg() || !MO.isDef())
  322. continue;
  323. unsigned MOReg = MO.getReg();
  324. if (TargetRegisterInfo::isVirtualRegister(MOReg))
  325. continue;
  326. if (PhysRefs.count(MOReg))
  327. return false;
  328. }
  329. --LookAheadLeft;
  330. ++I;
  331. }
  332. return false;
  333. }
  334. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  335. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
  336. MI->isInlineAsm() || MI->isDebugInstr())
  337. return false;
  338. // Ignore copies.
  339. if (MI->isCopyLike())
  340. return false;
  341. // Ignore stuff that we obviously can't move.
  342. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  343. MI->hasUnmodeledSideEffects())
  344. return false;
  345. if (MI->mayLoad()) {
  346. // Okay, this instruction does a load. As a refinement, we allow the target
  347. // to decide whether the loaded value is actually a constant. If so, we can
  348. // actually use it as a load.
  349. if (!MI->isDereferenceableInvariantLoad(AA))
  350. // FIXME: we should be able to hoist loads with no other side effects if
  351. // there are no other instructions which can change memory in this loop.
  352. // This is a trivial form of alias analysis.
  353. return false;
  354. }
  355. // Ignore stack guard loads, otherwise the register that holds CSEed value may
  356. // be spilled and get loaded back with corrupted data.
  357. if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
  358. return false;
  359. return true;
  360. }
  361. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  362. /// common expression that defines Reg.
  363. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  364. MachineInstr *CSMI, MachineInstr *MI) {
  365. // FIXME: Heuristics that works around the lack the live range splitting.
  366. // If CSReg is used at all uses of Reg, CSE should not increase register
  367. // pressure of CSReg.
  368. bool MayIncreasePressure = true;
  369. if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
  370. TargetRegisterInfo::isVirtualRegister(Reg)) {
  371. MayIncreasePressure = false;
  372. SmallPtrSet<MachineInstr*, 8> CSUses;
  373. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  374. CSUses.insert(&MI);
  375. }
  376. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  377. if (!CSUses.count(&MI)) {
  378. MayIncreasePressure = true;
  379. break;
  380. }
  381. }
  382. }
  383. if (!MayIncreasePressure) return true;
  384. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  385. // an immediate predecessor. We don't want to increase register pressure and
  386. // end up causing other computation to be spilled.
  387. if (TII->isAsCheapAsAMove(*MI)) {
  388. MachineBasicBlock *CSBB = CSMI->getParent();
  389. MachineBasicBlock *BB = MI->getParent();
  390. if (CSBB != BB && !CSBB->isSuccessor(BB))
  391. return false;
  392. }
  393. // Heuristics #2: If the expression doesn't not use a vr and the only use
  394. // of the redundant computation are copies, do not cse.
  395. bool HasVRegUse = false;
  396. for (const MachineOperand &MO : MI->operands()) {
  397. if (MO.isReg() && MO.isUse() &&
  398. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  399. HasVRegUse = true;
  400. break;
  401. }
  402. }
  403. if (!HasVRegUse) {
  404. bool HasNonCopyUse = false;
  405. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  406. // Ignore copies.
  407. if (!MI.isCopyLike()) {
  408. HasNonCopyUse = true;
  409. break;
  410. }
  411. }
  412. if (!HasNonCopyUse)
  413. return false;
  414. }
  415. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  416. // it unless the defined value is already used in the BB of the new use.
  417. bool HasPHI = false;
  418. for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
  419. HasPHI |= UseMI.isPHI();
  420. if (UseMI.getParent() == MI->getParent())
  421. return true;
  422. }
  423. return !HasPHI;
  424. }
  425. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  426. LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  427. ScopeType *Scope = new ScopeType(VNT);
  428. ScopeMap[MBB] = Scope;
  429. }
  430. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  431. LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  432. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  433. assert(SI != ScopeMap.end());
  434. delete SI->second;
  435. ScopeMap.erase(SI);
  436. }
  437. bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
  438. bool Changed = false;
  439. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  440. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  441. SmallVector<unsigned, 2> ImplicitDefs;
  442. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  443. MachineInstr *MI = &*I;
  444. ++I;
  445. if (!isCSECandidate(MI))
  446. continue;
  447. bool FoundCSE = VNT.count(MI);
  448. if (!FoundCSE) {
  449. // Using trivial copy propagation to find more CSE opportunities.
  450. if (PerformTrivialCopyPropagation(MI, MBB)) {
  451. Changed = true;
  452. // After coalescing MI itself may become a copy.
  453. if (MI->isCopyLike())
  454. continue;
  455. // Try again to see if CSE is possible.
  456. FoundCSE = VNT.count(MI);
  457. }
  458. }
  459. // Commute commutable instructions.
  460. bool Commuted = false;
  461. if (!FoundCSE && MI->isCommutable()) {
  462. if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
  463. Commuted = true;
  464. FoundCSE = VNT.count(NewMI);
  465. if (NewMI != MI) {
  466. // New instruction. It doesn't need to be kept.
  467. NewMI->eraseFromParent();
  468. Changed = true;
  469. } else if (!FoundCSE)
  470. // MI was changed but it didn't help, commute it back!
  471. (void)TII->commuteInstruction(*MI);
  472. }
  473. }
  474. // If the instruction defines physical registers and the values *may* be
  475. // used, then it's not safe to replace it with a common subexpression.
  476. // It's also not safe if the instruction uses physical registers.
  477. bool CrossMBBPhysDef = false;
  478. SmallSet<unsigned, 8> PhysRefs;
  479. PhysDefVector PhysDefs;
  480. bool PhysUseDef = false;
  481. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  482. PhysDefs, PhysUseDef)) {
  483. FoundCSE = false;
  484. // ... Unless the CS is local or is in the sole predecessor block
  485. // and it also defines the physical register which is not clobbered
  486. // in between and the physical register uses were not clobbered.
  487. // This can never be the case if the instruction both uses and
  488. // defines the same physical register, which was detected above.
  489. if (!PhysUseDef) {
  490. unsigned CSVN = VNT.lookup(MI);
  491. MachineInstr *CSMI = Exps[CSVN];
  492. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  493. FoundCSE = true;
  494. }
  495. }
  496. if (!FoundCSE) {
  497. VNT.insert(MI, CurrVN++);
  498. Exps.push_back(MI);
  499. continue;
  500. }
  501. // Found a common subexpression, eliminate it.
  502. unsigned CSVN = VNT.lookup(MI);
  503. MachineInstr *CSMI = Exps[CSVN];
  504. LLVM_DEBUG(dbgs() << "Examining: " << *MI);
  505. LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  506. // Check if it's profitable to perform this CSE.
  507. bool DoCSE = true;
  508. unsigned NumDefs = MI->getNumDefs();
  509. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  510. MachineOperand &MO = MI->getOperand(i);
  511. if (!MO.isReg() || !MO.isDef())
  512. continue;
  513. unsigned OldReg = MO.getReg();
  514. unsigned NewReg = CSMI->getOperand(i).getReg();
  515. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  516. // we should make sure it is not dead at CSMI.
  517. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  518. ImplicitDefsToUpdate.push_back(i);
  519. // Keep track of implicit defs of CSMI and MI, to clear possibly
  520. // made-redundant kill flags.
  521. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
  522. ImplicitDefs.push_back(OldReg);
  523. if (OldReg == NewReg) {
  524. --NumDefs;
  525. continue;
  526. }
  527. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  528. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  529. "Do not CSE physical register defs!");
  530. if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
  531. LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  532. DoCSE = false;
  533. break;
  534. }
  535. // Don't perform CSE if the result of the new instruction cannot exist
  536. // within the constraints (register class, bank, or low-level type) of
  537. // the old instruction.
  538. if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
  539. LLVM_DEBUG(
  540. dbgs() << "*** Not the same register constraints, avoid CSE!\n");
  541. DoCSE = false;
  542. break;
  543. }
  544. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  545. --NumDefs;
  546. }
  547. // Actually perform the elimination.
  548. if (DoCSE) {
  549. for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
  550. unsigned OldReg = CSEPair.first;
  551. unsigned NewReg = CSEPair.second;
  552. // OldReg may have been unused but is used now, clear the Dead flag
  553. MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
  554. assert(Def != nullptr && "CSEd register has no unique definition?");
  555. Def->clearRegisterDeads(NewReg);
  556. // Replace with NewReg and clear kill flags which may be wrong now.
  557. MRI->replaceRegWith(OldReg, NewReg);
  558. MRI->clearKillFlags(NewReg);
  559. }
  560. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  561. // we should make sure it is not dead at CSMI.
  562. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
  563. CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
  564. for (auto PhysDef : PhysDefs)
  565. if (!MI->getOperand(PhysDef.first).isDead())
  566. CSMI->getOperand(PhysDef.first).setIsDead(false);
  567. // Go through implicit defs of CSMI and MI, and clear the kill flags on
  568. // their uses in all the instructions between CSMI and MI.
  569. // We might have made some of the kill flags redundant, consider:
  570. // subs ... implicit-def %nzcv <- CSMI
  571. // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
  572. // subs ... implicit-def %nzcv <- MI, to be eliminated
  573. // csinc ... implicit killed %nzcv
  574. // Since we eliminated MI, and reused a register imp-def'd by CSMI
  575. // (here %nzcv), that register, if it was killed before MI, should have
  576. // that kill flag removed, because it's lifetime was extended.
  577. if (CSMI->getParent() == MI->getParent()) {
  578. for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
  579. for (auto ImplicitDef : ImplicitDefs)
  580. if (MachineOperand *MO = II->findRegisterUseOperand(
  581. ImplicitDef, /*isKill=*/true, TRI))
  582. MO->setIsKill(false);
  583. } else {
  584. // If the instructions aren't in the same BB, bail out and clear the
  585. // kill flag on all uses of the imp-def'd register.
  586. for (auto ImplicitDef : ImplicitDefs)
  587. MRI->clearKillFlags(ImplicitDef);
  588. }
  589. if (CrossMBBPhysDef) {
  590. // Add physical register defs now coming in from a predecessor to MBB
  591. // livein list.
  592. while (!PhysDefs.empty()) {
  593. auto LiveIn = PhysDefs.pop_back_val();
  594. if (!MBB->isLiveIn(LiveIn.second))
  595. MBB->addLiveIn(LiveIn.second);
  596. }
  597. ++NumCrossBBCSEs;
  598. }
  599. MI->eraseFromParent();
  600. ++NumCSEs;
  601. if (!PhysRefs.empty())
  602. ++NumPhysCSEs;
  603. if (Commuted)
  604. ++NumCommutes;
  605. Changed = true;
  606. } else {
  607. VNT.insert(MI, CurrVN++);
  608. Exps.push_back(MI);
  609. }
  610. CSEPairs.clear();
  611. ImplicitDefsToUpdate.clear();
  612. ImplicitDefs.clear();
  613. }
  614. return Changed;
  615. }
  616. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  617. /// dominator tree node if its a leaf or all of its children are done. Walk
  618. /// up the dominator tree to destroy ancestors which are now done.
  619. void
  620. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  621. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  622. if (OpenChildren[Node])
  623. return;
  624. // Pop scope.
  625. ExitScope(Node->getBlock());
  626. // Now traverse upwards to pop ancestors whose offsprings are all done.
  627. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  628. unsigned Left = --OpenChildren[Parent];
  629. if (Left != 0)
  630. break;
  631. ExitScope(Parent->getBlock());
  632. Node = Parent;
  633. }
  634. }
  635. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  636. SmallVector<MachineDomTreeNode*, 32> Scopes;
  637. SmallVector<MachineDomTreeNode*, 8> WorkList;
  638. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  639. CurrVN = 0;
  640. // Perform a DFS walk to determine the order of visit.
  641. WorkList.push_back(Node);
  642. do {
  643. Node = WorkList.pop_back_val();
  644. Scopes.push_back(Node);
  645. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  646. OpenChildren[Node] = Children.size();
  647. for (MachineDomTreeNode *Child : Children)
  648. WorkList.push_back(Child);
  649. } while (!WorkList.empty());
  650. // Now perform CSE.
  651. bool Changed = false;
  652. for (MachineDomTreeNode *Node : Scopes) {
  653. MachineBasicBlock *MBB = Node->getBlock();
  654. EnterScope(MBB);
  655. Changed |= ProcessBlock(MBB);
  656. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  657. ExitScopeIfDone(Node, OpenChildren);
  658. }
  659. return Changed;
  660. }
  661. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  662. if (skipFunction(MF.getFunction()))
  663. return false;
  664. TII = MF.getSubtarget().getInstrInfo();
  665. TRI = MF.getSubtarget().getRegisterInfo();
  666. MRI = &MF.getRegInfo();
  667. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  668. DT = &getAnalysis<MachineDominatorTree>();
  669. LookAheadLimit = TII->getMachineCSELookAheadLimit();
  670. return PerformCSE(DT->getRootNode());
  671. }