MachineCSE.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs global common subexpression elimination on machine
  10. // instructions using a scoped hash table based value numbering scheme. It
  11. // must be run while the machine function is still in SSA form.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/ScopedHashTable.h"
  16. #include "llvm/ADT/SmallPtrSet.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Analysis/CFG.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineDominators.h"
  24. #include "llvm/CodeGen/MachineFunction.h"
  25. #include "llvm/CodeGen/MachineFunctionPass.h"
  26. #include "llvm/CodeGen/MachineInstr.h"
  27. #include "llvm/CodeGen/MachineOperand.h"
  28. #include "llvm/CodeGen/MachineRegisterInfo.h"
  29. #include "llvm/CodeGen/Passes.h"
  30. #include "llvm/CodeGen/TargetInstrInfo.h"
  31. #include "llvm/CodeGen/TargetOpcodes.h"
  32. #include "llvm/CodeGen/TargetRegisterInfo.h"
  33. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  34. #include "llvm/MC/MCInstrDesc.h"
  35. #include "llvm/MC/MCRegisterInfo.h"
  36. #include "llvm/Pass.h"
  37. #include "llvm/Support/Allocator.h"
  38. #include "llvm/Support/Debug.h"
  39. #include "llvm/Support/RecyclingAllocator.h"
  40. #include "llvm/Support/raw_ostream.h"
  41. #include <cassert>
  42. #include <iterator>
  43. #include <utility>
  44. #include <vector>
  45. using namespace llvm;
  46. #define DEBUG_TYPE "machine-cse"
  47. STATISTIC(NumCoalesces, "Number of copies coalesced");
  48. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  49. STATISTIC(NumPREs, "Number of partial redundant expression"
  50. " transformed to fully redundant");
  51. STATISTIC(NumPhysCSEs,
  52. "Number of physreg referencing common subexpr eliminated");
  53. STATISTIC(NumCrossBBCSEs,
  54. "Number of cross-MBB physreg referencing CS eliminated");
  55. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  56. namespace {
  57. class MachineCSE : public MachineFunctionPass {
  58. const TargetInstrInfo *TII;
  59. const TargetRegisterInfo *TRI;
  60. AliasAnalysis *AA;
  61. MachineDominatorTree *DT;
  62. MachineRegisterInfo *MRI;
  63. public:
  64. static char ID; // Pass identification
  65. MachineCSE() : MachineFunctionPass(ID) {
  66. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  67. }
  68. bool runOnMachineFunction(MachineFunction &MF) override;
  69. void getAnalysisUsage(AnalysisUsage &AU) const override {
  70. AU.setPreservesCFG();
  71. MachineFunctionPass::getAnalysisUsage(AU);
  72. AU.addRequired<AAResultsWrapperPass>();
  73. AU.addPreservedID(MachineLoopInfoID);
  74. AU.addRequired<MachineDominatorTree>();
  75. AU.addPreserved<MachineDominatorTree>();
  76. }
  77. void releaseMemory() override {
  78. ScopeMap.clear();
  79. PREMap.clear();
  80. Exps.clear();
  81. }
  82. private:
  83. using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
  84. ScopedHashTableVal<MachineInstr *, unsigned>>;
  85. using ScopedHTType =
  86. ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
  87. AllocatorTy>;
  88. using ScopeType = ScopedHTType::ScopeTy;
  89. using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
  90. unsigned LookAheadLimit = 0;
  91. DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
  92. DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> PREMap;
  93. ScopedHTType VNT;
  94. SmallVector<MachineInstr *, 64> Exps;
  95. unsigned CurrVN = 0;
  96. bool PerformTrivialCopyPropagation(MachineInstr *MI,
  97. MachineBasicBlock *MBB);
  98. bool isPhysDefTriviallyDead(unsigned Reg,
  99. MachineBasicBlock::const_iterator I,
  100. MachineBasicBlock::const_iterator E) const;
  101. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  102. const MachineBasicBlock *MBB,
  103. SmallSet<unsigned, 8> &PhysRefs,
  104. PhysDefVector &PhysDefs, bool &PhysUseDef) const;
  105. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  106. SmallSet<unsigned, 8> &PhysRefs,
  107. PhysDefVector &PhysDefs, bool &NonLocal) const;
  108. bool isCSECandidate(MachineInstr *MI);
  109. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  110. MachineBasicBlock *CSBB, MachineInstr *MI);
  111. void EnterScope(MachineBasicBlock *MBB);
  112. void ExitScope(MachineBasicBlock *MBB);
  113. bool ProcessBlockCSE(MachineBasicBlock *MBB);
  114. void ExitScopeIfDone(MachineDomTreeNode *Node,
  115. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  116. bool PerformCSE(MachineDomTreeNode *Node);
  117. bool isPRECandidate(MachineInstr *MI);
  118. bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
  119. bool PerformSimplePRE(MachineDominatorTree *DT);
  120. };
  121. } // end anonymous namespace
  122. char MachineCSE::ID = 0;
  123. char &llvm::MachineCSEID = MachineCSE::ID;
  124. INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
  125. "Machine Common Subexpression Elimination", false, false)
  126. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  127. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  128. INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
  129. "Machine Common Subexpression Elimination", false, false)
  130. /// The source register of a COPY machine instruction can be propagated to all
  131. /// its users, and this propagation could increase the probability of finding
  132. /// common subexpressions. If the COPY has only one user, the COPY itself can
  133. /// be removed.
  134. bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
  135. MachineBasicBlock *MBB) {
  136. bool Changed = false;
  137. for (MachineOperand &MO : MI->operands()) {
  138. if (!MO.isReg() || !MO.isUse())
  139. continue;
  140. unsigned Reg = MO.getReg();
  141. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  142. continue;
  143. bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
  144. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  145. if (!DefMI->isCopy())
  146. continue;
  147. unsigned SrcReg = DefMI->getOperand(1).getReg();
  148. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  149. continue;
  150. if (DefMI->getOperand(0).getSubReg())
  151. continue;
  152. // FIXME: We should trivially coalesce subregister copies to expose CSE
  153. // opportunities on instructions with truncated operands (see
  154. // cse-add-with-overflow.ll). This can be done here as follows:
  155. // if (SrcSubReg)
  156. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  157. // SrcSubReg);
  158. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  159. //
  160. // The 2-addr pass has been updated to handle coalesced subregs. However,
  161. // some machine-specific code still can't handle it.
  162. // To handle it properly we also need a way find a constrained subregister
  163. // class given a super-reg class and subreg index.
  164. if (DefMI->getOperand(1).getSubReg())
  165. continue;
  166. if (!MRI->constrainRegAttrs(SrcReg, Reg))
  167. continue;
  168. LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
  169. LLVM_DEBUG(dbgs() << "*** to: " << *MI);
  170. // Update matching debug values.
  171. DefMI->changeDebugValuesDefReg(SrcReg);
  172. // Propagate SrcReg of copies to MI.
  173. MO.setReg(SrcReg);
  174. MRI->clearKillFlags(SrcReg);
  175. // Coalesce single use copies.
  176. if (OnlyOneUse) {
  177. DefMI->eraseFromParent();
  178. ++NumCoalesces;
  179. }
  180. Changed = true;
  181. }
  182. return Changed;
  183. }
  184. bool
  185. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  186. MachineBasicBlock::const_iterator I,
  187. MachineBasicBlock::const_iterator E) const {
  188. unsigned LookAheadLeft = LookAheadLimit;
  189. while (LookAheadLeft) {
  190. // Skip over dbg_value's.
  191. I = skipDebugInstructionsForward(I, E);
  192. if (I == E)
  193. // Reached end of block, we don't know if register is dead or not.
  194. return false;
  195. bool SeenDef = false;
  196. for (const MachineOperand &MO : I->operands()) {
  197. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  198. SeenDef = true;
  199. if (!MO.isReg() || !MO.getReg())
  200. continue;
  201. if (!TRI->regsOverlap(MO.getReg(), Reg))
  202. continue;
  203. if (MO.isUse())
  204. // Found a use!
  205. return false;
  206. SeenDef = true;
  207. }
  208. if (SeenDef)
  209. // See a def of Reg (or an alias) before encountering any use, it's
  210. // trivially dead.
  211. return true;
  212. --LookAheadLeft;
  213. ++I;
  214. }
  215. return false;
  216. }
  217. static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
  218. const MachineFunction &MF,
  219. const TargetRegisterInfo &TRI) {
  220. // MachineRegisterInfo::isConstantPhysReg directly called by
  221. // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
  222. // reserved registers to be frozen. That doesn't cause a problem post-ISel as
  223. // most (if not all) targets freeze reserved registers right after ISel.
  224. //
  225. // It does cause issues mid-GlobalISel, however, hence the additional
  226. // reservedRegsFrozen check.
  227. const MachineRegisterInfo &MRI = MF.getRegInfo();
  228. return TRI.isCallerPreservedPhysReg(Reg, MF) ||
  229. (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
  230. }
  231. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  232. /// physical registers (except for dead defs of physical registers). It also
  233. /// returns the physical register def by reference if it's the only one and the
  234. /// instruction does not uses a physical register.
  235. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  236. const MachineBasicBlock *MBB,
  237. SmallSet<unsigned, 8> &PhysRefs,
  238. PhysDefVector &PhysDefs,
  239. bool &PhysUseDef) const {
  240. // First, add all uses to PhysRefs.
  241. for (const MachineOperand &MO : MI->operands()) {
  242. if (!MO.isReg() || MO.isDef())
  243. continue;
  244. unsigned Reg = MO.getReg();
  245. if (!Reg)
  246. continue;
  247. if (TargetRegisterInfo::isVirtualRegister(Reg))
  248. continue;
  249. // Reading either caller preserved or constant physregs is ok.
  250. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
  251. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  252. PhysRefs.insert(*AI);
  253. }
  254. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  255. // (which currently contains only uses), set the PhysUseDef flag.
  256. PhysUseDef = false;
  257. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  258. for (const auto &MOP : llvm::enumerate(MI->operands())) {
  259. const MachineOperand &MO = MOP.value();
  260. if (!MO.isReg() || !MO.isDef())
  261. continue;
  262. unsigned Reg = MO.getReg();
  263. if (!Reg)
  264. continue;
  265. if (TargetRegisterInfo::isVirtualRegister(Reg))
  266. continue;
  267. // Check against PhysRefs even if the def is "dead".
  268. if (PhysRefs.count(Reg))
  269. PhysUseDef = true;
  270. // If the def is dead, it's ok. But the def may not marked "dead". That's
  271. // common since this pass is run before livevariables. We can scan
  272. // forward a few instructions and check if it is obviously dead.
  273. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  274. PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
  275. }
  276. // Finally, add all defs to PhysRefs as well.
  277. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  278. for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
  279. ++AI)
  280. PhysRefs.insert(*AI);
  281. return !PhysRefs.empty();
  282. }
  283. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  284. SmallSet<unsigned, 8> &PhysRefs,
  285. PhysDefVector &PhysDefs,
  286. bool &NonLocal) const {
  287. // For now conservatively returns false if the common subexpression is
  288. // not in the same basic block as the given instruction. The only exception
  289. // is if the common subexpression is in the sole predecessor block.
  290. const MachineBasicBlock *MBB = MI->getParent();
  291. const MachineBasicBlock *CSMBB = CSMI->getParent();
  292. bool CrossMBB = false;
  293. if (CSMBB != MBB) {
  294. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  295. return false;
  296. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  297. if (MRI->isAllocatable(PhysDefs[i].second) ||
  298. MRI->isReserved(PhysDefs[i].second))
  299. // Avoid extending live range of physical registers if they are
  300. //allocatable or reserved.
  301. return false;
  302. }
  303. CrossMBB = true;
  304. }
  305. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  306. MachineBasicBlock::const_iterator E = MI;
  307. MachineBasicBlock::const_iterator EE = CSMBB->end();
  308. unsigned LookAheadLeft = LookAheadLimit;
  309. while (LookAheadLeft) {
  310. // Skip over dbg_value's.
  311. while (I != E && I != EE && I->isDebugInstr())
  312. ++I;
  313. if (I == EE) {
  314. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  315. (void)CrossMBB;
  316. CrossMBB = false;
  317. NonLocal = true;
  318. I = MBB->begin();
  319. EE = MBB->end();
  320. continue;
  321. }
  322. if (I == E)
  323. return true;
  324. for (const MachineOperand &MO : I->operands()) {
  325. // RegMasks go on instructions like calls that clobber lots of physregs.
  326. // Don't attempt to CSE across such an instruction.
  327. if (MO.isRegMask())
  328. return false;
  329. if (!MO.isReg() || !MO.isDef())
  330. continue;
  331. unsigned MOReg = MO.getReg();
  332. if (TargetRegisterInfo::isVirtualRegister(MOReg))
  333. continue;
  334. if (PhysRefs.count(MOReg))
  335. return false;
  336. }
  337. --LookAheadLeft;
  338. ++I;
  339. }
  340. return false;
  341. }
  342. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  343. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
  344. MI->isInlineAsm() || MI->isDebugInstr())
  345. return false;
  346. // Ignore copies.
  347. if (MI->isCopyLike())
  348. return false;
  349. // Ignore stuff that we obviously can't move.
  350. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  351. MI->hasUnmodeledSideEffects())
  352. return false;
  353. if (MI->mayLoad()) {
  354. // Okay, this instruction does a load. As a refinement, we allow the target
  355. // to decide whether the loaded value is actually a constant. If so, we can
  356. // actually use it as a load.
  357. if (!MI->isDereferenceableInvariantLoad(AA))
  358. // FIXME: we should be able to hoist loads with no other side effects if
  359. // there are no other instructions which can change memory in this loop.
  360. // This is a trivial form of alias analysis.
  361. return false;
  362. }
  363. // Ignore stack guard loads, otherwise the register that holds CSEed value may
  364. // be spilled and get loaded back with corrupted data.
  365. if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
  366. return false;
  367. return true;
  368. }
  369. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  370. /// common expression that defines Reg. CSBB is basic block where CSReg is
  371. /// defined.
  372. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  373. MachineBasicBlock *CSBB, MachineInstr *MI) {
  374. // FIXME: Heuristics that works around the lack the live range splitting.
  375. // If CSReg is used at all uses of Reg, CSE should not increase register
  376. // pressure of CSReg.
  377. bool MayIncreasePressure = true;
  378. if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
  379. TargetRegisterInfo::isVirtualRegister(Reg)) {
  380. MayIncreasePressure = false;
  381. SmallPtrSet<MachineInstr*, 8> CSUses;
  382. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  383. CSUses.insert(&MI);
  384. }
  385. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  386. if (!CSUses.count(&MI)) {
  387. MayIncreasePressure = true;
  388. break;
  389. }
  390. }
  391. }
  392. if (!MayIncreasePressure) return true;
  393. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  394. // an immediate predecessor. We don't want to increase register pressure and
  395. // end up causing other computation to be spilled.
  396. if (TII->isAsCheapAsAMove(*MI)) {
  397. MachineBasicBlock *BB = MI->getParent();
  398. if (CSBB != BB && !CSBB->isSuccessor(BB))
  399. return false;
  400. }
  401. // Heuristics #2: If the expression doesn't not use a vr and the only use
  402. // of the redundant computation are copies, do not cse.
  403. bool HasVRegUse = false;
  404. for (const MachineOperand &MO : MI->operands()) {
  405. if (MO.isReg() && MO.isUse() &&
  406. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  407. HasVRegUse = true;
  408. break;
  409. }
  410. }
  411. if (!HasVRegUse) {
  412. bool HasNonCopyUse = false;
  413. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  414. // Ignore copies.
  415. if (!MI.isCopyLike()) {
  416. HasNonCopyUse = true;
  417. break;
  418. }
  419. }
  420. if (!HasNonCopyUse)
  421. return false;
  422. }
  423. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  424. // it unless the defined value is already used in the BB of the new use.
  425. bool HasPHI = false;
  426. for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
  427. HasPHI |= UseMI.isPHI();
  428. if (UseMI.getParent() == MI->getParent())
  429. return true;
  430. }
  431. return !HasPHI;
  432. }
  433. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  434. LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  435. ScopeType *Scope = new ScopeType(VNT);
  436. ScopeMap[MBB] = Scope;
  437. }
  438. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  439. LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  440. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  441. assert(SI != ScopeMap.end());
  442. delete SI->second;
  443. ScopeMap.erase(SI);
  444. }
  445. bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
  446. bool Changed = false;
  447. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  448. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  449. SmallVector<unsigned, 2> ImplicitDefs;
  450. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  451. MachineInstr *MI = &*I;
  452. ++I;
  453. if (!isCSECandidate(MI))
  454. continue;
  455. bool FoundCSE = VNT.count(MI);
  456. if (!FoundCSE) {
  457. // Using trivial copy propagation to find more CSE opportunities.
  458. if (PerformTrivialCopyPropagation(MI, MBB)) {
  459. Changed = true;
  460. // After coalescing MI itself may become a copy.
  461. if (MI->isCopyLike())
  462. continue;
  463. // Try again to see if CSE is possible.
  464. FoundCSE = VNT.count(MI);
  465. }
  466. }
  467. // Commute commutable instructions.
  468. bool Commuted = false;
  469. if (!FoundCSE && MI->isCommutable()) {
  470. if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
  471. Commuted = true;
  472. FoundCSE = VNT.count(NewMI);
  473. if (NewMI != MI) {
  474. // New instruction. It doesn't need to be kept.
  475. NewMI->eraseFromParent();
  476. Changed = true;
  477. } else if (!FoundCSE)
  478. // MI was changed but it didn't help, commute it back!
  479. (void)TII->commuteInstruction(*MI);
  480. }
  481. }
  482. // If the instruction defines physical registers and the values *may* be
  483. // used, then it's not safe to replace it with a common subexpression.
  484. // It's also not safe if the instruction uses physical registers.
  485. bool CrossMBBPhysDef = false;
  486. SmallSet<unsigned, 8> PhysRefs;
  487. PhysDefVector PhysDefs;
  488. bool PhysUseDef = false;
  489. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  490. PhysDefs, PhysUseDef)) {
  491. FoundCSE = false;
  492. // ... Unless the CS is local or is in the sole predecessor block
  493. // and it also defines the physical register which is not clobbered
  494. // in between and the physical register uses were not clobbered.
  495. // This can never be the case if the instruction both uses and
  496. // defines the same physical register, which was detected above.
  497. if (!PhysUseDef) {
  498. unsigned CSVN = VNT.lookup(MI);
  499. MachineInstr *CSMI = Exps[CSVN];
  500. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  501. FoundCSE = true;
  502. }
  503. }
  504. if (!FoundCSE) {
  505. VNT.insert(MI, CurrVN++);
  506. Exps.push_back(MI);
  507. continue;
  508. }
  509. // Found a common subexpression, eliminate it.
  510. unsigned CSVN = VNT.lookup(MI);
  511. MachineInstr *CSMI = Exps[CSVN];
  512. LLVM_DEBUG(dbgs() << "Examining: " << *MI);
  513. LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  514. // Check if it's profitable to perform this CSE.
  515. bool DoCSE = true;
  516. unsigned NumDefs = MI->getNumDefs();
  517. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  518. MachineOperand &MO = MI->getOperand(i);
  519. if (!MO.isReg() || !MO.isDef())
  520. continue;
  521. unsigned OldReg = MO.getReg();
  522. unsigned NewReg = CSMI->getOperand(i).getReg();
  523. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  524. // we should make sure it is not dead at CSMI.
  525. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  526. ImplicitDefsToUpdate.push_back(i);
  527. // Keep track of implicit defs of CSMI and MI, to clear possibly
  528. // made-redundant kill flags.
  529. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
  530. ImplicitDefs.push_back(OldReg);
  531. if (OldReg == NewReg) {
  532. --NumDefs;
  533. continue;
  534. }
  535. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  536. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  537. "Do not CSE physical register defs!");
  538. if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
  539. LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  540. DoCSE = false;
  541. break;
  542. }
  543. // Don't perform CSE if the result of the new instruction cannot exist
  544. // within the constraints (register class, bank, or low-level type) of
  545. // the old instruction.
  546. if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
  547. LLVM_DEBUG(
  548. dbgs() << "*** Not the same register constraints, avoid CSE!\n");
  549. DoCSE = false;
  550. break;
  551. }
  552. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  553. --NumDefs;
  554. }
  555. // Actually perform the elimination.
  556. if (DoCSE) {
  557. for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
  558. unsigned OldReg = CSEPair.first;
  559. unsigned NewReg = CSEPair.second;
  560. // OldReg may have been unused but is used now, clear the Dead flag
  561. MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
  562. assert(Def != nullptr && "CSEd register has no unique definition?");
  563. Def->clearRegisterDeads(NewReg);
  564. // Replace with NewReg and clear kill flags which may be wrong now.
  565. MRI->replaceRegWith(OldReg, NewReg);
  566. MRI->clearKillFlags(NewReg);
  567. }
  568. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  569. // we should make sure it is not dead at CSMI.
  570. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
  571. CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
  572. for (auto PhysDef : PhysDefs)
  573. if (!MI->getOperand(PhysDef.first).isDead())
  574. CSMI->getOperand(PhysDef.first).setIsDead(false);
  575. // Go through implicit defs of CSMI and MI, and clear the kill flags on
  576. // their uses in all the instructions between CSMI and MI.
  577. // We might have made some of the kill flags redundant, consider:
  578. // subs ... implicit-def %nzcv <- CSMI
  579. // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
  580. // subs ... implicit-def %nzcv <- MI, to be eliminated
  581. // csinc ... implicit killed %nzcv
  582. // Since we eliminated MI, and reused a register imp-def'd by CSMI
  583. // (here %nzcv), that register, if it was killed before MI, should have
  584. // that kill flag removed, because it's lifetime was extended.
  585. if (CSMI->getParent() == MI->getParent()) {
  586. for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
  587. for (auto ImplicitDef : ImplicitDefs)
  588. if (MachineOperand *MO = II->findRegisterUseOperand(
  589. ImplicitDef, /*isKill=*/true, TRI))
  590. MO->setIsKill(false);
  591. } else {
  592. // If the instructions aren't in the same BB, bail out and clear the
  593. // kill flag on all uses of the imp-def'd register.
  594. for (auto ImplicitDef : ImplicitDefs)
  595. MRI->clearKillFlags(ImplicitDef);
  596. }
  597. if (CrossMBBPhysDef) {
  598. // Add physical register defs now coming in from a predecessor to MBB
  599. // livein list.
  600. while (!PhysDefs.empty()) {
  601. auto LiveIn = PhysDefs.pop_back_val();
  602. if (!MBB->isLiveIn(LiveIn.second))
  603. MBB->addLiveIn(LiveIn.second);
  604. }
  605. ++NumCrossBBCSEs;
  606. }
  607. MI->eraseFromParent();
  608. ++NumCSEs;
  609. if (!PhysRefs.empty())
  610. ++NumPhysCSEs;
  611. if (Commuted)
  612. ++NumCommutes;
  613. Changed = true;
  614. } else {
  615. VNT.insert(MI, CurrVN++);
  616. Exps.push_back(MI);
  617. }
  618. CSEPairs.clear();
  619. ImplicitDefsToUpdate.clear();
  620. ImplicitDefs.clear();
  621. }
  622. return Changed;
  623. }
  624. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  625. /// dominator tree node if its a leaf or all of its children are done. Walk
  626. /// up the dominator tree to destroy ancestors which are now done.
  627. void
  628. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  629. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  630. if (OpenChildren[Node])
  631. return;
  632. // Pop scope.
  633. ExitScope(Node->getBlock());
  634. // Now traverse upwards to pop ancestors whose offsprings are all done.
  635. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  636. unsigned Left = --OpenChildren[Parent];
  637. if (Left != 0)
  638. break;
  639. ExitScope(Parent->getBlock());
  640. Node = Parent;
  641. }
  642. }
  643. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  644. SmallVector<MachineDomTreeNode*, 32> Scopes;
  645. SmallVector<MachineDomTreeNode*, 8> WorkList;
  646. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  647. CurrVN = 0;
  648. // Perform a DFS walk to determine the order of visit.
  649. WorkList.push_back(Node);
  650. do {
  651. Node = WorkList.pop_back_val();
  652. Scopes.push_back(Node);
  653. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  654. OpenChildren[Node] = Children.size();
  655. for (MachineDomTreeNode *Child : Children)
  656. WorkList.push_back(Child);
  657. } while (!WorkList.empty());
  658. // Now perform CSE.
  659. bool Changed = false;
  660. for (MachineDomTreeNode *Node : Scopes) {
  661. MachineBasicBlock *MBB = Node->getBlock();
  662. EnterScope(MBB);
  663. Changed |= ProcessBlockCSE(MBB);
  664. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  665. ExitScopeIfDone(Node, OpenChildren);
  666. }
  667. return Changed;
  668. }
  669. // We use stronger checks for PRE candidate rather than for CSE ones to embrace
  670. // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
  671. // to exclude instrs created by PRE that won't be CSEed later.
  672. bool MachineCSE::isPRECandidate(MachineInstr *MI) {
  673. if (!isCSECandidate(MI) ||
  674. MI->isNotDuplicable() ||
  675. MI->isAsCheapAsAMove() ||
  676. MI->getNumDefs() != 1 ||
  677. MI->getNumExplicitDefs() != 1)
  678. return false;
  679. for (auto def: MI->defs())
  680. if (!TRI->isVirtualRegister(def.getReg()))
  681. return false;
  682. for (auto use: MI->uses())
  683. if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
  684. return false;
  685. return true;
  686. }
  687. bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) {
  688. bool Changed = false;
  689. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  690. MachineInstr *MI = &*I;
  691. ++I;
  692. if (!isPRECandidate(MI))
  693. continue;
  694. if (!PREMap.count(MI)) {
  695. PREMap[MI] = MBB;
  696. continue;
  697. }
  698. auto MBB1 = PREMap[MI];
  699. assert(!DT->properlyDominates(MBB, MBB1) &&
  700. "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
  701. auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
  702. // Two instrs are partial redundant if their basic blocks are reachable
  703. // from one to another but one doesn't dominate another.
  704. if (CMBB != MBB1) {
  705. auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
  706. if (BB != nullptr && BB1 != nullptr &&
  707. (isPotentiallyReachable(BB1, BB) ||
  708. isPotentiallyReachable(BB, BB1))) {
  709. assert(MI->getOperand(0).isDef() &&
  710. "First operand of instr with one explicit def must be this def");
  711. unsigned VReg = MI->getOperand(0).getReg();
  712. unsigned NewReg = MRI->cloneVirtualRegister(VReg);
  713. if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
  714. continue;
  715. MachineInstr &NewMI = TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
  716. NewMI.getOperand(0).setReg(NewReg);
  717. PREMap[MI] = CMBB;
  718. ++NumPREs;
  719. Changed = true;
  720. }
  721. }
  722. }
  723. return Changed;
  724. }
  725. // This simple PRE (partial redundancy elimination) pass doesn't actually
  726. // eliminate partial redundancy but transforms it to full redundancy,
  727. // anticipating that the next CSE step will eliminate this created redundancy.
  728. // If CSE doesn't eliminate this, than created instruction will remain dead
  729. // and eliminated later by Remove Dead Machine Instructions pass.
  730. bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
  731. SmallVector<MachineDomTreeNode*, 32> BBs;
  732. PREMap.clear();
  733. bool Changed = false;
  734. BBs.push_back(DT->getRootNode());
  735. do {
  736. auto Node = BBs.pop_back_val();
  737. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  738. for (MachineDomTreeNode *Child : Children)
  739. BBs.push_back(Child);
  740. MachineBasicBlock *MBB = Node->getBlock();
  741. Changed |= ProcessBlockPRE(DT, MBB);
  742. } while (!BBs.empty());
  743. return Changed;
  744. }
  745. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  746. if (skipFunction(MF.getFunction()))
  747. return false;
  748. TII = MF.getSubtarget().getInstrInfo();
  749. TRI = MF.getSubtarget().getRegisterInfo();
  750. MRI = &MF.getRegInfo();
  751. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  752. DT = &getAnalysis<MachineDominatorTree>();
  753. LookAheadLimit = TII->getMachineCSELookAheadLimit();
  754. bool ChangedPRE, ChangedCSE;
  755. ChangedPRE = PerformSimplePRE(DT);
  756. ChangedCSE = PerformCSE(DT->getRootNode());
  757. return ChangedPRE || ChangedCSE;
  758. }