MachineCSE.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs global common subexpression elimination on machine
  10. // instructions using a scoped hash table based value numbering scheme. It
  11. // must be run while the machine function is still in SSA form.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/ScopedHashTable.h"
  16. #include "llvm/ADT/SmallPtrSet.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Analysis/CFG.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  24. #include "llvm/CodeGen/MachineDominators.h"
  25. #include "llvm/CodeGen/MachineFunction.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineOperand.h"
  29. #include "llvm/CodeGen/MachineRegisterInfo.h"
  30. #include "llvm/CodeGen/Passes.h"
  31. #include "llvm/CodeGen/TargetInstrInfo.h"
  32. #include "llvm/CodeGen/TargetOpcodes.h"
  33. #include "llvm/CodeGen/TargetRegisterInfo.h"
  34. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  35. #include "llvm/MC/MCInstrDesc.h"
  36. #include "llvm/MC/MCRegisterInfo.h"
  37. #include "llvm/Pass.h"
  38. #include "llvm/Support/Allocator.h"
  39. #include "llvm/Support/Debug.h"
  40. #include "llvm/Support/RecyclingAllocator.h"
  41. #include "llvm/Support/raw_ostream.h"
  42. #include <cassert>
  43. #include <iterator>
  44. #include <utility>
  45. #include <vector>
  46. using namespace llvm;
  47. #define DEBUG_TYPE "machine-cse"
  48. STATISTIC(NumCoalesces, "Number of copies coalesced");
  49. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  50. STATISTIC(NumPREs, "Number of partial redundant expression"
  51. " transformed to fully redundant");
  52. STATISTIC(NumPhysCSEs,
  53. "Number of physreg referencing common subexpr eliminated");
  54. STATISTIC(NumCrossBBCSEs,
  55. "Number of cross-MBB physreg referencing CS eliminated");
  56. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  57. namespace {
  58. class MachineCSE : public MachineFunctionPass {
  59. const TargetInstrInfo *TII;
  60. const TargetRegisterInfo *TRI;
  61. AliasAnalysis *AA;
  62. MachineDominatorTree *DT;
  63. MachineRegisterInfo *MRI;
  64. MachineBlockFrequencyInfo *MBFI;
  65. public:
  66. static char ID; // Pass identification
  67. MachineCSE() : MachineFunctionPass(ID) {
  68. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  69. }
  70. bool runOnMachineFunction(MachineFunction &MF) override;
  71. void getAnalysisUsage(AnalysisUsage &AU) const override {
  72. AU.setPreservesCFG();
  73. MachineFunctionPass::getAnalysisUsage(AU);
  74. AU.addRequired<AAResultsWrapperPass>();
  75. AU.addPreservedID(MachineLoopInfoID);
  76. AU.addRequired<MachineDominatorTree>();
  77. AU.addPreserved<MachineDominatorTree>();
  78. AU.addRequired<MachineBlockFrequencyInfo>();
  79. AU.addPreserved<MachineBlockFrequencyInfo>();
  80. }
  81. void releaseMemory() override {
  82. ScopeMap.clear();
  83. PREMap.clear();
  84. Exps.clear();
  85. }
  86. private:
  87. using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
  88. ScopedHashTableVal<MachineInstr *, unsigned>>;
  89. using ScopedHTType =
  90. ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
  91. AllocatorTy>;
  92. using ScopeType = ScopedHTType::ScopeTy;
  93. using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
  94. unsigned LookAheadLimit = 0;
  95. DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
  96. DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
  97. PREMap;
  98. ScopedHTType VNT;
  99. SmallVector<MachineInstr *, 64> Exps;
  100. unsigned CurrVN = 0;
  101. bool PerformTrivialCopyPropagation(MachineInstr *MI,
  102. MachineBasicBlock *MBB);
  103. bool isPhysDefTriviallyDead(unsigned Reg,
  104. MachineBasicBlock::const_iterator I,
  105. MachineBasicBlock::const_iterator E) const;
  106. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  107. const MachineBasicBlock *MBB,
  108. SmallSet<unsigned, 8> &PhysRefs,
  109. PhysDefVector &PhysDefs, bool &PhysUseDef) const;
  110. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  111. SmallSet<unsigned, 8> &PhysRefs,
  112. PhysDefVector &PhysDefs, bool &NonLocal) const;
  113. bool isCSECandidate(MachineInstr *MI);
  114. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  115. MachineBasicBlock *CSBB, MachineInstr *MI);
  116. void EnterScope(MachineBasicBlock *MBB);
  117. void ExitScope(MachineBasicBlock *MBB);
  118. bool ProcessBlockCSE(MachineBasicBlock *MBB);
  119. void ExitScopeIfDone(MachineDomTreeNode *Node,
  120. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  121. bool PerformCSE(MachineDomTreeNode *Node);
  122. bool isPRECandidate(MachineInstr *MI);
  123. bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
  124. bool PerformSimplePRE(MachineDominatorTree *DT);
  125. /// Heuristics to see if it's profitable to move common computations of MBB
  126. /// and MBB1 to CandidateBB.
  127. bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
  128. MachineBasicBlock *MBB,
  129. MachineBasicBlock *MBB1);
  130. };
  131. } // end anonymous namespace
  132. char MachineCSE::ID = 0;
  133. char &llvm::MachineCSEID = MachineCSE::ID;
  134. INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
  135. "Machine Common Subexpression Elimination", false, false)
  136. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  137. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  138. INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
  139. "Machine Common Subexpression Elimination", false, false)
  140. /// The source register of a COPY machine instruction can be propagated to all
  141. /// its users, and this propagation could increase the probability of finding
  142. /// common subexpressions. If the COPY has only one user, the COPY itself can
  143. /// be removed.
  144. bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
  145. MachineBasicBlock *MBB) {
  146. bool Changed = false;
  147. for (MachineOperand &MO : MI->operands()) {
  148. if (!MO.isReg() || !MO.isUse())
  149. continue;
  150. Register Reg = MO.getReg();
  151. if (!Register::isVirtualRegister(Reg))
  152. continue;
  153. bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
  154. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  155. if (!DefMI->isCopy())
  156. continue;
  157. Register SrcReg = DefMI->getOperand(1).getReg();
  158. if (!Register::isVirtualRegister(SrcReg))
  159. continue;
  160. if (DefMI->getOperand(0).getSubReg())
  161. continue;
  162. // FIXME: We should trivially coalesce subregister copies to expose CSE
  163. // opportunities on instructions with truncated operands (see
  164. // cse-add-with-overflow.ll). This can be done here as follows:
  165. // if (SrcSubReg)
  166. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  167. // SrcSubReg);
  168. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  169. //
  170. // The 2-addr pass has been updated to handle coalesced subregs. However,
  171. // some machine-specific code still can't handle it.
  172. // To handle it properly we also need a way find a constrained subregister
  173. // class given a super-reg class and subreg index.
  174. if (DefMI->getOperand(1).getSubReg())
  175. continue;
  176. if (!MRI->constrainRegAttrs(SrcReg, Reg))
  177. continue;
  178. LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
  179. LLVM_DEBUG(dbgs() << "*** to: " << *MI);
  180. // Propagate SrcReg of copies to MI.
  181. MO.setReg(SrcReg);
  182. MRI->clearKillFlags(SrcReg);
  183. // Coalesce single use copies.
  184. if (OnlyOneUse) {
  185. // If (and only if) we've eliminated all uses of the copy, also
  186. // copy-propagate to any debug-users of MI, or they'll be left using
  187. // an undefined value.
  188. DefMI->changeDebugValuesDefReg(SrcReg);
  189. DefMI->eraseFromParent();
  190. ++NumCoalesces;
  191. }
  192. Changed = true;
  193. }
  194. return Changed;
  195. }
  196. bool
  197. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  198. MachineBasicBlock::const_iterator I,
  199. MachineBasicBlock::const_iterator E) const {
  200. unsigned LookAheadLeft = LookAheadLimit;
  201. while (LookAheadLeft) {
  202. // Skip over dbg_value's.
  203. I = skipDebugInstructionsForward(I, E);
  204. if (I == E)
  205. // Reached end of block, we don't know if register is dead or not.
  206. return false;
  207. bool SeenDef = false;
  208. for (const MachineOperand &MO : I->operands()) {
  209. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  210. SeenDef = true;
  211. if (!MO.isReg() || !MO.getReg())
  212. continue;
  213. if (!TRI->regsOverlap(MO.getReg(), Reg))
  214. continue;
  215. if (MO.isUse())
  216. // Found a use!
  217. return false;
  218. SeenDef = true;
  219. }
  220. if (SeenDef)
  221. // See a def of Reg (or an alias) before encountering any use, it's
  222. // trivially dead.
  223. return true;
  224. --LookAheadLeft;
  225. ++I;
  226. }
  227. return false;
  228. }
  229. static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
  230. const MachineFunction &MF,
  231. const TargetRegisterInfo &TRI) {
  232. // MachineRegisterInfo::isConstantPhysReg directly called by
  233. // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
  234. // reserved registers to be frozen. That doesn't cause a problem post-ISel as
  235. // most (if not all) targets freeze reserved registers right after ISel.
  236. //
  237. // It does cause issues mid-GlobalISel, however, hence the additional
  238. // reservedRegsFrozen check.
  239. const MachineRegisterInfo &MRI = MF.getRegInfo();
  240. return TRI.isCallerPreservedPhysReg(Reg, MF) ||
  241. (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
  242. }
  243. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  244. /// physical registers (except for dead defs of physical registers). It also
  245. /// returns the physical register def by reference if it's the only one and the
  246. /// instruction does not uses a physical register.
  247. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  248. const MachineBasicBlock *MBB,
  249. SmallSet<unsigned, 8> &PhysRefs,
  250. PhysDefVector &PhysDefs,
  251. bool &PhysUseDef) const {
  252. // First, add all uses to PhysRefs.
  253. for (const MachineOperand &MO : MI->operands()) {
  254. if (!MO.isReg() || MO.isDef())
  255. continue;
  256. Register Reg = MO.getReg();
  257. if (!Reg)
  258. continue;
  259. if (Register::isVirtualRegister(Reg))
  260. continue;
  261. // Reading either caller preserved or constant physregs is ok.
  262. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
  263. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  264. PhysRefs.insert(*AI);
  265. }
  266. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  267. // (which currently contains only uses), set the PhysUseDef flag.
  268. PhysUseDef = false;
  269. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  270. for (const auto &MOP : llvm::enumerate(MI->operands())) {
  271. const MachineOperand &MO = MOP.value();
  272. if (!MO.isReg() || !MO.isDef())
  273. continue;
  274. Register Reg = MO.getReg();
  275. if (!Reg)
  276. continue;
  277. if (Register::isVirtualRegister(Reg))
  278. continue;
  279. // Check against PhysRefs even if the def is "dead".
  280. if (PhysRefs.count(Reg))
  281. PhysUseDef = true;
  282. // If the def is dead, it's ok. But the def may not marked "dead". That's
  283. // common since this pass is run before livevariables. We can scan
  284. // forward a few instructions and check if it is obviously dead.
  285. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  286. PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
  287. }
  288. // Finally, add all defs to PhysRefs as well.
  289. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  290. for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
  291. ++AI)
  292. PhysRefs.insert(*AI);
  293. return !PhysRefs.empty();
  294. }
  295. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  296. SmallSet<unsigned, 8> &PhysRefs,
  297. PhysDefVector &PhysDefs,
  298. bool &NonLocal) const {
  299. // For now conservatively returns false if the common subexpression is
  300. // not in the same basic block as the given instruction. The only exception
  301. // is if the common subexpression is in the sole predecessor block.
  302. const MachineBasicBlock *MBB = MI->getParent();
  303. const MachineBasicBlock *CSMBB = CSMI->getParent();
  304. bool CrossMBB = false;
  305. if (CSMBB != MBB) {
  306. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  307. return false;
  308. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  309. if (MRI->isAllocatable(PhysDefs[i].second) ||
  310. MRI->isReserved(PhysDefs[i].second))
  311. // Avoid extending live range of physical registers if they are
  312. //allocatable or reserved.
  313. return false;
  314. }
  315. CrossMBB = true;
  316. }
  317. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  318. MachineBasicBlock::const_iterator E = MI;
  319. MachineBasicBlock::const_iterator EE = CSMBB->end();
  320. unsigned LookAheadLeft = LookAheadLimit;
  321. while (LookAheadLeft) {
  322. // Skip over dbg_value's.
  323. while (I != E && I != EE && I->isDebugInstr())
  324. ++I;
  325. if (I == EE) {
  326. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  327. (void)CrossMBB;
  328. CrossMBB = false;
  329. NonLocal = true;
  330. I = MBB->begin();
  331. EE = MBB->end();
  332. continue;
  333. }
  334. if (I == E)
  335. return true;
  336. for (const MachineOperand &MO : I->operands()) {
  337. // RegMasks go on instructions like calls that clobber lots of physregs.
  338. // Don't attempt to CSE across such an instruction.
  339. if (MO.isRegMask())
  340. return false;
  341. if (!MO.isReg() || !MO.isDef())
  342. continue;
  343. Register MOReg = MO.getReg();
  344. if (Register::isVirtualRegister(MOReg))
  345. continue;
  346. if (PhysRefs.count(MOReg))
  347. return false;
  348. }
  349. --LookAheadLeft;
  350. ++I;
  351. }
  352. return false;
  353. }
  354. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  355. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
  356. MI->isInlineAsm() || MI->isDebugInstr())
  357. return false;
  358. // Ignore copies.
  359. if (MI->isCopyLike())
  360. return false;
  361. // Ignore stuff that we obviously can't move.
  362. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  363. MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
  364. return false;
  365. if (MI->mayLoad()) {
  366. // Okay, this instruction does a load. As a refinement, we allow the target
  367. // to decide whether the loaded value is actually a constant. If so, we can
  368. // actually use it as a load.
  369. if (!MI->isDereferenceableInvariantLoad(AA))
  370. // FIXME: we should be able to hoist loads with no other side effects if
  371. // there are no other instructions which can change memory in this loop.
  372. // This is a trivial form of alias analysis.
  373. return false;
  374. }
  375. // Ignore stack guard loads, otherwise the register that holds CSEed value may
  376. // be spilled and get loaded back with corrupted data.
  377. if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
  378. return false;
  379. return true;
  380. }
  381. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  382. /// common expression that defines Reg. CSBB is basic block where CSReg is
  383. /// defined.
  384. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  385. MachineBasicBlock *CSBB, MachineInstr *MI) {
  386. // FIXME: Heuristics that works around the lack the live range splitting.
  387. // If CSReg is used at all uses of Reg, CSE should not increase register
  388. // pressure of CSReg.
  389. bool MayIncreasePressure = true;
  390. if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) {
  391. MayIncreasePressure = false;
  392. SmallPtrSet<MachineInstr*, 8> CSUses;
  393. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  394. CSUses.insert(&MI);
  395. }
  396. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  397. if (!CSUses.count(&MI)) {
  398. MayIncreasePressure = true;
  399. break;
  400. }
  401. }
  402. }
  403. if (!MayIncreasePressure) return true;
  404. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  405. // an immediate predecessor. We don't want to increase register pressure and
  406. // end up causing other computation to be spilled.
  407. if (TII->isAsCheapAsAMove(*MI)) {
  408. MachineBasicBlock *BB = MI->getParent();
  409. if (CSBB != BB && !CSBB->isSuccessor(BB))
  410. return false;
  411. }
  412. // Heuristics #2: If the expression doesn't not use a vr and the only use
  413. // of the redundant computation are copies, do not cse.
  414. bool HasVRegUse = false;
  415. for (const MachineOperand &MO : MI->operands()) {
  416. if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
  417. HasVRegUse = true;
  418. break;
  419. }
  420. }
  421. if (!HasVRegUse) {
  422. bool HasNonCopyUse = false;
  423. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  424. // Ignore copies.
  425. if (!MI.isCopyLike()) {
  426. HasNonCopyUse = true;
  427. break;
  428. }
  429. }
  430. if (!HasNonCopyUse)
  431. return false;
  432. }
  433. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  434. // it unless the defined value is already used in the BB of the new use.
  435. bool HasPHI = false;
  436. for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
  437. HasPHI |= UseMI.isPHI();
  438. if (UseMI.getParent() == MI->getParent())
  439. return true;
  440. }
  441. return !HasPHI;
  442. }
  443. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  444. LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  445. ScopeType *Scope = new ScopeType(VNT);
  446. ScopeMap[MBB] = Scope;
  447. }
  448. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  449. LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  450. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  451. assert(SI != ScopeMap.end());
  452. delete SI->second;
  453. ScopeMap.erase(SI);
  454. }
  455. bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
  456. bool Changed = false;
  457. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  458. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  459. SmallVector<unsigned, 2> ImplicitDefs;
  460. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  461. MachineInstr *MI = &*I;
  462. ++I;
  463. if (!isCSECandidate(MI))
  464. continue;
  465. bool FoundCSE = VNT.count(MI);
  466. if (!FoundCSE) {
  467. // Using trivial copy propagation to find more CSE opportunities.
  468. if (PerformTrivialCopyPropagation(MI, MBB)) {
  469. Changed = true;
  470. // After coalescing MI itself may become a copy.
  471. if (MI->isCopyLike())
  472. continue;
  473. // Try again to see if CSE is possible.
  474. FoundCSE = VNT.count(MI);
  475. }
  476. }
  477. // Commute commutable instructions.
  478. bool Commuted = false;
  479. if (!FoundCSE && MI->isCommutable()) {
  480. if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
  481. Commuted = true;
  482. FoundCSE = VNT.count(NewMI);
  483. if (NewMI != MI) {
  484. // New instruction. It doesn't need to be kept.
  485. NewMI->eraseFromParent();
  486. Changed = true;
  487. } else if (!FoundCSE)
  488. // MI was changed but it didn't help, commute it back!
  489. (void)TII->commuteInstruction(*MI);
  490. }
  491. }
  492. // If the instruction defines physical registers and the values *may* be
  493. // used, then it's not safe to replace it with a common subexpression.
  494. // It's also not safe if the instruction uses physical registers.
  495. bool CrossMBBPhysDef = false;
  496. SmallSet<unsigned, 8> PhysRefs;
  497. PhysDefVector PhysDefs;
  498. bool PhysUseDef = false;
  499. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  500. PhysDefs, PhysUseDef)) {
  501. FoundCSE = false;
  502. // ... Unless the CS is local or is in the sole predecessor block
  503. // and it also defines the physical register which is not clobbered
  504. // in between and the physical register uses were not clobbered.
  505. // This can never be the case if the instruction both uses and
  506. // defines the same physical register, which was detected above.
  507. if (!PhysUseDef) {
  508. unsigned CSVN = VNT.lookup(MI);
  509. MachineInstr *CSMI = Exps[CSVN];
  510. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  511. FoundCSE = true;
  512. }
  513. }
  514. if (!FoundCSE) {
  515. VNT.insert(MI, CurrVN++);
  516. Exps.push_back(MI);
  517. continue;
  518. }
  519. // Found a common subexpression, eliminate it.
  520. unsigned CSVN = VNT.lookup(MI);
  521. MachineInstr *CSMI = Exps[CSVN];
  522. LLVM_DEBUG(dbgs() << "Examining: " << *MI);
  523. LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  524. // Check if it's profitable to perform this CSE.
  525. bool DoCSE = true;
  526. unsigned NumDefs = MI->getNumDefs();
  527. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  528. MachineOperand &MO = MI->getOperand(i);
  529. if (!MO.isReg() || !MO.isDef())
  530. continue;
  531. Register OldReg = MO.getReg();
  532. Register NewReg = CSMI->getOperand(i).getReg();
  533. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  534. // we should make sure it is not dead at CSMI.
  535. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  536. ImplicitDefsToUpdate.push_back(i);
  537. // Keep track of implicit defs of CSMI and MI, to clear possibly
  538. // made-redundant kill flags.
  539. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
  540. ImplicitDefs.push_back(OldReg);
  541. if (OldReg == NewReg) {
  542. --NumDefs;
  543. continue;
  544. }
  545. assert(Register::isVirtualRegister(OldReg) &&
  546. Register::isVirtualRegister(NewReg) &&
  547. "Do not CSE physical register defs!");
  548. if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
  549. LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  550. DoCSE = false;
  551. break;
  552. }
  553. // Don't perform CSE if the result of the new instruction cannot exist
  554. // within the constraints (register class, bank, or low-level type) of
  555. // the old instruction.
  556. if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
  557. LLVM_DEBUG(
  558. dbgs() << "*** Not the same register constraints, avoid CSE!\n");
  559. DoCSE = false;
  560. break;
  561. }
  562. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  563. --NumDefs;
  564. }
  565. // Actually perform the elimination.
  566. if (DoCSE) {
  567. for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
  568. unsigned OldReg = CSEPair.first;
  569. unsigned NewReg = CSEPair.second;
  570. // OldReg may have been unused but is used now, clear the Dead flag
  571. MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
  572. assert(Def != nullptr && "CSEd register has no unique definition?");
  573. Def->clearRegisterDeads(NewReg);
  574. // Replace with NewReg and clear kill flags which may be wrong now.
  575. MRI->replaceRegWith(OldReg, NewReg);
  576. MRI->clearKillFlags(NewReg);
  577. }
  578. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  579. // we should make sure it is not dead at CSMI.
  580. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
  581. CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
  582. for (auto PhysDef : PhysDefs)
  583. if (!MI->getOperand(PhysDef.first).isDead())
  584. CSMI->getOperand(PhysDef.first).setIsDead(false);
  585. // Go through implicit defs of CSMI and MI, and clear the kill flags on
  586. // their uses in all the instructions between CSMI and MI.
  587. // We might have made some of the kill flags redundant, consider:
  588. // subs ... implicit-def %nzcv <- CSMI
  589. // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
  590. // subs ... implicit-def %nzcv <- MI, to be eliminated
  591. // csinc ... implicit killed %nzcv
  592. // Since we eliminated MI, and reused a register imp-def'd by CSMI
  593. // (here %nzcv), that register, if it was killed before MI, should have
  594. // that kill flag removed, because it's lifetime was extended.
  595. if (CSMI->getParent() == MI->getParent()) {
  596. for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
  597. for (auto ImplicitDef : ImplicitDefs)
  598. if (MachineOperand *MO = II->findRegisterUseOperand(
  599. ImplicitDef, /*isKill=*/true, TRI))
  600. MO->setIsKill(false);
  601. } else {
  602. // If the instructions aren't in the same BB, bail out and clear the
  603. // kill flag on all uses of the imp-def'd register.
  604. for (auto ImplicitDef : ImplicitDefs)
  605. MRI->clearKillFlags(ImplicitDef);
  606. }
  607. if (CrossMBBPhysDef) {
  608. // Add physical register defs now coming in from a predecessor to MBB
  609. // livein list.
  610. while (!PhysDefs.empty()) {
  611. auto LiveIn = PhysDefs.pop_back_val();
  612. if (!MBB->isLiveIn(LiveIn.second))
  613. MBB->addLiveIn(LiveIn.second);
  614. }
  615. ++NumCrossBBCSEs;
  616. }
  617. MI->eraseFromParent();
  618. ++NumCSEs;
  619. if (!PhysRefs.empty())
  620. ++NumPhysCSEs;
  621. if (Commuted)
  622. ++NumCommutes;
  623. Changed = true;
  624. } else {
  625. VNT.insert(MI, CurrVN++);
  626. Exps.push_back(MI);
  627. }
  628. CSEPairs.clear();
  629. ImplicitDefsToUpdate.clear();
  630. ImplicitDefs.clear();
  631. }
  632. return Changed;
  633. }
  634. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  635. /// dominator tree node if its a leaf or all of its children are done. Walk
  636. /// up the dominator tree to destroy ancestors which are now done.
  637. void
  638. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  639. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  640. if (OpenChildren[Node])
  641. return;
  642. // Pop scope.
  643. ExitScope(Node->getBlock());
  644. // Now traverse upwards to pop ancestors whose offsprings are all done.
  645. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  646. unsigned Left = --OpenChildren[Parent];
  647. if (Left != 0)
  648. break;
  649. ExitScope(Parent->getBlock());
  650. Node = Parent;
  651. }
  652. }
  653. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  654. SmallVector<MachineDomTreeNode*, 32> Scopes;
  655. SmallVector<MachineDomTreeNode*, 8> WorkList;
  656. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  657. CurrVN = 0;
  658. // Perform a DFS walk to determine the order of visit.
  659. WorkList.push_back(Node);
  660. do {
  661. Node = WorkList.pop_back_val();
  662. Scopes.push_back(Node);
  663. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  664. OpenChildren[Node] = Children.size();
  665. for (MachineDomTreeNode *Child : Children)
  666. WorkList.push_back(Child);
  667. } while (!WorkList.empty());
  668. // Now perform CSE.
  669. bool Changed = false;
  670. for (MachineDomTreeNode *Node : Scopes) {
  671. MachineBasicBlock *MBB = Node->getBlock();
  672. EnterScope(MBB);
  673. Changed |= ProcessBlockCSE(MBB);
  674. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  675. ExitScopeIfDone(Node, OpenChildren);
  676. }
  677. return Changed;
  678. }
  679. // We use stronger checks for PRE candidate rather than for CSE ones to embrace
  680. // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
  681. // to exclude instrs created by PRE that won't be CSEed later.
  682. bool MachineCSE::isPRECandidate(MachineInstr *MI) {
  683. if (!isCSECandidate(MI) ||
  684. MI->isNotDuplicable() ||
  685. MI->mayLoad() ||
  686. MI->isAsCheapAsAMove() ||
  687. MI->getNumDefs() != 1 ||
  688. MI->getNumExplicitDefs() != 1)
  689. return false;
  690. for (auto def : MI->defs())
  691. if (!Register::isVirtualRegister(def.getReg()))
  692. return false;
  693. for (auto use : MI->uses())
  694. if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
  695. return false;
  696. return true;
  697. }
  698. bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
  699. MachineBasicBlock *MBB) {
  700. bool Changed = false;
  701. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
  702. MachineInstr *MI = &*I;
  703. ++I;
  704. if (!isPRECandidate(MI))
  705. continue;
  706. if (!PREMap.count(MI)) {
  707. PREMap[MI] = MBB;
  708. continue;
  709. }
  710. auto MBB1 = PREMap[MI];
  711. assert(
  712. !DT->properlyDominates(MBB, MBB1) &&
  713. "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
  714. auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
  715. if (!CMBB->isLegalToHoistInto())
  716. continue;
  717. if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
  718. continue;
  719. // Two instrs are partial redundant if their basic blocks are reachable
  720. // from one to another but one doesn't dominate another.
  721. if (CMBB != MBB1) {
  722. auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
  723. if (BB != nullptr && BB1 != nullptr &&
  724. (isPotentiallyReachable(BB1, BB) ||
  725. isPotentiallyReachable(BB, BB1))) {
  726. assert(MI->getOperand(0).isDef() &&
  727. "First operand of instr with one explicit def must be this def");
  728. Register VReg = MI->getOperand(0).getReg();
  729. Register NewReg = MRI->cloneVirtualRegister(VReg);
  730. if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
  731. continue;
  732. MachineInstr &NewMI =
  733. TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
  734. NewMI.getOperand(0).setReg(NewReg);
  735. PREMap[MI] = CMBB;
  736. ++NumPREs;
  737. Changed = true;
  738. }
  739. }
  740. }
  741. return Changed;
  742. }
  743. // This simple PRE (partial redundancy elimination) pass doesn't actually
  744. // eliminate partial redundancy but transforms it to full redundancy,
  745. // anticipating that the next CSE step will eliminate this created redundancy.
  746. // If CSE doesn't eliminate this, than created instruction will remain dead
  747. // and eliminated later by Remove Dead Machine Instructions pass.
  748. bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
  749. SmallVector<MachineDomTreeNode *, 32> BBs;
  750. PREMap.clear();
  751. bool Changed = false;
  752. BBs.push_back(DT->getRootNode());
  753. do {
  754. auto Node = BBs.pop_back_val();
  755. const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
  756. for (MachineDomTreeNode *Child : Children)
  757. BBs.push_back(Child);
  758. MachineBasicBlock *MBB = Node->getBlock();
  759. Changed |= ProcessBlockPRE(DT, MBB);
  760. } while (!BBs.empty());
  761. return Changed;
  762. }
  763. bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
  764. MachineBasicBlock *MBB,
  765. MachineBasicBlock *MBB1) {
  766. if (CandidateBB->getParent()->getFunction().hasMinSize())
  767. return true;
  768. assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
  769. assert(DT->dominates(CandidateBB, MBB1) &&
  770. "CandidateBB should dominate MBB1");
  771. return MBFI->getBlockFreq(CandidateBB) <=
  772. MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
  773. }
  774. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  775. if (skipFunction(MF.getFunction()))
  776. return false;
  777. TII = MF.getSubtarget().getInstrInfo();
  778. TRI = MF.getSubtarget().getRegisterInfo();
  779. MRI = &MF.getRegInfo();
  780. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  781. DT = &getAnalysis<MachineDominatorTree>();
  782. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  783. LookAheadLimit = TII->getMachineCSELookAheadLimit();
  784. bool ChangedPRE, ChangedCSE;
  785. ChangedPRE = PerformSimplePRE(DT);
  786. ChangedCSE = PerformCSE(DT->getRootNode());
  787. return ChangedPRE || ChangedCSE;
  788. }