ExecutionDomainFix.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. //===- ExecutionDepsFix.cpp - Fix execution domain issues ----*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/CodeGen/ExecutionDomainFix.h"
  10. #include "llvm/ADT/PostOrderIterator.h"
  11. #include "llvm/CodeGen/MachineRegisterInfo.h"
  12. #include "llvm/CodeGen/TargetInstrInfo.h"
  13. using namespace llvm;
  14. #define DEBUG_TYPE "execution-deps-fix"
  15. char ReachingDefAnalysis::ID = 0;
  16. INITIALIZE_PASS(ReachingDefAnalysis, "reaching-deps-analysis",
  17. "ReachingDefAnalysis", false, true)
  18. char BreakFalseDeps::ID = 0;
  19. INITIALIZE_PASS_BEGIN(BreakFalseDeps, "break-false-deps", "BreakFalseDeps",
  20. false, false)
  21. INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
  22. INITIALIZE_PASS_END(BreakFalseDeps, "break-false-deps", "BreakFalseDeps", false,
  23. false)
  24. iterator_range<SmallVectorImpl<int>::const_iterator>
  25. ExecutionDomainFix::regIndices(unsigned Reg) const {
  26. assert(Reg < AliasMap.size() && "Invalid register");
  27. const auto &Entry = AliasMap[Reg];
  28. return make_range(Entry.begin(), Entry.end());
  29. }
  30. DomainValue *ExecutionDomainFix::alloc(int domain) {
  31. DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
  32. : Avail.pop_back_val();
  33. if (domain >= 0)
  34. dv->addDomain(domain);
  35. assert(dv->Refs == 0 && "Reference count wasn't cleared");
  36. assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
  37. return dv;
  38. }
  39. void ExecutionDomainFix::release(DomainValue *DV) {
  40. while (DV) {
  41. assert(DV->Refs && "Bad DomainValue");
  42. if (--DV->Refs)
  43. return;
  44. // There are no more DV references. Collapse any contained instructions.
  45. if (DV->AvailableDomains && !DV->isCollapsed())
  46. collapse(DV, DV->getFirstDomain());
  47. DomainValue *Next = DV->Next;
  48. DV->clear();
  49. Avail.push_back(DV);
  50. // Also release the next DomainValue in the chain.
  51. DV = Next;
  52. }
  53. }
  54. DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
  55. DomainValue *DV = DVRef;
  56. if (!DV || !DV->Next)
  57. return DV;
  58. // DV has a chain. Find the end.
  59. do
  60. DV = DV->Next;
  61. while (DV->Next);
  62. // Update DVRef to point to DV.
  63. retain(DV);
  64. release(DVRef);
  65. DVRef = DV;
  66. return DV;
  67. }
  68. void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
  69. assert(unsigned(rx) < NumRegs && "Invalid index");
  70. assert(!LiveRegs.empty() && "Must enter basic block first.");
  71. if (LiveRegs[rx] == dv)
  72. return;
  73. if (LiveRegs[rx])
  74. release(LiveRegs[rx]);
  75. LiveRegs[rx] = retain(dv);
  76. }
  77. void ExecutionDomainFix::kill(int rx) {
  78. assert(unsigned(rx) < NumRegs && "Invalid index");
  79. assert(!LiveRegs.empty() && "Must enter basic block first.");
  80. if (!LiveRegs[rx])
  81. return;
  82. release(LiveRegs[rx]);
  83. LiveRegs[rx] = nullptr;
  84. }
  85. void ExecutionDomainFix::force(int rx, unsigned domain) {
  86. assert(unsigned(rx) < NumRegs && "Invalid index");
  87. assert(!LiveRegs.empty() && "Must enter basic block first.");
  88. if (DomainValue *dv = LiveRegs[rx]) {
  89. if (dv->isCollapsed())
  90. dv->addDomain(domain);
  91. else if (dv->hasDomain(domain))
  92. collapse(dv, domain);
  93. else {
  94. // This is an incompatible open DomainValue. Collapse it to whatever and
  95. // force the new value into domain. This costs a domain crossing.
  96. collapse(dv, dv->getFirstDomain());
  97. assert(LiveRegs[rx] && "Not live after collapse?");
  98. LiveRegs[rx]->addDomain(domain);
  99. }
  100. } else {
  101. // Set up basic collapsed DomainValue.
  102. setLiveReg(rx, alloc(domain));
  103. }
  104. }
  105. void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
  106. assert(dv->hasDomain(domain) && "Cannot collapse");
  107. // Collapse all the instructions.
  108. while (!dv->Instrs.empty())
  109. TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
  110. dv->setSingleDomain(domain);
  111. // If there are multiple users, give them new, unique DomainValues.
  112. if (!LiveRegs.empty() && dv->Refs > 1)
  113. for (unsigned rx = 0; rx != NumRegs; ++rx)
  114. if (LiveRegs[rx] == dv)
  115. setLiveReg(rx, alloc(domain));
  116. }
  117. bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
  118. assert(!A->isCollapsed() && "Cannot merge into collapsed");
  119. assert(!B->isCollapsed() && "Cannot merge from collapsed");
  120. if (A == B)
  121. return true;
  122. // Restrict to the domains that A and B have in common.
  123. unsigned common = A->getCommonDomains(B->AvailableDomains);
  124. if (!common)
  125. return false;
  126. A->AvailableDomains = common;
  127. A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
  128. // Clear the old DomainValue so we won't try to swizzle instructions twice.
  129. B->clear();
  130. // All uses of B are referred to A.
  131. B->Next = retain(A);
  132. for (unsigned rx = 0; rx != NumRegs; ++rx) {
  133. assert(!LiveRegs.empty() && "no space allocated for live registers");
  134. if (LiveRegs[rx] == B)
  135. setLiveReg(rx, A);
  136. }
  137. return true;
  138. }
  139. void ReachingDefAnalysis::enterBasicBlock(
  140. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  141. MachineBasicBlock *MBB = TraversedMBB.MBB;
  142. int MBBNumber = MBB->getNumber();
  143. assert(MBBNumber < MBBReachingDefs.size() &&
  144. "Unexpected basic block number.");
  145. MBBReachingDefs[MBBNumber].resize(NumRegUnits);
  146. // Reset instruction counter in each basic block.
  147. CurInstr = 0;
  148. // Set up LiveRegs to represent registers entering MBB.
  149. // Default values are 'nothing happened a long time ago'.
  150. if (LiveRegs.empty())
  151. LiveRegs.assign(NumRegUnits, ReachingDedDefaultVal);
  152. // This is the entry block.
  153. if (MBB->pred_empty()) {
  154. for (const auto &LI : MBB->liveins()) {
  155. for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) {
  156. // Treat function live-ins as if they were defined just before the first
  157. // instruction. Usually, function arguments are set up immediately
  158. // before the call.
  159. LiveRegs[*Unit] = -1;
  160. MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]);
  161. }
  162. }
  163. DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
  164. return;
  165. }
  166. // Try to coalesce live-out registers from predecessors.
  167. for (MachineBasicBlock *pred : MBB->predecessors()) {
  168. assert(pred->getNumber() < MBBOutRegsInfos.size() &&
  169. "Should have pre-allocated MBBInfos for all MBBs");
  170. const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
  171. // Incoming is null if this is a backedge from a BB
  172. // we haven't processed yet
  173. if (Incoming.empty())
  174. continue;
  175. for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
  176. // Use the most recent predecessor def for each register.
  177. LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]);
  178. if ((LiveRegs[Unit] != ReachingDedDefaultVal))
  179. MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
  180. }
  181. }
  182. DEBUG(dbgs() << printMBBReference(*MBB)
  183. << (!TraversedMBB.IsDone ? ": incomplete\n"
  184. : ": all preds known\n"));
  185. }
  186. void ExecutionDomainFix::enterBasicBlock(
  187. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  188. MachineBasicBlock *MBB = TraversedMBB.MBB;
  189. // Set up LiveRegs to represent registers entering MBB.
  190. // Set default domain values to 'no domain' (nullptr)
  191. if (LiveRegs.empty())
  192. LiveRegs.assign(NumRegs, nullptr);
  193. // This is the entry block.
  194. if (MBB->pred_empty()) {
  195. DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
  196. return;
  197. }
  198. // Try to coalesce live-out registers from predecessors.
  199. for (MachineBasicBlock *pred : MBB->predecessors()) {
  200. assert(pred->getNumber() < MBBOutRegsInfos.size() &&
  201. "Should have pre-allocated MBBInfos for all MBBs");
  202. LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
  203. // Incoming is null if this is a backedge from a BB
  204. // we haven't processed yet
  205. if (Incoming.empty())
  206. continue;
  207. for (unsigned rx = 0; rx != NumRegs; ++rx) {
  208. DomainValue *pdv = resolve(Incoming[rx]);
  209. if (!pdv)
  210. continue;
  211. if (!LiveRegs[rx]) {
  212. setLiveReg(rx, pdv);
  213. continue;
  214. }
  215. // We have a live DomainValue from more than one predecessor.
  216. if (LiveRegs[rx]->isCollapsed()) {
  217. // We are already collapsed, but predecessor is not. Force it.
  218. unsigned Domain = LiveRegs[rx]->getFirstDomain();
  219. if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
  220. collapse(pdv, Domain);
  221. continue;
  222. }
  223. // Currently open, merge in predecessor.
  224. if (!pdv->isCollapsed())
  225. merge(LiveRegs[rx], pdv);
  226. else
  227. force(rx, pdv->getFirstDomain());
  228. }
  229. }
  230. DEBUG(dbgs() << printMBBReference(*MBB)
  231. << (!TraversedMBB.IsDone ? ": incomplete\n"
  232. : ": all preds known\n"));
  233. }
  234. void ReachingDefAnalysis::leaveBasicBlock(
  235. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  236. assert(!LiveRegs.empty() && "Must enter basic block first.");
  237. int MBBNumber = TraversedMBB.MBB->getNumber();
  238. assert(MBBNumber < MBBOutRegsInfos.size() &&
  239. "Unexpected basic block number.");
  240. // Save register clearances at end of MBB - used by enterBasicBlock().
  241. MBBOutRegsInfos[MBBNumber] = LiveRegs;
  242. // While processing the basic block, we kept `Def` relative to the start
  243. // of the basic block for convenience. However, future use of this information
  244. // only cares about the clearance from the end of the block, so adjust
  245. // everything to be relative to the end of the basic block.
  246. for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber])
  247. OutLiveReg -= CurInstr;
  248. LiveRegs.clear();
  249. }
  250. void ExecutionDomainFix::leaveBasicBlock(
  251. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  252. assert(!LiveRegs.empty() && "Must enter basic block first.");
  253. int MBBNumber = TraversedMBB.MBB->getNumber();
  254. assert(MBBNumber < MBBOutRegsInfos.size() &&
  255. "Unexpected basic block number.");
  256. // Save register clearances at end of MBB - used by enterBasicBlock().
  257. for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
  258. release(OldLiveReg);
  259. }
  260. MBBOutRegsInfos[MBBNumber] = LiveRegs;
  261. LiveRegs.clear();
  262. }
  263. bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
  264. // Update instructions with explicit execution domains.
  265. std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
  266. if (DomP.first) {
  267. if (DomP.second)
  268. visitSoftInstr(MI, DomP.second);
  269. else
  270. visitHardInstr(MI, DomP.first);
  271. }
  272. return !DomP.first;
  273. }
  274. bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
  275. unsigned Pref) {
  276. MachineOperand &MO = MI->getOperand(OpIdx);
  277. assert(MO.isUndef() && "Expected undef machine operand");
  278. unsigned OriginalReg = MO.getReg();
  279. // Update only undef operands that have reg units that are mapped to one root.
  280. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
  281. unsigned NumRoots = 0;
  282. for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
  283. NumRoots++;
  284. if (NumRoots > 1)
  285. return false;
  286. }
  287. }
  288. // Get the undef operand's register class
  289. const TargetRegisterClass *OpRC =
  290. TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
  291. // If the instruction has a true dependency, we can hide the false depdency
  292. // behind it.
  293. for (MachineOperand &CurrMO : MI->operands()) {
  294. if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
  295. !OpRC->contains(CurrMO.getReg()))
  296. continue;
  297. // We found a true dependency - replace the undef register with the true
  298. // dependency.
  299. MO.setReg(CurrMO.getReg());
  300. return true;
  301. }
  302. // Go over all registers in the register class and find the register with
  303. // max clearance or clearance higher than Pref.
  304. unsigned MaxClearance = 0;
  305. unsigned MaxClearanceReg = OriginalReg;
  306. ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
  307. for (MCPhysReg Reg : Order) {
  308. unsigned Clearance = RDA->getClearance(MI, Reg);
  309. if (Clearance <= MaxClearance)
  310. continue;
  311. MaxClearance = Clearance;
  312. MaxClearanceReg = Reg;
  313. if (MaxClearance > Pref)
  314. break;
  315. }
  316. // Update the operand if we found a register with better clearance.
  317. if (MaxClearanceReg != OriginalReg)
  318. MO.setReg(MaxClearanceReg);
  319. return false;
  320. }
  321. bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
  322. unsigned Pref) {
  323. unsigned reg = MI->getOperand(OpIdx).getReg();
  324. unsigned Clearance = RDA->getClearance(MI, reg);
  325. DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
  326. if (Pref > Clearance) {
  327. DEBUG(dbgs() << ": Break dependency.\n");
  328. return true;
  329. }
  330. DEBUG(dbgs() << ": OK .\n");
  331. return false;
  332. }
  333. void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
  334. assert(!MI->isDebugValue() && "Won't process debug values");
  335. const MCInstrDesc &MCID = MI->getDesc();
  336. for (unsigned i = 0,
  337. e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
  338. i != e; ++i) {
  339. MachineOperand &MO = MI->getOperand(i);
  340. if (!MO.isReg())
  341. continue;
  342. if (MO.isUse())
  343. continue;
  344. for (int rx : regIndices(MO.getReg())) {
  345. // This instruction explicitly defines rx.
  346. DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
  347. // Kill off domains redefined by generic instructions.
  348. if (Kill)
  349. kill(rx);
  350. }
  351. }
  352. }
  353. void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
  354. assert(!MI->isDebugValue() && "Won't process debug values");
  355. int MBBNumber = MI->getParent()->getNumber();
  356. assert(MBBNumber < MBBReachingDefs.size() &&
  357. "Unexpected basic block number.");
  358. const MCInstrDesc &MCID = MI->getDesc();
  359. for (unsigned i = 0,
  360. e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
  361. i != e; ++i) {
  362. MachineOperand &MO = MI->getOperand(i);
  363. if (!MO.isReg() || !MO.getReg())
  364. continue;
  365. if (MO.isUse())
  366. continue;
  367. for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) {
  368. // This instruction explicitly defines the current reg unit.
  369. DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr << '\t'
  370. << *MI);
  371. // How many instructions since this reg unit was last written?
  372. LiveRegs[*Unit] = CurInstr;
  373. MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
  374. }
  375. }
  376. InstIds[MI] = CurInstr;
  377. ++CurInstr;
  378. }
  379. void BreakFalseDeps::processDefs(MachineInstr *MI) {
  380. assert(!MI->isDebugValue() && "Won't process debug values");
  381. // Break dependence on undef uses. Do this before updating LiveRegs below.
  382. unsigned OpNum;
  383. unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
  384. if (Pref) {
  385. bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
  386. // We don't need to bother trying to break a dependency if this
  387. // instruction has a true dependency on that register through another
  388. // operand - we'll have to wait for it to be available regardless.
  389. if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
  390. UndefReads.push_back(std::make_pair(MI, OpNum));
  391. }
  392. const MCInstrDesc &MCID = MI->getDesc();
  393. for (unsigned i = 0,
  394. e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
  395. i != e; ++i) {
  396. MachineOperand &MO = MI->getOperand(i);
  397. if (!MO.isReg() || !MO.getReg())
  398. continue;
  399. if (MO.isUse())
  400. continue;
  401. // Check clearance before partial register updates.
  402. unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
  403. if (Pref && shouldBreakDependence(MI, i, Pref))
  404. TII->breakPartialRegDependency(*MI, i, TRI);
  405. }
  406. }
  407. void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
  408. if (UndefReads.empty())
  409. return;
  410. // Collect this block's live out register units.
  411. LiveRegSet.init(*TRI);
  412. // We do not need to care about pristine registers as they are just preserved
  413. // but not actually used in the function.
  414. LiveRegSet.addLiveOutsNoPristines(*MBB);
  415. MachineInstr *UndefMI = UndefReads.back().first;
  416. unsigned OpIdx = UndefReads.back().second;
  417. for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
  418. // Update liveness, including the current instruction's defs.
  419. LiveRegSet.stepBackward(I);
  420. if (UndefMI == &I) {
  421. if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
  422. TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
  423. UndefReads.pop_back();
  424. if (UndefReads.empty())
  425. return;
  426. UndefMI = UndefReads.back().first;
  427. OpIdx = UndefReads.back().second;
  428. }
  429. }
  430. }
  431. void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
  432. // Collapse all uses.
  433. for (unsigned i = mi->getDesc().getNumDefs(),
  434. e = mi->getDesc().getNumOperands();
  435. i != e; ++i) {
  436. MachineOperand &mo = mi->getOperand(i);
  437. if (!mo.isReg())
  438. continue;
  439. for (int rx : regIndices(mo.getReg())) {
  440. force(rx, domain);
  441. }
  442. }
  443. // Kill all defs and force them.
  444. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
  445. MachineOperand &mo = mi->getOperand(i);
  446. if (!mo.isReg())
  447. continue;
  448. for (int rx : regIndices(mo.getReg())) {
  449. kill(rx);
  450. force(rx, domain);
  451. }
  452. }
  453. }
  454. void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
  455. // Bitmask of available domains for this instruction after taking collapsed
  456. // operands into account.
  457. unsigned available = mask;
  458. // Scan the explicit use operands for incoming domains.
  459. SmallVector<int, 4> used;
  460. if (!LiveRegs.empty())
  461. for (unsigned i = mi->getDesc().getNumDefs(),
  462. e = mi->getDesc().getNumOperands();
  463. i != e; ++i) {
  464. MachineOperand &mo = mi->getOperand(i);
  465. if (!mo.isReg())
  466. continue;
  467. for (int rx : regIndices(mo.getReg())) {
  468. DomainValue *dv = LiveRegs[rx];
  469. if (dv == nullptr)
  470. continue;
  471. // Bitmask of domains that dv and available have in common.
  472. unsigned common = dv->getCommonDomains(available);
  473. // Is it possible to use this collapsed register for free?
  474. if (dv->isCollapsed()) {
  475. // Restrict available domains to the ones in common with the operand.
  476. // If there are no common domains, we must pay the cross-domain
  477. // penalty for this operand.
  478. if (common)
  479. available = common;
  480. } else if (common)
  481. // Open DomainValue is compatible, save it for merging.
  482. used.push_back(rx);
  483. else
  484. // Open DomainValue is not compatible with instruction. It is useless
  485. // now.
  486. kill(rx);
  487. }
  488. }
  489. // If the collapsed operands force a single domain, propagate the collapse.
  490. if (isPowerOf2_32(available)) {
  491. unsigned domain = countTrailingZeros(available);
  492. TII->setExecutionDomain(*mi, domain);
  493. visitHardInstr(mi, domain);
  494. return;
  495. }
  496. // Kill off any remaining uses that don't match available, and build a list of
  497. // incoming DomainValues that we want to merge.
  498. SmallVector<int, 4> Regs;
  499. for (int rx : used) {
  500. assert(!LiveRegs.empty() && "no space allocated for live registers");
  501. DomainValue *&LR = LiveRegs[rx];
  502. // This useless DomainValue could have been missed above.
  503. if (!LR->getCommonDomains(available)) {
  504. kill(rx);
  505. continue;
  506. }
  507. // Sorted insertion.
  508. // Enables giving priority to the latest domains during merging.
  509. auto I = std::upper_bound(
  510. Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) {
  511. return RDA->getReachingDef(mi, RC->getRegister(LHS)) <
  512. RDA->getReachingDef(mi, RC->getRegister(RHS));
  513. });
  514. Regs.insert(I, rx);
  515. }
  516. // doms are now sorted in order of appearance. Try to merge them all, giving
  517. // priority to the latest ones.
  518. DomainValue *dv = nullptr;
  519. while (!Regs.empty()) {
  520. if (!dv) {
  521. dv = LiveRegs[Regs.pop_back_val()];
  522. // Force the first dv to match the current instruction.
  523. dv->AvailableDomains = dv->getCommonDomains(available);
  524. assert(dv->AvailableDomains && "Domain should have been filtered");
  525. continue;
  526. }
  527. DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
  528. // Skip already merged values.
  529. if (Latest == dv || Latest->Next)
  530. continue;
  531. if (merge(dv, Latest))
  532. continue;
  533. // If latest didn't merge, it is useless now. Kill all registers using it.
  534. for (int i : used) {
  535. assert(!LiveRegs.empty() && "no space allocated for live registers");
  536. if (LiveRegs[i] == Latest)
  537. kill(i);
  538. }
  539. }
  540. // dv is the DomainValue we are going to use for this instruction.
  541. if (!dv) {
  542. dv = alloc();
  543. dv->AvailableDomains = available;
  544. }
  545. dv->Instrs.push_back(mi);
  546. // Finally set all defs and non-collapsed uses to dv. We must iterate through
  547. // all the operators, including imp-def ones.
  548. for (MachineOperand &mo : mi->operands()) {
  549. if (!mo.isReg())
  550. continue;
  551. for (int rx : regIndices(mo.getReg())) {
  552. if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
  553. kill(rx);
  554. setLiveReg(rx, dv);
  555. }
  556. }
  557. }
  558. }
  559. void ExecutionDomainFix::processBasicBlock(
  560. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  561. enterBasicBlock(TraversedMBB);
  562. // If this block is not done, it makes little sense to make any decisions
  563. // based on clearance information. We need to make a second pass anyway,
  564. // and by then we'll have better information, so we can avoid doing the work
  565. // to try and break dependencies now.
  566. for (MachineInstr &MI : *TraversedMBB.MBB) {
  567. if (!MI.isDebugValue()) {
  568. bool Kill = false;
  569. if (TraversedMBB.PrimaryPass)
  570. Kill = visitInstr(&MI);
  571. processDefs(&MI, Kill);
  572. }
  573. }
  574. leaveBasicBlock(TraversedMBB);
  575. }
  576. void ReachingDefAnalysis::processBasicBlock(
  577. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  578. enterBasicBlock(TraversedMBB);
  579. for (MachineInstr &MI : *TraversedMBB.MBB) {
  580. if (!MI.isDebugValue())
  581. processDefs(&MI);
  582. }
  583. leaveBasicBlock(TraversedMBB);
  584. }
  585. void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {
  586. UndefReads.clear();
  587. // If this block is not done, it makes little sense to make any decisions
  588. // based on clearance information. We need to make a second pass anyway,
  589. // and by then we'll have better information, so we can avoid doing the work
  590. // to try and break dependencies now.
  591. for (MachineInstr &MI : *MBB) {
  592. if (!MI.isDebugValue())
  593. processDefs(&MI);
  594. }
  595. processUndefReads(MBB);
  596. }
  597. bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) {
  598. int MBBNumber = MBB->getNumber();
  599. assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
  600. return MBBInfos[MBBNumber].PrimaryCompleted &&
  601. MBBInfos[MBBNumber].IncomingCompleted ==
  602. MBBInfos[MBBNumber].PrimaryIncoming &&
  603. MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size();
  604. }
  605. LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
  606. // Initialize the MMBInfos
  607. MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo());
  608. MachineBasicBlock *Entry = &*MF.begin();
  609. ReversePostOrderTraversal<MachineBasicBlock *> RPOT(Entry);
  610. SmallVector<MachineBasicBlock *, 4> Workqueue;
  611. SmallVector<TraversedMBBInfo, 4> MBBTraversalOrder;
  612. for (MachineBasicBlock *MBB : RPOT) {
  613. // N.B: IncomingProcessed and IncomingCompleted were already updated while
  614. // processing this block's predecessors.
  615. int MBBNumber = MBB->getNumber();
  616. assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
  617. MBBInfos[MBBNumber].PrimaryCompleted = true;
  618. MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed;
  619. bool Primary = true;
  620. Workqueue.push_back(MBB);
  621. while (!Workqueue.empty()) {
  622. MachineBasicBlock *ActiveMBB = &*Workqueue.back();
  623. Workqueue.pop_back();
  624. bool Done = isBlockDone(ActiveMBB);
  625. MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
  626. for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
  627. int SuccNumber = Succ->getNumber();
  628. assert(SuccNumber < MBBInfos.size() &&
  629. "Unexpected basic block number.");
  630. if (!isBlockDone(Succ)) {
  631. if (Primary)
  632. MBBInfos[SuccNumber].IncomingProcessed++;
  633. if (Done)
  634. MBBInfos[SuccNumber].IncomingCompleted++;
  635. if (isBlockDone(Succ))
  636. Workqueue.push_back(Succ);
  637. }
  638. }
  639. Primary = false;
  640. }
  641. }
  642. // We need to go through again and finalize any blocks that are not done yet.
  643. // This is possible if blocks have dead predecessors, so we didn't visit them
  644. // above.
  645. for (MachineBasicBlock *MBB : RPOT) {
  646. if (!isBlockDone(MBB))
  647. MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true));
  648. // Don't update successors here. We'll get to them anyway through this
  649. // loop.
  650. }
  651. MBBInfos.clear();
  652. return MBBTraversalOrder;
  653. }
  654. bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
  655. if (skipFunction(mf.getFunction()))
  656. return false;
  657. MF = &mf;
  658. TII = MF->getSubtarget().getInstrInfo();
  659. TRI = MF->getSubtarget().getRegisterInfo();
  660. LiveRegs.clear();
  661. assert(NumRegs == RC->getNumRegs() && "Bad regclass");
  662. DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
  663. << TRI->getRegClassName(RC) << " **********\n");
  664. // If no relevant registers are used in the function, we can skip it
  665. // completely.
  666. bool anyregs = false;
  667. const MachineRegisterInfo &MRI = mf.getRegInfo();
  668. for (unsigned Reg : *RC) {
  669. if (MRI.isPhysRegUsed(Reg)) {
  670. anyregs = true;
  671. break;
  672. }
  673. }
  674. if (!anyregs)
  675. return false;
  676. RDA = &getAnalysis<ReachingDefAnalysis>();
  677. // Initialize the AliasMap on the first use.
  678. if (AliasMap.empty()) {
  679. // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
  680. // therefore the LiveRegs array.
  681. AliasMap.resize(TRI->getNumRegs());
  682. for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
  683. for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
  684. ++AI)
  685. AliasMap[*AI].push_back(i);
  686. }
  687. // Initialize the MBBOutRegsInfos
  688. MBBOutRegsInfos.resize(mf.getNumBlockIDs());
  689. // Traverse the basic blocks.
  690. LoopTraversal Traversal;
  691. LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
  692. for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
  693. processBasicBlock(TraversedMBB);
  694. }
  695. for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) {
  696. for (DomainValue *OutLiveReg : OutLiveRegs) {
  697. if (OutLiveReg)
  698. release(OutLiveReg);
  699. }
  700. }
  701. MBBOutRegsInfos.clear();
  702. Avail.clear();
  703. Allocator.DestroyAll();
  704. return false;
  705. }
  706. bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
  707. if (skipFunction(mf.getFunction()))
  708. return false;
  709. MF = &mf;
  710. TII = MF->getSubtarget().getInstrInfo();
  711. TRI = MF->getSubtarget().getRegisterInfo();
  712. LiveRegs.clear();
  713. NumRegUnits = TRI->getNumRegUnits();
  714. MBBReachingDefs.resize(mf.getNumBlockIDs());
  715. DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n");
  716. // Initialize the MBBOutRegsInfos
  717. MBBOutRegsInfos.resize(mf.getNumBlockIDs());
  718. // Traverse the basic blocks.
  719. LoopTraversal Traversal;
  720. LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
  721. for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
  722. processBasicBlock(TraversedMBB);
  723. }
  724. // Sorting all reaching defs found for a ceartin reg unit in a given BB.
  725. for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
  726. for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
  727. std::sort(RegUnitDefs.begin(), RegUnitDefs.end());
  728. }
  729. return false;
  730. }
  731. void ReachingDefAnalysis::releaseMemory() {
  732. // Clear the internal vectors.
  733. MBBOutRegsInfos.clear();
  734. MBBReachingDefs.clear();
  735. InstIds.clear();
  736. }
  737. bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
  738. if (skipFunction(mf.getFunction()))
  739. return false;
  740. MF = &mf;
  741. TII = MF->getSubtarget().getInstrInfo();
  742. TRI = MF->getSubtarget().getRegisterInfo();
  743. RDA = &getAnalysis<ReachingDefAnalysis>();
  744. RegClassInfo.runOnMachineFunction(mf);
  745. DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
  746. // Traverse the basic blocks.
  747. for (MachineBasicBlock &MBB : mf) {
  748. processBasicBlock(&MBB);
  749. }
  750. return false;
  751. }
  752. int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
  753. assert(InstIds.count(MI) && "Unexpected machine instuction.");
  754. int InstId = InstIds[MI];
  755. int DefRes = ReachingDedDefaultVal;
  756. int MBBNumber = MI->getParent()->getNumber();
  757. assert(MBBNumber < MBBReachingDefs.size() &&
  758. "Unexpected basic block number.");
  759. int LatestDef = ReachingDedDefaultVal;
  760. for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
  761. for (int Def : MBBReachingDefs[MBBNumber][*Unit]) {
  762. if (Def >= InstId)
  763. break;
  764. DefRes = Def;
  765. }
  766. LatestDef = std::max(LatestDef, DefRes);
  767. }
  768. return LatestDef;
  769. }
  770. int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
  771. assert(InstIds.count(MI) && "Unexpected machine instuction.");
  772. return InstIds[MI] - getReachingDef(MI, PhysReg);
  773. }