PostRASchedulerList.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. //===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This implements a top-down list scheduler, using standard algorithms.
  11. // The basic approach uses a priority queue of available nodes to schedule.
  12. // One at a time, nodes are taken from the priority queue (thus in priority
  13. // order), checked for legality to schedule, and emitted if legal.
  14. //
  15. // Nodes may not be legal to schedule either due to structural hazards (e.g.
  16. // pipeline or resource constraints) or because an input to the instruction has
  17. // not completed execution.
  18. //
  19. //===----------------------------------------------------------------------===//
  20. #define DEBUG_TYPE "post-RA-sched"
  21. #include "ScheduleDAGInstrs.h"
  22. #include "llvm/CodeGen/Passes.h"
  23. #include "llvm/CodeGen/LatencyPriorityQueue.h"
  24. #include "llvm/CodeGen/SchedulerRegistry.h"
  25. #include "llvm/CodeGen/MachineDominators.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineLoopInfo.h"
  28. #include "llvm/CodeGen/MachineRegisterInfo.h"
  29. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  30. #include "llvm/Target/TargetLowering.h"
  31. #include "llvm/Target/TargetMachine.h"
  32. #include "llvm/Target/TargetInstrInfo.h"
  33. #include "llvm/Target/TargetRegisterInfo.h"
  34. #include "llvm/Support/Compiler.h"
  35. #include "llvm/Support/Debug.h"
  36. #include "llvm/Support/ErrorHandling.h"
  37. #include "llvm/ADT/Statistic.h"
  38. #include <map>
  39. using namespace llvm;
  40. STATISTIC(NumNoops, "Number of noops inserted");
  41. STATISTIC(NumStalls, "Number of pipeline stalls");
  42. static cl::opt<bool>
  43. EnableAntiDepBreaking("break-anti-dependencies",
  44. cl::desc("Break post-RA scheduling anti-dependencies"),
  45. cl::init(true), cl::Hidden);
  46. static cl::opt<bool>
  47. EnablePostRAHazardAvoidance("avoid-hazards",
  48. cl::desc("Enable simple hazard-avoidance"),
  49. cl::init(true), cl::Hidden);
  50. namespace {
  51. class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
  52. public:
  53. static char ID;
  54. PostRAScheduler() : MachineFunctionPass(&ID) {}
  55. void getAnalysisUsage(AnalysisUsage &AU) const {
  56. AU.addRequired<MachineDominatorTree>();
  57. AU.addPreserved<MachineDominatorTree>();
  58. AU.addRequired<MachineLoopInfo>();
  59. AU.addPreserved<MachineLoopInfo>();
  60. MachineFunctionPass::getAnalysisUsage(AU);
  61. }
  62. const char *getPassName() const {
  63. return "Post RA top-down list latency scheduler";
  64. }
  65. bool runOnMachineFunction(MachineFunction &Fn);
  66. };
  67. char PostRAScheduler::ID = 0;
  68. class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs {
  69. /// AvailableQueue - The priority queue to use for the available SUnits.
  70. ///
  71. LatencyPriorityQueue AvailableQueue;
  72. /// PendingQueue - This contains all of the instructions whose operands have
  73. /// been issued, but their results are not ready yet (due to the latency of
  74. /// the operation). Once the operands becomes available, the instruction is
  75. /// added to the AvailableQueue.
  76. std::vector<SUnit*> PendingQueue;
  77. /// Topo - A topological ordering for SUnits.
  78. ScheduleDAGTopologicalSort Topo;
  79. /// AllocatableSet - The set of allocatable registers.
  80. /// We'll be ignoring anti-dependencies on non-allocatable registers,
  81. /// because they may not be safe to break.
  82. const BitVector AllocatableSet;
  83. /// HazardRec - The hazard recognizer to use.
  84. ScheduleHazardRecognizer *HazardRec;
  85. /// Classes - For live regs that are only used in one register class in a
  86. /// live range, the register class. If the register is not live, the
  87. /// corresponding value is null. If the register is live but used in
  88. /// multiple register classes, the corresponding value is -1 casted to a
  89. /// pointer.
  90. const TargetRegisterClass *
  91. Classes[TargetRegisterInfo::FirstVirtualRegister];
  92. /// RegRegs - Map registers to all their references within a live range.
  93. std::multimap<unsigned, MachineOperand *> RegRefs;
  94. /// The index of the most recent kill (proceding bottom-up), or ~0u if
  95. /// the register is not live.
  96. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
  97. /// The index of the most recent complete def (proceding bottom up), or ~0u
  98. /// if the register is live.
  99. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
  100. public:
  101. SchedulePostRATDList(MachineFunction &MF,
  102. const MachineLoopInfo &MLI,
  103. const MachineDominatorTree &MDT,
  104. ScheduleHazardRecognizer *HR)
  105. : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
  106. AllocatableSet(TRI->getAllocatableSet(MF)),
  107. HazardRec(HR) {}
  108. ~SchedulePostRATDList() {
  109. delete HazardRec;
  110. }
  111. /// StartBlock - Initialize register live-range state for scheduling in
  112. /// this block.
  113. ///
  114. void StartBlock(MachineBasicBlock *BB);
  115. /// Schedule - Schedule the instruction range using list scheduling.
  116. ///
  117. void Schedule();
  118. /// Observe - Update liveness information to account for the current
  119. /// instruction, which will not be scheduled.
  120. ///
  121. void Observe(MachineInstr *MI, unsigned Count);
  122. /// FinishBlock - Clean up register live-range state.
  123. ///
  124. void FinishBlock();
  125. private:
  126. void PrescanInstruction(MachineInstr *MI);
  127. void ScanInstruction(MachineInstr *MI, unsigned Count);
  128. void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
  129. void ReleaseSuccessors(SUnit *SU);
  130. void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
  131. void ListScheduleTopDown();
  132. bool BreakAntiDependencies();
  133. };
  134. /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
  135. /// a coarse classification and attempts to avoid that instructions of
  136. /// a given class aren't grouped too densely together.
  137. class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
  138. /// Class - A simple classification for SUnits.
  139. enum Class {
  140. Other, Load, Store
  141. };
  142. /// Window - The Class values of the most recently issued
  143. /// instructions.
  144. Class Window[8];
  145. /// getClass - Classify the given SUnit.
  146. Class getClass(const SUnit *SU) {
  147. const MachineInstr *MI = SU->getInstr();
  148. const TargetInstrDesc &TID = MI->getDesc();
  149. if (TID.mayLoad())
  150. return Load;
  151. if (TID.mayStore())
  152. return Store;
  153. return Other;
  154. }
  155. /// Step - Rotate the existing entries in Window and insert the
  156. /// given class value in position as the most recent.
  157. void Step(Class C) {
  158. std::copy(Window+1, array_endof(Window), Window);
  159. Window[array_lengthof(Window)-1] = C;
  160. }
  161. public:
  162. SimpleHazardRecognizer() : Window() {}
  163. virtual HazardType getHazardType(SUnit *SU) {
  164. Class C = getClass(SU);
  165. if (C == Other)
  166. return NoHazard;
  167. unsigned Score = 0;
  168. for (unsigned i = 0; i != array_lengthof(Window); ++i)
  169. if (Window[i] == C)
  170. Score += i + 1;
  171. if (Score > array_lengthof(Window) * 2)
  172. return Hazard;
  173. return NoHazard;
  174. }
  175. virtual void EmitInstruction(SUnit *SU) {
  176. Step(getClass(SU));
  177. }
  178. virtual void AdvanceCycle() {
  179. Step(Other);
  180. }
  181. };
  182. }
  183. /// isSchedulingBoundary - Test if the given instruction should be
  184. /// considered a scheduling boundary. This primarily includes labels
  185. /// and terminators.
  186. ///
  187. static bool isSchedulingBoundary(const MachineInstr *MI,
  188. const MachineFunction &MF) {
  189. // Terminators and labels can't be scheduled around.
  190. if (MI->getDesc().isTerminator() || MI->isLabel())
  191. return true;
  192. // Don't attempt to schedule around any instruction that modifies
  193. // a stack-oriented pointer, as it's unlikely to be profitable. This
  194. // saves compile time, because it doesn't require every single
  195. // stack slot reference to depend on the instruction that does the
  196. // modification.
  197. const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
  198. if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
  199. return true;
  200. return false;
  201. }
  202. bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
  203. DOUT << "PostRAScheduler\n";
  204. const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
  205. const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
  206. ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
  207. new SimpleHazardRecognizer :
  208. new ScheduleHazardRecognizer();
  209. SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
  210. // Loop over all of the basic blocks
  211. for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
  212. MBB != MBBe; ++MBB) {
  213. // Initialize register live-range state for scheduling in this block.
  214. Scheduler.StartBlock(MBB);
  215. // Schedule each sequence of instructions not interrupted by a label
  216. // or anything else that effectively needs to shut down scheduling.
  217. MachineBasicBlock::iterator Current = MBB->end();
  218. unsigned Count = MBB->size(), CurrentCount = Count;
  219. for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
  220. MachineInstr *MI = prior(I);
  221. if (isSchedulingBoundary(MI, Fn)) {
  222. Scheduler.Run(MBB, I, Current, CurrentCount);
  223. Scheduler.EmitSchedule();
  224. Current = MI;
  225. CurrentCount = Count - 1;
  226. Scheduler.Observe(MI, CurrentCount);
  227. }
  228. I = MI;
  229. --Count;
  230. }
  231. assert(Count == 0 && "Instruction count mismatch!");
  232. assert((MBB->begin() == Current || CurrentCount != 0) &&
  233. "Instruction count mismatch!");
  234. Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
  235. Scheduler.EmitSchedule();
  236. // Clean up register live-range state.
  237. Scheduler.FinishBlock();
  238. }
  239. return true;
  240. }
  241. /// StartBlock - Initialize register live-range state for scheduling in
  242. /// this block.
  243. ///
  244. void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
  245. // Call the superclass.
  246. ScheduleDAGInstrs::StartBlock(BB);
  247. // Clear out the register class data.
  248. std::fill(Classes, array_endof(Classes),
  249. static_cast<const TargetRegisterClass *>(0));
  250. // Initialize the indices to indicate that no registers are live.
  251. std::fill(KillIndices, array_endof(KillIndices), ~0u);
  252. std::fill(DefIndices, array_endof(DefIndices), BB->size());
  253. // Determine the live-out physregs for this block.
  254. if (!BB->empty() && BB->back().getDesc().isReturn())
  255. // In a return block, examine the function live-out regs.
  256. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
  257. E = MRI.liveout_end(); I != E; ++I) {
  258. unsigned Reg = *I;
  259. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  260. KillIndices[Reg] = BB->size();
  261. DefIndices[Reg] = ~0u;
  262. // Repeat, for all aliases.
  263. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  264. unsigned AliasReg = *Alias;
  265. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  266. KillIndices[AliasReg] = BB->size();
  267. DefIndices[AliasReg] = ~0u;
  268. }
  269. }
  270. else
  271. // In a non-return block, examine the live-in regs of all successors.
  272. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  273. SE = BB->succ_end(); SI != SE; ++SI)
  274. for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
  275. E = (*SI)->livein_end(); I != E; ++I) {
  276. unsigned Reg = *I;
  277. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  278. KillIndices[Reg] = BB->size();
  279. DefIndices[Reg] = ~0u;
  280. // Repeat, for all aliases.
  281. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  282. unsigned AliasReg = *Alias;
  283. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  284. KillIndices[AliasReg] = BB->size();
  285. DefIndices[AliasReg] = ~0u;
  286. }
  287. }
  288. // Consider callee-saved registers as live-out, since we're running after
  289. // prologue/epilogue insertion so there's no way to add additional
  290. // saved registers.
  291. //
  292. // TODO: If the callee saves and restores these, then we can potentially
  293. // use them between the save and the restore. To do that, we could scan
  294. // the exit blocks to see which of these registers are defined.
  295. // Alternatively, callee-saved registers that aren't saved and restored
  296. // could be marked live-in in every block.
  297. for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
  298. unsigned Reg = *I;
  299. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  300. KillIndices[Reg] = BB->size();
  301. DefIndices[Reg] = ~0u;
  302. // Repeat, for all aliases.
  303. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  304. unsigned AliasReg = *Alias;
  305. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  306. KillIndices[AliasReg] = BB->size();
  307. DefIndices[AliasReg] = ~0u;
  308. }
  309. }
  310. }
  311. /// Schedule - Schedule the instruction range using list scheduling.
  312. ///
  313. void SchedulePostRATDList::Schedule() {
  314. DOUT << "********** List Scheduling **********\n";
  315. // Build the scheduling graph.
  316. BuildSchedGraph();
  317. if (EnableAntiDepBreaking) {
  318. if (BreakAntiDependencies()) {
  319. // We made changes. Update the dependency graph.
  320. // Theoretically we could update the graph in place:
  321. // When a live range is changed to use a different register, remove
  322. // the def's anti-dependence *and* output-dependence edges due to
  323. // that register, and add new anti-dependence and output-dependence
  324. // edges based on the next live range of the register.
  325. SUnits.clear();
  326. EntrySU = SUnit();
  327. ExitSU = SUnit();
  328. BuildSchedGraph();
  329. }
  330. }
  331. AvailableQueue.initNodes(SUnits);
  332. ListScheduleTopDown();
  333. AvailableQueue.releaseState();
  334. }
  335. /// Observe - Update liveness information to account for the current
  336. /// instruction, which will not be scheduled.
  337. ///
  338. void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
  339. assert(Count < InsertPosIndex && "Instruction index out of expected range!");
  340. // Any register which was defined within the previous scheduling region
  341. // may have been rescheduled and its lifetime may overlap with registers
  342. // in ways not reflected in our current liveness state. For each such
  343. // register, adjust the liveness state to be conservatively correct.
  344. for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
  345. if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
  346. assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
  347. // Mark this register to be non-renamable.
  348. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  349. // Move the def index to the end of the previous region, to reflect
  350. // that the def could theoretically have been scheduled at the end.
  351. DefIndices[Reg] = InsertPosIndex;
  352. }
  353. PrescanInstruction(MI);
  354. ScanInstruction(MI, Count);
  355. }
  356. /// FinishBlock - Clean up register live-range state.
  357. ///
  358. void SchedulePostRATDList::FinishBlock() {
  359. RegRefs.clear();
  360. // Call the superclass.
  361. ScheduleDAGInstrs::FinishBlock();
  362. }
  363. /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
  364. /// critical path.
  365. static SDep *CriticalPathStep(SUnit *SU) {
  366. SDep *Next = 0;
  367. unsigned NextDepth = 0;
  368. // Find the predecessor edge with the greatest depth.
  369. for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
  370. P != PE; ++P) {
  371. SUnit *PredSU = P->getSUnit();
  372. unsigned PredLatency = P->getLatency();
  373. unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
  374. // In the case of a latency tie, prefer an anti-dependency edge over
  375. // other types of edges.
  376. if (NextDepth < PredTotalLatency ||
  377. (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
  378. NextDepth = PredTotalLatency;
  379. Next = &*P;
  380. }
  381. }
  382. return Next;
  383. }
  384. void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
  385. // Scan the register operands for this instruction and update
  386. // Classes and RegRefs.
  387. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  388. MachineOperand &MO = MI->getOperand(i);
  389. if (!MO.isReg()) continue;
  390. unsigned Reg = MO.getReg();
  391. if (Reg == 0) continue;
  392. const TargetRegisterClass *NewRC =
  393. getInstrOperandRegClass(TRI, MI->getDesc(), i);
  394. // For now, only allow the register to be changed if its register
  395. // class is consistent across all uses.
  396. if (!Classes[Reg] && NewRC)
  397. Classes[Reg] = NewRC;
  398. else if (!NewRC || Classes[Reg] != NewRC)
  399. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  400. // Now check for aliases.
  401. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  402. // If an alias of the reg is used during the live range, give up.
  403. // Note that this allows us to skip checking if AntiDepReg
  404. // overlaps with any of the aliases, among other things.
  405. unsigned AliasReg = *Alias;
  406. if (Classes[AliasReg]) {
  407. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  408. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  409. }
  410. }
  411. // If we're still willing to consider this register, note the reference.
  412. if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
  413. RegRefs.insert(std::make_pair(Reg, &MO));
  414. }
  415. }
  416. void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
  417. unsigned Count) {
  418. // Update liveness.
  419. // Proceding upwards, registers that are defed but not used in this
  420. // instruction are now dead.
  421. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  422. MachineOperand &MO = MI->getOperand(i);
  423. if (!MO.isReg()) continue;
  424. unsigned Reg = MO.getReg();
  425. if (Reg == 0) continue;
  426. if (!MO.isDef()) continue;
  427. // Ignore two-addr defs.
  428. if (MI->isRegTiedToUseOperand(i)) continue;
  429. DefIndices[Reg] = Count;
  430. KillIndices[Reg] = ~0u;
  431. assert(((KillIndices[Reg] == ~0u) !=
  432. (DefIndices[Reg] == ~0u)) &&
  433. "Kill and Def maps aren't consistent for Reg!");
  434. Classes[Reg] = 0;
  435. RegRefs.erase(Reg);
  436. // Repeat, for all subregs.
  437. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  438. *Subreg; ++Subreg) {
  439. unsigned SubregReg = *Subreg;
  440. DefIndices[SubregReg] = Count;
  441. KillIndices[SubregReg] = ~0u;
  442. Classes[SubregReg] = 0;
  443. RegRefs.erase(SubregReg);
  444. }
  445. // Conservatively mark super-registers as unusable.
  446. for (const unsigned *Super = TRI->getSuperRegisters(Reg);
  447. *Super; ++Super) {
  448. unsigned SuperReg = *Super;
  449. Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  450. }
  451. }
  452. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  453. MachineOperand &MO = MI->getOperand(i);
  454. if (!MO.isReg()) continue;
  455. unsigned Reg = MO.getReg();
  456. if (Reg == 0) continue;
  457. if (!MO.isUse()) continue;
  458. const TargetRegisterClass *NewRC =
  459. getInstrOperandRegClass(TRI, MI->getDesc(), i);
  460. // For now, only allow the register to be changed if its register
  461. // class is consistent across all uses.
  462. if (!Classes[Reg] && NewRC)
  463. Classes[Reg] = NewRC;
  464. else if (!NewRC || Classes[Reg] != NewRC)
  465. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  466. RegRefs.insert(std::make_pair(Reg, &MO));
  467. // It wasn't previously live but now it is, this is a kill.
  468. if (KillIndices[Reg] == ~0u) {
  469. KillIndices[Reg] = Count;
  470. DefIndices[Reg] = ~0u;
  471. assert(((KillIndices[Reg] == ~0u) !=
  472. (DefIndices[Reg] == ~0u)) &&
  473. "Kill and Def maps aren't consistent for Reg!");
  474. }
  475. // Repeat, for all aliases.
  476. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  477. unsigned AliasReg = *Alias;
  478. if (KillIndices[AliasReg] == ~0u) {
  479. KillIndices[AliasReg] = Count;
  480. DefIndices[AliasReg] = ~0u;
  481. }
  482. }
  483. }
  484. }
  485. /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
  486. /// of the ScheduleDAG and break them by renaming registers.
  487. ///
  488. bool SchedulePostRATDList::BreakAntiDependencies() {
  489. // The code below assumes that there is at least one instruction,
  490. // so just duck out immediately if the block is empty.
  491. if (SUnits.empty()) return false;
  492. // Find the node at the bottom of the critical path.
  493. SUnit *Max = 0;
  494. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
  495. SUnit *SU = &SUnits[i];
  496. if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
  497. Max = SU;
  498. }
  499. DOUT << "Critical path has total latency "
  500. << (Max->getDepth() + Max->Latency) << "\n";
  501. // Track progress along the critical path through the SUnit graph as we walk
  502. // the instructions.
  503. SUnit *CriticalPathSU = Max;
  504. MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
  505. // Consider this pattern:
  506. // A = ...
  507. // ... = A
  508. // A = ...
  509. // ... = A
  510. // A = ...
  511. // ... = A
  512. // A = ...
  513. // ... = A
  514. // There are three anti-dependencies here, and without special care,
  515. // we'd break all of them using the same register:
  516. // A = ...
  517. // ... = A
  518. // B = ...
  519. // ... = B
  520. // B = ...
  521. // ... = B
  522. // B = ...
  523. // ... = B
  524. // because at each anti-dependence, B is the first register that
  525. // isn't A which is free. This re-introduces anti-dependencies
  526. // at all but one of the original anti-dependencies that we were
  527. // trying to break. To avoid this, keep track of the most recent
  528. // register that each register was replaced with, avoid avoid
  529. // using it to repair an anti-dependence on the same register.
  530. // This lets us produce this:
  531. // A = ...
  532. // ... = A
  533. // B = ...
  534. // ... = B
  535. // C = ...
  536. // ... = C
  537. // B = ...
  538. // ... = B
  539. // This still has an anti-dependence on B, but at least it isn't on the
  540. // original critical path.
  541. //
  542. // TODO: If we tracked more than one register here, we could potentially
  543. // fix that remaining critical edge too. This is a little more involved,
  544. // because unlike the most recent register, less recent registers should
  545. // still be considered, though only if no other registers are available.
  546. unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
  547. // Attempt to break anti-dependence edges on the critical path. Walk the
  548. // instructions from the bottom up, tracking information about liveness
  549. // as we go to help determine which registers are available.
  550. bool Changed = false;
  551. unsigned Count = InsertPosIndex - 1;
  552. for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
  553. I != E; --Count) {
  554. MachineInstr *MI = --I;
  555. // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
  556. // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
  557. // is left behind appearing to clobber the super-register, while the
  558. // subregister needs to remain live. So we just ignore them.
  559. if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
  560. continue;
  561. // Check if this instruction has a dependence on the critical path that
  562. // is an anti-dependence that we may be able to break. If it is, set
  563. // AntiDepReg to the non-zero register associated with the anti-dependence.
  564. //
  565. // We limit our attention to the critical path as a heuristic to avoid
  566. // breaking anti-dependence edges that aren't going to significantly
  567. // impact the overall schedule. There are a limited number of registers
  568. // and we want to save them for the important edges.
  569. //
  570. // TODO: Instructions with multiple defs could have multiple
  571. // anti-dependencies. The current code here only knows how to break one
  572. // edge per instruction. Note that we'd have to be able to break all of
  573. // the anti-dependencies in an instruction in order to be effective.
  574. unsigned AntiDepReg = 0;
  575. if (MI == CriticalPathMI) {
  576. if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
  577. SUnit *NextSU = Edge->getSUnit();
  578. // Only consider anti-dependence edges.
  579. if (Edge->getKind() == SDep::Anti) {
  580. AntiDepReg = Edge->getReg();
  581. assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
  582. // Don't break anti-dependencies on non-allocatable registers.
  583. if (!AllocatableSet.test(AntiDepReg))
  584. AntiDepReg = 0;
  585. else {
  586. // If the SUnit has other dependencies on the SUnit that it
  587. // anti-depends on, don't bother breaking the anti-dependency
  588. // since those edges would prevent such units from being
  589. // scheduled past each other regardless.
  590. //
  591. // Also, if there are dependencies on other SUnits with the
  592. // same register as the anti-dependency, don't attempt to
  593. // break it.
  594. for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
  595. PE = CriticalPathSU->Preds.end(); P != PE; ++P)
  596. if (P->getSUnit() == NextSU ?
  597. (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
  598. (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
  599. AntiDepReg = 0;
  600. break;
  601. }
  602. }
  603. }
  604. CriticalPathSU = NextSU;
  605. CriticalPathMI = CriticalPathSU->getInstr();
  606. } else {
  607. // We've reached the end of the critical path.
  608. CriticalPathSU = 0;
  609. CriticalPathMI = 0;
  610. }
  611. }
  612. PrescanInstruction(MI);
  613. // If this instruction has a use of AntiDepReg, breaking it
  614. // is invalid.
  615. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  616. MachineOperand &MO = MI->getOperand(i);
  617. if (!MO.isReg()) continue;
  618. unsigned Reg = MO.getReg();
  619. if (Reg == 0) continue;
  620. if (MO.isUse() && AntiDepReg == Reg) {
  621. AntiDepReg = 0;
  622. break;
  623. }
  624. }
  625. // Determine AntiDepReg's register class, if it is live and is
  626. // consistently used within a single class.
  627. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
  628. assert((AntiDepReg == 0 || RC != NULL) &&
  629. "Register should be live if it's causing an anti-dependence!");
  630. if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
  631. AntiDepReg = 0;
  632. // Look for a suitable register to use to break the anti-depenence.
  633. //
  634. // TODO: Instead of picking the first free register, consider which might
  635. // be the best.
  636. if (AntiDepReg != 0) {
  637. for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
  638. RE = RC->allocation_order_end(MF); R != RE; ++R) {
  639. unsigned NewReg = *R;
  640. // Don't replace a register with itself.
  641. if (NewReg == AntiDepReg) continue;
  642. // Don't replace a register with one that was recently used to repair
  643. // an anti-dependence with this AntiDepReg, because that would
  644. // re-introduce that anti-dependence.
  645. if (NewReg == LastNewReg[AntiDepReg]) continue;
  646. // If NewReg is dead and NewReg's most recent def is not before
  647. // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
  648. assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
  649. "Kill and Def maps aren't consistent for AntiDepReg!");
  650. assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
  651. "Kill and Def maps aren't consistent for NewReg!");
  652. if (KillIndices[NewReg] == ~0u &&
  653. Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) &&
  654. KillIndices[AntiDepReg] <= DefIndices[NewReg]) {
  655. DOUT << "Breaking anti-dependence edge on "
  656. << TRI->getName(AntiDepReg)
  657. << " with " << RegRefs.count(AntiDepReg) << " references"
  658. << " using " << TRI->getName(NewReg) << "!\n";
  659. // Update the references to the old register to refer to the new
  660. // register.
  661. std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
  662. std::multimap<unsigned, MachineOperand *>::iterator>
  663. Range = RegRefs.equal_range(AntiDepReg);
  664. for (std::multimap<unsigned, MachineOperand *>::iterator
  665. Q = Range.first, QE = Range.second; Q != QE; ++Q)
  666. Q->second->setReg(NewReg);
  667. // We just went back in time and modified history; the
  668. // liveness information for the anti-depenence reg is now
  669. // inconsistent. Set the state as if it were dead.
  670. Classes[NewReg] = Classes[AntiDepReg];
  671. DefIndices[NewReg] = DefIndices[AntiDepReg];
  672. KillIndices[NewReg] = KillIndices[AntiDepReg];
  673. assert(((KillIndices[NewReg] == ~0u) !=
  674. (DefIndices[NewReg] == ~0u)) &&
  675. "Kill and Def maps aren't consistent for NewReg!");
  676. Classes[AntiDepReg] = 0;
  677. DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
  678. KillIndices[AntiDepReg] = ~0u;
  679. assert(((KillIndices[AntiDepReg] == ~0u) !=
  680. (DefIndices[AntiDepReg] == ~0u)) &&
  681. "Kill and Def maps aren't consistent for AntiDepReg!");
  682. RegRefs.erase(AntiDepReg);
  683. Changed = true;
  684. LastNewReg[AntiDepReg] = NewReg;
  685. break;
  686. }
  687. }
  688. }
  689. ScanInstruction(MI, Count);
  690. }
  691. return Changed;
  692. }
  693. //===----------------------------------------------------------------------===//
  694. // Top-Down Scheduling
  695. //===----------------------------------------------------------------------===//
  696. /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
  697. /// the PendingQueue if the count reaches zero. Also update its cycle bound.
  698. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
  699. SUnit *SuccSU = SuccEdge->getSUnit();
  700. --SuccSU->NumPredsLeft;
  701. #ifndef NDEBUG
  702. if (SuccSU->NumPredsLeft < 0) {
  703. cerr << "*** Scheduling failed! ***\n";
  704. SuccSU->dump(this);
  705. cerr << " has been released too many times!\n";
  706. llvm_unreachable();
  707. }
  708. #endif
  709. // Compute how many cycles it will be before this actually becomes
  710. // available. This is the max of the start time of all predecessors plus
  711. // their latencies.
  712. SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
  713. // If all the node's predecessors are scheduled, this node is ready
  714. // to be scheduled. Ignore the special ExitSU node.
  715. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
  716. PendingQueue.push_back(SuccSU);
  717. }
  718. /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
  719. void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
  720. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
  721. I != E; ++I)
  722. ReleaseSucc(SU, &*I);
  723. }
  724. /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
  725. /// count of its successors. If a successor pending count is zero, add it to
  726. /// the Available queue.
  727. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
  728. DOUT << "*** Scheduling [" << CurCycle << "]: ";
  729. DEBUG(SU->dump(this));
  730. Sequence.push_back(SU);
  731. assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
  732. SU->setDepthToAtLeast(CurCycle);
  733. ReleaseSuccessors(SU);
  734. SU->isScheduled = true;
  735. AvailableQueue.ScheduledNode(SU);
  736. }
  737. /// ListScheduleTopDown - The main loop of list scheduling for top-down
  738. /// schedulers.
  739. void SchedulePostRATDList::ListScheduleTopDown() {
  740. unsigned CurCycle = 0;
  741. // Release any successors of the special Entry node.
  742. ReleaseSuccessors(&EntrySU);
  743. // All leaves to Available queue.
  744. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
  745. // It is available if it has no predecessors.
  746. if (SUnits[i].Preds.empty()) {
  747. AvailableQueue.push(&SUnits[i]);
  748. SUnits[i].isAvailable = true;
  749. }
  750. }
  751. // While Available queue is not empty, grab the node with the highest
  752. // priority. If it is not ready put it back. Schedule the node.
  753. std::vector<SUnit*> NotReady;
  754. Sequence.reserve(SUnits.size());
  755. while (!AvailableQueue.empty() || !PendingQueue.empty()) {
  756. // Check to see if any of the pending instructions are ready to issue. If
  757. // so, add them to the available queue.
  758. unsigned MinDepth = ~0u;
  759. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
  760. if (PendingQueue[i]->getDepth() <= CurCycle) {
  761. AvailableQueue.push(PendingQueue[i]);
  762. PendingQueue[i]->isAvailable = true;
  763. PendingQueue[i] = PendingQueue.back();
  764. PendingQueue.pop_back();
  765. --i; --e;
  766. } else if (PendingQueue[i]->getDepth() < MinDepth)
  767. MinDepth = PendingQueue[i]->getDepth();
  768. }
  769. // If there are no instructions available, don't try to issue anything, and
  770. // don't advance the hazard recognizer.
  771. if (AvailableQueue.empty()) {
  772. CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
  773. continue;
  774. }
  775. SUnit *FoundSUnit = 0;
  776. bool HasNoopHazards = false;
  777. while (!AvailableQueue.empty()) {
  778. SUnit *CurSUnit = AvailableQueue.pop();
  779. ScheduleHazardRecognizer::HazardType HT =
  780. HazardRec->getHazardType(CurSUnit);
  781. if (HT == ScheduleHazardRecognizer::NoHazard) {
  782. FoundSUnit = CurSUnit;
  783. break;
  784. }
  785. // Remember if this is a noop hazard.
  786. HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
  787. NotReady.push_back(CurSUnit);
  788. }
  789. // Add the nodes that aren't ready back onto the available list.
  790. if (!NotReady.empty()) {
  791. AvailableQueue.push_all(NotReady);
  792. NotReady.clear();
  793. }
  794. // If we found a node to schedule, do it now.
  795. if (FoundSUnit) {
  796. ScheduleNodeTopDown(FoundSUnit, CurCycle);
  797. HazardRec->EmitInstruction(FoundSUnit);
  798. // If this is a pseudo-op node, we don't want to increment the current
  799. // cycle.
  800. if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
  801. ++CurCycle;
  802. } else if (!HasNoopHazards) {
  803. // Otherwise, we have a pipeline stall, but no other problem, just advance
  804. // the current cycle and try again.
  805. DOUT << "*** Advancing cycle, no work to do\n";
  806. HazardRec->AdvanceCycle();
  807. ++NumStalls;
  808. ++CurCycle;
  809. } else {
  810. // Otherwise, we have no instructions to issue and we have instructions
  811. // that will fault if we don't do this right. This is the case for
  812. // processors without pipeline interlocks and other cases.
  813. DOUT << "*** Emitting noop\n";
  814. HazardRec->EmitNoop();
  815. Sequence.push_back(0); // NULL here means noop
  816. ++NumNoops;
  817. ++CurCycle;
  818. }
  819. }
  820. #ifndef NDEBUG
  821. VerifySchedule(/*isBottomUp=*/false);
  822. #endif
  823. }
  824. //===----------------------------------------------------------------------===//
  825. // Public Constructor Functions
  826. //===----------------------------------------------------------------------===//
  827. FunctionPass *llvm::createPostRAScheduler() {
  828. return new PostRAScheduler();
  829. }