PostRASchedulerList.cpp 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212
  1. //===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This implements a top-down list scheduler, using standard algorithms.
  11. // The basic approach uses a priority queue of available nodes to schedule.
  12. // One at a time, nodes are taken from the priority queue (thus in priority
  13. // order), checked for legality to schedule, and emitted if legal.
  14. //
  15. // Nodes may not be legal to schedule either due to structural hazards (e.g.
  16. // pipeline or resource constraints) or because an input to the instruction has
  17. // not completed execution.
  18. //
  19. //===----------------------------------------------------------------------===//
  20. #define DEBUG_TYPE "post-RA-sched"
  21. #include "ExactHazardRecognizer.h"
  22. #include "SimpleHazardRecognizer.h"
  23. #include "ScheduleDAGInstrs.h"
  24. #include "llvm/CodeGen/Passes.h"
  25. #include "llvm/CodeGen/LatencyPriorityQueue.h"
  26. #include "llvm/CodeGen/SchedulerRegistry.h"
  27. #include "llvm/CodeGen/MachineDominators.h"
  28. #include "llvm/CodeGen/MachineFrameInfo.h"
  29. #include "llvm/CodeGen/MachineFunctionPass.h"
  30. #include "llvm/CodeGen/MachineLoopInfo.h"
  31. #include "llvm/CodeGen/MachineRegisterInfo.h"
  32. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  33. #include "llvm/Analysis/AliasAnalysis.h"
  34. #include "llvm/Target/TargetLowering.h"
  35. #include "llvm/Target/TargetMachine.h"
  36. #include "llvm/Target/TargetInstrInfo.h"
  37. #include "llvm/Target/TargetRegisterInfo.h"
  38. #include "llvm/Target/TargetSubtarget.h"
  39. #include "llvm/Support/Debug.h"
  40. #include "llvm/Support/ErrorHandling.h"
  41. #include "llvm/Support/raw_ostream.h"
  42. #include "llvm/ADT/Statistic.h"
  43. #include <map>
  44. #include <set>
  45. using namespace llvm;
  46. STATISTIC(NumNoops, "Number of noops inserted");
  47. STATISTIC(NumStalls, "Number of pipeline stalls");
  48. // Post-RA scheduling is enabled with
  49. // TargetSubtarget.enablePostRAScheduler(). This flag can be used to
  50. // override the target.
  51. static cl::opt<bool>
  52. EnablePostRAScheduler("post-RA-scheduler",
  53. cl::desc("Enable scheduling after register allocation"),
  54. cl::init(false), cl::Hidden);
  55. static cl::opt<bool>
  56. EnableAntiDepBreaking("break-anti-dependencies",
  57. cl::desc("Break post-RA scheduling anti-dependencies"),
  58. cl::init(true), cl::Hidden);
  59. static cl::opt<bool>
  60. EnablePostRAHazardAvoidance("avoid-hazards",
  61. cl::desc("Enable exact hazard avoidance"),
  62. cl::init(true), cl::Hidden);
  63. // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
  64. static cl::opt<int>
  65. DebugDiv("postra-sched-debugdiv",
  66. cl::desc("Debug control MBBs that are scheduled"),
  67. cl::init(0), cl::Hidden);
  68. static cl::opt<int>
  69. DebugMod("postra-sched-debugmod",
  70. cl::desc("Debug control MBBs that are scheduled"),
  71. cl::init(0), cl::Hidden);
  72. namespace {
  73. class PostRAScheduler : public MachineFunctionPass {
  74. AliasAnalysis *AA;
  75. CodeGenOpt::Level OptLevel;
  76. public:
  77. static char ID;
  78. PostRAScheduler(CodeGenOpt::Level ol) :
  79. MachineFunctionPass(&ID), OptLevel(ol) {}
  80. void getAnalysisUsage(AnalysisUsage &AU) const {
  81. AU.setPreservesCFG();
  82. AU.addRequired<AliasAnalysis>();
  83. AU.addRequired<MachineDominatorTree>();
  84. AU.addPreserved<MachineDominatorTree>();
  85. AU.addRequired<MachineLoopInfo>();
  86. AU.addPreserved<MachineLoopInfo>();
  87. MachineFunctionPass::getAnalysisUsage(AU);
  88. }
  89. const char *getPassName() const {
  90. return "Post RA top-down list latency scheduler";
  91. }
  92. bool runOnMachineFunction(MachineFunction &Fn);
  93. };
  94. char PostRAScheduler::ID = 0;
  95. class SchedulePostRATDList : public ScheduleDAGInstrs {
  96. /// AvailableQueue - The priority queue to use for the available SUnits.
  97. ///
  98. LatencyPriorityQueue AvailableQueue;
  99. /// PendingQueue - This contains all of the instructions whose operands have
  100. /// been issued, but their results are not ready yet (due to the latency of
  101. /// the operation). Once the operands becomes available, the instruction is
  102. /// added to the AvailableQueue.
  103. std::vector<SUnit*> PendingQueue;
  104. /// Topo - A topological ordering for SUnits.
  105. ScheduleDAGTopologicalSort Topo;
  106. /// AllocatableSet - The set of allocatable registers.
  107. /// We'll be ignoring anti-dependencies on non-allocatable registers,
  108. /// because they may not be safe to break.
  109. const BitVector AllocatableSet;
  110. /// HazardRec - The hazard recognizer to use.
  111. ScheduleHazardRecognizer *HazardRec;
  112. /// AA - AliasAnalysis for making memory reference queries.
  113. AliasAnalysis *AA;
  114. /// AntiDepMode - Anti-dependence breaking mode
  115. TargetSubtarget::AntiDepBreakMode AntiDepMode;
  116. /// Classes - For live regs that are only used in one register class in a
  117. /// live range, the register class. If the register is not live, the
  118. /// corresponding value is null. If the register is live but used in
  119. /// multiple register classes, the corresponding value is -1 casted to a
  120. /// pointer.
  121. const TargetRegisterClass *
  122. Classes[TargetRegisterInfo::FirstVirtualRegister];
  123. /// RegRegs - Map registers to all their references within a live range.
  124. std::multimap<unsigned, MachineOperand *> RegRefs;
  125. /// KillIndices - The index of the most recent kill (proceding bottom-up),
  126. /// or ~0u if the register is not live.
  127. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
  128. /// DefIndices - The index of the most recent complete def (proceding bottom
  129. /// up), or ~0u if the register is live.
  130. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
  131. /// KeepRegs - A set of registers which are live and cannot be changed to
  132. /// break anti-dependencies.
  133. SmallSet<unsigned, 4> KeepRegs;
  134. public:
  135. SchedulePostRATDList(MachineFunction &MF,
  136. const MachineLoopInfo &MLI,
  137. const MachineDominatorTree &MDT,
  138. ScheduleHazardRecognizer *HR,
  139. AliasAnalysis *aa,
  140. TargetSubtarget::AntiDepBreakMode adm)
  141. : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
  142. AllocatableSet(TRI->getAllocatableSet(MF)),
  143. HazardRec(HR), AA(aa), AntiDepMode(adm) {}
  144. ~SchedulePostRATDList() {
  145. delete HazardRec;
  146. }
  147. /// StartBlock - Initialize register live-range state for scheduling in
  148. /// this block.
  149. ///
  150. void StartBlock(MachineBasicBlock *BB);
  151. /// Schedule - Schedule the instruction range using list scheduling.
  152. ///
  153. void Schedule();
  154. /// FixupKills - Fix register kill flags that have been made
  155. /// invalid due to scheduling
  156. ///
  157. void FixupKills(MachineBasicBlock *MBB);
  158. /// Observe - Update liveness information to account for the current
  159. /// instruction, which will not be scheduled.
  160. ///
  161. void Observe(MachineInstr *MI, unsigned Count);
  162. /// FinishBlock - Clean up register live-range state.
  163. ///
  164. void FinishBlock();
  165. private:
  166. void PrescanInstruction(MachineInstr *MI);
  167. void ScanInstruction(MachineInstr *MI, unsigned Count);
  168. void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
  169. void ReleaseSuccessors(SUnit *SU);
  170. void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
  171. void ListScheduleTopDown();
  172. bool BreakAntiDependencies();
  173. unsigned findSuitableFreeRegister(unsigned AntiDepReg,
  174. unsigned LastNewReg,
  175. const TargetRegisterClass *);
  176. void StartBlockForKills(MachineBasicBlock *BB);
  177. // ToggleKillFlag - Toggle a register operand kill flag. Other
  178. // adjustments may be made to the instruction if necessary. Return
  179. // true if the operand has been deleted, false if not.
  180. bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
  181. };
  182. }
  183. /// isSchedulingBoundary - Test if the given instruction should be
  184. /// considered a scheduling boundary. This primarily includes labels
  185. /// and terminators.
  186. ///
  187. static bool isSchedulingBoundary(const MachineInstr *MI,
  188. const MachineFunction &MF) {
  189. // Terminators and labels can't be scheduled around.
  190. if (MI->getDesc().isTerminator() || MI->isLabel())
  191. return true;
  192. // Don't attempt to schedule around any instruction that modifies
  193. // a stack-oriented pointer, as it's unlikely to be profitable. This
  194. // saves compile time, because it doesn't require every single
  195. // stack slot reference to depend on the instruction that does the
  196. // modification.
  197. const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
  198. if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
  199. return true;
  200. return false;
  201. }
  202. bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
  203. AA = &getAnalysis<AliasAnalysis>();
  204. // Check for explicit enable/disable of post-ra scheduling.
  205. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
  206. if (EnablePostRAScheduler.getPosition() > 0) {
  207. if (!EnablePostRAScheduler)
  208. return false;
  209. } else {
  210. // Check that post-RA scheduling is enabled for this target.
  211. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
  212. if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
  213. return false;
  214. }
  215. // Check for antidep breaking override...
  216. if (EnableAntiDepBreaking.getPosition() > 0) {
  217. AntiDepMode = (EnableAntiDepBreaking) ?
  218. TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
  219. }
  220. DEBUG(errs() << "PostRAScheduler\n");
  221. const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
  222. const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
  223. const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
  224. ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
  225. (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
  226. (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
  227. SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode);
  228. // Loop over all of the basic blocks
  229. for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
  230. MBB != MBBe; ++MBB) {
  231. #ifndef NDEBUG
  232. // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
  233. if (DebugDiv > 0) {
  234. static int bbcnt = 0;
  235. if (bbcnt++ % DebugDiv != DebugMod)
  236. continue;
  237. errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
  238. ":MBB ID#" << MBB->getNumber() << " ***\n";
  239. }
  240. #endif
  241. // Initialize register live-range state for scheduling in this block.
  242. Scheduler.StartBlock(MBB);
  243. // Schedule each sequence of instructions not interrupted by a label
  244. // or anything else that effectively needs to shut down scheduling.
  245. MachineBasicBlock::iterator Current = MBB->end();
  246. unsigned Count = MBB->size(), CurrentCount = Count;
  247. for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
  248. MachineInstr *MI = prior(I);
  249. if (isSchedulingBoundary(MI, Fn)) {
  250. Scheduler.Run(MBB, I, Current, CurrentCount);
  251. Scheduler.EmitSchedule(0);
  252. Current = MI;
  253. CurrentCount = Count - 1;
  254. Scheduler.Observe(MI, CurrentCount);
  255. }
  256. I = MI;
  257. --Count;
  258. }
  259. assert(Count == 0 && "Instruction count mismatch!");
  260. assert((MBB->begin() == Current || CurrentCount != 0) &&
  261. "Instruction count mismatch!");
  262. Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
  263. Scheduler.EmitSchedule(0);
  264. // Clean up register live-range state.
  265. Scheduler.FinishBlock();
  266. // Update register kills
  267. Scheduler.FixupKills(MBB);
  268. }
  269. return true;
  270. }
  271. /// StartBlock - Initialize register live-range state for scheduling in
  272. /// this block.
  273. ///
  274. void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
  275. // Call the superclass.
  276. ScheduleDAGInstrs::StartBlock(BB);
  277. // Reset the hazard recognizer.
  278. HazardRec->Reset();
  279. // Clear out the register class data.
  280. std::fill(Classes, array_endof(Classes),
  281. static_cast<const TargetRegisterClass *>(0));
  282. // Initialize the indices to indicate that no registers are live.
  283. std::fill(KillIndices, array_endof(KillIndices), ~0u);
  284. std::fill(DefIndices, array_endof(DefIndices), BB->size());
  285. // Clear "do not change" set.
  286. KeepRegs.clear();
  287. bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
  288. // Determine the live-out physregs for this block.
  289. if (IsReturnBlock) {
  290. // In a return block, examine the function live-out regs.
  291. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
  292. E = MRI.liveout_end(); I != E; ++I) {
  293. unsigned Reg = *I;
  294. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  295. KillIndices[Reg] = BB->size();
  296. DefIndices[Reg] = ~0u;
  297. // Repeat, for all aliases.
  298. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  299. unsigned AliasReg = *Alias;
  300. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  301. KillIndices[AliasReg] = BB->size();
  302. DefIndices[AliasReg] = ~0u;
  303. }
  304. }
  305. } else {
  306. // In a non-return block, examine the live-in regs of all successors.
  307. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  308. SE = BB->succ_end(); SI != SE; ++SI)
  309. for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
  310. E = (*SI)->livein_end(); I != E; ++I) {
  311. unsigned Reg = *I;
  312. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  313. KillIndices[Reg] = BB->size();
  314. DefIndices[Reg] = ~0u;
  315. // Repeat, for all aliases.
  316. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  317. unsigned AliasReg = *Alias;
  318. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  319. KillIndices[AliasReg] = BB->size();
  320. DefIndices[AliasReg] = ~0u;
  321. }
  322. }
  323. }
  324. // Mark live-out callee-saved registers. In a return block this is
  325. // all callee-saved registers. In non-return this is any
  326. // callee-saved register that is not saved in the prolog.
  327. const MachineFrameInfo *MFI = MF.getFrameInfo();
  328. BitVector Pristine = MFI->getPristineRegs(BB);
  329. for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
  330. unsigned Reg = *I;
  331. if (!IsReturnBlock && !Pristine.test(Reg)) continue;
  332. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  333. KillIndices[Reg] = BB->size();
  334. DefIndices[Reg] = ~0u;
  335. // Repeat, for all aliases.
  336. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  337. unsigned AliasReg = *Alias;
  338. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  339. KillIndices[AliasReg] = BB->size();
  340. DefIndices[AliasReg] = ~0u;
  341. }
  342. }
  343. }
  344. /// Schedule - Schedule the instruction range using list scheduling.
  345. ///
  346. void SchedulePostRATDList::Schedule() {
  347. DEBUG(errs() << "********** List Scheduling **********\n");
  348. // Build the scheduling graph.
  349. BuildSchedGraph(AA);
  350. if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) {
  351. if (BreakAntiDependencies()) {
  352. // We made changes. Update the dependency graph.
  353. // Theoretically we could update the graph in place:
  354. // When a live range is changed to use a different register, remove
  355. // the def's anti-dependence *and* output-dependence edges due to
  356. // that register, and add new anti-dependence and output-dependence
  357. // edges based on the next live range of the register.
  358. SUnits.clear();
  359. EntrySU = SUnit();
  360. ExitSU = SUnit();
  361. BuildSchedGraph(AA);
  362. }
  363. }
  364. DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
  365. SUnits[su].dumpAll(this));
  366. AvailableQueue.initNodes(SUnits);
  367. ListScheduleTopDown();
  368. AvailableQueue.releaseState();
  369. }
  370. /// Observe - Update liveness information to account for the current
  371. /// instruction, which will not be scheduled.
  372. ///
  373. void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
  374. assert(Count < InsertPosIndex && "Instruction index out of expected range!");
  375. // Any register which was defined within the previous scheduling region
  376. // may have been rescheduled and its lifetime may overlap with registers
  377. // in ways not reflected in our current liveness state. For each such
  378. // register, adjust the liveness state to be conservatively correct.
  379. for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
  380. if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
  381. assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
  382. // Mark this register to be non-renamable.
  383. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  384. // Move the def index to the end of the previous region, to reflect
  385. // that the def could theoretically have been scheduled at the end.
  386. DefIndices[Reg] = InsertPosIndex;
  387. }
  388. PrescanInstruction(MI);
  389. ScanInstruction(MI, Count);
  390. }
  391. /// FinishBlock - Clean up register live-range state.
  392. ///
  393. void SchedulePostRATDList::FinishBlock() {
  394. RegRefs.clear();
  395. // Call the superclass.
  396. ScheduleDAGInstrs::FinishBlock();
  397. }
  398. /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
  399. /// critical path.
  400. static SDep *CriticalPathStep(SUnit *SU) {
  401. SDep *Next = 0;
  402. unsigned NextDepth = 0;
  403. // Find the predecessor edge with the greatest depth.
  404. for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
  405. P != PE; ++P) {
  406. SUnit *PredSU = P->getSUnit();
  407. unsigned PredLatency = P->getLatency();
  408. unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
  409. // In the case of a latency tie, prefer an anti-dependency edge over
  410. // other types of edges.
  411. if (NextDepth < PredTotalLatency ||
  412. (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
  413. NextDepth = PredTotalLatency;
  414. Next = &*P;
  415. }
  416. }
  417. return Next;
  418. }
  419. void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
  420. // Scan the register operands for this instruction and update
  421. // Classes and RegRefs.
  422. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  423. MachineOperand &MO = MI->getOperand(i);
  424. if (!MO.isReg()) continue;
  425. unsigned Reg = MO.getReg();
  426. if (Reg == 0) continue;
  427. const TargetRegisterClass *NewRC = 0;
  428. if (i < MI->getDesc().getNumOperands())
  429. NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
  430. // For now, only allow the register to be changed if its register
  431. // class is consistent across all uses.
  432. if (!Classes[Reg] && NewRC)
  433. Classes[Reg] = NewRC;
  434. else if (!NewRC || Classes[Reg] != NewRC)
  435. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  436. // Now check for aliases.
  437. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  438. // If an alias of the reg is used during the live range, give up.
  439. // Note that this allows us to skip checking if AntiDepReg
  440. // overlaps with any of the aliases, among other things.
  441. unsigned AliasReg = *Alias;
  442. if (Classes[AliasReg]) {
  443. Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  444. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  445. }
  446. }
  447. // If we're still willing to consider this register, note the reference.
  448. if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
  449. RegRefs.insert(std::make_pair(Reg, &MO));
  450. // It's not safe to change register allocation for source operands of
  451. // that have special allocation requirements.
  452. if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
  453. if (KeepRegs.insert(Reg)) {
  454. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  455. *Subreg; ++Subreg)
  456. KeepRegs.insert(*Subreg);
  457. }
  458. }
  459. }
  460. }
  461. void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
  462. unsigned Count) {
  463. // Update liveness.
  464. // Proceding upwards, registers that are defed but not used in this
  465. // instruction are now dead.
  466. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  467. MachineOperand &MO = MI->getOperand(i);
  468. if (!MO.isReg()) continue;
  469. unsigned Reg = MO.getReg();
  470. if (Reg == 0) continue;
  471. if (!MO.isDef()) continue;
  472. // Ignore two-addr defs.
  473. if (MI->isRegTiedToUseOperand(i)) continue;
  474. DefIndices[Reg] = Count;
  475. KillIndices[Reg] = ~0u;
  476. assert(((KillIndices[Reg] == ~0u) !=
  477. (DefIndices[Reg] == ~0u)) &&
  478. "Kill and Def maps aren't consistent for Reg!");
  479. KeepRegs.erase(Reg);
  480. Classes[Reg] = 0;
  481. RegRefs.erase(Reg);
  482. // Repeat, for all subregs.
  483. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  484. *Subreg; ++Subreg) {
  485. unsigned SubregReg = *Subreg;
  486. DefIndices[SubregReg] = Count;
  487. KillIndices[SubregReg] = ~0u;
  488. KeepRegs.erase(SubregReg);
  489. Classes[SubregReg] = 0;
  490. RegRefs.erase(SubregReg);
  491. }
  492. // Conservatively mark super-registers as unusable.
  493. for (const unsigned *Super = TRI->getSuperRegisters(Reg);
  494. *Super; ++Super) {
  495. unsigned SuperReg = *Super;
  496. Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
  497. }
  498. }
  499. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  500. MachineOperand &MO = MI->getOperand(i);
  501. if (!MO.isReg()) continue;
  502. unsigned Reg = MO.getReg();
  503. if (Reg == 0) continue;
  504. if (!MO.isUse()) continue;
  505. const TargetRegisterClass *NewRC = 0;
  506. if (i < MI->getDesc().getNumOperands())
  507. NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
  508. // For now, only allow the register to be changed if its register
  509. // class is consistent across all uses.
  510. if (!Classes[Reg] && NewRC)
  511. Classes[Reg] = NewRC;
  512. else if (!NewRC || Classes[Reg] != NewRC)
  513. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
  514. RegRefs.insert(std::make_pair(Reg, &MO));
  515. // It wasn't previously live but now it is, this is a kill.
  516. if (KillIndices[Reg] == ~0u) {
  517. KillIndices[Reg] = Count;
  518. DefIndices[Reg] = ~0u;
  519. assert(((KillIndices[Reg] == ~0u) !=
  520. (DefIndices[Reg] == ~0u)) &&
  521. "Kill and Def maps aren't consistent for Reg!");
  522. }
  523. // Repeat, for all aliases.
  524. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
  525. unsigned AliasReg = *Alias;
  526. if (KillIndices[AliasReg] == ~0u) {
  527. KillIndices[AliasReg] = Count;
  528. DefIndices[AliasReg] = ~0u;
  529. }
  530. }
  531. }
  532. }
  533. unsigned
  534. SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg,
  535. unsigned LastNewReg,
  536. const TargetRegisterClass *RC) {
  537. for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
  538. RE = RC->allocation_order_end(MF); R != RE; ++R) {
  539. unsigned NewReg = *R;
  540. // Don't replace a register with itself.
  541. if (NewReg == AntiDepReg) continue;
  542. // Don't replace a register with one that was recently used to repair
  543. // an anti-dependence with this AntiDepReg, because that would
  544. // re-introduce that anti-dependence.
  545. if (NewReg == LastNewReg) continue;
  546. // If NewReg is dead and NewReg's most recent def is not before
  547. // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
  548. assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
  549. "Kill and Def maps aren't consistent for AntiDepReg!");
  550. assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
  551. "Kill and Def maps aren't consistent for NewReg!");
  552. if (KillIndices[NewReg] != ~0u ||
  553. Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
  554. KillIndices[AntiDepReg] > DefIndices[NewReg])
  555. continue;
  556. return NewReg;
  557. }
  558. // No registers are free and available!
  559. return 0;
  560. }
  561. /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
  562. /// of the ScheduleDAG and break them by renaming registers.
  563. ///
  564. bool SchedulePostRATDList::BreakAntiDependencies() {
  565. // The code below assumes that there is at least one instruction,
  566. // so just duck out immediately if the block is empty.
  567. if (SUnits.empty()) return false;
  568. // Find the node at the bottom of the critical path.
  569. SUnit *Max = 0;
  570. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
  571. SUnit *SU = &SUnits[i];
  572. if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
  573. Max = SU;
  574. }
  575. #ifndef NDEBUG
  576. {
  577. DEBUG(errs() << "Critical path has total latency "
  578. << (Max->getDepth() + Max->Latency) << "\n");
  579. DEBUG(errs() << "Available regs:");
  580. for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
  581. if (KillIndices[Reg] == ~0u)
  582. DEBUG(errs() << " " << TRI->getName(Reg));
  583. }
  584. DEBUG(errs() << '\n');
  585. }
  586. #endif
  587. // Track progress along the critical path through the SUnit graph as we walk
  588. // the instructions.
  589. SUnit *CriticalPathSU = Max;
  590. MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
  591. // Consider this pattern:
  592. // A = ...
  593. // ... = A
  594. // A = ...
  595. // ... = A
  596. // A = ...
  597. // ... = A
  598. // A = ...
  599. // ... = A
  600. // There are three anti-dependencies here, and without special care,
  601. // we'd break all of them using the same register:
  602. // A = ...
  603. // ... = A
  604. // B = ...
  605. // ... = B
  606. // B = ...
  607. // ... = B
  608. // B = ...
  609. // ... = B
  610. // because at each anti-dependence, B is the first register that
  611. // isn't A which is free. This re-introduces anti-dependencies
  612. // at all but one of the original anti-dependencies that we were
  613. // trying to break. To avoid this, keep track of the most recent
  614. // register that each register was replaced with, avoid
  615. // using it to repair an anti-dependence on the same register.
  616. // This lets us produce this:
  617. // A = ...
  618. // ... = A
  619. // B = ...
  620. // ... = B
  621. // C = ...
  622. // ... = C
  623. // B = ...
  624. // ... = B
  625. // This still has an anti-dependence on B, but at least it isn't on the
  626. // original critical path.
  627. //
  628. // TODO: If we tracked more than one register here, we could potentially
  629. // fix that remaining critical edge too. This is a little more involved,
  630. // because unlike the most recent register, less recent registers should
  631. // still be considered, though only if no other registers are available.
  632. unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
  633. // Attempt to break anti-dependence edges on the critical path. Walk the
  634. // instructions from the bottom up, tracking information about liveness
  635. // as we go to help determine which registers are available.
  636. bool Changed = false;
  637. unsigned Count = InsertPosIndex - 1;
  638. for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
  639. I != E; --Count) {
  640. MachineInstr *MI = --I;
  641. // Check if this instruction has a dependence on the critical path that
  642. // is an anti-dependence that we may be able to break. If it is, set
  643. // AntiDepReg to the non-zero register associated with the anti-dependence.
  644. //
  645. // We limit our attention to the critical path as a heuristic to avoid
  646. // breaking anti-dependence edges that aren't going to significantly
  647. // impact the overall schedule. There are a limited number of registers
  648. // and we want to save them for the important edges.
  649. //
  650. // TODO: Instructions with multiple defs could have multiple
  651. // anti-dependencies. The current code here only knows how to break one
  652. // edge per instruction. Note that we'd have to be able to break all of
  653. // the anti-dependencies in an instruction in order to be effective.
  654. unsigned AntiDepReg = 0;
  655. if (MI == CriticalPathMI) {
  656. if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
  657. SUnit *NextSU = Edge->getSUnit();
  658. // Only consider anti-dependence edges.
  659. if (Edge->getKind() == SDep::Anti) {
  660. AntiDepReg = Edge->getReg();
  661. assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
  662. if (!AllocatableSet.test(AntiDepReg))
  663. // Don't break anti-dependencies on non-allocatable registers.
  664. AntiDepReg = 0;
  665. else if (KeepRegs.count(AntiDepReg))
  666. // Don't break anti-dependencies if an use down below requires
  667. // this exact register.
  668. AntiDepReg = 0;
  669. else {
  670. // If the SUnit has other dependencies on the SUnit that it
  671. // anti-depends on, don't bother breaking the anti-dependency
  672. // since those edges would prevent such units from being
  673. // scheduled past each other regardless.
  674. //
  675. // Also, if there are dependencies on other SUnits with the
  676. // same register as the anti-dependency, don't attempt to
  677. // break it.
  678. for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
  679. PE = CriticalPathSU->Preds.end(); P != PE; ++P)
  680. if (P->getSUnit() == NextSU ?
  681. (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
  682. (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
  683. AntiDepReg = 0;
  684. break;
  685. }
  686. }
  687. }
  688. CriticalPathSU = NextSU;
  689. CriticalPathMI = CriticalPathSU->getInstr();
  690. } else {
  691. // We've reached the end of the critical path.
  692. CriticalPathSU = 0;
  693. CriticalPathMI = 0;
  694. }
  695. }
  696. PrescanInstruction(MI);
  697. if (MI->getDesc().hasExtraDefRegAllocReq())
  698. // If this instruction's defs have special allocation requirement, don't
  699. // break this anti-dependency.
  700. AntiDepReg = 0;
  701. else if (AntiDepReg) {
  702. // If this instruction has a use of AntiDepReg, breaking it
  703. // is invalid.
  704. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  705. MachineOperand &MO = MI->getOperand(i);
  706. if (!MO.isReg()) continue;
  707. unsigned Reg = MO.getReg();
  708. if (Reg == 0) continue;
  709. if (MO.isUse() && AntiDepReg == Reg) {
  710. AntiDepReg = 0;
  711. break;
  712. }
  713. }
  714. }
  715. // Determine AntiDepReg's register class, if it is live and is
  716. // consistently used within a single class.
  717. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
  718. assert((AntiDepReg == 0 || RC != NULL) &&
  719. "Register should be live if it's causing an anti-dependence!");
  720. if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
  721. AntiDepReg = 0;
  722. // Look for a suitable register to use to break the anti-depenence.
  723. //
  724. // TODO: Instead of picking the first free register, consider which might
  725. // be the best.
  726. if (AntiDepReg != 0) {
  727. if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
  728. LastNewReg[AntiDepReg],
  729. RC)) {
  730. DEBUG(errs() << "Breaking anti-dependence edge on "
  731. << TRI->getName(AntiDepReg)
  732. << " with " << RegRefs.count(AntiDepReg) << " references"
  733. << " using " << TRI->getName(NewReg) << "!\n");
  734. // Update the references to the old register to refer to the new
  735. // register.
  736. std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
  737. std::multimap<unsigned, MachineOperand *>::iterator>
  738. Range = RegRefs.equal_range(AntiDepReg);
  739. for (std::multimap<unsigned, MachineOperand *>::iterator
  740. Q = Range.first, QE = Range.second; Q != QE; ++Q)
  741. Q->second->setReg(NewReg);
  742. // We just went back in time and modified history; the
  743. // liveness information for the anti-depenence reg is now
  744. // inconsistent. Set the state as if it were dead.
  745. Classes[NewReg] = Classes[AntiDepReg];
  746. DefIndices[NewReg] = DefIndices[AntiDepReg];
  747. KillIndices[NewReg] = KillIndices[AntiDepReg];
  748. assert(((KillIndices[NewReg] == ~0u) !=
  749. (DefIndices[NewReg] == ~0u)) &&
  750. "Kill and Def maps aren't consistent for NewReg!");
  751. Classes[AntiDepReg] = 0;
  752. DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
  753. KillIndices[AntiDepReg] = ~0u;
  754. assert(((KillIndices[AntiDepReg] == ~0u) !=
  755. (DefIndices[AntiDepReg] == ~0u)) &&
  756. "Kill and Def maps aren't consistent for AntiDepReg!");
  757. RegRefs.erase(AntiDepReg);
  758. Changed = true;
  759. LastNewReg[AntiDepReg] = NewReg;
  760. }
  761. }
  762. ScanInstruction(MI, Count);
  763. }
  764. return Changed;
  765. }
  766. /// StartBlockForKills - Initialize register live-range state for updating kills
  767. ///
  768. void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
  769. // Initialize the indices to indicate that no registers are live.
  770. std::fill(KillIndices, array_endof(KillIndices), ~0u);
  771. // Determine the live-out physregs for this block.
  772. if (!BB->empty() && BB->back().getDesc().isReturn()) {
  773. // In a return block, examine the function live-out regs.
  774. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
  775. E = MRI.liveout_end(); I != E; ++I) {
  776. unsigned Reg = *I;
  777. KillIndices[Reg] = BB->size();
  778. // Repeat, for all subregs.
  779. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  780. *Subreg; ++Subreg) {
  781. KillIndices[*Subreg] = BB->size();
  782. }
  783. }
  784. }
  785. else {
  786. // In a non-return block, examine the live-in regs of all successors.
  787. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  788. SE = BB->succ_end(); SI != SE; ++SI) {
  789. for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
  790. E = (*SI)->livein_end(); I != E; ++I) {
  791. unsigned Reg = *I;
  792. KillIndices[Reg] = BB->size();
  793. // Repeat, for all subregs.
  794. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  795. *Subreg; ++Subreg) {
  796. KillIndices[*Subreg] = BB->size();
  797. }
  798. }
  799. }
  800. }
  801. }
  802. bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
  803. MachineOperand &MO) {
  804. // Setting kill flag...
  805. if (!MO.isKill()) {
  806. MO.setIsKill(true);
  807. return false;
  808. }
  809. // If MO itself is live, clear the kill flag...
  810. if (KillIndices[MO.getReg()] != ~0u) {
  811. MO.setIsKill(false);
  812. return false;
  813. }
  814. // If any subreg of MO is live, then create an imp-def for that
  815. // subreg and keep MO marked as killed.
  816. MO.setIsKill(false);
  817. bool AllDead = true;
  818. const unsigned SuperReg = MO.getReg();
  819. for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
  820. *Subreg; ++Subreg) {
  821. if (KillIndices[*Subreg] != ~0u) {
  822. MI->addOperand(MachineOperand::CreateReg(*Subreg,
  823. true /*IsDef*/,
  824. true /*IsImp*/,
  825. false /*IsKill*/,
  826. false /*IsDead*/));
  827. AllDead = false;
  828. }
  829. }
  830. if(AllDead)
  831. MO.setIsKill(true);
  832. return false;
  833. }
  834. /// FixupKills - Fix the register kill flags, they may have been made
  835. /// incorrect by instruction reordering.
  836. ///
  837. void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
  838. DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
  839. std::set<unsigned> killedRegs;
  840. BitVector ReservedRegs = TRI->getReservedRegs(MF);
  841. StartBlockForKills(MBB);
  842. // Examine block from end to start...
  843. unsigned Count = MBB->size();
  844. for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
  845. I != E; --Count) {
  846. MachineInstr *MI = --I;
  847. // Update liveness. Registers that are defed but not used in this
  848. // instruction are now dead. Mark register and all subregs as they
  849. // are completely defined.
  850. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  851. MachineOperand &MO = MI->getOperand(i);
  852. if (!MO.isReg()) continue;
  853. unsigned Reg = MO.getReg();
  854. if (Reg == 0) continue;
  855. if (!MO.isDef()) continue;
  856. // Ignore two-addr defs.
  857. if (MI->isRegTiedToUseOperand(i)) continue;
  858. KillIndices[Reg] = ~0u;
  859. // Repeat for all subregs.
  860. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  861. *Subreg; ++Subreg) {
  862. KillIndices[*Subreg] = ~0u;
  863. }
  864. }
  865. // Examine all used registers and set/clear kill flag. When a
  866. // register is used multiple times we only set the kill flag on
  867. // the first use.
  868. killedRegs.clear();
  869. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  870. MachineOperand &MO = MI->getOperand(i);
  871. if (!MO.isReg() || !MO.isUse()) continue;
  872. unsigned Reg = MO.getReg();
  873. if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
  874. bool kill = false;
  875. if (killedRegs.find(Reg) == killedRegs.end()) {
  876. kill = true;
  877. // A register is not killed if any subregs are live...
  878. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  879. *Subreg; ++Subreg) {
  880. if (KillIndices[*Subreg] != ~0u) {
  881. kill = false;
  882. break;
  883. }
  884. }
  885. // If subreg is not live, then register is killed if it became
  886. // live in this instruction
  887. if (kill)
  888. kill = (KillIndices[Reg] == ~0u);
  889. }
  890. if (MO.isKill() != kill) {
  891. bool removed = ToggleKillFlag(MI, MO);
  892. if (removed) {
  893. DEBUG(errs() << "Fixed <removed> in ");
  894. } else {
  895. DEBUG(errs() << "Fixed " << MO << " in ");
  896. }
  897. DEBUG(MI->dump());
  898. }
  899. killedRegs.insert(Reg);
  900. }
  901. // Mark any used register (that is not using undef) and subregs as
  902. // now live...
  903. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  904. MachineOperand &MO = MI->getOperand(i);
  905. if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
  906. unsigned Reg = MO.getReg();
  907. if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
  908. KillIndices[Reg] = Count;
  909. for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
  910. *Subreg; ++Subreg) {
  911. KillIndices[*Subreg] = Count;
  912. }
  913. }
  914. }
  915. }
  916. //===----------------------------------------------------------------------===//
  917. // Top-Down Scheduling
  918. //===----------------------------------------------------------------------===//
  919. /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
  920. /// the PendingQueue if the count reaches zero. Also update its cycle bound.
  921. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
  922. SUnit *SuccSU = SuccEdge->getSUnit();
  923. #ifndef NDEBUG
  924. if (SuccSU->NumPredsLeft == 0) {
  925. errs() << "*** Scheduling failed! ***\n";
  926. SuccSU->dump(this);
  927. errs() << " has been released too many times!\n";
  928. llvm_unreachable(0);
  929. }
  930. #endif
  931. --SuccSU->NumPredsLeft;
  932. // Compute how many cycles it will be before this actually becomes
  933. // available. This is the max of the start time of all predecessors plus
  934. // their latencies.
  935. SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
  936. // If all the node's predecessors are scheduled, this node is ready
  937. // to be scheduled. Ignore the special ExitSU node.
  938. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
  939. PendingQueue.push_back(SuccSU);
  940. }
  941. /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
  942. void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
  943. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
  944. I != E; ++I)
  945. ReleaseSucc(SU, &*I);
  946. }
  947. /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
  948. /// count of its successors. If a successor pending count is zero, add it to
  949. /// the Available queue.
  950. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
  951. DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
  952. DEBUG(SU->dump(this));
  953. Sequence.push_back(SU);
  954. assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
  955. SU->setDepthToAtLeast(CurCycle);
  956. ReleaseSuccessors(SU);
  957. SU->isScheduled = true;
  958. AvailableQueue.ScheduledNode(SU);
  959. }
  960. /// ListScheduleTopDown - The main loop of list scheduling for top-down
  961. /// schedulers.
  962. void SchedulePostRATDList::ListScheduleTopDown() {
  963. unsigned CurCycle = 0;
  964. // Release any successors of the special Entry node.
  965. ReleaseSuccessors(&EntrySU);
  966. // All leaves to Available queue.
  967. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
  968. // It is available if it has no predecessors.
  969. if (SUnits[i].Preds.empty()) {
  970. AvailableQueue.push(&SUnits[i]);
  971. SUnits[i].isAvailable = true;
  972. }
  973. }
  974. // In any cycle where we can't schedule any instructions, we must
  975. // stall or emit a noop, depending on the target.
  976. bool CycleHasInsts = false;
  977. // While Available queue is not empty, grab the node with the highest
  978. // priority. If it is not ready put it back. Schedule the node.
  979. std::vector<SUnit*> NotReady;
  980. Sequence.reserve(SUnits.size());
  981. while (!AvailableQueue.empty() || !PendingQueue.empty()) {
  982. // Check to see if any of the pending instructions are ready to issue. If
  983. // so, add them to the available queue.
  984. unsigned MinDepth = ~0u;
  985. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
  986. if (PendingQueue[i]->getDepth() <= CurCycle) {
  987. AvailableQueue.push(PendingQueue[i]);
  988. PendingQueue[i]->isAvailable = true;
  989. PendingQueue[i] = PendingQueue.back();
  990. PendingQueue.pop_back();
  991. --i; --e;
  992. } else if (PendingQueue[i]->getDepth() < MinDepth)
  993. MinDepth = PendingQueue[i]->getDepth();
  994. }
  995. DEBUG(errs() << "\n*** Examining Available\n";
  996. LatencyPriorityQueue q = AvailableQueue;
  997. while (!q.empty()) {
  998. SUnit *su = q.pop();
  999. errs() << "Height " << su->getHeight() << ": ";
  1000. su->dump(this);
  1001. });
  1002. SUnit *FoundSUnit = 0;
  1003. bool HasNoopHazards = false;
  1004. while (!AvailableQueue.empty()) {
  1005. SUnit *CurSUnit = AvailableQueue.pop();
  1006. ScheduleHazardRecognizer::HazardType HT =
  1007. HazardRec->getHazardType(CurSUnit);
  1008. if (HT == ScheduleHazardRecognizer::NoHazard) {
  1009. FoundSUnit = CurSUnit;
  1010. break;
  1011. }
  1012. // Remember if this is a noop hazard.
  1013. HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
  1014. NotReady.push_back(CurSUnit);
  1015. }
  1016. // Add the nodes that aren't ready back onto the available list.
  1017. if (!NotReady.empty()) {
  1018. AvailableQueue.push_all(NotReady);
  1019. NotReady.clear();
  1020. }
  1021. // If we found a node to schedule, do it now.
  1022. if (FoundSUnit) {
  1023. ScheduleNodeTopDown(FoundSUnit, CurCycle);
  1024. HazardRec->EmitInstruction(FoundSUnit);
  1025. CycleHasInsts = true;
  1026. // If we are using the target-specific hazards, then don't
  1027. // advance the cycle time just because we schedule a node. If
  1028. // the target allows it we can schedule multiple nodes in the
  1029. // same cycle.
  1030. if (!EnablePostRAHazardAvoidance) {
  1031. if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
  1032. ++CurCycle;
  1033. }
  1034. } else {
  1035. if (CycleHasInsts) {
  1036. DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
  1037. HazardRec->AdvanceCycle();
  1038. } else if (!HasNoopHazards) {
  1039. // Otherwise, we have a pipeline stall, but no other problem,
  1040. // just advance the current cycle and try again.
  1041. DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
  1042. HazardRec->AdvanceCycle();
  1043. ++NumStalls;
  1044. } else {
  1045. // Otherwise, we have no instructions to issue and we have instructions
  1046. // that will fault if we don't do this right. This is the case for
  1047. // processors without pipeline interlocks and other cases.
  1048. DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
  1049. HazardRec->EmitNoop();
  1050. Sequence.push_back(0); // NULL here means noop
  1051. ++NumNoops;
  1052. }
  1053. ++CurCycle;
  1054. CycleHasInsts = false;
  1055. }
  1056. }
  1057. #ifndef NDEBUG
  1058. VerifySchedule(/*isBottomUp=*/false);
  1059. #endif
  1060. }
  1061. //===----------------------------------------------------------------------===//
  1062. // Public Constructor Functions
  1063. //===----------------------------------------------------------------------===//
  1064. FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
  1065. return new PostRAScheduler(OptLevel);
  1066. }