MachineScheduler.cpp 103 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937
  1. //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // MachineScheduler schedules machine instructions after phi elimination. It
  11. // preserves LiveIntervals so it can be invoked before register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #define DEBUG_TYPE "misched"
  15. #include "llvm/CodeGen/MachineScheduler.h"
  16. #include "llvm/ADT/OwningPtr.h"
  17. #include "llvm/ADT/PriorityQueue.h"
  18. #include "llvm/Analysis/AliasAnalysis.h"
  19. #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  20. #include "llvm/CodeGen/MachineDominators.h"
  21. #include "llvm/CodeGen/MachineLoopInfo.h"
  22. #include "llvm/CodeGen/MachineRegisterInfo.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/CodeGen/RegisterClassInfo.h"
  25. #include "llvm/CodeGen/ScheduleDFS.h"
  26. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  27. #include "llvm/Support/CommandLine.h"
  28. #include "llvm/Support/Debug.h"
  29. #include "llvm/Support/ErrorHandling.h"
  30. #include "llvm/Support/GraphWriter.h"
  31. #include "llvm/Support/raw_ostream.h"
  32. #include "llvm/Target/TargetInstrInfo.h"
  33. #include <queue>
  34. using namespace llvm;
  35. namespace llvm {
  36. cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  37. cl::desc("Force top-down list scheduling"));
  38. cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  39. cl::desc("Force bottom-up list scheduling"));
  40. }
  41. #ifndef NDEBUG
  42. static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  43. cl::desc("Pop up a window to show MISched dags after they are processed"));
  44. static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  45. cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  46. #else
  47. static bool ViewMISchedDAGs = false;
  48. #endif // NDEBUG
  49. static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  50. cl::desc("Enable cyclic critical path analysis."), cl::init(false));
  51. static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  52. cl::desc("Enable load clustering."), cl::init(true));
  53. // Experimental heuristics
  54. static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  55. cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  56. static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
  57. cl::desc("Verify machine instrs before and after machine scheduling"));
  58. // DAG subtrees must have at least this many nodes.
  59. static const unsigned MinSubtreeSize = 8;
  60. //===----------------------------------------------------------------------===//
  61. // Machine Instruction Scheduling Pass and Registry
  62. //===----------------------------------------------------------------------===//
  63. MachineSchedContext::MachineSchedContext():
  64. MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  65. RegClassInfo = new RegisterClassInfo();
  66. }
  67. MachineSchedContext::~MachineSchedContext() {
  68. delete RegClassInfo;
  69. }
  70. namespace {
  71. /// MachineScheduler runs after coalescing and before register allocation.
  72. class MachineScheduler : public MachineSchedContext,
  73. public MachineFunctionPass {
  74. public:
  75. MachineScheduler();
  76. virtual void getAnalysisUsage(AnalysisUsage &AU) const;
  77. virtual void releaseMemory() {}
  78. virtual bool runOnMachineFunction(MachineFunction&);
  79. virtual void print(raw_ostream &O, const Module* = 0) const;
  80. static char ID; // Class identification, replacement for typeinfo
  81. };
  82. } // namespace
  83. char MachineScheduler::ID = 0;
  84. char &llvm::MachineSchedulerID = MachineScheduler::ID;
  85. INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
  86. "Machine Instruction Scheduler", false, false)
  87. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  88. INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  89. INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
  90. INITIALIZE_PASS_END(MachineScheduler, "misched",
  91. "Machine Instruction Scheduler", false, false)
  92. MachineScheduler::MachineScheduler()
  93. : MachineFunctionPass(ID) {
  94. initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
  95. }
  96. void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
  97. AU.setPreservesCFG();
  98. AU.addRequiredID(MachineDominatorsID);
  99. AU.addRequired<MachineLoopInfo>();
  100. AU.addRequired<AliasAnalysis>();
  101. AU.addRequired<TargetPassConfig>();
  102. AU.addRequired<SlotIndexes>();
  103. AU.addPreserved<SlotIndexes>();
  104. AU.addRequired<LiveIntervals>();
  105. AU.addPreserved<LiveIntervals>();
  106. MachineFunctionPass::getAnalysisUsage(AU);
  107. }
  108. MachinePassRegistry MachineSchedRegistry::Registry;
  109. /// A dummy default scheduler factory indicates whether the scheduler
  110. /// is overridden on the command line.
  111. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
  112. return 0;
  113. }
  114. /// MachineSchedOpt allows command line selection of the scheduler.
  115. static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
  116. RegisterPassParser<MachineSchedRegistry> >
  117. MachineSchedOpt("misched",
  118. cl::init(&useDefaultMachineSched), cl::Hidden,
  119. cl::desc("Machine instruction scheduler to use"));
  120. static MachineSchedRegistry
  121. DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
  122. useDefaultMachineSched);
  123. /// Forward declare the standard machine scheduler. This will be used as the
  124. /// default scheduler if the target does not set a default.
  125. static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
  126. /// Decrement this iterator until reaching the top or a non-debug instr.
  127. static MachineBasicBlock::iterator
  128. priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
  129. assert(I != Beg && "reached the top of the region, cannot decrement");
  130. while (--I != Beg) {
  131. if (!I->isDebugValue())
  132. break;
  133. }
  134. return I;
  135. }
  136. /// If this iterator is a debug value, increment until reaching the End or a
  137. /// non-debug instruction.
  138. static MachineBasicBlock::iterator
  139. nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
  140. for(; I != End; ++I) {
  141. if (!I->isDebugValue())
  142. break;
  143. }
  144. return I;
  145. }
  146. /// Top-level MachineScheduler pass driver.
  147. ///
  148. /// Visit blocks in function order. Divide each block into scheduling regions
  149. /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
  150. /// consistent with the DAG builder, which traverses the interior of the
  151. /// scheduling regions bottom-up.
  152. ///
  153. /// This design avoids exposing scheduling boundaries to the DAG builder,
  154. /// simplifying the DAG builder's support for "special" target instructions.
  155. /// At the same time the design allows target schedulers to operate across
  156. /// scheduling boundaries, for example to bundle the boudary instructions
  157. /// without reordering them. This creates complexity, because the target
  158. /// scheduler must update the RegionBegin and RegionEnd positions cached by
  159. /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
  160. /// design would be to split blocks at scheduling boundaries, but LLVM has a
  161. /// general bias against block splitting purely for implementation simplicity.
  162. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
  163. DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
  164. // Initialize the context of the pass.
  165. MF = &mf;
  166. MLI = &getAnalysis<MachineLoopInfo>();
  167. MDT = &getAnalysis<MachineDominatorTree>();
  168. PassConfig = &getAnalysis<TargetPassConfig>();
  169. AA = &getAnalysis<AliasAnalysis>();
  170. LIS = &getAnalysis<LiveIntervals>();
  171. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
  172. if (VerifyScheduling) {
  173. DEBUG(LIS->dump());
  174. MF->verify(this, "Before machine scheduling.");
  175. }
  176. RegClassInfo->runOnMachineFunction(*MF);
  177. // Select the scheduler, or set the default.
  178. MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
  179. if (Ctor == useDefaultMachineSched) {
  180. // Get the default scheduler set by the target.
  181. Ctor = MachineSchedRegistry::getDefault();
  182. if (!Ctor) {
  183. Ctor = createConvergingSched;
  184. MachineSchedRegistry::setDefault(Ctor);
  185. }
  186. }
  187. // Instantiate the selected scheduler.
  188. OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
  189. // Visit all machine basic blocks.
  190. //
  191. // TODO: Visit blocks in global postorder or postorder within the bottom-up
  192. // loop tree. Then we can optionally compute global RegPressure.
  193. for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
  194. MBB != MBBEnd; ++MBB) {
  195. Scheduler->startBlock(MBB);
  196. // Break the block into scheduling regions [I, RegionEnd), and schedule each
  197. // region as soon as it is discovered. RegionEnd points the scheduling
  198. // boundary at the bottom of the region. The DAG does not include RegionEnd,
  199. // but the region does (i.e. the next RegionEnd is above the previous
  200. // RegionBegin). If the current block has no terminator then RegionEnd ==
  201. // MBB->end() for the bottom region.
  202. //
  203. // The Scheduler may insert instructions during either schedule() or
  204. // exitRegion(), even for empty regions. So the local iterators 'I' and
  205. // 'RegionEnd' are invalid across these calls.
  206. unsigned RemainingInstrs = MBB->size();
  207. for(MachineBasicBlock::iterator RegionEnd = MBB->end();
  208. RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
  209. // Avoid decrementing RegionEnd for blocks with no terminator.
  210. if (RegionEnd != MBB->end()
  211. || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
  212. --RegionEnd;
  213. // Count the boundary instruction.
  214. --RemainingInstrs;
  215. }
  216. // The next region starts above the previous region. Look backward in the
  217. // instruction stream until we find the nearest boundary.
  218. unsigned NumRegionInstrs = 0;
  219. MachineBasicBlock::iterator I = RegionEnd;
  220. for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
  221. if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
  222. break;
  223. }
  224. // Notify the scheduler of the region, even if we may skip scheduling
  225. // it. Perhaps it still needs to be bundled.
  226. Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
  227. // Skip empty scheduling regions (0 or 1 schedulable instructions).
  228. if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
  229. // Close the current region. Bundle the terminator if needed.
  230. // This invalidates 'RegionEnd' and 'I'.
  231. Scheduler->exitRegion();
  232. continue;
  233. }
  234. DEBUG(dbgs() << "********** MI Scheduling **********\n");
  235. DEBUG(dbgs() << MF->getName()
  236. << ":BB#" << MBB->getNumber() << " " << MBB->getName()
  237. << "\n From: " << *I << " To: ";
  238. if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
  239. else dbgs() << "End";
  240. dbgs() << " RegionInstrs: " << NumRegionInstrs
  241. << " Remaining: " << RemainingInstrs << "\n");
  242. // Schedule a region: possibly reorder instructions.
  243. // This invalidates 'RegionEnd' and 'I'.
  244. Scheduler->schedule();
  245. // Close the current region.
  246. Scheduler->exitRegion();
  247. // Scheduling has invalidated the current iterator 'I'. Ask the
  248. // scheduler for the top of it's scheduled region.
  249. RegionEnd = Scheduler->begin();
  250. }
  251. assert(RemainingInstrs == 0 && "Instruction count mismatch!");
  252. Scheduler->finishBlock();
  253. }
  254. Scheduler->finalizeSchedule();
  255. DEBUG(LIS->dump());
  256. if (VerifyScheduling)
  257. MF->verify(this, "After machine scheduling.");
  258. return true;
  259. }
  260. void MachineScheduler::print(raw_ostream &O, const Module* m) const {
  261. // unimplemented
  262. }
  263. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  264. void ReadyQueue::dump() {
  265. dbgs() << Name << ": ";
  266. for (unsigned i = 0, e = Queue.size(); i < e; ++i)
  267. dbgs() << Queue[i]->NodeNum << " ";
  268. dbgs() << "\n";
  269. }
  270. #endif
  271. //===----------------------------------------------------------------------===//
  272. // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
  273. // preservation.
  274. //===----------------------------------------------------------------------===//
  275. ScheduleDAGMI::~ScheduleDAGMI() {
  276. delete DFSResult;
  277. DeleteContainerPointers(Mutations);
  278. delete SchedImpl;
  279. }
  280. bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
  281. return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
  282. }
  283. bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
  284. if (SuccSU != &ExitSU) {
  285. // Do not use WillCreateCycle, it assumes SD scheduling.
  286. // If Pred is reachable from Succ, then the edge creates a cycle.
  287. if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
  288. return false;
  289. Topo.AddPred(SuccSU, PredDep.getSUnit());
  290. }
  291. SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
  292. // Return true regardless of whether a new edge needed to be inserted.
  293. return true;
  294. }
  295. /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
  296. /// NumPredsLeft reaches zero, release the successor node.
  297. ///
  298. /// FIXME: Adjust SuccSU height based on MinLatency.
  299. void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
  300. SUnit *SuccSU = SuccEdge->getSUnit();
  301. if (SuccEdge->isWeak()) {
  302. --SuccSU->WeakPredsLeft;
  303. if (SuccEdge->isCluster())
  304. NextClusterSucc = SuccSU;
  305. return;
  306. }
  307. #ifndef NDEBUG
  308. if (SuccSU->NumPredsLeft == 0) {
  309. dbgs() << "*** Scheduling failed! ***\n";
  310. SuccSU->dump(this);
  311. dbgs() << " has been released too many times!\n";
  312. llvm_unreachable(0);
  313. }
  314. #endif
  315. --SuccSU->NumPredsLeft;
  316. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
  317. SchedImpl->releaseTopNode(SuccSU);
  318. }
  319. /// releaseSuccessors - Call releaseSucc on each of SU's successors.
  320. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
  321. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
  322. I != E; ++I) {
  323. releaseSucc(SU, &*I);
  324. }
  325. }
  326. /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
  327. /// NumSuccsLeft reaches zero, release the predecessor node.
  328. ///
  329. /// FIXME: Adjust PredSU height based on MinLatency.
  330. void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
  331. SUnit *PredSU = PredEdge->getSUnit();
  332. if (PredEdge->isWeak()) {
  333. --PredSU->WeakSuccsLeft;
  334. if (PredEdge->isCluster())
  335. NextClusterPred = PredSU;
  336. return;
  337. }
  338. #ifndef NDEBUG
  339. if (PredSU->NumSuccsLeft == 0) {
  340. dbgs() << "*** Scheduling failed! ***\n";
  341. PredSU->dump(this);
  342. dbgs() << " has been released too many times!\n";
  343. llvm_unreachable(0);
  344. }
  345. #endif
  346. --PredSU->NumSuccsLeft;
  347. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
  348. SchedImpl->releaseBottomNode(PredSU);
  349. }
  350. /// releasePredecessors - Call releasePred on each of SU's predecessors.
  351. void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
  352. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
  353. I != E; ++I) {
  354. releasePred(SU, &*I);
  355. }
  356. }
  357. /// This is normally called from the main scheduler loop but may also be invoked
  358. /// by the scheduling strategy to perform additional code motion.
  359. void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
  360. MachineBasicBlock::iterator InsertPos) {
  361. // Advance RegionBegin if the first instruction moves down.
  362. if (&*RegionBegin == MI)
  363. ++RegionBegin;
  364. // Update the instruction stream.
  365. BB->splice(InsertPos, BB, MI);
  366. // Update LiveIntervals
  367. LIS->handleMove(MI, /*UpdateFlags=*/true);
  368. // Recede RegionBegin if an instruction moves above the first.
  369. if (RegionBegin == InsertPos)
  370. RegionBegin = MI;
  371. }
  372. bool ScheduleDAGMI::checkSchedLimit() {
  373. #ifndef NDEBUG
  374. if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
  375. CurrentTop = CurrentBottom;
  376. return false;
  377. }
  378. ++NumInstrsScheduled;
  379. #endif
  380. return true;
  381. }
  382. /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
  383. /// crossing a scheduling boundary. [begin, end) includes all instructions in
  384. /// the region, including the boundary itself and single-instruction regions
  385. /// that don't get scheduled.
  386. void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
  387. MachineBasicBlock::iterator begin,
  388. MachineBasicBlock::iterator end,
  389. unsigned regioninstrs)
  390. {
  391. ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
  392. // For convenience remember the end of the liveness region.
  393. LiveRegionEnd =
  394. (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
  395. }
  396. // Setup the register pressure trackers for the top scheduled top and bottom
  397. // scheduled regions.
  398. void ScheduleDAGMI::initRegPressure() {
  399. TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
  400. BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
  401. // Close the RPTracker to finalize live ins.
  402. RPTracker.closeRegion();
  403. DEBUG(RPTracker.dump());
  404. // Initialize the live ins and live outs.
  405. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
  406. BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
  407. // Close one end of the tracker so we can call
  408. // getMaxUpward/DownwardPressureDelta before advancing across any
  409. // instructions. This converts currently live regs into live ins/outs.
  410. TopRPTracker.closeTop();
  411. BotRPTracker.closeBottom();
  412. BotRPTracker.initLiveThru(RPTracker);
  413. if (!BotRPTracker.getLiveThru().empty()) {
  414. TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
  415. DEBUG(dbgs() << "Live Thru: ";
  416. dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
  417. };
  418. // Account for liveness generated by the region boundary.
  419. if (LiveRegionEnd != RegionEnd)
  420. BotRPTracker.recede();
  421. assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
  422. // Cache the list of excess pressure sets in this region. This will also track
  423. // the max pressure in the scheduled code for these sets.
  424. RegionCriticalPSets.clear();
  425. const std::vector<unsigned> &RegionPressure =
  426. RPTracker.getPressure().MaxSetPressure;
  427. for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
  428. unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
  429. if (RegionPressure[i] > Limit) {
  430. DEBUG(dbgs() << TRI->getRegPressureSetName(i)
  431. << " Limit " << Limit
  432. << " Actual " << RegionPressure[i] << "\n");
  433. RegionCriticalPSets.push_back(PressureElement(i, 0));
  434. }
  435. }
  436. DEBUG(dbgs() << "Excess PSets: ";
  437. for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
  438. dbgs() << TRI->getRegPressureSetName(
  439. RegionCriticalPSets[i].PSetID) << " ";
  440. dbgs() << "\n");
  441. }
  442. // FIXME: When the pressure tracker deals in pressure differences then we won't
  443. // iterate over all RegionCriticalPSets[i].
  444. void ScheduleDAGMI::
  445. updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
  446. for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
  447. unsigned ID = RegionCriticalPSets[i].PSetID;
  448. int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
  449. if ((int)NewMaxPressure[ID] > MaxUnits)
  450. MaxUnits = NewMaxPressure[ID];
  451. }
  452. DEBUG(
  453. for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
  454. unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
  455. if (NewMaxPressure[i] > Limit ) {
  456. dbgs() << " " << TRI->getRegPressureSetName(i) << ": "
  457. << NewMaxPressure[i] << " > " << Limit << "\n";
  458. }
  459. });
  460. }
  461. /// schedule - Called back from MachineScheduler::runOnMachineFunction
  462. /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
  463. /// only includes instructions that have DAG nodes, not scheduling boundaries.
  464. ///
  465. /// This is a skeletal driver, with all the functionality pushed into helpers,
  466. /// so that it can be easilly extended by experimental schedulers. Generally,
  467. /// implementing MachineSchedStrategy should be sufficient to implement a new
  468. /// scheduling algorithm. However, if a scheduler further subclasses
  469. /// ScheduleDAGMI then it will want to override this virtual method in order to
  470. /// update any specialized state.
  471. void ScheduleDAGMI::schedule() {
  472. buildDAGWithRegPressure();
  473. Topo.InitDAGTopologicalSorting();
  474. postprocessDAG();
  475. SmallVector<SUnit*, 8> TopRoots, BotRoots;
  476. findRootsAndBiasEdges(TopRoots, BotRoots);
  477. // Initialize the strategy before modifying the DAG.
  478. // This may initialize a DFSResult to be used for queue priority.
  479. SchedImpl->initialize(this);
  480. DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
  481. SUnits[su].dumpAll(this));
  482. if (ViewMISchedDAGs) viewGraph();
  483. // Initialize ready queues now that the DAG and priority data are finalized.
  484. initQueues(TopRoots, BotRoots);
  485. bool IsTopNode = false;
  486. while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
  487. assert(!SU->isScheduled && "Node already scheduled");
  488. if (!checkSchedLimit())
  489. break;
  490. scheduleMI(SU, IsTopNode);
  491. updateQueues(SU, IsTopNode);
  492. }
  493. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  494. placeDebugValues();
  495. DEBUG({
  496. unsigned BBNum = begin()->getParent()->getNumber();
  497. dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
  498. dumpSchedule();
  499. dbgs() << '\n';
  500. });
  501. }
  502. /// Build the DAG and setup three register pressure trackers.
  503. void ScheduleDAGMI::buildDAGWithRegPressure() {
  504. // Initialize the register pressure tracker used by buildSchedGraph.
  505. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
  506. /*TrackUntiedDefs=*/true);
  507. // Account for liveness generate by the region boundary.
  508. if (LiveRegionEnd != RegionEnd)
  509. RPTracker.recede();
  510. // Build the DAG, and compute current register pressure.
  511. buildSchedGraph(AA, &RPTracker);
  512. // Initialize top/bottom trackers after computing region pressure.
  513. initRegPressure();
  514. }
  515. /// Apply each ScheduleDAGMutation step in order.
  516. void ScheduleDAGMI::postprocessDAG() {
  517. for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
  518. Mutations[i]->apply(this);
  519. }
  520. }
  521. void ScheduleDAGMI::computeDFSResult() {
  522. if (!DFSResult)
  523. DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
  524. DFSResult->clear();
  525. ScheduledTrees.clear();
  526. DFSResult->resize(SUnits.size());
  527. DFSResult->compute(SUnits);
  528. ScheduledTrees.resize(DFSResult->getNumSubtrees());
  529. }
  530. void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
  531. SmallVectorImpl<SUnit*> &BotRoots) {
  532. for (std::vector<SUnit>::iterator
  533. I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
  534. SUnit *SU = &(*I);
  535. assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
  536. // Order predecessors so DFSResult follows the critical path.
  537. SU->biasCriticalPath();
  538. // A SUnit is ready to top schedule if it has no predecessors.
  539. if (!I->NumPredsLeft)
  540. TopRoots.push_back(SU);
  541. // A SUnit is ready to bottom schedule if it has no successors.
  542. if (!I->NumSuccsLeft)
  543. BotRoots.push_back(SU);
  544. }
  545. ExitSU.biasCriticalPath();
  546. }
  547. /// Compute the max cyclic critical path through the DAG. The scheduling DAG
  548. /// only provides the critical path for single block loops. To handle loops that
  549. /// span blocks, we could use the vreg path latencies provided by
  550. /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
  551. /// available for use in the scheduler.
  552. ///
  553. /// The cyclic path estimation identifies a def-use pair that crosses the back
  554. /// edge and considers the depth and height of the nodes. For example, consider
  555. /// the following instruction sequence where each instruction has unit latency
  556. /// and defines an epomymous virtual register:
  557. ///
  558. /// a->b(a,c)->c(b)->d(c)->exit
  559. ///
  560. /// The cyclic critical path is a two cycles: b->c->b
  561. /// The acyclic critical path is four cycles: a->b->c->d->exit
  562. /// LiveOutHeight = height(c) = len(c->d->exit) = 2
  563. /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
  564. /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
  565. /// LiveInDepth = depth(b) = len(a->b) = 1
  566. ///
  567. /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
  568. /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
  569. /// CyclicCriticalPath = min(2, 2) = 2
  570. unsigned ScheduleDAGMI::computeCyclicCriticalPath() {
  571. // This only applies to single block loop.
  572. if (!BB->isSuccessor(BB))
  573. return 0;
  574. unsigned MaxCyclicLatency = 0;
  575. // Visit each live out vreg def to find def/use pairs that cross iterations.
  576. ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
  577. for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
  578. RI != RE; ++RI) {
  579. unsigned Reg = *RI;
  580. if (!TRI->isVirtualRegister(Reg))
  581. continue;
  582. const LiveInterval &LI = LIS->getInterval(Reg);
  583. const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
  584. if (!DefVNI)
  585. continue;
  586. MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
  587. const SUnit *DefSU = getSUnit(DefMI);
  588. if (!DefSU)
  589. continue;
  590. unsigned LiveOutHeight = DefSU->getHeight();
  591. unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
  592. // Visit all local users of the vreg def.
  593. for (VReg2UseMap::iterator
  594. UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
  595. if (UI->SU == &ExitSU)
  596. continue;
  597. // Only consider uses of the phi.
  598. LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));
  599. if (!LRQ.valueIn()->isPHIDef())
  600. continue;
  601. // Assume that a path spanning two iterations is a cycle, which could
  602. // overestimate in strange cases. This allows cyclic latency to be
  603. // estimated as the minimum slack of the vreg's depth or height.
  604. unsigned CyclicLatency = 0;
  605. if (LiveOutDepth > UI->SU->getDepth())
  606. CyclicLatency = LiveOutDepth - UI->SU->getDepth();
  607. unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
  608. if (LiveInHeight > LiveOutHeight) {
  609. if (LiveInHeight - LiveOutHeight < CyclicLatency)
  610. CyclicLatency = LiveInHeight - LiveOutHeight;
  611. }
  612. else
  613. CyclicLatency = 0;
  614. DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
  615. << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
  616. if (CyclicLatency > MaxCyclicLatency)
  617. MaxCyclicLatency = CyclicLatency;
  618. }
  619. }
  620. DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
  621. return MaxCyclicLatency;
  622. }
  623. /// Identify DAG roots and setup scheduler queues.
  624. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
  625. ArrayRef<SUnit*> BotRoots) {
  626. NextClusterSucc = NULL;
  627. NextClusterPred = NULL;
  628. // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
  629. //
  630. // Nodes with unreleased weak edges can still be roots.
  631. // Release top roots in forward order.
  632. for (SmallVectorImpl<SUnit*>::const_iterator
  633. I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
  634. SchedImpl->releaseTopNode(*I);
  635. }
  636. // Release bottom roots in reverse order so the higher priority nodes appear
  637. // first. This is more natural and slightly more efficient.
  638. for (SmallVectorImpl<SUnit*>::const_reverse_iterator
  639. I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
  640. SchedImpl->releaseBottomNode(*I);
  641. }
  642. releaseSuccessors(&EntrySU);
  643. releasePredecessors(&ExitSU);
  644. SchedImpl->registerRoots();
  645. // Advance past initial DebugValues.
  646. assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
  647. CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
  648. TopRPTracker.setPos(CurrentTop);
  649. CurrentBottom = RegionEnd;
  650. }
  651. /// Move an instruction and update register pressure.
  652. void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
  653. // Move the instruction to its new location in the instruction stream.
  654. MachineInstr *MI = SU->getInstr();
  655. if (IsTopNode) {
  656. assert(SU->isTopReady() && "node still has unscheduled dependencies");
  657. if (&*CurrentTop == MI)
  658. CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
  659. else {
  660. moveInstruction(MI, CurrentTop);
  661. TopRPTracker.setPos(MI);
  662. }
  663. // Update top scheduled pressure.
  664. TopRPTracker.advance();
  665. assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
  666. updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
  667. }
  668. else {
  669. assert(SU->isBottomReady() && "node still has unscheduled dependencies");
  670. MachineBasicBlock::iterator priorII =
  671. priorNonDebug(CurrentBottom, CurrentTop);
  672. if (&*priorII == MI)
  673. CurrentBottom = priorII;
  674. else {
  675. if (&*CurrentTop == MI) {
  676. CurrentTop = nextIfDebug(++CurrentTop, priorII);
  677. TopRPTracker.setPos(CurrentTop);
  678. }
  679. moveInstruction(MI, CurrentBottom);
  680. CurrentBottom = MI;
  681. }
  682. // Update bottom scheduled pressure.
  683. BotRPTracker.recede();
  684. assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
  685. updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
  686. }
  687. }
  688. /// Update scheduler queues after scheduling an instruction.
  689. void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
  690. // Release dependent instructions for scheduling.
  691. if (IsTopNode)
  692. releaseSuccessors(SU);
  693. else
  694. releasePredecessors(SU);
  695. SU->isScheduled = true;
  696. if (DFSResult) {
  697. unsigned SubtreeID = DFSResult->getSubtreeID(SU);
  698. if (!ScheduledTrees.test(SubtreeID)) {
  699. ScheduledTrees.set(SubtreeID);
  700. DFSResult->scheduleTree(SubtreeID);
  701. SchedImpl->scheduleTree(SubtreeID);
  702. }
  703. }
  704. // Notify the scheduling strategy after updating the DAG.
  705. SchedImpl->schedNode(SU, IsTopNode);
  706. }
  707. /// Reinsert any remaining debug_values, just like the PostRA scheduler.
  708. void ScheduleDAGMI::placeDebugValues() {
  709. // If first instruction was a DBG_VALUE then put it back.
  710. if (FirstDbgValue) {
  711. BB->splice(RegionBegin, BB, FirstDbgValue);
  712. RegionBegin = FirstDbgValue;
  713. }
  714. for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
  715. DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
  716. std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
  717. MachineInstr *DbgValue = P.first;
  718. MachineBasicBlock::iterator OrigPrevMI = P.second;
  719. if (&*RegionBegin == DbgValue)
  720. ++RegionBegin;
  721. BB->splice(++OrigPrevMI, BB, DbgValue);
  722. if (OrigPrevMI == llvm::prior(RegionEnd))
  723. RegionEnd = DbgValue;
  724. }
  725. DbgValues.clear();
  726. FirstDbgValue = NULL;
  727. }
  728. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  729. void ScheduleDAGMI::dumpSchedule() const {
  730. for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
  731. if (SUnit *SU = getSUnit(&(*MI)))
  732. SU->dump(this);
  733. else
  734. dbgs() << "Missing SUnit\n";
  735. }
  736. }
  737. #endif
  738. //===----------------------------------------------------------------------===//
  739. // LoadClusterMutation - DAG post-processing to cluster loads.
  740. //===----------------------------------------------------------------------===//
  741. namespace {
  742. /// \brief Post-process the DAG to create cluster edges between neighboring
  743. /// loads.
  744. class LoadClusterMutation : public ScheduleDAGMutation {
  745. struct LoadInfo {
  746. SUnit *SU;
  747. unsigned BaseReg;
  748. unsigned Offset;
  749. LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
  750. : SU(su), BaseReg(reg), Offset(ofs) {}
  751. };
  752. static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
  753. const LoadClusterMutation::LoadInfo &RHS);
  754. const TargetInstrInfo *TII;
  755. const TargetRegisterInfo *TRI;
  756. public:
  757. LoadClusterMutation(const TargetInstrInfo *tii,
  758. const TargetRegisterInfo *tri)
  759. : TII(tii), TRI(tri) {}
  760. virtual void apply(ScheduleDAGMI *DAG);
  761. protected:
  762. void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
  763. };
  764. } // anonymous
  765. bool LoadClusterMutation::LoadInfoLess(
  766. const LoadClusterMutation::LoadInfo &LHS,
  767. const LoadClusterMutation::LoadInfo &RHS) {
  768. if (LHS.BaseReg != RHS.BaseReg)
  769. return LHS.BaseReg < RHS.BaseReg;
  770. return LHS.Offset < RHS.Offset;
  771. }
  772. void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
  773. ScheduleDAGMI *DAG) {
  774. SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
  775. for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
  776. SUnit *SU = Loads[Idx];
  777. unsigned BaseReg;
  778. unsigned Offset;
  779. if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
  780. LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
  781. }
  782. if (LoadRecords.size() < 2)
  783. return;
  784. std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
  785. unsigned ClusterLength = 1;
  786. for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
  787. if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
  788. ClusterLength = 1;
  789. continue;
  790. }
  791. SUnit *SUa = LoadRecords[Idx].SU;
  792. SUnit *SUb = LoadRecords[Idx+1].SU;
  793. if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
  794. && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
  795. DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
  796. << SUb->NodeNum << ")\n");
  797. // Copy successor edges from SUa to SUb. Interleaving computation
  798. // dependent on SUa can prevent load combining due to register reuse.
  799. // Predecessor edges do not need to be copied from SUb to SUa since nearby
  800. // loads should have effectively the same inputs.
  801. for (SUnit::const_succ_iterator
  802. SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
  803. if (SI->getSUnit() == SUb)
  804. continue;
  805. DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
  806. DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
  807. }
  808. ++ClusterLength;
  809. }
  810. else
  811. ClusterLength = 1;
  812. }
  813. }
  814. /// \brief Callback from DAG postProcessing to create cluster edges for loads.
  815. void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
  816. // Map DAG NodeNum to store chain ID.
  817. DenseMap<unsigned, unsigned> StoreChainIDs;
  818. // Map each store chain to a set of dependent loads.
  819. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
  820. for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
  821. SUnit *SU = &DAG->SUnits[Idx];
  822. if (!SU->getInstr()->mayLoad())
  823. continue;
  824. unsigned ChainPredID = DAG->SUnits.size();
  825. for (SUnit::const_pred_iterator
  826. PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
  827. if (PI->isCtrl()) {
  828. ChainPredID = PI->getSUnit()->NodeNum;
  829. break;
  830. }
  831. }
  832. // Check if this chain-like pred has been seen
  833. // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
  834. unsigned NumChains = StoreChainDependents.size();
  835. std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
  836. StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
  837. if (Result.second)
  838. StoreChainDependents.resize(NumChains + 1);
  839. StoreChainDependents[Result.first->second].push_back(SU);
  840. }
  841. // Iterate over the store chains.
  842. for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
  843. clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
  844. }
  845. //===----------------------------------------------------------------------===//
  846. // MacroFusion - DAG post-processing to encourage fusion of macro ops.
  847. //===----------------------------------------------------------------------===//
  848. namespace {
  849. /// \brief Post-process the DAG to create cluster edges between instructions
  850. /// that may be fused by the processor into a single operation.
  851. class MacroFusion : public ScheduleDAGMutation {
  852. const TargetInstrInfo *TII;
  853. public:
  854. MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
  855. virtual void apply(ScheduleDAGMI *DAG);
  856. };
  857. } // anonymous
  858. /// \brief Callback from DAG postProcessing to create cluster edges to encourage
  859. /// fused operations.
  860. void MacroFusion::apply(ScheduleDAGMI *DAG) {
  861. // For now, assume targets can only fuse with the branch.
  862. MachineInstr *Branch = DAG->ExitSU.getInstr();
  863. if (!Branch)
  864. return;
  865. for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
  866. SUnit *SU = &DAG->SUnits[--Idx];
  867. if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
  868. continue;
  869. // Create a single weak edge from SU to ExitSU. The only effect is to cause
  870. // bottom-up scheduling to heavily prioritize the clustered SU. There is no
  871. // need to copy predecessor edges from ExitSU to SU, since top-down
  872. // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
  873. // of SU, we could create an artificial edge from the deepest root, but it
  874. // hasn't been needed yet.
  875. bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
  876. (void)Success;
  877. assert(Success && "No DAG nodes should be reachable from ExitSU");
  878. DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
  879. break;
  880. }
  881. }
  882. //===----------------------------------------------------------------------===//
  883. // CopyConstrain - DAG post-processing to encourage copy elimination.
  884. //===----------------------------------------------------------------------===//
  885. namespace {
  886. /// \brief Post-process the DAG to create weak edges from all uses of a copy to
  887. /// the one use that defines the copy's source vreg, most likely an induction
  888. /// variable increment.
  889. class CopyConstrain : public ScheduleDAGMutation {
  890. // Transient state.
  891. SlotIndex RegionBeginIdx;
  892. // RegionEndIdx is the slot index of the last non-debug instruction in the
  893. // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
  894. SlotIndex RegionEndIdx;
  895. public:
  896. CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
  897. virtual void apply(ScheduleDAGMI *DAG);
  898. protected:
  899. void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
  900. };
  901. } // anonymous
  902. /// constrainLocalCopy handles two possibilities:
  903. /// 1) Local src:
  904. /// I0: = dst
  905. /// I1: src = ...
  906. /// I2: = dst
  907. /// I3: dst = src (copy)
  908. /// (create pred->succ edges I0->I1, I2->I1)
  909. ///
  910. /// 2) Local copy:
  911. /// I0: dst = src (copy)
  912. /// I1: = dst
  913. /// I2: src = ...
  914. /// I3: = dst
  915. /// (create pred->succ edges I1->I2, I3->I2)
  916. ///
  917. /// Although the MachineScheduler is currently constrained to single blocks,
  918. /// this algorithm should handle extended blocks. An EBB is a set of
  919. /// contiguously numbered blocks such that the previous block in the EBB is
  920. /// always the single predecessor.
  921. void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
  922. LiveIntervals *LIS = DAG->getLIS();
  923. MachineInstr *Copy = CopySU->getInstr();
  924. // Check for pure vreg copies.
  925. unsigned SrcReg = Copy->getOperand(1).getReg();
  926. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  927. return;
  928. unsigned DstReg = Copy->getOperand(0).getReg();
  929. if (!TargetRegisterInfo::isVirtualRegister(DstReg))
  930. return;
  931. // Check if either the dest or source is local. If it's live across a back
  932. // edge, it's not local. Note that if both vregs are live across the back
  933. // edge, we cannot successfully contrain the copy without cyclic scheduling.
  934. unsigned LocalReg = DstReg;
  935. unsigned GlobalReg = SrcReg;
  936. LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
  937. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
  938. LocalReg = SrcReg;
  939. GlobalReg = DstReg;
  940. LocalLI = &LIS->getInterval(LocalReg);
  941. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
  942. return;
  943. }
  944. LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
  945. // Find the global segment after the start of the local LI.
  946. LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
  947. // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
  948. // local live range. We could create edges from other global uses to the local
  949. // start, but the coalescer should have already eliminated these cases, so
  950. // don't bother dealing with it.
  951. if (GlobalSegment == GlobalLI->end())
  952. return;
  953. // If GlobalSegment is killed at the LocalLI->start, the call to find()
  954. // returned the next global segment. But if GlobalSegment overlaps with
  955. // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
  956. // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
  957. if (GlobalSegment->contains(LocalLI->beginIndex()))
  958. ++GlobalSegment;
  959. if (GlobalSegment == GlobalLI->end())
  960. return;
  961. // Check if GlobalLI contains a hole in the vicinity of LocalLI.
  962. if (GlobalSegment != GlobalLI->begin()) {
  963. // Two address defs have no hole.
  964. if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
  965. GlobalSegment->start)) {
  966. return;
  967. }
  968. // If the prior global segment may be defined by the same two-address
  969. // instruction that also defines LocalLI, then can't make a hole here.
  970. if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
  971. LocalLI->beginIndex())) {
  972. return;
  973. }
  974. // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
  975. // it would be a disconnected component in the live range.
  976. assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
  977. "Disconnected LRG within the scheduling region.");
  978. }
  979. MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
  980. if (!GlobalDef)
  981. return;
  982. SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
  983. if (!GlobalSU)
  984. return;
  985. // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
  986. // constraining the uses of the last local def to precede GlobalDef.
  987. SmallVector<SUnit*,8> LocalUses;
  988. const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
  989. MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
  990. SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
  991. for (SUnit::const_succ_iterator
  992. I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
  993. I != E; ++I) {
  994. if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
  995. continue;
  996. if (I->getSUnit() == GlobalSU)
  997. continue;
  998. if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
  999. return;
  1000. LocalUses.push_back(I->getSUnit());
  1001. }
  1002. // Open the top of the GlobalLI hole by constraining any earlier global uses
  1003. // to precede the start of LocalLI.
  1004. SmallVector<SUnit*,8> GlobalUses;
  1005. MachineInstr *FirstLocalDef =
  1006. LIS->getInstructionFromIndex(LocalLI->beginIndex());
  1007. SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
  1008. for (SUnit::const_pred_iterator
  1009. I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
  1010. if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
  1011. continue;
  1012. if (I->getSUnit() == FirstLocalSU)
  1013. continue;
  1014. if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
  1015. return;
  1016. GlobalUses.push_back(I->getSUnit());
  1017. }
  1018. DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
  1019. // Add the weak edges.
  1020. for (SmallVectorImpl<SUnit*>::const_iterator
  1021. I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
  1022. DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
  1023. << GlobalSU->NodeNum << ")\n");
  1024. DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
  1025. }
  1026. for (SmallVectorImpl<SUnit*>::const_iterator
  1027. I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
  1028. DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
  1029. << FirstLocalSU->NodeNum << ")\n");
  1030. DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
  1031. }
  1032. }
  1033. /// \brief Callback from DAG postProcessing to create weak edges to encourage
  1034. /// copy elimination.
  1035. void CopyConstrain::apply(ScheduleDAGMI *DAG) {
  1036. MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
  1037. if (FirstPos == DAG->end())
  1038. return;
  1039. RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
  1040. RegionEndIdx = DAG->getLIS()->getInstructionIndex(
  1041. &*priorNonDebug(DAG->end(), DAG->begin()));
  1042. for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
  1043. SUnit *SU = &DAG->SUnits[Idx];
  1044. if (!SU->getInstr()->isCopy())
  1045. continue;
  1046. constrainLocalCopy(SU, DAG);
  1047. }
  1048. }
  1049. //===----------------------------------------------------------------------===//
  1050. // ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
  1051. //===----------------------------------------------------------------------===//
  1052. namespace {
  1053. /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
  1054. /// the schedule.
  1055. class ConvergingScheduler : public MachineSchedStrategy {
  1056. public:
  1057. /// Represent the type of SchedCandidate found within a single queue.
  1058. /// pickNodeBidirectional depends on these listed by decreasing priority.
  1059. enum CandReason {
  1060. NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
  1061. ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
  1062. TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
  1063. #ifndef NDEBUG
  1064. static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
  1065. #endif
  1066. /// Policy for scheduling the next instruction in the candidate's zone.
  1067. struct CandPolicy {
  1068. bool ReduceLatency;
  1069. unsigned ReduceResIdx;
  1070. unsigned DemandResIdx;
  1071. CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
  1072. };
  1073. /// Status of an instruction's critical resource consumption.
  1074. struct SchedResourceDelta {
  1075. // Count critical resources in the scheduled region required by SU.
  1076. unsigned CritResources;
  1077. // Count critical resources from another region consumed by SU.
  1078. unsigned DemandedResources;
  1079. SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
  1080. bool operator==(const SchedResourceDelta &RHS) const {
  1081. return CritResources == RHS.CritResources
  1082. && DemandedResources == RHS.DemandedResources;
  1083. }
  1084. bool operator!=(const SchedResourceDelta &RHS) const {
  1085. return !operator==(RHS);
  1086. }
  1087. };
  1088. /// Store the state used by ConvergingScheduler heuristics, required for the
  1089. /// lifetime of one invocation of pickNode().
  1090. struct SchedCandidate {
  1091. CandPolicy Policy;
  1092. // The best SUnit candidate.
  1093. SUnit *SU;
  1094. // The reason for this candidate.
  1095. CandReason Reason;
  1096. // Set of reasons that apply to multiple candidates.
  1097. uint32_t RepeatReasonSet;
  1098. // Register pressure values for the best candidate.
  1099. RegPressureDelta RPDelta;
  1100. // Critical resource consumption of the best candidate.
  1101. SchedResourceDelta ResDelta;
  1102. SchedCandidate(const CandPolicy &policy)
  1103. : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
  1104. bool isValid() const { return SU; }
  1105. // Copy the status of another candidate without changing policy.
  1106. void setBest(SchedCandidate &Best) {
  1107. assert(Best.Reason != NoCand && "uninitialized Sched candidate");
  1108. SU = Best.SU;
  1109. Reason = Best.Reason;
  1110. RPDelta = Best.RPDelta;
  1111. ResDelta = Best.ResDelta;
  1112. }
  1113. bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
  1114. void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
  1115. void initResourceDelta(const ScheduleDAGMI *DAG,
  1116. const TargetSchedModel *SchedModel);
  1117. };
  1118. /// Summarize the unscheduled region.
  1119. struct SchedRemainder {
  1120. // Critical path through the DAG in expected latency.
  1121. unsigned CriticalPath;
  1122. unsigned CyclicCritPath;
  1123. // Scaled count of micro-ops left to schedule.
  1124. unsigned RemIssueCount;
  1125. bool IsAcyclicLatencyLimited;
  1126. // Unscheduled resources
  1127. SmallVector<unsigned, 16> RemainingCounts;
  1128. void reset() {
  1129. CriticalPath = 0;
  1130. CyclicCritPath = 0;
  1131. RemIssueCount = 0;
  1132. IsAcyclicLatencyLimited = false;
  1133. RemainingCounts.clear();
  1134. }
  1135. SchedRemainder() { reset(); }
  1136. void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
  1137. };
  1138. /// Each Scheduling boundary is associated with ready queues. It tracks the
  1139. /// current cycle in the direction of movement, and maintains the state
  1140. /// of "hazards" and other interlocks at the current cycle.
  1141. struct SchedBoundary {
  1142. ScheduleDAGMI *DAG;
  1143. const TargetSchedModel *SchedModel;
  1144. SchedRemainder *Rem;
  1145. ReadyQueue Available;
  1146. ReadyQueue Pending;
  1147. bool CheckPending;
  1148. // For heuristics, keep a list of the nodes that immediately depend on the
  1149. // most recently scheduled node.
  1150. SmallPtrSet<const SUnit*, 8> NextSUs;
  1151. ScheduleHazardRecognizer *HazardRec;
  1152. /// Number of cycles it takes to issue the instructions scheduled in this
  1153. /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
  1154. /// See getStalls().
  1155. unsigned CurrCycle;
  1156. /// Micro-ops issued in the current cycle
  1157. unsigned CurrMOps;
  1158. /// MinReadyCycle - Cycle of the soonest available instruction.
  1159. unsigned MinReadyCycle;
  1160. // The expected latency of the critical path in this scheduled zone.
  1161. unsigned ExpectedLatency;
  1162. // The latency of dependence chains leading into this zone.
  1163. // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
  1164. // For each cycle scheduled: DLat -= 1.
  1165. unsigned DependentLatency;
  1166. /// Count the scheduled (issued) micro-ops that can be retired by
  1167. /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
  1168. unsigned RetiredMOps;
  1169. // Count scheduled resources that have been executed. Resources are
  1170. // considered executed if they become ready in the time that it takes to
  1171. // saturate any resource including the one in question. Counts are scaled
  1172. // for direct comparison with other resources. Counts can be compared with
  1173. // MOps * getMicroOpFactor and Latency * getLatencyFactor.
  1174. SmallVector<unsigned, 16> ExecutedResCounts;
  1175. /// Cache the max count for a single resource.
  1176. unsigned MaxExecutedResCount;
  1177. // Cache the critical resources ID in this scheduled zone.
  1178. unsigned ZoneCritResIdx;
  1179. // Is the scheduled region resource limited vs. latency limited.
  1180. bool IsResourceLimited;
  1181. #ifndef NDEBUG
  1182. // Remember the greatest operand latency as an upper bound on the number of
  1183. // times we should retry the pending queue because of a hazard.
  1184. unsigned MaxObservedLatency;
  1185. #endif
  1186. void reset() {
  1187. // A new HazardRec is created for each DAG and owned by SchedBoundary.
  1188. delete HazardRec;
  1189. Available.clear();
  1190. Pending.clear();
  1191. CheckPending = false;
  1192. NextSUs.clear();
  1193. HazardRec = 0;
  1194. CurrCycle = 0;
  1195. CurrMOps = 0;
  1196. MinReadyCycle = UINT_MAX;
  1197. ExpectedLatency = 0;
  1198. DependentLatency = 0;
  1199. RetiredMOps = 0;
  1200. MaxExecutedResCount = 0;
  1201. ZoneCritResIdx = 0;
  1202. IsResourceLimited = false;
  1203. #ifndef NDEBUG
  1204. MaxObservedLatency = 0;
  1205. #endif
  1206. // Reserve a zero-count for invalid CritResIdx.
  1207. ExecutedResCounts.resize(1);
  1208. assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
  1209. }
  1210. /// Pending queues extend the ready queues with the same ID and the
  1211. /// PendingFlag set.
  1212. SchedBoundary(unsigned ID, const Twine &Name):
  1213. DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
  1214. Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
  1215. HazardRec(0) {
  1216. reset();
  1217. }
  1218. ~SchedBoundary() { delete HazardRec; }
  1219. void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
  1220. SchedRemainder *rem);
  1221. bool isTop() const {
  1222. return Available.getID() == ConvergingScheduler::TopQID;
  1223. }
  1224. #ifndef NDEBUG
  1225. const char *getResourceName(unsigned PIdx) {
  1226. if (!PIdx)
  1227. return "MOps";
  1228. return SchedModel->getProcResource(PIdx)->Name;
  1229. }
  1230. #endif
  1231. /// Get the number of latency cycles "covered" by the scheduled
  1232. /// instructions. This is the larger of the critical path within the zone
  1233. /// and the number of cycles required to issue the instructions.
  1234. unsigned getScheduledLatency() const {
  1235. return std::max(ExpectedLatency, CurrCycle);
  1236. }
  1237. unsigned getUnscheduledLatency(SUnit *SU) const {
  1238. return isTop() ? SU->getHeight() : SU->getDepth();
  1239. }
  1240. unsigned getResourceCount(unsigned ResIdx) const {
  1241. return ExecutedResCounts[ResIdx];
  1242. }
  1243. /// Get the scaled count of scheduled micro-ops and resources, including
  1244. /// executed resources.
  1245. unsigned getCriticalCount() const {
  1246. if (!ZoneCritResIdx)
  1247. return RetiredMOps * SchedModel->getMicroOpFactor();
  1248. return getResourceCount(ZoneCritResIdx);
  1249. }
  1250. /// Get a scaled count for the minimum execution time of the scheduled
  1251. /// micro-ops that are ready to execute by getExecutedCount. Notice the
  1252. /// feedback loop.
  1253. unsigned getExecutedCount() const {
  1254. return std::max(CurrCycle * SchedModel->getLatencyFactor(),
  1255. MaxExecutedResCount);
  1256. }
  1257. bool checkHazard(SUnit *SU);
  1258. unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
  1259. unsigned getOtherResourceCount(unsigned &OtherCritIdx);
  1260. void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
  1261. void releaseNode(SUnit *SU, unsigned ReadyCycle);
  1262. void bumpCycle(unsigned NextCycle);
  1263. void incExecutedResources(unsigned PIdx, unsigned Count);
  1264. unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
  1265. void bumpNode(SUnit *SU);
  1266. void releasePending();
  1267. void removeReady(SUnit *SU);
  1268. SUnit *pickOnlyChoice();
  1269. #ifndef NDEBUG
  1270. void dumpScheduledState();
  1271. #endif
  1272. };
  1273. private:
  1274. ScheduleDAGMI *DAG;
  1275. const TargetSchedModel *SchedModel;
  1276. const TargetRegisterInfo *TRI;
  1277. // State of the top and bottom scheduled instruction boundaries.
  1278. SchedRemainder Rem;
  1279. SchedBoundary Top;
  1280. SchedBoundary Bot;
  1281. public:
  1282. /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
  1283. enum {
  1284. TopQID = 1,
  1285. BotQID = 2,
  1286. LogMaxQID = 2
  1287. };
  1288. ConvergingScheduler():
  1289. DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
  1290. virtual void initialize(ScheduleDAGMI *dag);
  1291. virtual SUnit *pickNode(bool &IsTopNode);
  1292. virtual void schedNode(SUnit *SU, bool IsTopNode);
  1293. virtual void releaseTopNode(SUnit *SU);
  1294. virtual void releaseBottomNode(SUnit *SU);
  1295. virtual void registerRoots();
  1296. protected:
  1297. void checkAcyclicLatency();
  1298. void tryCandidate(SchedCandidate &Cand,
  1299. SchedCandidate &TryCand,
  1300. SchedBoundary &Zone,
  1301. const RegPressureTracker &RPTracker,
  1302. RegPressureTracker &TempTracker);
  1303. SUnit *pickNodeBidirectional(bool &IsTopNode);
  1304. void pickNodeFromQueue(SchedBoundary &Zone,
  1305. const RegPressureTracker &RPTracker,
  1306. SchedCandidate &Candidate);
  1307. void reschedulePhysRegCopies(SUnit *SU, bool isTop);
  1308. #ifndef NDEBUG
  1309. void traceCandidate(const SchedCandidate &Cand);
  1310. #endif
  1311. };
  1312. } // namespace
  1313. void ConvergingScheduler::SchedRemainder::
  1314. init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
  1315. reset();
  1316. if (!SchedModel->hasInstrSchedModel())
  1317. return;
  1318. RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
  1319. for (std::vector<SUnit>::iterator
  1320. I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
  1321. const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
  1322. RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
  1323. * SchedModel->getMicroOpFactor();
  1324. for (TargetSchedModel::ProcResIter
  1325. PI = SchedModel->getWriteProcResBegin(SC),
  1326. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1327. unsigned PIdx = PI->ProcResourceIdx;
  1328. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  1329. RemainingCounts[PIdx] += (Factor * PI->Cycles);
  1330. }
  1331. }
  1332. }
  1333. void ConvergingScheduler::SchedBoundary::
  1334. init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
  1335. reset();
  1336. DAG = dag;
  1337. SchedModel = smodel;
  1338. Rem = rem;
  1339. if (SchedModel->hasInstrSchedModel())
  1340. ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
  1341. }
  1342. void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
  1343. DAG = dag;
  1344. SchedModel = DAG->getSchedModel();
  1345. TRI = DAG->TRI;
  1346. Rem.init(DAG, SchedModel);
  1347. Top.init(DAG, SchedModel, &Rem);
  1348. Bot.init(DAG, SchedModel, &Rem);
  1349. // Initialize resource counts.
  1350. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
  1351. // are disabled, then these HazardRecs will be disabled.
  1352. const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
  1353. const TargetMachine &TM = DAG->MF.getTarget();
  1354. Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
  1355. Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
  1356. assert((!ForceTopDown || !ForceBottomUp) &&
  1357. "-misched-topdown incompatible with -misched-bottomup");
  1358. }
  1359. void ConvergingScheduler::releaseTopNode(SUnit *SU) {
  1360. if (SU->isScheduled)
  1361. return;
  1362. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
  1363. I != E; ++I) {
  1364. if (I->isWeak())
  1365. continue;
  1366. unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
  1367. unsigned Latency = I->getLatency();
  1368. #ifndef NDEBUG
  1369. Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
  1370. #endif
  1371. if (SU->TopReadyCycle < PredReadyCycle + Latency)
  1372. SU->TopReadyCycle = PredReadyCycle + Latency;
  1373. }
  1374. Top.releaseNode(SU, SU->TopReadyCycle);
  1375. }
  1376. void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
  1377. if (SU->isScheduled)
  1378. return;
  1379. assert(SU->getInstr() && "Scheduled SUnit must have instr");
  1380. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
  1381. I != E; ++I) {
  1382. if (I->isWeak())
  1383. continue;
  1384. unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
  1385. unsigned Latency = I->getLatency();
  1386. #ifndef NDEBUG
  1387. Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
  1388. #endif
  1389. if (SU->BotReadyCycle < SuccReadyCycle + Latency)
  1390. SU->BotReadyCycle = SuccReadyCycle + Latency;
  1391. }
  1392. Bot.releaseNode(SU, SU->BotReadyCycle);
  1393. }
  1394. /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
  1395. /// critical path by more cycles than it takes to drain the instruction buffer.
  1396. /// We estimate an upper bounds on in-flight instructions as:
  1397. ///
  1398. /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
  1399. /// InFlightIterations = AcyclicPath / CyclesPerIteration
  1400. /// InFlightResources = InFlightIterations * LoopResources
  1401. ///
  1402. /// TODO: Check execution resources in addition to IssueCount.
  1403. void ConvergingScheduler::checkAcyclicLatency() {
  1404. if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
  1405. return;
  1406. // Scaled number of cycles per loop iteration.
  1407. unsigned IterCount =
  1408. std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
  1409. Rem.RemIssueCount);
  1410. // Scaled acyclic critical path.
  1411. unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
  1412. // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
  1413. unsigned InFlightCount =
  1414. (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
  1415. unsigned BufferLimit =
  1416. SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
  1417. Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
  1418. DEBUG(dbgs() << "IssueCycles="
  1419. << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
  1420. << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
  1421. << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
  1422. << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
  1423. << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
  1424. if (Rem.IsAcyclicLatencyLimited)
  1425. dbgs() << " ACYCLIC LATENCY LIMIT\n");
  1426. }
  1427. void ConvergingScheduler::registerRoots() {
  1428. Rem.CriticalPath = DAG->ExitSU.getDepth();
  1429. // Some roots may not feed into ExitSU. Check all of them in case.
  1430. for (std::vector<SUnit*>::const_iterator
  1431. I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
  1432. if ((*I)->getDepth() > Rem.CriticalPath)
  1433. Rem.CriticalPath = (*I)->getDepth();
  1434. }
  1435. DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
  1436. if (EnableCyclicPath) {
  1437. Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
  1438. checkAcyclicLatency();
  1439. }
  1440. }
  1441. /// Does this SU have a hazard within the current instruction group.
  1442. ///
  1443. /// The scheduler supports two modes of hazard recognition. The first is the
  1444. /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
  1445. /// supports highly complicated in-order reservation tables
  1446. /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
  1447. ///
  1448. /// The second is a streamlined mechanism that checks for hazards based on
  1449. /// simple counters that the scheduler itself maintains. It explicitly checks
  1450. /// for instruction dispatch limitations, including the number of micro-ops that
  1451. /// can dispatch per cycle.
  1452. ///
  1453. /// TODO: Also check whether the SU must start a new group.
  1454. bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
  1455. if (HazardRec->isEnabled())
  1456. return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
  1457. unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
  1458. if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
  1459. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
  1460. << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
  1461. return true;
  1462. }
  1463. return false;
  1464. }
  1465. // Find the unscheduled node in ReadySUs with the highest latency.
  1466. unsigned ConvergingScheduler::SchedBoundary::
  1467. findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
  1468. SUnit *LateSU = 0;
  1469. unsigned RemLatency = 0;
  1470. for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
  1471. I != E; ++I) {
  1472. unsigned L = getUnscheduledLatency(*I);
  1473. if (L > RemLatency) {
  1474. RemLatency = L;
  1475. LateSU = *I;
  1476. }
  1477. }
  1478. if (LateSU) {
  1479. DEBUG(dbgs() << Available.getName() << " RemLatency SU("
  1480. << LateSU->NodeNum << ") " << RemLatency << "c\n");
  1481. }
  1482. return RemLatency;
  1483. }
  1484. // Count resources in this zone and the remaining unscheduled
  1485. // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
  1486. // resource index, or zero if the zone is issue limited.
  1487. unsigned ConvergingScheduler::SchedBoundary::
  1488. getOtherResourceCount(unsigned &OtherCritIdx) {
  1489. OtherCritIdx = 0;
  1490. if (!SchedModel->hasInstrSchedModel())
  1491. return 0;
  1492. unsigned OtherCritCount = Rem->RemIssueCount
  1493. + (RetiredMOps * SchedModel->getMicroOpFactor());
  1494. DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
  1495. << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
  1496. for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
  1497. PIdx != PEnd; ++PIdx) {
  1498. unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
  1499. if (OtherCount > OtherCritCount) {
  1500. OtherCritCount = OtherCount;
  1501. OtherCritIdx = PIdx;
  1502. }
  1503. }
  1504. if (OtherCritIdx) {
  1505. DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
  1506. << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
  1507. << " " << getResourceName(OtherCritIdx) << "\n");
  1508. }
  1509. return OtherCritCount;
  1510. }
  1511. /// Set the CandPolicy for this zone given the current resources and latencies
  1512. /// inside and outside the zone.
  1513. void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
  1514. SchedBoundary &OtherZone) {
  1515. // Now that potential stalls have been considered, apply preemptive heuristics
  1516. // based on the the total latency and resources inside and outside this
  1517. // zone.
  1518. // Compute remaining latency. We need this both to determine whether the
  1519. // overall schedule has become latency-limited and whether the instructions
  1520. // outside this zone are resource or latency limited.
  1521. //
  1522. // The "dependent" latency is updated incrementally during scheduling as the
  1523. // max height/depth of scheduled nodes minus the cycles since it was
  1524. // scheduled:
  1525. // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
  1526. //
  1527. // The "independent" latency is the max ready queue depth:
  1528. // ILat = max N.depth for N in Available|Pending
  1529. //
  1530. // RemainingLatency is the greater of independent and dependent latency.
  1531. unsigned RemLatency = DependentLatency;
  1532. RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
  1533. RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
  1534. // Compute the critical resource outside the zone.
  1535. unsigned OtherCritIdx;
  1536. unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
  1537. bool OtherResLimited = false;
  1538. if (SchedModel->hasInstrSchedModel()) {
  1539. unsigned LFactor = SchedModel->getLatencyFactor();
  1540. OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
  1541. }
  1542. if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
  1543. Policy.ReduceLatency |= true;
  1544. DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency "
  1545. << RemLatency << " + " << CurrCycle << "c > CritPath "
  1546. << Rem->CriticalPath << "\n");
  1547. }
  1548. // If the same resource is limiting inside and outside the zone, do nothing.
  1549. if (ZoneCritResIdx == OtherCritIdx)
  1550. return;
  1551. DEBUG(
  1552. if (IsResourceLimited) {
  1553. dbgs() << " " << Available.getName() << " ResourceLimited: "
  1554. << getResourceName(ZoneCritResIdx) << "\n";
  1555. }
  1556. if (OtherResLimited)
  1557. dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
  1558. if (!IsResourceLimited && !OtherResLimited)
  1559. dbgs() << " Latency limited both directions.\n");
  1560. if (IsResourceLimited && !Policy.ReduceResIdx)
  1561. Policy.ReduceResIdx = ZoneCritResIdx;
  1562. if (OtherResLimited)
  1563. Policy.DemandResIdx = OtherCritIdx;
  1564. }
  1565. void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
  1566. unsigned ReadyCycle) {
  1567. if (ReadyCycle < MinReadyCycle)
  1568. MinReadyCycle = ReadyCycle;
  1569. // Check for interlocks first. For the purpose of other heuristics, an
  1570. // instruction that cannot issue appears as if it's not in the ReadyQueue.
  1571. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
  1572. if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
  1573. Pending.push(SU);
  1574. else
  1575. Available.push(SU);
  1576. // Record this node as an immediate dependent of the scheduled node.
  1577. NextSUs.insert(SU);
  1578. }
  1579. /// Move the boundary of scheduled code by one cycle.
  1580. void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
  1581. if (SchedModel->getMicroOpBufferSize() == 0) {
  1582. assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
  1583. if (MinReadyCycle > NextCycle)
  1584. NextCycle = MinReadyCycle;
  1585. }
  1586. // Update the current micro-ops, which will issue in the next cycle.
  1587. unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
  1588. CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
  1589. // Decrement DependentLatency based on the next cycle.
  1590. if ((NextCycle - CurrCycle) > DependentLatency)
  1591. DependentLatency = 0;
  1592. else
  1593. DependentLatency -= (NextCycle - CurrCycle);
  1594. if (!HazardRec->isEnabled()) {
  1595. // Bypass HazardRec virtual calls.
  1596. CurrCycle = NextCycle;
  1597. }
  1598. else {
  1599. // Bypass getHazardType calls in case of long latency.
  1600. for (; CurrCycle != NextCycle; ++CurrCycle) {
  1601. if (isTop())
  1602. HazardRec->AdvanceCycle();
  1603. else
  1604. HazardRec->RecedeCycle();
  1605. }
  1606. }
  1607. CheckPending = true;
  1608. unsigned LFactor = SchedModel->getLatencyFactor();
  1609. IsResourceLimited =
  1610. (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
  1611. > (int)LFactor;
  1612. DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
  1613. }
  1614. void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
  1615. unsigned Count) {
  1616. ExecutedResCounts[PIdx] += Count;
  1617. if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
  1618. MaxExecutedResCount = ExecutedResCounts[PIdx];
  1619. }
  1620. /// Add the given processor resource to this scheduled zone.
  1621. ///
  1622. /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
  1623. /// during which this resource is consumed.
  1624. ///
  1625. /// \return the next cycle at which the instruction may execute without
  1626. /// oversubscribing resources.
  1627. unsigned ConvergingScheduler::SchedBoundary::
  1628. countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
  1629. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  1630. unsigned Count = Factor * Cycles;
  1631. DEBUG(dbgs() << " " << getResourceName(PIdx)
  1632. << " +" << Cycles << "x" << Factor << "u\n");
  1633. // Update Executed resources counts.
  1634. incExecutedResources(PIdx, Count);
  1635. assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
  1636. Rem->RemainingCounts[PIdx] -= Count;
  1637. // Check if this resource exceeds the current critical resource. If so, it
  1638. // becomes the critical resource.
  1639. if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
  1640. ZoneCritResIdx = PIdx;
  1641. DEBUG(dbgs() << " *** Critical resource "
  1642. << getResourceName(PIdx) << ": "
  1643. << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
  1644. }
  1645. // TODO: We don't yet model reserved resources. It's not hard though.
  1646. return CurrCycle;
  1647. }
  1648. /// Move the boundary of scheduled code by one SUnit.
  1649. void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
  1650. // Update the reservation table.
  1651. if (HazardRec->isEnabled()) {
  1652. if (!isTop() && SU->isCall) {
  1653. // Calls are scheduled with their preceding instructions. For bottom-up
  1654. // scheduling, clear the pipeline state before emitting.
  1655. HazardRec->Reset();
  1656. }
  1657. HazardRec->EmitInstruction(SU);
  1658. }
  1659. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  1660. unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
  1661. CurrMOps += IncMOps;
  1662. // checkHazard prevents scheduling multiple instructions per cycle that exceed
  1663. // issue width. However, we commonly reach the maximum. In this case
  1664. // opportunistically bump the cycle to avoid uselessly checking everything in
  1665. // the readyQ. Furthermore, a single instruction may produce more than one
  1666. // cycle's worth of micro-ops.
  1667. //
  1668. // TODO: Also check if this SU must end a dispatch group.
  1669. unsigned NextCycle = CurrCycle;
  1670. if (CurrMOps >= SchedModel->getIssueWidth()) {
  1671. ++NextCycle;
  1672. DEBUG(dbgs() << " *** Max MOps " << CurrMOps
  1673. << " at cycle " << CurrCycle << '\n');
  1674. }
  1675. unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  1676. DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
  1677. switch (SchedModel->getMicroOpBufferSize()) {
  1678. case 0:
  1679. assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
  1680. break;
  1681. case 1:
  1682. if (ReadyCycle > NextCycle) {
  1683. NextCycle = ReadyCycle;
  1684. DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
  1685. }
  1686. break;
  1687. default:
  1688. // We don't currently model the OOO reorder buffer, so consider all
  1689. // scheduled MOps to be "retired".
  1690. break;
  1691. }
  1692. RetiredMOps += IncMOps;
  1693. // Update resource counts and critical resource.
  1694. if (SchedModel->hasInstrSchedModel()) {
  1695. unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
  1696. assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
  1697. Rem->RemIssueCount -= DecRemIssue;
  1698. if (ZoneCritResIdx) {
  1699. // Scale scheduled micro-ops for comparing with the critical resource.
  1700. unsigned ScaledMOps =
  1701. RetiredMOps * SchedModel->getMicroOpFactor();
  1702. // If scaled micro-ops are now more than the previous critical resource by
  1703. // a full cycle, then micro-ops issue becomes critical.
  1704. if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
  1705. >= (int)SchedModel->getLatencyFactor()) {
  1706. ZoneCritResIdx = 0;
  1707. DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
  1708. << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
  1709. }
  1710. }
  1711. for (TargetSchedModel::ProcResIter
  1712. PI = SchedModel->getWriteProcResBegin(SC),
  1713. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1714. unsigned RCycle =
  1715. countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
  1716. if (RCycle > NextCycle)
  1717. NextCycle = RCycle;
  1718. }
  1719. }
  1720. // Update ExpectedLatency and DependentLatency.
  1721. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
  1722. unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
  1723. if (SU->getDepth() > TopLatency) {
  1724. TopLatency = SU->getDepth();
  1725. DEBUG(dbgs() << " " << Available.getName()
  1726. << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
  1727. }
  1728. if (SU->getHeight() > BotLatency) {
  1729. BotLatency = SU->getHeight();
  1730. DEBUG(dbgs() << " " << Available.getName()
  1731. << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
  1732. }
  1733. // If we stall for any reason, bump the cycle.
  1734. if (NextCycle > CurrCycle) {
  1735. bumpCycle(NextCycle);
  1736. }
  1737. else {
  1738. // After updating ZoneCritResIdx and ExpectedLatency, check if we're
  1739. // resource limited. If a stall occured, bumpCycle does this.
  1740. unsigned LFactor = SchedModel->getLatencyFactor();
  1741. IsResourceLimited =
  1742. (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
  1743. > (int)LFactor;
  1744. }
  1745. DEBUG(dumpScheduledState());
  1746. }
  1747. /// Release pending ready nodes in to the available queue. This makes them
  1748. /// visible to heuristics.
  1749. void ConvergingScheduler::SchedBoundary::releasePending() {
  1750. // If the available queue is empty, it is safe to reset MinReadyCycle.
  1751. if (Available.empty())
  1752. MinReadyCycle = UINT_MAX;
  1753. // Check to see if any of the pending instructions are ready to issue. If
  1754. // so, add them to the available queue.
  1755. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
  1756. for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
  1757. SUnit *SU = *(Pending.begin()+i);
  1758. unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  1759. if (ReadyCycle < MinReadyCycle)
  1760. MinReadyCycle = ReadyCycle;
  1761. if (!IsBuffered && ReadyCycle > CurrCycle)
  1762. continue;
  1763. if (checkHazard(SU))
  1764. continue;
  1765. Available.push(SU);
  1766. Pending.remove(Pending.begin()+i);
  1767. --i; --e;
  1768. }
  1769. DEBUG(if (!Pending.empty()) Pending.dump());
  1770. CheckPending = false;
  1771. }
  1772. /// Remove SU from the ready set for this boundary.
  1773. void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
  1774. if (Available.isInQueue(SU))
  1775. Available.remove(Available.find(SU));
  1776. else {
  1777. assert(Pending.isInQueue(SU) && "bad ready count");
  1778. Pending.remove(Pending.find(SU));
  1779. }
  1780. }
  1781. /// If this queue only has one ready candidate, return it. As a side effect,
  1782. /// defer any nodes that now hit a hazard, and advance the cycle until at least
  1783. /// one node is ready. If multiple instructions are ready, return NULL.
  1784. SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
  1785. if (CheckPending)
  1786. releasePending();
  1787. if (CurrMOps > 0) {
  1788. // Defer any ready instrs that now have a hazard.
  1789. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
  1790. if (checkHazard(*I)) {
  1791. Pending.push(*I);
  1792. I = Available.remove(I);
  1793. continue;
  1794. }
  1795. ++I;
  1796. }
  1797. }
  1798. for (unsigned i = 0; Available.empty(); ++i) {
  1799. assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
  1800. "permanent hazard"); (void)i;
  1801. bumpCycle(CurrCycle + 1);
  1802. releasePending();
  1803. }
  1804. if (Available.size() == 1)
  1805. return *Available.begin();
  1806. return NULL;
  1807. }
  1808. #ifndef NDEBUG
  1809. // This is useful information to dump after bumpNode.
  1810. // Note that the Queue contents are more useful before pickNodeFromQueue.
  1811. void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
  1812. unsigned ResFactor;
  1813. unsigned ResCount;
  1814. if (ZoneCritResIdx) {
  1815. ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
  1816. ResCount = getResourceCount(ZoneCritResIdx);
  1817. }
  1818. else {
  1819. ResFactor = SchedModel->getMicroOpFactor();
  1820. ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
  1821. }
  1822. unsigned LFactor = SchedModel->getLatencyFactor();
  1823. dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
  1824. << " Retired: " << RetiredMOps;
  1825. dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
  1826. dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
  1827. << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
  1828. << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
  1829. << (IsResourceLimited ? " - Resource" : " - Latency")
  1830. << " limited.\n";
  1831. }
  1832. #endif
  1833. void ConvergingScheduler::SchedCandidate::
  1834. initResourceDelta(const ScheduleDAGMI *DAG,
  1835. const TargetSchedModel *SchedModel) {
  1836. if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
  1837. return;
  1838. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  1839. for (TargetSchedModel::ProcResIter
  1840. PI = SchedModel->getWriteProcResBegin(SC),
  1841. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1842. if (PI->ProcResourceIdx == Policy.ReduceResIdx)
  1843. ResDelta.CritResources += PI->Cycles;
  1844. if (PI->ProcResourceIdx == Policy.DemandResIdx)
  1845. ResDelta.DemandedResources += PI->Cycles;
  1846. }
  1847. }
  1848. /// Return true if this heuristic determines order.
  1849. static bool tryLess(int TryVal, int CandVal,
  1850. ConvergingScheduler::SchedCandidate &TryCand,
  1851. ConvergingScheduler::SchedCandidate &Cand,
  1852. ConvergingScheduler::CandReason Reason) {
  1853. if (TryVal < CandVal) {
  1854. TryCand.Reason = Reason;
  1855. return true;
  1856. }
  1857. if (TryVal > CandVal) {
  1858. if (Cand.Reason > Reason)
  1859. Cand.Reason = Reason;
  1860. return true;
  1861. }
  1862. Cand.setRepeat(Reason);
  1863. return false;
  1864. }
  1865. static bool tryGreater(int TryVal, int CandVal,
  1866. ConvergingScheduler::SchedCandidate &TryCand,
  1867. ConvergingScheduler::SchedCandidate &Cand,
  1868. ConvergingScheduler::CandReason Reason) {
  1869. if (TryVal > CandVal) {
  1870. TryCand.Reason = Reason;
  1871. return true;
  1872. }
  1873. if (TryVal < CandVal) {
  1874. if (Cand.Reason > Reason)
  1875. Cand.Reason = Reason;
  1876. return true;
  1877. }
  1878. Cand.setRepeat(Reason);
  1879. return false;
  1880. }
  1881. static bool tryPressure(const PressureElement &TryP,
  1882. const PressureElement &CandP,
  1883. ConvergingScheduler::SchedCandidate &TryCand,
  1884. ConvergingScheduler::SchedCandidate &Cand,
  1885. ConvergingScheduler::CandReason Reason) {
  1886. // If both candidates affect the same set, go with the smallest increase.
  1887. if (TryP.PSetID == CandP.PSetID) {
  1888. return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand,
  1889. Reason);
  1890. }
  1891. // If one candidate decreases and the other increases, go with it.
  1892. if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand,
  1893. Reason)) {
  1894. return true;
  1895. }
  1896. // If TryP has lower Rank, it has a higher priority.
  1897. int TryRank = TryP.PSetRank();
  1898. int CandRank = CandP.PSetRank();
  1899. // If the candidates are decreasing pressure, reverse priority.
  1900. if (TryP.UnitIncrease < 0)
  1901. std::swap(TryRank, CandRank);
  1902. return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
  1903. }
  1904. static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
  1905. return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
  1906. }
  1907. /// Minimize physical register live ranges. Regalloc wants them adjacent to
  1908. /// their physreg def/use.
  1909. ///
  1910. /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
  1911. /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
  1912. /// with the operation that produces or consumes the physreg. We'll do this when
  1913. /// regalloc has support for parallel copies.
  1914. static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
  1915. const MachineInstr *MI = SU->getInstr();
  1916. if (!MI->isCopy())
  1917. return 0;
  1918. unsigned ScheduledOper = isTop ? 1 : 0;
  1919. unsigned UnscheduledOper = isTop ? 0 : 1;
  1920. // If we have already scheduled the physreg produce/consumer, immediately
  1921. // schedule the copy.
  1922. if (TargetRegisterInfo::isPhysicalRegister(
  1923. MI->getOperand(ScheduledOper).getReg()))
  1924. return 1;
  1925. // If the physreg is at the boundary, defer it. Otherwise schedule it
  1926. // immediately to free the dependent. We can hoist the copy later.
  1927. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
  1928. if (TargetRegisterInfo::isPhysicalRegister(
  1929. MI->getOperand(UnscheduledOper).getReg()))
  1930. return AtBoundary ? -1 : 1;
  1931. return 0;
  1932. }
  1933. static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
  1934. ConvergingScheduler::SchedCandidate &Cand,
  1935. ConvergingScheduler::SchedBoundary &Zone) {
  1936. if (Zone.isTop()) {
  1937. if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
  1938. if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  1939. TryCand, Cand, ConvergingScheduler::TopDepthReduce))
  1940. return true;
  1941. }
  1942. if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  1943. TryCand, Cand, ConvergingScheduler::TopPathReduce))
  1944. return true;
  1945. }
  1946. else {
  1947. if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
  1948. if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  1949. TryCand, Cand, ConvergingScheduler::BotHeightReduce))
  1950. return true;
  1951. }
  1952. if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  1953. TryCand, Cand, ConvergingScheduler::BotPathReduce))
  1954. return true;
  1955. }
  1956. return false;
  1957. }
  1958. /// Apply a set of heursitics to a new candidate. Heuristics are currently
  1959. /// hierarchical. This may be more efficient than a graduated cost model because
  1960. /// we don't need to evaluate all aspects of the model for each node in the
  1961. /// queue. But it's really done to make the heuristics easier to debug and
  1962. /// statistically analyze.
  1963. ///
  1964. /// \param Cand provides the policy and current best candidate.
  1965. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
  1966. /// \param Zone describes the scheduled zone that we are extending.
  1967. /// \param RPTracker describes reg pressure within the scheduled zone.
  1968. /// \param TempTracker is a scratch pressure tracker to reuse in queries.
  1969. void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
  1970. SchedCandidate &TryCand,
  1971. SchedBoundary &Zone,
  1972. const RegPressureTracker &RPTracker,
  1973. RegPressureTracker &TempTracker) {
  1974. // Always initialize TryCand's RPDelta.
  1975. TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
  1976. DAG->getRegionCriticalPSets(),
  1977. DAG->getRegPressure().MaxSetPressure);
  1978. // Initialize the candidate if needed.
  1979. if (!Cand.isValid()) {
  1980. TryCand.Reason = NodeOrder;
  1981. return;
  1982. }
  1983. if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
  1984. biasPhysRegCopy(Cand.SU, Zone.isTop()),
  1985. TryCand, Cand, PhysRegCopy))
  1986. return;
  1987. // Avoid exceeding the target's limit. If signed PSetID is negative, it is
  1988. // invalid; convert it to INT_MAX to give it lowest priority.
  1989. if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
  1990. RegExcess))
  1991. return;
  1992. // For loops that are acyclic path limited, aggressively schedule for latency.
  1993. if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))
  1994. return;
  1995. // Avoid increasing the max critical pressure in the scheduled region.
  1996. if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
  1997. TryCand, Cand, RegCritical))
  1998. return;
  1999. // Keep clustered nodes together to encourage downstream peephole
  2000. // optimizations which may reduce resource requirements.
  2001. //
  2002. // This is a best effort to set things up for a post-RA pass. Optimizations
  2003. // like generating loads of multiple registers should ideally be done within
  2004. // the scheduler pass by combining the loads during DAG postprocessing.
  2005. const SUnit *NextClusterSU =
  2006. Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  2007. if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
  2008. TryCand, Cand, Cluster))
  2009. return;
  2010. // Weak edges are for clustering and other constraints.
  2011. if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
  2012. getWeakLeft(Cand.SU, Zone.isTop()),
  2013. TryCand, Cand, Weak)) {
  2014. return;
  2015. }
  2016. // Avoid increasing the max pressure of the entire region.
  2017. if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax,
  2018. TryCand, Cand, RegMax))
  2019. return;
  2020. // Avoid critical resource consumption and balance the schedule.
  2021. TryCand.initResourceDelta(DAG, SchedModel);
  2022. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  2023. TryCand, Cand, ResourceReduce))
  2024. return;
  2025. if (tryGreater(TryCand.ResDelta.DemandedResources,
  2026. Cand.ResDelta.DemandedResources,
  2027. TryCand, Cand, ResourceDemand))
  2028. return;
  2029. // Avoid serializing long latency dependence chains.
  2030. // For acyclic path limited loops, latency was already checked above.
  2031. if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
  2032. && tryLatency(TryCand, Cand, Zone)) {
  2033. return;
  2034. }
  2035. // Prefer immediate defs/users of the last scheduled instruction. This is a
  2036. // local pressure avoidance strategy that also makes the machine code
  2037. // readable.
  2038. if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
  2039. TryCand, Cand, NextDefUse))
  2040. return;
  2041. // Fall through to original instruction order.
  2042. if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
  2043. || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
  2044. TryCand.Reason = NodeOrder;
  2045. }
  2046. }
  2047. #ifndef NDEBUG
  2048. const char *ConvergingScheduler::getReasonStr(
  2049. ConvergingScheduler::CandReason Reason) {
  2050. switch (Reason) {
  2051. case NoCand: return "NOCAND ";
  2052. case PhysRegCopy: return "PREG-COPY";
  2053. case RegExcess: return "REG-EXCESS";
  2054. case RegCritical: return "REG-CRIT ";
  2055. case Cluster: return "CLUSTER ";
  2056. case Weak: return "WEAK ";
  2057. case RegMax: return "REG-MAX ";
  2058. case ResourceReduce: return "RES-REDUCE";
  2059. case ResourceDemand: return "RES-DEMAND";
  2060. case TopDepthReduce: return "TOP-DEPTH ";
  2061. case TopPathReduce: return "TOP-PATH ";
  2062. case BotHeightReduce:return "BOT-HEIGHT";
  2063. case BotPathReduce: return "BOT-PATH ";
  2064. case NextDefUse: return "DEF-USE ";
  2065. case NodeOrder: return "ORDER ";
  2066. };
  2067. llvm_unreachable("Unknown reason!");
  2068. }
  2069. void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
  2070. PressureElement P;
  2071. unsigned ResIdx = 0;
  2072. unsigned Latency = 0;
  2073. switch (Cand.Reason) {
  2074. default:
  2075. break;
  2076. case RegExcess:
  2077. P = Cand.RPDelta.Excess;
  2078. break;
  2079. case RegCritical:
  2080. P = Cand.RPDelta.CriticalMax;
  2081. break;
  2082. case RegMax:
  2083. P = Cand.RPDelta.CurrentMax;
  2084. break;
  2085. case ResourceReduce:
  2086. ResIdx = Cand.Policy.ReduceResIdx;
  2087. break;
  2088. case ResourceDemand:
  2089. ResIdx = Cand.Policy.DemandResIdx;
  2090. break;
  2091. case TopDepthReduce:
  2092. Latency = Cand.SU->getDepth();
  2093. break;
  2094. case TopPathReduce:
  2095. Latency = Cand.SU->getHeight();
  2096. break;
  2097. case BotHeightReduce:
  2098. Latency = Cand.SU->getHeight();
  2099. break;
  2100. case BotPathReduce:
  2101. Latency = Cand.SU->getDepth();
  2102. break;
  2103. }
  2104. dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
  2105. if (P.isValid())
  2106. dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
  2107. << ":" << P.UnitIncrease << " ";
  2108. else
  2109. dbgs() << " ";
  2110. if (ResIdx)
  2111. dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
  2112. else
  2113. dbgs() << " ";
  2114. if (Latency)
  2115. dbgs() << " " << Latency << " cycles ";
  2116. else
  2117. dbgs() << " ";
  2118. dbgs() << '\n';
  2119. }
  2120. #endif
  2121. /// Pick the best candidate from the top queue.
  2122. ///
  2123. /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
  2124. /// DAG building. To adjust for the current scheduling location we need to
  2125. /// maintain the number of vreg uses remaining to be top-scheduled.
  2126. void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
  2127. const RegPressureTracker &RPTracker,
  2128. SchedCandidate &Cand) {
  2129. ReadyQueue &Q = Zone.Available;
  2130. DEBUG(Q.dump());
  2131. // getMaxPressureDelta temporarily modifies the tracker.
  2132. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
  2133. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  2134. SchedCandidate TryCand(Cand.Policy);
  2135. TryCand.SU = *I;
  2136. tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
  2137. if (TryCand.Reason != NoCand) {
  2138. // Initialize resource delta if needed in case future heuristics query it.
  2139. if (TryCand.ResDelta == SchedResourceDelta())
  2140. TryCand.initResourceDelta(DAG, SchedModel);
  2141. Cand.setBest(TryCand);
  2142. DEBUG(traceCandidate(Cand));
  2143. }
  2144. }
  2145. }
  2146. static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
  2147. bool IsTop) {
  2148. DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
  2149. << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
  2150. }
  2151. /// Pick the best candidate node from either the top or bottom queue.
  2152. SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
  2153. // Schedule as far as possible in the direction of no choice. This is most
  2154. // efficient, but also provides the best heuristics for CriticalPSets.
  2155. if (SUnit *SU = Bot.pickOnlyChoice()) {
  2156. IsTopNode = false;
  2157. DEBUG(dbgs() << "Pick Bot NOCAND\n");
  2158. return SU;
  2159. }
  2160. if (SUnit *SU = Top.pickOnlyChoice()) {
  2161. IsTopNode = true;
  2162. DEBUG(dbgs() << "Pick Top NOCAND\n");
  2163. return SU;
  2164. }
  2165. CandPolicy NoPolicy;
  2166. SchedCandidate BotCand(NoPolicy);
  2167. SchedCandidate TopCand(NoPolicy);
  2168. Bot.setPolicy(BotCand.Policy, Top);
  2169. Top.setPolicy(TopCand.Policy, Bot);
  2170. // Prefer bottom scheduling when heuristics are silent.
  2171. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  2172. assert(BotCand.Reason != NoCand && "failed to find the first candidate");
  2173. // If either Q has a single candidate that provides the least increase in
  2174. // Excess pressure, we can immediately schedule from that Q.
  2175. //
  2176. // RegionCriticalPSets summarizes the pressure within the scheduled region and
  2177. // affects picking from either Q. If scheduling in one direction must
  2178. // increase pressure for one of the excess PSets, then schedule in that
  2179. // direction first to provide more freedom in the other direction.
  2180. if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
  2181. || (BotCand.Reason == RegCritical
  2182. && !BotCand.isRepeat(RegCritical)))
  2183. {
  2184. IsTopNode = false;
  2185. tracePick(BotCand, IsTopNode);
  2186. return BotCand.SU;
  2187. }
  2188. // Check if the top Q has a better candidate.
  2189. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  2190. assert(TopCand.Reason != NoCand && "failed to find the first candidate");
  2191. // Choose the queue with the most important (lowest enum) reason.
  2192. if (TopCand.Reason < BotCand.Reason) {
  2193. IsTopNode = true;
  2194. tracePick(TopCand, IsTopNode);
  2195. return TopCand.SU;
  2196. }
  2197. // Otherwise prefer the bottom candidate, in node order if all else failed.
  2198. IsTopNode = false;
  2199. tracePick(BotCand, IsTopNode);
  2200. return BotCand.SU;
  2201. }
  2202. /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
  2203. SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
  2204. if (DAG->top() == DAG->bottom()) {
  2205. assert(Top.Available.empty() && Top.Pending.empty() &&
  2206. Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
  2207. return NULL;
  2208. }
  2209. SUnit *SU;
  2210. do {
  2211. if (ForceTopDown) {
  2212. SU = Top.pickOnlyChoice();
  2213. if (!SU) {
  2214. CandPolicy NoPolicy;
  2215. SchedCandidate TopCand(NoPolicy);
  2216. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  2217. assert(TopCand.Reason != NoCand && "failed to find the first candidate");
  2218. SU = TopCand.SU;
  2219. }
  2220. IsTopNode = true;
  2221. }
  2222. else if (ForceBottomUp) {
  2223. SU = Bot.pickOnlyChoice();
  2224. if (!SU) {
  2225. CandPolicy NoPolicy;
  2226. SchedCandidate BotCand(NoPolicy);
  2227. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  2228. assert(BotCand.Reason != NoCand && "failed to find the first candidate");
  2229. SU = BotCand.SU;
  2230. }
  2231. IsTopNode = false;
  2232. }
  2233. else {
  2234. SU = pickNodeBidirectional(IsTopNode);
  2235. }
  2236. } while (SU->isScheduled);
  2237. if (SU->isTopReady())
  2238. Top.removeReady(SU);
  2239. if (SU->isBottomReady())
  2240. Bot.removeReady(SU);
  2241. DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
  2242. return SU;
  2243. }
  2244. void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
  2245. MachineBasicBlock::iterator InsertPos = SU->getInstr();
  2246. if (!isTop)
  2247. ++InsertPos;
  2248. SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
  2249. // Find already scheduled copies with a single physreg dependence and move
  2250. // them just above the scheduled instruction.
  2251. for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
  2252. I != E; ++I) {
  2253. if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
  2254. continue;
  2255. SUnit *DepSU = I->getSUnit();
  2256. if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
  2257. continue;
  2258. MachineInstr *Copy = DepSU->getInstr();
  2259. if (!Copy->isCopy())
  2260. continue;
  2261. DEBUG(dbgs() << " Rescheduling physreg copy ";
  2262. I->getSUnit()->dump(DAG));
  2263. DAG->moveInstruction(Copy, InsertPos);
  2264. }
  2265. }
  2266. /// Update the scheduler's state after scheduling a node. This is the same node
  2267. /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
  2268. /// it's state based on the current cycle before MachineSchedStrategy does.
  2269. ///
  2270. /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
  2271. /// them here. See comments in biasPhysRegCopy.
  2272. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  2273. if (IsTopNode) {
  2274. SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
  2275. Top.bumpNode(SU);
  2276. if (SU->hasPhysRegUses)
  2277. reschedulePhysRegCopies(SU, true);
  2278. }
  2279. else {
  2280. SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
  2281. Bot.bumpNode(SU);
  2282. if (SU->hasPhysRegDefs)
  2283. reschedulePhysRegCopies(SU, false);
  2284. }
  2285. }
  2286. /// Create the standard converging machine scheduler. This will be used as the
  2287. /// default scheduler if the target does not set a default.
  2288. static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
  2289. assert((!ForceTopDown || !ForceBottomUp) &&
  2290. "-misched-topdown incompatible with -misched-bottomup");
  2291. ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
  2292. // Register DAG post-processors.
  2293. //
  2294. // FIXME: extend the mutation API to allow earlier mutations to instantiate
  2295. // data and pass it to later mutations. Have a single mutation that gathers
  2296. // the interesting nodes in one pass.
  2297. DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
  2298. if (EnableLoadCluster)
  2299. DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
  2300. if (EnableMacroFusion)
  2301. DAG->addMutation(new MacroFusion(DAG->TII));
  2302. return DAG;
  2303. }
  2304. static MachineSchedRegistry
  2305. ConvergingSchedRegistry("converge", "Standard converging scheduler.",
  2306. createConvergingSched);
  2307. //===----------------------------------------------------------------------===//
  2308. // ILP Scheduler. Currently for experimental analysis of heuristics.
  2309. //===----------------------------------------------------------------------===//
  2310. namespace {
  2311. /// \brief Order nodes by the ILP metric.
  2312. struct ILPOrder {
  2313. const SchedDFSResult *DFSResult;
  2314. const BitVector *ScheduledTrees;
  2315. bool MaximizeILP;
  2316. ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
  2317. /// \brief Apply a less-than relation on node priority.
  2318. ///
  2319. /// (Return true if A comes after B in the Q.)
  2320. bool operator()(const SUnit *A, const SUnit *B) const {
  2321. unsigned SchedTreeA = DFSResult->getSubtreeID(A);
  2322. unsigned SchedTreeB = DFSResult->getSubtreeID(B);
  2323. if (SchedTreeA != SchedTreeB) {
  2324. // Unscheduled trees have lower priority.
  2325. if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
  2326. return ScheduledTrees->test(SchedTreeB);
  2327. // Trees with shallower connections have have lower priority.
  2328. if (DFSResult->getSubtreeLevel(SchedTreeA)
  2329. != DFSResult->getSubtreeLevel(SchedTreeB)) {
  2330. return DFSResult->getSubtreeLevel(SchedTreeA)
  2331. < DFSResult->getSubtreeLevel(SchedTreeB);
  2332. }
  2333. }
  2334. if (MaximizeILP)
  2335. return DFSResult->getILP(A) < DFSResult->getILP(B);
  2336. else
  2337. return DFSResult->getILP(A) > DFSResult->getILP(B);
  2338. }
  2339. };
  2340. /// \brief Schedule based on the ILP metric.
  2341. class ILPScheduler : public MachineSchedStrategy {
  2342. /// In case all subtrees are eventually connected to a common root through
  2343. /// data dependence (e.g. reduction), place an upper limit on their size.
  2344. ///
  2345. /// FIXME: A subtree limit is generally good, but in the situation commented
  2346. /// above, where multiple similar subtrees feed a common root, we should
  2347. /// only split at a point where the resulting subtrees will be balanced.
  2348. /// (a motivating test case must be found).
  2349. static const unsigned SubtreeLimit = 16;
  2350. ScheduleDAGMI *DAG;
  2351. ILPOrder Cmp;
  2352. std::vector<SUnit*> ReadyQ;
  2353. public:
  2354. ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
  2355. virtual void initialize(ScheduleDAGMI *dag) {
  2356. DAG = dag;
  2357. DAG->computeDFSResult();
  2358. Cmp.DFSResult = DAG->getDFSResult();
  2359. Cmp.ScheduledTrees = &DAG->getScheduledTrees();
  2360. ReadyQ.clear();
  2361. }
  2362. virtual void registerRoots() {
  2363. // Restore the heap in ReadyQ with the updated DFS results.
  2364. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2365. }
  2366. /// Implement MachineSchedStrategy interface.
  2367. /// -----------------------------------------
  2368. /// Callback to select the highest priority node from the ready Q.
  2369. virtual SUnit *pickNode(bool &IsTopNode) {
  2370. if (ReadyQ.empty()) return NULL;
  2371. std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2372. SUnit *SU = ReadyQ.back();
  2373. ReadyQ.pop_back();
  2374. IsTopNode = false;
  2375. DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
  2376. << " ILP: " << DAG->getDFSResult()->getILP(SU)
  2377. << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
  2378. << DAG->getDFSResult()->getSubtreeLevel(
  2379. DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
  2380. << "Scheduling " << *SU->getInstr());
  2381. return SU;
  2382. }
  2383. /// \brief Scheduler callback to notify that a new subtree is scheduled.
  2384. virtual void scheduleTree(unsigned SubtreeID) {
  2385. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2386. }
  2387. /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
  2388. /// DFSResults, and resort the priority Q.
  2389. virtual void schedNode(SUnit *SU, bool IsTopNode) {
  2390. assert(!IsTopNode && "SchedDFSResult needs bottom-up");
  2391. }
  2392. virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
  2393. virtual void releaseBottomNode(SUnit *SU) {
  2394. ReadyQ.push_back(SU);
  2395. std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2396. }
  2397. };
  2398. } // namespace
  2399. static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
  2400. return new ScheduleDAGMI(C, new ILPScheduler(true));
  2401. }
  2402. static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
  2403. return new ScheduleDAGMI(C, new ILPScheduler(false));
  2404. }
  2405. static MachineSchedRegistry ILPMaxRegistry(
  2406. "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
  2407. static MachineSchedRegistry ILPMinRegistry(
  2408. "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
  2409. //===----------------------------------------------------------------------===//
  2410. // Machine Instruction Shuffler for Correctness Testing
  2411. //===----------------------------------------------------------------------===//
  2412. #ifndef NDEBUG
  2413. namespace {
  2414. /// Apply a less-than relation on the node order, which corresponds to the
  2415. /// instruction order prior to scheduling. IsReverse implements greater-than.
  2416. template<bool IsReverse>
  2417. struct SUnitOrder {
  2418. bool operator()(SUnit *A, SUnit *B) const {
  2419. if (IsReverse)
  2420. return A->NodeNum > B->NodeNum;
  2421. else
  2422. return A->NodeNum < B->NodeNum;
  2423. }
  2424. };
  2425. /// Reorder instructions as much as possible.
  2426. class InstructionShuffler : public MachineSchedStrategy {
  2427. bool IsAlternating;
  2428. bool IsTopDown;
  2429. // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
  2430. // gives nodes with a higher number higher priority causing the latest
  2431. // instructions to be scheduled first.
  2432. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
  2433. TopQ;
  2434. // When scheduling bottom-up, use greater-than as the queue priority.
  2435. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
  2436. BottomQ;
  2437. public:
  2438. InstructionShuffler(bool alternate, bool topdown)
  2439. : IsAlternating(alternate), IsTopDown(topdown) {}
  2440. virtual void initialize(ScheduleDAGMI *) {
  2441. TopQ.clear();
  2442. BottomQ.clear();
  2443. }
  2444. /// Implement MachineSchedStrategy interface.
  2445. /// -----------------------------------------
  2446. virtual SUnit *pickNode(bool &IsTopNode) {
  2447. SUnit *SU;
  2448. if (IsTopDown) {
  2449. do {
  2450. if (TopQ.empty()) return NULL;
  2451. SU = TopQ.top();
  2452. TopQ.pop();
  2453. } while (SU->isScheduled);
  2454. IsTopNode = true;
  2455. }
  2456. else {
  2457. do {
  2458. if (BottomQ.empty()) return NULL;
  2459. SU = BottomQ.top();
  2460. BottomQ.pop();
  2461. } while (SU->isScheduled);
  2462. IsTopNode = false;
  2463. }
  2464. if (IsAlternating)
  2465. IsTopDown = !IsTopDown;
  2466. return SU;
  2467. }
  2468. virtual void schedNode(SUnit *SU, bool IsTopNode) {}
  2469. virtual void releaseTopNode(SUnit *SU) {
  2470. TopQ.push(SU);
  2471. }
  2472. virtual void releaseBottomNode(SUnit *SU) {
  2473. BottomQ.push(SU);
  2474. }
  2475. };
  2476. } // namespace
  2477. static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
  2478. bool Alternate = !ForceTopDown && !ForceBottomUp;
  2479. bool TopDown = !ForceBottomUp;
  2480. assert((TopDown || !ForceTopDown) &&
  2481. "-misched-topdown incompatible with -misched-bottomup");
  2482. return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
  2483. }
  2484. static MachineSchedRegistry ShufflerRegistry(
  2485. "shuffle", "Shuffle machine instructions alternating directions",
  2486. createInstructionShuffler);
  2487. #endif // !NDEBUG
  2488. //===----------------------------------------------------------------------===//
  2489. // GraphWriter support for ScheduleDAGMI.
  2490. //===----------------------------------------------------------------------===//
  2491. #ifndef NDEBUG
  2492. namespace llvm {
  2493. template<> struct GraphTraits<
  2494. ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
  2495. template<>
  2496. struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
  2497. DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
  2498. static std::string getGraphName(const ScheduleDAG *G) {
  2499. return G->MF.getName();
  2500. }
  2501. static bool renderGraphFromBottomUp() {
  2502. return true;
  2503. }
  2504. static bool isNodeHidden(const SUnit *Node) {
  2505. return (Node->NumPreds > 10 || Node->NumSuccs > 10);
  2506. }
  2507. static bool hasNodeAddressLabel(const SUnit *Node,
  2508. const ScheduleDAG *Graph) {
  2509. return false;
  2510. }
  2511. /// If you want to override the dot attributes printed for a particular
  2512. /// edge, override this method.
  2513. static std::string getEdgeAttributes(const SUnit *Node,
  2514. SUnitIterator EI,
  2515. const ScheduleDAG *Graph) {
  2516. if (EI.isArtificialDep())
  2517. return "color=cyan,style=dashed";
  2518. if (EI.isCtrlDep())
  2519. return "color=blue,style=dashed";
  2520. return "";
  2521. }
  2522. static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
  2523. std::string Str;
  2524. raw_string_ostream SS(Str);
  2525. SS << "SU(" << SU->NodeNum << ')';
  2526. return SS.str();
  2527. }
  2528. static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
  2529. return G->getGraphNodeLabel(SU);
  2530. }
  2531. static std::string getNodeAttributes(const SUnit *N,
  2532. const ScheduleDAG *Graph) {
  2533. std::string Str("shape=Mrecord");
  2534. const SchedDFSResult *DFS =
  2535. static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
  2536. if (DFS) {
  2537. Str += ",style=filled,fillcolor=\"#";
  2538. Str += DOT::getColorString(DFS->getSubtreeID(N));
  2539. Str += '"';
  2540. }
  2541. return Str;
  2542. }
  2543. };
  2544. } // namespace llvm
  2545. #endif // NDEBUG
  2546. /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
  2547. /// rendered using 'dot'.
  2548. ///
  2549. void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
  2550. #ifndef NDEBUG
  2551. ViewGraph(this, Name, false, Title);
  2552. #else
  2553. errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
  2554. << "systems with Graphviz or gv!\n";
  2555. #endif // NDEBUG
  2556. }
  2557. /// Out-of-line implementation with no arguments is handy for gdb.
  2558. void ScheduleDAGMI::viewGraph() {
  2559. viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
  2560. }