MachineScheduler.cpp 127 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508
  1. //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // MachineScheduler schedules machine instructions after phi elimination. It
  11. // preserves LiveIntervals so it can be invoked before register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/MachineScheduler.h"
  15. #include "llvm/ADT/PriorityQueue.h"
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  18. #include "llvm/CodeGen/MachineDominators.h"
  19. #include "llvm/CodeGen/MachineLoopInfo.h"
  20. #include "llvm/CodeGen/MachineRegisterInfo.h"
  21. #include "llvm/CodeGen/Passes.h"
  22. #include "llvm/CodeGen/RegisterClassInfo.h"
  23. #include "llvm/CodeGen/ScheduleDFS.h"
  24. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/ErrorHandling.h"
  28. #include "llvm/Support/GraphWriter.h"
  29. #include "llvm/Support/raw_ostream.h"
  30. #include "llvm/Target/TargetInstrInfo.h"
  31. using namespace llvm;
  32. #define DEBUG_TYPE "misched"
  33. namespace llvm {
  34. cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  35. cl::desc("Force top-down list scheduling"));
  36. cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  37. cl::desc("Force bottom-up list scheduling"));
  38. cl::opt<bool>
  39. DumpCriticalPathLength("misched-dcpl", cl::Hidden,
  40. cl::desc("Print critical path length to stdout"));
  41. }
  42. #ifndef NDEBUG
  43. static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  44. cl::desc("Pop up a window to show MISched dags after they are processed"));
  45. /// In some situations a few uninteresting nodes depend on nearly all other
  46. /// nodes in the graph, provide a cutoff to hide them.
  47. static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
  48. cl::desc("Hide nodes with more predecessor/successor than cutoff"));
  49. static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  50. cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  51. static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  52. cl::desc("Only schedule this function"));
  53. static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
  54. cl::desc("Only schedule this MBB#"));
  55. #else
  56. static bool ViewMISchedDAGs = false;
  57. #endif // NDEBUG
  58. /// Avoid quadratic complexity in unusually large basic blocks by limiting the
  59. /// size of the ready lists.
  60. static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
  61. cl::desc("Limit ready list to N instructions"), cl::init(256));
  62. static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
  63. cl::desc("Enable register pressure scheduling."), cl::init(true));
  64. static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  65. cl::desc("Enable cyclic critical path analysis."), cl::init(true));
  66. static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
  67. cl::desc("Enable memop clustering."),
  68. cl::init(true));
  69. // Experimental heuristics
  70. static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  71. cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  72. static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
  73. cl::desc("Verify machine instrs before and after machine scheduling"));
  74. // DAG subtrees must have at least this many nodes.
  75. static const unsigned MinSubtreeSize = 8;
  76. // Pin the vtables to this file.
  77. void MachineSchedStrategy::anchor() {}
  78. void ScheduleDAGMutation::anchor() {}
  79. //===----------------------------------------------------------------------===//
  80. // Machine Instruction Scheduling Pass and Registry
  81. //===----------------------------------------------------------------------===//
  82. MachineSchedContext::MachineSchedContext():
  83. MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
  84. RegClassInfo = new RegisterClassInfo();
  85. }
  86. MachineSchedContext::~MachineSchedContext() {
  87. delete RegClassInfo;
  88. }
  89. namespace {
  90. /// Base class for a machine scheduler class that can run at any point.
  91. class MachineSchedulerBase : public MachineSchedContext,
  92. public MachineFunctionPass {
  93. public:
  94. MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
  95. void print(raw_ostream &O, const Module* = nullptr) const override;
  96. protected:
  97. void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
  98. };
  99. /// MachineScheduler runs after coalescing and before register allocation.
  100. class MachineScheduler : public MachineSchedulerBase {
  101. public:
  102. MachineScheduler();
  103. void getAnalysisUsage(AnalysisUsage &AU) const override;
  104. bool runOnMachineFunction(MachineFunction&) override;
  105. static char ID; // Class identification, replacement for typeinfo
  106. protected:
  107. ScheduleDAGInstrs *createMachineScheduler();
  108. };
  109. /// PostMachineScheduler runs after shortly before code emission.
  110. class PostMachineScheduler : public MachineSchedulerBase {
  111. public:
  112. PostMachineScheduler();
  113. void getAnalysisUsage(AnalysisUsage &AU) const override;
  114. bool runOnMachineFunction(MachineFunction&) override;
  115. static char ID; // Class identification, replacement for typeinfo
  116. protected:
  117. ScheduleDAGInstrs *createPostMachineScheduler();
  118. };
  119. } // namespace
  120. char MachineScheduler::ID = 0;
  121. char &llvm::MachineSchedulerID = MachineScheduler::ID;
  122. INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
  123. "Machine Instruction Scheduler", false, false)
  124. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  125. INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  126. INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
  127. INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
  128. "Machine Instruction Scheduler", false, false)
  129. MachineScheduler::MachineScheduler()
  130. : MachineSchedulerBase(ID) {
  131. initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
  132. }
  133. void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
  134. AU.setPreservesCFG();
  135. AU.addRequiredID(MachineDominatorsID);
  136. AU.addRequired<MachineLoopInfo>();
  137. AU.addRequired<AAResultsWrapperPass>();
  138. AU.addRequired<TargetPassConfig>();
  139. AU.addRequired<SlotIndexes>();
  140. AU.addPreserved<SlotIndexes>();
  141. AU.addRequired<LiveIntervals>();
  142. AU.addPreserved<LiveIntervals>();
  143. MachineFunctionPass::getAnalysisUsage(AU);
  144. }
  145. char PostMachineScheduler::ID = 0;
  146. char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
  147. INITIALIZE_PASS(PostMachineScheduler, "postmisched",
  148. "PostRA Machine Instruction Scheduler", false, false)
  149. PostMachineScheduler::PostMachineScheduler()
  150. : MachineSchedulerBase(ID) {
  151. initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
  152. }
  153. void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
  154. AU.setPreservesCFG();
  155. AU.addRequiredID(MachineDominatorsID);
  156. AU.addRequired<MachineLoopInfo>();
  157. AU.addRequired<TargetPassConfig>();
  158. MachineFunctionPass::getAnalysisUsage(AU);
  159. }
  160. MachinePassRegistry MachineSchedRegistry::Registry;
  161. /// A dummy default scheduler factory indicates whether the scheduler
  162. /// is overridden on the command line.
  163. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
  164. return nullptr;
  165. }
  166. /// MachineSchedOpt allows command line selection of the scheduler.
  167. static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
  168. RegisterPassParser<MachineSchedRegistry> >
  169. MachineSchedOpt("misched",
  170. cl::init(&useDefaultMachineSched), cl::Hidden,
  171. cl::desc("Machine instruction scheduler to use"));
  172. static MachineSchedRegistry
  173. DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
  174. useDefaultMachineSched);
  175. static cl::opt<bool> EnableMachineSched(
  176. "enable-misched",
  177. cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
  178. cl::Hidden);
  179. static cl::opt<bool> EnablePostRAMachineSched(
  180. "enable-post-misched",
  181. cl::desc("Enable the post-ra machine instruction scheduling pass."),
  182. cl::init(true), cl::Hidden);
  183. /// Forward declare the standard machine scheduler. This will be used as the
  184. /// default scheduler if the target does not set a default.
  185. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
  186. static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
  187. /// Decrement this iterator until reaching the top or a non-debug instr.
  188. static MachineBasicBlock::const_iterator
  189. priorNonDebug(MachineBasicBlock::const_iterator I,
  190. MachineBasicBlock::const_iterator Beg) {
  191. assert(I != Beg && "reached the top of the region, cannot decrement");
  192. while (--I != Beg) {
  193. if (!I->isDebugValue())
  194. break;
  195. }
  196. return I;
  197. }
  198. /// Non-const version.
  199. static MachineBasicBlock::iterator
  200. priorNonDebug(MachineBasicBlock::iterator I,
  201. MachineBasicBlock::const_iterator Beg) {
  202. return const_cast<MachineInstr*>(
  203. &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
  204. }
  205. /// If this iterator is a debug value, increment until reaching the End or a
  206. /// non-debug instruction.
  207. static MachineBasicBlock::const_iterator
  208. nextIfDebug(MachineBasicBlock::const_iterator I,
  209. MachineBasicBlock::const_iterator End) {
  210. for(; I != End; ++I) {
  211. if (!I->isDebugValue())
  212. break;
  213. }
  214. return I;
  215. }
  216. /// Non-const version.
  217. static MachineBasicBlock::iterator
  218. nextIfDebug(MachineBasicBlock::iterator I,
  219. MachineBasicBlock::const_iterator End) {
  220. // Cast the return value to nonconst MachineInstr, then cast to an
  221. // instr_iterator, which does not check for null, finally return a
  222. // bundle_iterator.
  223. return MachineBasicBlock::instr_iterator(
  224. const_cast<MachineInstr*>(
  225. &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
  226. }
  227. /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
  228. ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
  229. // Select the scheduler, or set the default.
  230. MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
  231. if (Ctor != useDefaultMachineSched)
  232. return Ctor(this);
  233. // Get the default scheduler set by the target for this function.
  234. ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
  235. if (Scheduler)
  236. return Scheduler;
  237. // Default to GenericScheduler.
  238. return createGenericSchedLive(this);
  239. }
  240. /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
  241. /// the caller. We don't have a command line option to override the postRA
  242. /// scheduler. The Target must configure it.
  243. ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
  244. // Get the postRA scheduler set by the target for this function.
  245. ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
  246. if (Scheduler)
  247. return Scheduler;
  248. // Default to GenericScheduler.
  249. return createGenericSchedPostRA(this);
  250. }
  251. /// Top-level MachineScheduler pass driver.
  252. ///
  253. /// Visit blocks in function order. Divide each block into scheduling regions
  254. /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
  255. /// consistent with the DAG builder, which traverses the interior of the
  256. /// scheduling regions bottom-up.
  257. ///
  258. /// This design avoids exposing scheduling boundaries to the DAG builder,
  259. /// simplifying the DAG builder's support for "special" target instructions.
  260. /// At the same time the design allows target schedulers to operate across
  261. /// scheduling boundaries, for example to bundle the boudary instructions
  262. /// without reordering them. This creates complexity, because the target
  263. /// scheduler must update the RegionBegin and RegionEnd positions cached by
  264. /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
  265. /// design would be to split blocks at scheduling boundaries, but LLVM has a
  266. /// general bias against block splitting purely for implementation simplicity.
  267. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
  268. if (skipFunction(*mf.getFunction()))
  269. return false;
  270. if (EnableMachineSched.getNumOccurrences()) {
  271. if (!EnableMachineSched)
  272. return false;
  273. } else if (!mf.getSubtarget().enableMachineScheduler())
  274. return false;
  275. DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
  276. // Initialize the context of the pass.
  277. MF = &mf;
  278. MLI = &getAnalysis<MachineLoopInfo>();
  279. MDT = &getAnalysis<MachineDominatorTree>();
  280. PassConfig = &getAnalysis<TargetPassConfig>();
  281. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  282. LIS = &getAnalysis<LiveIntervals>();
  283. if (VerifyScheduling) {
  284. DEBUG(LIS->dump());
  285. MF->verify(this, "Before machine scheduling.");
  286. }
  287. RegClassInfo->runOnMachineFunction(*MF);
  288. // Instantiate the selected scheduler for this target, function, and
  289. // optimization level.
  290. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
  291. scheduleRegions(*Scheduler, false);
  292. DEBUG(LIS->dump());
  293. if (VerifyScheduling)
  294. MF->verify(this, "After machine scheduling.");
  295. return true;
  296. }
  297. bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
  298. if (skipFunction(*mf.getFunction()))
  299. return false;
  300. if (EnablePostRAMachineSched.getNumOccurrences()) {
  301. if (!EnablePostRAMachineSched)
  302. return false;
  303. } else if (!mf.getSubtarget().enablePostRAScheduler()) {
  304. DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
  305. return false;
  306. }
  307. DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
  308. // Initialize the context of the pass.
  309. MF = &mf;
  310. PassConfig = &getAnalysis<TargetPassConfig>();
  311. if (VerifyScheduling)
  312. MF->verify(this, "Before post machine scheduling.");
  313. // Instantiate the selected scheduler for this target, function, and
  314. // optimization level.
  315. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
  316. scheduleRegions(*Scheduler, true);
  317. if (VerifyScheduling)
  318. MF->verify(this, "After post machine scheduling.");
  319. return true;
  320. }
  321. /// Return true of the given instruction should not be included in a scheduling
  322. /// region.
  323. ///
  324. /// MachineScheduler does not currently support scheduling across calls. To
  325. /// handle calls, the DAG builder needs to be modified to create register
  326. /// anti/output dependencies on the registers clobbered by the call's regmask
  327. /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
  328. /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
  329. /// the boundary, but there would be no benefit to postRA scheduling across
  330. /// calls this late anyway.
  331. static bool isSchedBoundary(MachineBasicBlock::iterator MI,
  332. MachineBasicBlock *MBB,
  333. MachineFunction *MF,
  334. const TargetInstrInfo *TII) {
  335. return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
  336. }
  337. /// Main driver for both MachineScheduler and PostMachineScheduler.
  338. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
  339. bool FixKillFlags) {
  340. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  341. // Visit all machine basic blocks.
  342. //
  343. // TODO: Visit blocks in global postorder or postorder within the bottom-up
  344. // loop tree. Then we can optionally compute global RegPressure.
  345. for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
  346. MBB != MBBEnd; ++MBB) {
  347. Scheduler.startBlock(&*MBB);
  348. #ifndef NDEBUG
  349. if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
  350. continue;
  351. if (SchedOnlyBlock.getNumOccurrences()
  352. && (int)SchedOnlyBlock != MBB->getNumber())
  353. continue;
  354. #endif
  355. // Break the block into scheduling regions [I, RegionEnd), and schedule each
  356. // region as soon as it is discovered. RegionEnd points the scheduling
  357. // boundary at the bottom of the region. The DAG does not include RegionEnd,
  358. // but the region does (i.e. the next RegionEnd is above the previous
  359. // RegionBegin). If the current block has no terminator then RegionEnd ==
  360. // MBB->end() for the bottom region.
  361. //
  362. // The Scheduler may insert instructions during either schedule() or
  363. // exitRegion(), even for empty regions. So the local iterators 'I' and
  364. // 'RegionEnd' are invalid across these calls.
  365. //
  366. // MBB::size() uses instr_iterator to count. Here we need a bundle to count
  367. // as a single instruction.
  368. unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
  369. for(MachineBasicBlock::iterator RegionEnd = MBB->end();
  370. RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
  371. // Avoid decrementing RegionEnd for blocks with no terminator.
  372. if (RegionEnd != MBB->end() ||
  373. isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
  374. --RegionEnd;
  375. // Count the boundary instruction.
  376. --RemainingInstrs;
  377. }
  378. // The next region starts above the previous region. Look backward in the
  379. // instruction stream until we find the nearest boundary.
  380. unsigned NumRegionInstrs = 0;
  381. MachineBasicBlock::iterator I = RegionEnd;
  382. for(;I != MBB->begin(); --I, --RemainingInstrs) {
  383. if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
  384. break;
  385. if (!I->isDebugValue())
  386. ++NumRegionInstrs;
  387. }
  388. // Notify the scheduler of the region, even if we may skip scheduling
  389. // it. Perhaps it still needs to be bundled.
  390. Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
  391. // Skip empty scheduling regions (0 or 1 schedulable instructions).
  392. if (I == RegionEnd || I == std::prev(RegionEnd)) {
  393. // Close the current region. Bundle the terminator if needed.
  394. // This invalidates 'RegionEnd' and 'I'.
  395. Scheduler.exitRegion();
  396. continue;
  397. }
  398. DEBUG(dbgs() << "********** MI Scheduling **********\n");
  399. DEBUG(dbgs() << MF->getName()
  400. << ":BB#" << MBB->getNumber() << " " << MBB->getName()
  401. << "\n From: " << *I << " To: ";
  402. if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
  403. else dbgs() << "End";
  404. dbgs() << " RegionInstrs: " << NumRegionInstrs
  405. << " Remaining: " << RemainingInstrs << "\n");
  406. if (DumpCriticalPathLength) {
  407. errs() << MF->getName();
  408. errs() << ":BB# " << MBB->getNumber();
  409. errs() << " " << MBB->getName() << " \n";
  410. }
  411. // Schedule a region: possibly reorder instructions.
  412. // This invalidates 'RegionEnd' and 'I'.
  413. Scheduler.schedule();
  414. // Close the current region.
  415. Scheduler.exitRegion();
  416. // Scheduling has invalidated the current iterator 'I'. Ask the
  417. // scheduler for the top of it's scheduled region.
  418. RegionEnd = Scheduler.begin();
  419. }
  420. assert(RemainingInstrs == 0 && "Instruction count mismatch!");
  421. Scheduler.finishBlock();
  422. // FIXME: Ideally, no further passes should rely on kill flags. However,
  423. // thumb2 size reduction is currently an exception, so the PostMIScheduler
  424. // needs to do this.
  425. if (FixKillFlags)
  426. Scheduler.fixupKills(&*MBB);
  427. }
  428. Scheduler.finalizeSchedule();
  429. }
  430. void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
  431. // unimplemented
  432. }
  433. LLVM_DUMP_METHOD
  434. void ReadyQueue::dump() {
  435. dbgs() << "Queue " << Name << ": ";
  436. for (unsigned i = 0, e = Queue.size(); i < e; ++i)
  437. dbgs() << Queue[i]->NodeNum << " ";
  438. dbgs() << "\n";
  439. }
  440. //===----------------------------------------------------------------------===//
  441. // ScheduleDAGMI - Basic machine instruction scheduling. This is
  442. // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
  443. // virtual registers.
  444. // ===----------------------------------------------------------------------===/
  445. // Provide a vtable anchor.
  446. ScheduleDAGMI::~ScheduleDAGMI() {
  447. }
  448. bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
  449. return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
  450. }
  451. bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
  452. if (SuccSU != &ExitSU) {
  453. // Do not use WillCreateCycle, it assumes SD scheduling.
  454. // If Pred is reachable from Succ, then the edge creates a cycle.
  455. if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
  456. return false;
  457. Topo.AddPred(SuccSU, PredDep.getSUnit());
  458. }
  459. SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
  460. // Return true regardless of whether a new edge needed to be inserted.
  461. return true;
  462. }
  463. /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
  464. /// NumPredsLeft reaches zero, release the successor node.
  465. ///
  466. /// FIXME: Adjust SuccSU height based on MinLatency.
  467. void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
  468. SUnit *SuccSU = SuccEdge->getSUnit();
  469. if (SuccEdge->isWeak()) {
  470. --SuccSU->WeakPredsLeft;
  471. if (SuccEdge->isCluster())
  472. NextClusterSucc = SuccSU;
  473. return;
  474. }
  475. #ifndef NDEBUG
  476. if (SuccSU->NumPredsLeft == 0) {
  477. dbgs() << "*** Scheduling failed! ***\n";
  478. SuccSU->dump(this);
  479. dbgs() << " has been released too many times!\n";
  480. llvm_unreachable(nullptr);
  481. }
  482. #endif
  483. // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
  484. // CurrCycle may have advanced since then.
  485. if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
  486. SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
  487. --SuccSU->NumPredsLeft;
  488. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
  489. SchedImpl->releaseTopNode(SuccSU);
  490. }
  491. /// releaseSuccessors - Call releaseSucc on each of SU's successors.
  492. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
  493. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
  494. I != E; ++I) {
  495. releaseSucc(SU, &*I);
  496. }
  497. }
  498. /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
  499. /// NumSuccsLeft reaches zero, release the predecessor node.
  500. ///
  501. /// FIXME: Adjust PredSU height based on MinLatency.
  502. void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
  503. SUnit *PredSU = PredEdge->getSUnit();
  504. if (PredEdge->isWeak()) {
  505. --PredSU->WeakSuccsLeft;
  506. if (PredEdge->isCluster())
  507. NextClusterPred = PredSU;
  508. return;
  509. }
  510. #ifndef NDEBUG
  511. if (PredSU->NumSuccsLeft == 0) {
  512. dbgs() << "*** Scheduling failed! ***\n";
  513. PredSU->dump(this);
  514. dbgs() << " has been released too many times!\n";
  515. llvm_unreachable(nullptr);
  516. }
  517. #endif
  518. // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
  519. // CurrCycle may have advanced since then.
  520. if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
  521. PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
  522. --PredSU->NumSuccsLeft;
  523. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
  524. SchedImpl->releaseBottomNode(PredSU);
  525. }
  526. /// releasePredecessors - Call releasePred on each of SU's predecessors.
  527. void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
  528. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
  529. I != E; ++I) {
  530. releasePred(SU, &*I);
  531. }
  532. }
  533. /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
  534. /// crossing a scheduling boundary. [begin, end) includes all instructions in
  535. /// the region, including the boundary itself and single-instruction regions
  536. /// that don't get scheduled.
  537. void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
  538. MachineBasicBlock::iterator begin,
  539. MachineBasicBlock::iterator end,
  540. unsigned regioninstrs)
  541. {
  542. ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
  543. SchedImpl->initPolicy(begin, end, regioninstrs);
  544. }
  545. /// This is normally called from the main scheduler loop but may also be invoked
  546. /// by the scheduling strategy to perform additional code motion.
  547. void ScheduleDAGMI::moveInstruction(
  548. MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
  549. // Advance RegionBegin if the first instruction moves down.
  550. if (&*RegionBegin == MI)
  551. ++RegionBegin;
  552. // Update the instruction stream.
  553. BB->splice(InsertPos, BB, MI);
  554. // Update LiveIntervals
  555. if (LIS)
  556. LIS->handleMove(*MI, /*UpdateFlags=*/true);
  557. // Recede RegionBegin if an instruction moves above the first.
  558. if (RegionBegin == InsertPos)
  559. RegionBegin = MI;
  560. }
  561. bool ScheduleDAGMI::checkSchedLimit() {
  562. #ifndef NDEBUG
  563. if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
  564. CurrentTop = CurrentBottom;
  565. return false;
  566. }
  567. ++NumInstrsScheduled;
  568. #endif
  569. return true;
  570. }
  571. /// Per-region scheduling driver, called back from
  572. /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
  573. /// does not consider liveness or register pressure. It is useful for PostRA
  574. /// scheduling and potentially other custom schedulers.
  575. void ScheduleDAGMI::schedule() {
  576. DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
  577. DEBUG(SchedImpl->dumpPolicy());
  578. // Build the DAG.
  579. buildSchedGraph(AA);
  580. Topo.InitDAGTopologicalSorting();
  581. postprocessDAG();
  582. SmallVector<SUnit*, 8> TopRoots, BotRoots;
  583. findRootsAndBiasEdges(TopRoots, BotRoots);
  584. // Initialize the strategy before modifying the DAG.
  585. // This may initialize a DFSResult to be used for queue priority.
  586. SchedImpl->initialize(this);
  587. DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
  588. SUnits[su].dumpAll(this));
  589. if (ViewMISchedDAGs) viewGraph();
  590. // Initialize ready queues now that the DAG and priority data are finalized.
  591. initQueues(TopRoots, BotRoots);
  592. bool IsTopNode = false;
  593. while (true) {
  594. DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
  595. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  596. if (!SU) break;
  597. assert(!SU->isScheduled && "Node already scheduled");
  598. if (!checkSchedLimit())
  599. break;
  600. MachineInstr *MI = SU->getInstr();
  601. if (IsTopNode) {
  602. assert(SU->isTopReady() && "node still has unscheduled dependencies");
  603. if (&*CurrentTop == MI)
  604. CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
  605. else
  606. moveInstruction(MI, CurrentTop);
  607. } else {
  608. assert(SU->isBottomReady() && "node still has unscheduled dependencies");
  609. MachineBasicBlock::iterator priorII =
  610. priorNonDebug(CurrentBottom, CurrentTop);
  611. if (&*priorII == MI)
  612. CurrentBottom = priorII;
  613. else {
  614. if (&*CurrentTop == MI)
  615. CurrentTop = nextIfDebug(++CurrentTop, priorII);
  616. moveInstruction(MI, CurrentBottom);
  617. CurrentBottom = MI;
  618. }
  619. }
  620. // Notify the scheduling strategy before updating the DAG.
  621. // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
  622. // runs, it can then use the accurate ReadyCycle time to determine whether
  623. // newly released nodes can move to the readyQ.
  624. SchedImpl->schedNode(SU, IsTopNode);
  625. updateQueues(SU, IsTopNode);
  626. }
  627. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  628. placeDebugValues();
  629. DEBUG({
  630. unsigned BBNum = begin()->getParent()->getNumber();
  631. dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
  632. dumpSchedule();
  633. dbgs() << '\n';
  634. });
  635. }
  636. /// Apply each ScheduleDAGMutation step in order.
  637. void ScheduleDAGMI::postprocessDAG() {
  638. for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
  639. Mutations[i]->apply(this);
  640. }
  641. }
  642. void ScheduleDAGMI::
  643. findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
  644. SmallVectorImpl<SUnit*> &BotRoots) {
  645. for (std::vector<SUnit>::iterator
  646. I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
  647. SUnit *SU = &(*I);
  648. assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
  649. // Order predecessors so DFSResult follows the critical path.
  650. SU->biasCriticalPath();
  651. // A SUnit is ready to top schedule if it has no predecessors.
  652. if (!I->NumPredsLeft)
  653. TopRoots.push_back(SU);
  654. // A SUnit is ready to bottom schedule if it has no successors.
  655. if (!I->NumSuccsLeft)
  656. BotRoots.push_back(SU);
  657. }
  658. ExitSU.biasCriticalPath();
  659. }
  660. /// Identify DAG roots and setup scheduler queues.
  661. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
  662. ArrayRef<SUnit*> BotRoots) {
  663. NextClusterSucc = nullptr;
  664. NextClusterPred = nullptr;
  665. // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
  666. //
  667. // Nodes with unreleased weak edges can still be roots.
  668. // Release top roots in forward order.
  669. for (SmallVectorImpl<SUnit*>::const_iterator
  670. I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
  671. SchedImpl->releaseTopNode(*I);
  672. }
  673. // Release bottom roots in reverse order so the higher priority nodes appear
  674. // first. This is more natural and slightly more efficient.
  675. for (SmallVectorImpl<SUnit*>::const_reverse_iterator
  676. I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
  677. SchedImpl->releaseBottomNode(*I);
  678. }
  679. releaseSuccessors(&EntrySU);
  680. releasePredecessors(&ExitSU);
  681. SchedImpl->registerRoots();
  682. // Advance past initial DebugValues.
  683. CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
  684. CurrentBottom = RegionEnd;
  685. }
  686. /// Update scheduler queues after scheduling an instruction.
  687. void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
  688. // Release dependent instructions for scheduling.
  689. if (IsTopNode)
  690. releaseSuccessors(SU);
  691. else
  692. releasePredecessors(SU);
  693. SU->isScheduled = true;
  694. }
  695. /// Reinsert any remaining debug_values, just like the PostRA scheduler.
  696. void ScheduleDAGMI::placeDebugValues() {
  697. // If first instruction was a DBG_VALUE then put it back.
  698. if (FirstDbgValue) {
  699. BB->splice(RegionBegin, BB, FirstDbgValue);
  700. RegionBegin = FirstDbgValue;
  701. }
  702. for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
  703. DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
  704. std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
  705. MachineInstr *DbgValue = P.first;
  706. MachineBasicBlock::iterator OrigPrevMI = P.second;
  707. if (&*RegionBegin == DbgValue)
  708. ++RegionBegin;
  709. BB->splice(++OrigPrevMI, BB, DbgValue);
  710. if (OrigPrevMI == std::prev(RegionEnd))
  711. RegionEnd = DbgValue;
  712. }
  713. DbgValues.clear();
  714. FirstDbgValue = nullptr;
  715. }
  716. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  717. void ScheduleDAGMI::dumpSchedule() const {
  718. for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
  719. if (SUnit *SU = getSUnit(&(*MI)))
  720. SU->dump(this);
  721. else
  722. dbgs() << "Missing SUnit\n";
  723. }
  724. }
  725. #endif
  726. //===----------------------------------------------------------------------===//
  727. // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
  728. // preservation.
  729. //===----------------------------------------------------------------------===//
  730. ScheduleDAGMILive::~ScheduleDAGMILive() {
  731. delete DFSResult;
  732. }
  733. /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
  734. /// crossing a scheduling boundary. [begin, end) includes all instructions in
  735. /// the region, including the boundary itself and single-instruction regions
  736. /// that don't get scheduled.
  737. void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
  738. MachineBasicBlock::iterator begin,
  739. MachineBasicBlock::iterator end,
  740. unsigned regioninstrs)
  741. {
  742. // ScheduleDAGMI initializes SchedImpl's per-region policy.
  743. ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
  744. // For convenience remember the end of the liveness region.
  745. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
  746. SUPressureDiffs.clear();
  747. ShouldTrackPressure = SchedImpl->shouldTrackPressure();
  748. ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
  749. if (ShouldTrackLaneMasks) {
  750. if (!ShouldTrackPressure)
  751. report_fatal_error("ShouldTrackLaneMasks requires ShouldTrackPressure");
  752. // Dead subregister defs have no users and therefore no dependencies,
  753. // moving them around may cause liveintervals to degrade into multiple
  754. // components. Change independent components to have their own vreg to avoid
  755. // this.
  756. if (!DisconnectedComponentsRenamed)
  757. LIS->renameDisconnectedComponents();
  758. }
  759. }
  760. // Setup the register pressure trackers for the top scheduled top and bottom
  761. // scheduled regions.
  762. void ScheduleDAGMILive::initRegPressure() {
  763. TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
  764. ShouldTrackLaneMasks, false);
  765. BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
  766. ShouldTrackLaneMasks, false);
  767. // Close the RPTracker to finalize live ins.
  768. RPTracker.closeRegion();
  769. DEBUG(RPTracker.dump());
  770. // Initialize the live ins and live outs.
  771. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
  772. BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
  773. // Close one end of the tracker so we can call
  774. // getMaxUpward/DownwardPressureDelta before advancing across any
  775. // instructions. This converts currently live regs into live ins/outs.
  776. TopRPTracker.closeTop();
  777. BotRPTracker.closeBottom();
  778. BotRPTracker.initLiveThru(RPTracker);
  779. if (!BotRPTracker.getLiveThru().empty()) {
  780. TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
  781. DEBUG(dbgs() << "Live Thru: ";
  782. dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
  783. };
  784. // For each live out vreg reduce the pressure change associated with other
  785. // uses of the same vreg below the live-out reaching def.
  786. updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
  787. // Account for liveness generated by the region boundary.
  788. if (LiveRegionEnd != RegionEnd) {
  789. SmallVector<RegisterMaskPair, 8> LiveUses;
  790. BotRPTracker.recede(&LiveUses);
  791. updatePressureDiffs(LiveUses);
  792. }
  793. DEBUG(
  794. dbgs() << "Top Pressure:\n";
  795. dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
  796. dbgs() << "Bottom Pressure:\n";
  797. dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
  798. );
  799. assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
  800. // Cache the list of excess pressure sets in this region. This will also track
  801. // the max pressure in the scheduled code for these sets.
  802. RegionCriticalPSets.clear();
  803. const std::vector<unsigned> &RegionPressure =
  804. RPTracker.getPressure().MaxSetPressure;
  805. for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
  806. unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
  807. if (RegionPressure[i] > Limit) {
  808. DEBUG(dbgs() << TRI->getRegPressureSetName(i)
  809. << " Limit " << Limit
  810. << " Actual " << RegionPressure[i] << "\n");
  811. RegionCriticalPSets.push_back(PressureChange(i));
  812. }
  813. }
  814. DEBUG(dbgs() << "Excess PSets: ";
  815. for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
  816. dbgs() << TRI->getRegPressureSetName(
  817. RegionCriticalPSets[i].getPSet()) << " ";
  818. dbgs() << "\n");
  819. }
  820. void ScheduleDAGMILive::
  821. updateScheduledPressure(const SUnit *SU,
  822. const std::vector<unsigned> &NewMaxPressure) {
  823. const PressureDiff &PDiff = getPressureDiff(SU);
  824. unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
  825. for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
  826. I != E; ++I) {
  827. if (!I->isValid())
  828. break;
  829. unsigned ID = I->getPSet();
  830. while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
  831. ++CritIdx;
  832. if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
  833. if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
  834. && NewMaxPressure[ID] <= INT16_MAX)
  835. RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
  836. }
  837. unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
  838. if (NewMaxPressure[ID] >= Limit - 2) {
  839. DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
  840. << NewMaxPressure[ID]
  841. << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
  842. << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
  843. }
  844. }
  845. }
  846. /// Update the PressureDiff array for liveness after scheduling this
  847. /// instruction.
  848. void ScheduleDAGMILive::updatePressureDiffs(
  849. ArrayRef<RegisterMaskPair> LiveUses) {
  850. for (const RegisterMaskPair &P : LiveUses) {
  851. unsigned Reg = P.RegUnit;
  852. /// FIXME: Currently assuming single-use physregs.
  853. if (!TRI->isVirtualRegister(Reg))
  854. continue;
  855. if (ShouldTrackLaneMasks) {
  856. // If the register has just become live then other uses won't change
  857. // this fact anymore => decrement pressure.
  858. // If the register has just become dead then other uses make it come
  859. // back to life => increment pressure.
  860. bool Decrement = P.LaneMask != 0;
  861. for (const VReg2SUnit &V2SU
  862. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  863. SUnit &SU = *V2SU.SU;
  864. if (SU.isScheduled || &SU == &ExitSU)
  865. continue;
  866. PressureDiff &PDiff = getPressureDiff(&SU);
  867. PDiff.addPressureChange(Reg, Decrement, &MRI);
  868. DEBUG(
  869. dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
  870. << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
  871. << ' ' << *SU.getInstr();
  872. dbgs() << " to ";
  873. PDiff.dump(*TRI);
  874. );
  875. }
  876. } else {
  877. assert(P.LaneMask != 0);
  878. DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
  879. // This may be called before CurrentBottom has been initialized. However,
  880. // BotRPTracker must have a valid position. We want the value live into the
  881. // instruction or live out of the block, so ask for the previous
  882. // instruction's live-out.
  883. const LiveInterval &LI = LIS->getInterval(Reg);
  884. VNInfo *VNI;
  885. MachineBasicBlock::const_iterator I =
  886. nextIfDebug(BotRPTracker.getPos(), BB->end());
  887. if (I == BB->end())
  888. VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
  889. else {
  890. LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
  891. VNI = LRQ.valueIn();
  892. }
  893. // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
  894. assert(VNI && "No live value at use.");
  895. for (const VReg2SUnit &V2SU
  896. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  897. SUnit *SU = V2SU.SU;
  898. // If this use comes before the reaching def, it cannot be a last use,
  899. // so decrease its pressure change.
  900. if (!SU->isScheduled && SU != &ExitSU) {
  901. LiveQueryResult LRQ =
  902. LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
  903. if (LRQ.valueIn() == VNI) {
  904. PressureDiff &PDiff = getPressureDiff(SU);
  905. PDiff.addPressureChange(Reg, true, &MRI);
  906. DEBUG(
  907. dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
  908. << *SU->getInstr();
  909. dbgs() << " to ";
  910. PDiff.dump(*TRI);
  911. );
  912. }
  913. }
  914. }
  915. }
  916. }
  917. }
  918. /// schedule - Called back from MachineScheduler::runOnMachineFunction
  919. /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
  920. /// only includes instructions that have DAG nodes, not scheduling boundaries.
  921. ///
  922. /// This is a skeletal driver, with all the functionality pushed into helpers,
  923. /// so that it can be easily extended by experimental schedulers. Generally,
  924. /// implementing MachineSchedStrategy should be sufficient to implement a new
  925. /// scheduling algorithm. However, if a scheduler further subclasses
  926. /// ScheduleDAGMILive then it will want to override this virtual method in order
  927. /// to update any specialized state.
  928. void ScheduleDAGMILive::schedule() {
  929. DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
  930. DEBUG(SchedImpl->dumpPolicy());
  931. buildDAGWithRegPressure();
  932. Topo.InitDAGTopologicalSorting();
  933. postprocessDAG();
  934. SmallVector<SUnit*, 8> TopRoots, BotRoots;
  935. findRootsAndBiasEdges(TopRoots, BotRoots);
  936. // Initialize the strategy before modifying the DAG.
  937. // This may initialize a DFSResult to be used for queue priority.
  938. SchedImpl->initialize(this);
  939. DEBUG(
  940. for (const SUnit &SU : SUnits) {
  941. SU.dumpAll(this);
  942. if (ShouldTrackPressure) {
  943. dbgs() << " Pressure Diff : ";
  944. getPressureDiff(&SU).dump(*TRI);
  945. }
  946. dbgs() << '\n';
  947. }
  948. );
  949. if (ViewMISchedDAGs) viewGraph();
  950. // Initialize ready queues now that the DAG and priority data are finalized.
  951. initQueues(TopRoots, BotRoots);
  952. if (ShouldTrackPressure) {
  953. assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
  954. TopRPTracker.setPos(CurrentTop);
  955. }
  956. bool IsTopNode = false;
  957. while (true) {
  958. DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
  959. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  960. if (!SU) break;
  961. assert(!SU->isScheduled && "Node already scheduled");
  962. if (!checkSchedLimit())
  963. break;
  964. scheduleMI(SU, IsTopNode);
  965. if (DFSResult) {
  966. unsigned SubtreeID = DFSResult->getSubtreeID(SU);
  967. if (!ScheduledTrees.test(SubtreeID)) {
  968. ScheduledTrees.set(SubtreeID);
  969. DFSResult->scheduleTree(SubtreeID);
  970. SchedImpl->scheduleTree(SubtreeID);
  971. }
  972. }
  973. // Notify the scheduling strategy after updating the DAG.
  974. SchedImpl->schedNode(SU, IsTopNode);
  975. updateQueues(SU, IsTopNode);
  976. }
  977. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  978. placeDebugValues();
  979. DEBUG({
  980. unsigned BBNum = begin()->getParent()->getNumber();
  981. dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
  982. dumpSchedule();
  983. dbgs() << '\n';
  984. });
  985. }
  986. /// Build the DAG and setup three register pressure trackers.
  987. void ScheduleDAGMILive::buildDAGWithRegPressure() {
  988. if (!ShouldTrackPressure) {
  989. RPTracker.reset();
  990. RegionCriticalPSets.clear();
  991. buildSchedGraph(AA);
  992. return;
  993. }
  994. // Initialize the register pressure tracker used by buildSchedGraph.
  995. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
  996. ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
  997. // Account for liveness generate by the region boundary.
  998. if (LiveRegionEnd != RegionEnd)
  999. RPTracker.recede();
  1000. // Build the DAG, and compute current register pressure.
  1001. buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
  1002. // Initialize top/bottom trackers after computing region pressure.
  1003. initRegPressure();
  1004. }
  1005. void ScheduleDAGMILive::computeDFSResult() {
  1006. if (!DFSResult)
  1007. DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
  1008. DFSResult->clear();
  1009. ScheduledTrees.clear();
  1010. DFSResult->resize(SUnits.size());
  1011. DFSResult->compute(SUnits);
  1012. ScheduledTrees.resize(DFSResult->getNumSubtrees());
  1013. }
  1014. /// Compute the max cyclic critical path through the DAG. The scheduling DAG
  1015. /// only provides the critical path for single block loops. To handle loops that
  1016. /// span blocks, we could use the vreg path latencies provided by
  1017. /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
  1018. /// available for use in the scheduler.
  1019. ///
  1020. /// The cyclic path estimation identifies a def-use pair that crosses the back
  1021. /// edge and considers the depth and height of the nodes. For example, consider
  1022. /// the following instruction sequence where each instruction has unit latency
  1023. /// and defines an epomymous virtual register:
  1024. ///
  1025. /// a->b(a,c)->c(b)->d(c)->exit
  1026. ///
  1027. /// The cyclic critical path is a two cycles: b->c->b
  1028. /// The acyclic critical path is four cycles: a->b->c->d->exit
  1029. /// LiveOutHeight = height(c) = len(c->d->exit) = 2
  1030. /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
  1031. /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
  1032. /// LiveInDepth = depth(b) = len(a->b) = 1
  1033. ///
  1034. /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
  1035. /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
  1036. /// CyclicCriticalPath = min(2, 2) = 2
  1037. ///
  1038. /// This could be relevant to PostRA scheduling, but is currently implemented
  1039. /// assuming LiveIntervals.
  1040. unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
  1041. // This only applies to single block loop.
  1042. if (!BB->isSuccessor(BB))
  1043. return 0;
  1044. unsigned MaxCyclicLatency = 0;
  1045. // Visit each live out vreg def to find def/use pairs that cross iterations.
  1046. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
  1047. unsigned Reg = P.RegUnit;
  1048. if (!TRI->isVirtualRegister(Reg))
  1049. continue;
  1050. const LiveInterval &LI = LIS->getInterval(Reg);
  1051. const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
  1052. if (!DefVNI)
  1053. continue;
  1054. MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
  1055. const SUnit *DefSU = getSUnit(DefMI);
  1056. if (!DefSU)
  1057. continue;
  1058. unsigned LiveOutHeight = DefSU->getHeight();
  1059. unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
  1060. // Visit all local users of the vreg def.
  1061. for (const VReg2SUnit &V2SU
  1062. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  1063. SUnit *SU = V2SU.SU;
  1064. if (SU == &ExitSU)
  1065. continue;
  1066. // Only consider uses of the phi.
  1067. LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
  1068. if (!LRQ.valueIn()->isPHIDef())
  1069. continue;
  1070. // Assume that a path spanning two iterations is a cycle, which could
  1071. // overestimate in strange cases. This allows cyclic latency to be
  1072. // estimated as the minimum slack of the vreg's depth or height.
  1073. unsigned CyclicLatency = 0;
  1074. if (LiveOutDepth > SU->getDepth())
  1075. CyclicLatency = LiveOutDepth - SU->getDepth();
  1076. unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
  1077. if (LiveInHeight > LiveOutHeight) {
  1078. if (LiveInHeight - LiveOutHeight < CyclicLatency)
  1079. CyclicLatency = LiveInHeight - LiveOutHeight;
  1080. } else
  1081. CyclicLatency = 0;
  1082. DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
  1083. << SU->NodeNum << ") = " << CyclicLatency << "c\n");
  1084. if (CyclicLatency > MaxCyclicLatency)
  1085. MaxCyclicLatency = CyclicLatency;
  1086. }
  1087. }
  1088. DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
  1089. return MaxCyclicLatency;
  1090. }
  1091. /// Move an instruction and update register pressure.
  1092. void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
  1093. // Move the instruction to its new location in the instruction stream.
  1094. MachineInstr *MI = SU->getInstr();
  1095. if (IsTopNode) {
  1096. assert(SU->isTopReady() && "node still has unscheduled dependencies");
  1097. if (&*CurrentTop == MI)
  1098. CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
  1099. else {
  1100. moveInstruction(MI, CurrentTop);
  1101. TopRPTracker.setPos(MI);
  1102. }
  1103. if (ShouldTrackPressure) {
  1104. // Update top scheduled pressure.
  1105. RegisterOperands RegOpers;
  1106. RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
  1107. if (ShouldTrackLaneMasks) {
  1108. // Adjust liveness and add missing dead+read-undef flags.
  1109. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
  1110. RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
  1111. } else {
  1112. // Adjust for missing dead-def flags.
  1113. RegOpers.detectDeadDefs(*MI, *LIS);
  1114. }
  1115. TopRPTracker.advance(RegOpers);
  1116. assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
  1117. DEBUG(
  1118. dbgs() << "Top Pressure:\n";
  1119. dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
  1120. );
  1121. updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
  1122. }
  1123. } else {
  1124. assert(SU->isBottomReady() && "node still has unscheduled dependencies");
  1125. MachineBasicBlock::iterator priorII =
  1126. priorNonDebug(CurrentBottom, CurrentTop);
  1127. if (&*priorII == MI)
  1128. CurrentBottom = priorII;
  1129. else {
  1130. if (&*CurrentTop == MI) {
  1131. CurrentTop = nextIfDebug(++CurrentTop, priorII);
  1132. TopRPTracker.setPos(CurrentTop);
  1133. }
  1134. moveInstruction(MI, CurrentBottom);
  1135. CurrentBottom = MI;
  1136. }
  1137. if (ShouldTrackPressure) {
  1138. RegisterOperands RegOpers;
  1139. RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
  1140. if (ShouldTrackLaneMasks) {
  1141. // Adjust liveness and add missing dead+read-undef flags.
  1142. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
  1143. RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
  1144. } else {
  1145. // Adjust for missing dead-def flags.
  1146. RegOpers.detectDeadDefs(*MI, *LIS);
  1147. }
  1148. BotRPTracker.recedeSkipDebugValues();
  1149. SmallVector<RegisterMaskPair, 8> LiveUses;
  1150. BotRPTracker.recede(RegOpers, &LiveUses);
  1151. assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
  1152. DEBUG(
  1153. dbgs() << "Bottom Pressure:\n";
  1154. dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
  1155. );
  1156. updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
  1157. updatePressureDiffs(LiveUses);
  1158. }
  1159. }
  1160. }
  1161. //===----------------------------------------------------------------------===//
  1162. // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
  1163. //===----------------------------------------------------------------------===//
  1164. namespace {
  1165. /// \brief Post-process the DAG to create cluster edges between neighboring
  1166. /// loads or between neighboring stores.
  1167. class BaseMemOpClusterMutation : public ScheduleDAGMutation {
  1168. struct MemOpInfo {
  1169. SUnit *SU;
  1170. unsigned BaseReg;
  1171. int64_t Offset;
  1172. MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
  1173. : SU(su), BaseReg(reg), Offset(ofs) {}
  1174. bool operator<(const MemOpInfo&RHS) const {
  1175. return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
  1176. }
  1177. };
  1178. const TargetInstrInfo *TII;
  1179. const TargetRegisterInfo *TRI;
  1180. bool IsLoad;
  1181. public:
  1182. BaseMemOpClusterMutation(const TargetInstrInfo *tii,
  1183. const TargetRegisterInfo *tri, bool IsLoad)
  1184. : TII(tii), TRI(tri), IsLoad(IsLoad) {}
  1185. void apply(ScheduleDAGInstrs *DAGInstrs) override;
  1186. protected:
  1187. void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
  1188. };
  1189. class StoreClusterMutation : public BaseMemOpClusterMutation {
  1190. public:
  1191. StoreClusterMutation(const TargetInstrInfo *tii,
  1192. const TargetRegisterInfo *tri)
  1193. : BaseMemOpClusterMutation(tii, tri, false) {}
  1194. };
  1195. class LoadClusterMutation : public BaseMemOpClusterMutation {
  1196. public:
  1197. LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
  1198. : BaseMemOpClusterMutation(tii, tri, true) {}
  1199. };
  1200. } // anonymous
  1201. void BaseMemOpClusterMutation::clusterNeighboringMemOps(
  1202. ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
  1203. SmallVector<MemOpInfo, 32> MemOpRecords;
  1204. for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
  1205. SUnit *SU = MemOps[Idx];
  1206. unsigned BaseReg;
  1207. int64_t Offset;
  1208. if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
  1209. MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
  1210. }
  1211. if (MemOpRecords.size() < 2)
  1212. return;
  1213. std::sort(MemOpRecords.begin(), MemOpRecords.end());
  1214. unsigned ClusterLength = 1;
  1215. for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
  1216. if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
  1217. ClusterLength = 1;
  1218. continue;
  1219. }
  1220. SUnit *SUa = MemOpRecords[Idx].SU;
  1221. SUnit *SUb = MemOpRecords[Idx+1].SU;
  1222. if (TII->shouldClusterMemOps(SUa->getInstr(), SUb->getInstr(), ClusterLength)
  1223. && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
  1224. DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
  1225. << SUb->NodeNum << ")\n");
  1226. // Copy successor edges from SUa to SUb. Interleaving computation
  1227. // dependent on SUa can prevent load combining due to register reuse.
  1228. // Predecessor edges do not need to be copied from SUb to SUa since nearby
  1229. // loads should have effectively the same inputs.
  1230. for (SUnit::const_succ_iterator
  1231. SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
  1232. if (SI->getSUnit() == SUb)
  1233. continue;
  1234. DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
  1235. DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
  1236. }
  1237. ++ClusterLength;
  1238. } else
  1239. ClusterLength = 1;
  1240. }
  1241. }
  1242. /// \brief Callback from DAG postProcessing to create cluster edges for loads.
  1243. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
  1244. ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
  1245. // Map DAG NodeNum to store chain ID.
  1246. DenseMap<unsigned, unsigned> StoreChainIDs;
  1247. // Map each store chain to a set of dependent MemOps.
  1248. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
  1249. for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
  1250. SUnit *SU = &DAG->SUnits[Idx];
  1251. if ((IsLoad && !SU->getInstr()->mayLoad()) ||
  1252. (!IsLoad && !SU->getInstr()->mayStore()))
  1253. continue;
  1254. unsigned ChainPredID = DAG->SUnits.size();
  1255. for (SUnit::const_pred_iterator
  1256. PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
  1257. if (PI->isCtrl()) {
  1258. ChainPredID = PI->getSUnit()->NodeNum;
  1259. break;
  1260. }
  1261. }
  1262. // Check if this chain-like pred has been seen
  1263. // before. ChainPredID==MaxNodeID at the top of the schedule.
  1264. unsigned NumChains = StoreChainDependents.size();
  1265. std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
  1266. StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
  1267. if (Result.second)
  1268. StoreChainDependents.resize(NumChains + 1);
  1269. StoreChainDependents[Result.first->second].push_back(SU);
  1270. }
  1271. // Iterate over the store chains.
  1272. for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
  1273. clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
  1274. }
  1275. //===----------------------------------------------------------------------===//
  1276. // MacroFusion - DAG post-processing to encourage fusion of macro ops.
  1277. //===----------------------------------------------------------------------===//
  1278. namespace {
  1279. /// \brief Post-process the DAG to create cluster edges between instructions
  1280. /// that may be fused by the processor into a single operation.
  1281. class MacroFusion : public ScheduleDAGMutation {
  1282. const TargetInstrInfo &TII;
  1283. const TargetRegisterInfo &TRI;
  1284. public:
  1285. MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
  1286. : TII(TII), TRI(TRI) {}
  1287. void apply(ScheduleDAGInstrs *DAGInstrs) override;
  1288. };
  1289. } // anonymous
  1290. /// Returns true if \p MI reads a register written by \p Other.
  1291. static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
  1292. const MachineInstr &Other) {
  1293. for (const MachineOperand &MO : MI.uses()) {
  1294. if (!MO.isReg() || !MO.readsReg())
  1295. continue;
  1296. unsigned Reg = MO.getReg();
  1297. if (Other.modifiesRegister(Reg, &TRI))
  1298. return true;
  1299. }
  1300. return false;
  1301. }
  1302. /// \brief Callback from DAG postProcessing to create cluster edges to encourage
  1303. /// fused operations.
  1304. void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
  1305. ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
  1306. // For now, assume targets can only fuse with the branch.
  1307. SUnit &ExitSU = DAG->ExitSU;
  1308. MachineInstr *Branch = ExitSU.getInstr();
  1309. if (!Branch)
  1310. return;
  1311. for (SUnit &SU : DAG->SUnits) {
  1312. // SUnits with successors can't be schedule in front of the ExitSU.
  1313. if (!SU.Succs.empty())
  1314. continue;
  1315. // We only care if the node writes to a register that the branch reads.
  1316. MachineInstr *Pred = SU.getInstr();
  1317. if (!HasDataDep(TRI, *Branch, *Pred))
  1318. continue;
  1319. if (!TII.shouldScheduleAdjacent(Pred, Branch))
  1320. continue;
  1321. // Create a single weak edge from SU to ExitSU. The only effect is to cause
  1322. // bottom-up scheduling to heavily prioritize the clustered SU. There is no
  1323. // need to copy predecessor edges from ExitSU to SU, since top-down
  1324. // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
  1325. // of SU, we could create an artificial edge from the deepest root, but it
  1326. // hasn't been needed yet.
  1327. bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
  1328. (void)Success;
  1329. assert(Success && "No DAG nodes should be reachable from ExitSU");
  1330. DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
  1331. break;
  1332. }
  1333. }
  1334. //===----------------------------------------------------------------------===//
  1335. // CopyConstrain - DAG post-processing to encourage copy elimination.
  1336. //===----------------------------------------------------------------------===//
  1337. namespace {
  1338. /// \brief Post-process the DAG to create weak edges from all uses of a copy to
  1339. /// the one use that defines the copy's source vreg, most likely an induction
  1340. /// variable increment.
  1341. class CopyConstrain : public ScheduleDAGMutation {
  1342. // Transient state.
  1343. SlotIndex RegionBeginIdx;
  1344. // RegionEndIdx is the slot index of the last non-debug instruction in the
  1345. // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
  1346. SlotIndex RegionEndIdx;
  1347. public:
  1348. CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
  1349. void apply(ScheduleDAGInstrs *DAGInstrs) override;
  1350. protected:
  1351. void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
  1352. };
  1353. } // anonymous
  1354. /// constrainLocalCopy handles two possibilities:
  1355. /// 1) Local src:
  1356. /// I0: = dst
  1357. /// I1: src = ...
  1358. /// I2: = dst
  1359. /// I3: dst = src (copy)
  1360. /// (create pred->succ edges I0->I1, I2->I1)
  1361. ///
  1362. /// 2) Local copy:
  1363. /// I0: dst = src (copy)
  1364. /// I1: = dst
  1365. /// I2: src = ...
  1366. /// I3: = dst
  1367. /// (create pred->succ edges I1->I2, I3->I2)
  1368. ///
  1369. /// Although the MachineScheduler is currently constrained to single blocks,
  1370. /// this algorithm should handle extended blocks. An EBB is a set of
  1371. /// contiguously numbered blocks such that the previous block in the EBB is
  1372. /// always the single predecessor.
  1373. void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
  1374. LiveIntervals *LIS = DAG->getLIS();
  1375. MachineInstr *Copy = CopySU->getInstr();
  1376. // Check for pure vreg copies.
  1377. const MachineOperand &SrcOp = Copy->getOperand(1);
  1378. unsigned SrcReg = SrcOp.getReg();
  1379. if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
  1380. return;
  1381. const MachineOperand &DstOp = Copy->getOperand(0);
  1382. unsigned DstReg = DstOp.getReg();
  1383. if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
  1384. return;
  1385. // Check if either the dest or source is local. If it's live across a back
  1386. // edge, it's not local. Note that if both vregs are live across the back
  1387. // edge, we cannot successfully contrain the copy without cyclic scheduling.
  1388. // If both the copy's source and dest are local live intervals, then we
  1389. // should treat the dest as the global for the purpose of adding
  1390. // constraints. This adds edges from source's other uses to the copy.
  1391. unsigned LocalReg = SrcReg;
  1392. unsigned GlobalReg = DstReg;
  1393. LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
  1394. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
  1395. LocalReg = DstReg;
  1396. GlobalReg = SrcReg;
  1397. LocalLI = &LIS->getInterval(LocalReg);
  1398. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
  1399. return;
  1400. }
  1401. LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
  1402. // Find the global segment after the start of the local LI.
  1403. LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
  1404. // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
  1405. // local live range. We could create edges from other global uses to the local
  1406. // start, but the coalescer should have already eliminated these cases, so
  1407. // don't bother dealing with it.
  1408. if (GlobalSegment == GlobalLI->end())
  1409. return;
  1410. // If GlobalSegment is killed at the LocalLI->start, the call to find()
  1411. // returned the next global segment. But if GlobalSegment overlaps with
  1412. // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
  1413. // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
  1414. if (GlobalSegment->contains(LocalLI->beginIndex()))
  1415. ++GlobalSegment;
  1416. if (GlobalSegment == GlobalLI->end())
  1417. return;
  1418. // Check if GlobalLI contains a hole in the vicinity of LocalLI.
  1419. if (GlobalSegment != GlobalLI->begin()) {
  1420. // Two address defs have no hole.
  1421. if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
  1422. GlobalSegment->start)) {
  1423. return;
  1424. }
  1425. // If the prior global segment may be defined by the same two-address
  1426. // instruction that also defines LocalLI, then can't make a hole here.
  1427. if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
  1428. LocalLI->beginIndex())) {
  1429. return;
  1430. }
  1431. // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
  1432. // it would be a disconnected component in the live range.
  1433. assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
  1434. "Disconnected LRG within the scheduling region.");
  1435. }
  1436. MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
  1437. if (!GlobalDef)
  1438. return;
  1439. SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
  1440. if (!GlobalSU)
  1441. return;
  1442. // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
  1443. // constraining the uses of the last local def to precede GlobalDef.
  1444. SmallVector<SUnit*,8> LocalUses;
  1445. const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
  1446. MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
  1447. SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
  1448. for (SUnit::const_succ_iterator
  1449. I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
  1450. I != E; ++I) {
  1451. if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
  1452. continue;
  1453. if (I->getSUnit() == GlobalSU)
  1454. continue;
  1455. if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
  1456. return;
  1457. LocalUses.push_back(I->getSUnit());
  1458. }
  1459. // Open the top of the GlobalLI hole by constraining any earlier global uses
  1460. // to precede the start of LocalLI.
  1461. SmallVector<SUnit*,8> GlobalUses;
  1462. MachineInstr *FirstLocalDef =
  1463. LIS->getInstructionFromIndex(LocalLI->beginIndex());
  1464. SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
  1465. for (SUnit::const_pred_iterator
  1466. I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
  1467. if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
  1468. continue;
  1469. if (I->getSUnit() == FirstLocalSU)
  1470. continue;
  1471. if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
  1472. return;
  1473. GlobalUses.push_back(I->getSUnit());
  1474. }
  1475. DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
  1476. // Add the weak edges.
  1477. for (SmallVectorImpl<SUnit*>::const_iterator
  1478. I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
  1479. DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
  1480. << GlobalSU->NodeNum << ")\n");
  1481. DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
  1482. }
  1483. for (SmallVectorImpl<SUnit*>::const_iterator
  1484. I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
  1485. DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
  1486. << FirstLocalSU->NodeNum << ")\n");
  1487. DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
  1488. }
  1489. }
  1490. /// \brief Callback from DAG postProcessing to create weak edges to encourage
  1491. /// copy elimination.
  1492. void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
  1493. ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
  1494. assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
  1495. MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
  1496. if (FirstPos == DAG->end())
  1497. return;
  1498. RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
  1499. RegionEndIdx = DAG->getLIS()->getInstructionIndex(
  1500. *priorNonDebug(DAG->end(), DAG->begin()));
  1501. for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
  1502. SUnit *SU = &DAG->SUnits[Idx];
  1503. if (!SU->getInstr()->isCopy())
  1504. continue;
  1505. constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
  1506. }
  1507. }
  1508. //===----------------------------------------------------------------------===//
  1509. // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
  1510. // and possibly other custom schedulers.
  1511. //===----------------------------------------------------------------------===//
  1512. static const unsigned InvalidCycle = ~0U;
  1513. SchedBoundary::~SchedBoundary() { delete HazardRec; }
  1514. void SchedBoundary::reset() {
  1515. // A new HazardRec is created for each DAG and owned by SchedBoundary.
  1516. // Destroying and reconstructing it is very expensive though. So keep
  1517. // invalid, placeholder HazardRecs.
  1518. if (HazardRec && HazardRec->isEnabled()) {
  1519. delete HazardRec;
  1520. HazardRec = nullptr;
  1521. }
  1522. Available.clear();
  1523. Pending.clear();
  1524. CheckPending = false;
  1525. NextSUs.clear();
  1526. CurrCycle = 0;
  1527. CurrMOps = 0;
  1528. MinReadyCycle = UINT_MAX;
  1529. ExpectedLatency = 0;
  1530. DependentLatency = 0;
  1531. RetiredMOps = 0;
  1532. MaxExecutedResCount = 0;
  1533. ZoneCritResIdx = 0;
  1534. IsResourceLimited = false;
  1535. ReservedCycles.clear();
  1536. #ifndef NDEBUG
  1537. // Track the maximum number of stall cycles that could arise either from the
  1538. // latency of a DAG edge or the number of cycles that a processor resource is
  1539. // reserved (SchedBoundary::ReservedCycles).
  1540. MaxObservedStall = 0;
  1541. #endif
  1542. // Reserve a zero-count for invalid CritResIdx.
  1543. ExecutedResCounts.resize(1);
  1544. assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
  1545. }
  1546. void SchedRemainder::
  1547. init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
  1548. reset();
  1549. if (!SchedModel->hasInstrSchedModel())
  1550. return;
  1551. RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
  1552. for (std::vector<SUnit>::iterator
  1553. I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
  1554. const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
  1555. RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
  1556. * SchedModel->getMicroOpFactor();
  1557. for (TargetSchedModel::ProcResIter
  1558. PI = SchedModel->getWriteProcResBegin(SC),
  1559. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1560. unsigned PIdx = PI->ProcResourceIdx;
  1561. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  1562. RemainingCounts[PIdx] += (Factor * PI->Cycles);
  1563. }
  1564. }
  1565. }
  1566. void SchedBoundary::
  1567. init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
  1568. reset();
  1569. DAG = dag;
  1570. SchedModel = smodel;
  1571. Rem = rem;
  1572. if (SchedModel->hasInstrSchedModel()) {
  1573. ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
  1574. ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
  1575. }
  1576. }
  1577. /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
  1578. /// these "soft stalls" differently than the hard stall cycles based on CPU
  1579. /// resources and computed by checkHazard(). A fully in-order model
  1580. /// (MicroOpBufferSize==0) will not make use of this since instructions are not
  1581. /// available for scheduling until they are ready. However, a weaker in-order
  1582. /// model may use this for heuristics. For example, if a processor has in-order
  1583. /// behavior when reading certain resources, this may come into play.
  1584. unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
  1585. if (!SU->isUnbuffered)
  1586. return 0;
  1587. unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  1588. if (ReadyCycle > CurrCycle)
  1589. return ReadyCycle - CurrCycle;
  1590. return 0;
  1591. }
  1592. /// Compute the next cycle at which the given processor resource can be
  1593. /// scheduled.
  1594. unsigned SchedBoundary::
  1595. getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
  1596. unsigned NextUnreserved = ReservedCycles[PIdx];
  1597. // If this resource has never been used, always return cycle zero.
  1598. if (NextUnreserved == InvalidCycle)
  1599. return 0;
  1600. // For bottom-up scheduling add the cycles needed for the current operation.
  1601. if (!isTop())
  1602. NextUnreserved += Cycles;
  1603. return NextUnreserved;
  1604. }
  1605. /// Does this SU have a hazard within the current instruction group.
  1606. ///
  1607. /// The scheduler supports two modes of hazard recognition. The first is the
  1608. /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
  1609. /// supports highly complicated in-order reservation tables
  1610. /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
  1611. ///
  1612. /// The second is a streamlined mechanism that checks for hazards based on
  1613. /// simple counters that the scheduler itself maintains. It explicitly checks
  1614. /// for instruction dispatch limitations, including the number of micro-ops that
  1615. /// can dispatch per cycle.
  1616. ///
  1617. /// TODO: Also check whether the SU must start a new group.
  1618. bool SchedBoundary::checkHazard(SUnit *SU) {
  1619. if (HazardRec->isEnabled()
  1620. && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
  1621. return true;
  1622. }
  1623. unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
  1624. if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
  1625. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
  1626. << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
  1627. return true;
  1628. }
  1629. if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
  1630. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  1631. for (TargetSchedModel::ProcResIter
  1632. PI = SchedModel->getWriteProcResBegin(SC),
  1633. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1634. unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
  1635. if (NRCycle > CurrCycle) {
  1636. #ifndef NDEBUG
  1637. MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
  1638. #endif
  1639. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
  1640. << SchedModel->getResourceName(PI->ProcResourceIdx)
  1641. << "=" << NRCycle << "c\n");
  1642. return true;
  1643. }
  1644. }
  1645. }
  1646. return false;
  1647. }
  1648. // Find the unscheduled node in ReadySUs with the highest latency.
  1649. unsigned SchedBoundary::
  1650. findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
  1651. SUnit *LateSU = nullptr;
  1652. unsigned RemLatency = 0;
  1653. for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
  1654. I != E; ++I) {
  1655. unsigned L = getUnscheduledLatency(*I);
  1656. if (L > RemLatency) {
  1657. RemLatency = L;
  1658. LateSU = *I;
  1659. }
  1660. }
  1661. if (LateSU) {
  1662. DEBUG(dbgs() << Available.getName() << " RemLatency SU("
  1663. << LateSU->NodeNum << ") " << RemLatency << "c\n");
  1664. }
  1665. return RemLatency;
  1666. }
  1667. // Count resources in this zone and the remaining unscheduled
  1668. // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
  1669. // resource index, or zero if the zone is issue limited.
  1670. unsigned SchedBoundary::
  1671. getOtherResourceCount(unsigned &OtherCritIdx) {
  1672. OtherCritIdx = 0;
  1673. if (!SchedModel->hasInstrSchedModel())
  1674. return 0;
  1675. unsigned OtherCritCount = Rem->RemIssueCount
  1676. + (RetiredMOps * SchedModel->getMicroOpFactor());
  1677. DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
  1678. << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
  1679. for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
  1680. PIdx != PEnd; ++PIdx) {
  1681. unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
  1682. if (OtherCount > OtherCritCount) {
  1683. OtherCritCount = OtherCount;
  1684. OtherCritIdx = PIdx;
  1685. }
  1686. }
  1687. if (OtherCritIdx) {
  1688. DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
  1689. << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
  1690. << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
  1691. }
  1692. return OtherCritCount;
  1693. }
  1694. void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
  1695. assert(SU->getInstr() && "Scheduled SUnit must have instr");
  1696. #ifndef NDEBUG
  1697. // ReadyCycle was been bumped up to the CurrCycle when this node was
  1698. // scheduled, but CurrCycle may have been eagerly advanced immediately after
  1699. // scheduling, so may now be greater than ReadyCycle.
  1700. if (ReadyCycle > CurrCycle)
  1701. MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
  1702. #endif
  1703. if (ReadyCycle < MinReadyCycle)
  1704. MinReadyCycle = ReadyCycle;
  1705. // Check for interlocks first. For the purpose of other heuristics, an
  1706. // instruction that cannot issue appears as if it's not in the ReadyQueue.
  1707. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
  1708. if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
  1709. Available.size() >= ReadyListLimit)
  1710. Pending.push(SU);
  1711. else
  1712. Available.push(SU);
  1713. // Record this node as an immediate dependent of the scheduled node.
  1714. NextSUs.insert(SU);
  1715. }
  1716. void SchedBoundary::releaseTopNode(SUnit *SU) {
  1717. if (SU->isScheduled)
  1718. return;
  1719. releaseNode(SU, SU->TopReadyCycle);
  1720. }
  1721. void SchedBoundary::releaseBottomNode(SUnit *SU) {
  1722. if (SU->isScheduled)
  1723. return;
  1724. releaseNode(SU, SU->BotReadyCycle);
  1725. }
  1726. /// Move the boundary of scheduled code by one cycle.
  1727. void SchedBoundary::bumpCycle(unsigned NextCycle) {
  1728. if (SchedModel->getMicroOpBufferSize() == 0) {
  1729. assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
  1730. if (MinReadyCycle > NextCycle)
  1731. NextCycle = MinReadyCycle;
  1732. }
  1733. // Update the current micro-ops, which will issue in the next cycle.
  1734. unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
  1735. CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
  1736. // Decrement DependentLatency based on the next cycle.
  1737. if ((NextCycle - CurrCycle) > DependentLatency)
  1738. DependentLatency = 0;
  1739. else
  1740. DependentLatency -= (NextCycle - CurrCycle);
  1741. if (!HazardRec->isEnabled()) {
  1742. // Bypass HazardRec virtual calls.
  1743. CurrCycle = NextCycle;
  1744. } else {
  1745. // Bypass getHazardType calls in case of long latency.
  1746. for (; CurrCycle != NextCycle; ++CurrCycle) {
  1747. if (isTop())
  1748. HazardRec->AdvanceCycle();
  1749. else
  1750. HazardRec->RecedeCycle();
  1751. }
  1752. }
  1753. CheckPending = true;
  1754. unsigned LFactor = SchedModel->getLatencyFactor();
  1755. IsResourceLimited =
  1756. (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
  1757. > (int)LFactor;
  1758. DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
  1759. }
  1760. void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
  1761. ExecutedResCounts[PIdx] += Count;
  1762. if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
  1763. MaxExecutedResCount = ExecutedResCounts[PIdx];
  1764. }
  1765. /// Add the given processor resource to this scheduled zone.
  1766. ///
  1767. /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
  1768. /// during which this resource is consumed.
  1769. ///
  1770. /// \return the next cycle at which the instruction may execute without
  1771. /// oversubscribing resources.
  1772. unsigned SchedBoundary::
  1773. countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
  1774. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  1775. unsigned Count = Factor * Cycles;
  1776. DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
  1777. << " +" << Cycles << "x" << Factor << "u\n");
  1778. // Update Executed resources counts.
  1779. incExecutedResources(PIdx, Count);
  1780. assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
  1781. Rem->RemainingCounts[PIdx] -= Count;
  1782. // Check if this resource exceeds the current critical resource. If so, it
  1783. // becomes the critical resource.
  1784. if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
  1785. ZoneCritResIdx = PIdx;
  1786. DEBUG(dbgs() << " *** Critical resource "
  1787. << SchedModel->getResourceName(PIdx) << ": "
  1788. << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
  1789. }
  1790. // For reserved resources, record the highest cycle using the resource.
  1791. unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
  1792. if (NextAvailable > CurrCycle) {
  1793. DEBUG(dbgs() << " Resource conflict: "
  1794. << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
  1795. << NextAvailable << "\n");
  1796. }
  1797. return NextAvailable;
  1798. }
  1799. /// Move the boundary of scheduled code by one SUnit.
  1800. void SchedBoundary::bumpNode(SUnit *SU) {
  1801. // Update the reservation table.
  1802. if (HazardRec->isEnabled()) {
  1803. if (!isTop() && SU->isCall) {
  1804. // Calls are scheduled with their preceding instructions. For bottom-up
  1805. // scheduling, clear the pipeline state before emitting.
  1806. HazardRec->Reset();
  1807. }
  1808. HazardRec->EmitInstruction(SU);
  1809. }
  1810. // checkHazard should prevent scheduling multiple instructions per cycle that
  1811. // exceed the issue width.
  1812. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  1813. unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
  1814. assert(
  1815. (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
  1816. "Cannot schedule this instruction's MicroOps in the current cycle.");
  1817. unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  1818. DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
  1819. unsigned NextCycle = CurrCycle;
  1820. switch (SchedModel->getMicroOpBufferSize()) {
  1821. case 0:
  1822. assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
  1823. break;
  1824. case 1:
  1825. if (ReadyCycle > NextCycle) {
  1826. NextCycle = ReadyCycle;
  1827. DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
  1828. }
  1829. break;
  1830. default:
  1831. // We don't currently model the OOO reorder buffer, so consider all
  1832. // scheduled MOps to be "retired". We do loosely model in-order resource
  1833. // latency. If this instruction uses an in-order resource, account for any
  1834. // likely stall cycles.
  1835. if (SU->isUnbuffered && ReadyCycle > NextCycle)
  1836. NextCycle = ReadyCycle;
  1837. break;
  1838. }
  1839. RetiredMOps += IncMOps;
  1840. // Update resource counts and critical resource.
  1841. if (SchedModel->hasInstrSchedModel()) {
  1842. unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
  1843. assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
  1844. Rem->RemIssueCount -= DecRemIssue;
  1845. if (ZoneCritResIdx) {
  1846. // Scale scheduled micro-ops for comparing with the critical resource.
  1847. unsigned ScaledMOps =
  1848. RetiredMOps * SchedModel->getMicroOpFactor();
  1849. // If scaled micro-ops are now more than the previous critical resource by
  1850. // a full cycle, then micro-ops issue becomes critical.
  1851. if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
  1852. >= (int)SchedModel->getLatencyFactor()) {
  1853. ZoneCritResIdx = 0;
  1854. DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
  1855. << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
  1856. }
  1857. }
  1858. for (TargetSchedModel::ProcResIter
  1859. PI = SchedModel->getWriteProcResBegin(SC),
  1860. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1861. unsigned RCycle =
  1862. countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
  1863. if (RCycle > NextCycle)
  1864. NextCycle = RCycle;
  1865. }
  1866. if (SU->hasReservedResource) {
  1867. // For reserved resources, record the highest cycle using the resource.
  1868. // For top-down scheduling, this is the cycle in which we schedule this
  1869. // instruction plus the number of cycles the operations reserves the
  1870. // resource. For bottom-up is it simply the instruction's cycle.
  1871. for (TargetSchedModel::ProcResIter
  1872. PI = SchedModel->getWriteProcResBegin(SC),
  1873. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1874. unsigned PIdx = PI->ProcResourceIdx;
  1875. if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
  1876. if (isTop()) {
  1877. ReservedCycles[PIdx] =
  1878. std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
  1879. }
  1880. else
  1881. ReservedCycles[PIdx] = NextCycle;
  1882. }
  1883. }
  1884. }
  1885. }
  1886. // Update ExpectedLatency and DependentLatency.
  1887. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
  1888. unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
  1889. if (SU->getDepth() > TopLatency) {
  1890. TopLatency = SU->getDepth();
  1891. DEBUG(dbgs() << " " << Available.getName()
  1892. << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
  1893. }
  1894. if (SU->getHeight() > BotLatency) {
  1895. BotLatency = SU->getHeight();
  1896. DEBUG(dbgs() << " " << Available.getName()
  1897. << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
  1898. }
  1899. // If we stall for any reason, bump the cycle.
  1900. if (NextCycle > CurrCycle) {
  1901. bumpCycle(NextCycle);
  1902. } else {
  1903. // After updating ZoneCritResIdx and ExpectedLatency, check if we're
  1904. // resource limited. If a stall occurred, bumpCycle does this.
  1905. unsigned LFactor = SchedModel->getLatencyFactor();
  1906. IsResourceLimited =
  1907. (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
  1908. > (int)LFactor;
  1909. }
  1910. // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
  1911. // resets CurrMOps. Loop to handle instructions with more MOps than issue in
  1912. // one cycle. Since we commonly reach the max MOps here, opportunistically
  1913. // bump the cycle to avoid uselessly checking everything in the readyQ.
  1914. CurrMOps += IncMOps;
  1915. while (CurrMOps >= SchedModel->getIssueWidth()) {
  1916. DEBUG(dbgs() << " *** Max MOps " << CurrMOps
  1917. << " at cycle " << CurrCycle << '\n');
  1918. bumpCycle(++NextCycle);
  1919. }
  1920. DEBUG(dumpScheduledState());
  1921. }
  1922. /// Release pending ready nodes in to the available queue. This makes them
  1923. /// visible to heuristics.
  1924. void SchedBoundary::releasePending() {
  1925. // If the available queue is empty, it is safe to reset MinReadyCycle.
  1926. if (Available.empty())
  1927. MinReadyCycle = UINT_MAX;
  1928. // Check to see if any of the pending instructions are ready to issue. If
  1929. // so, add them to the available queue.
  1930. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
  1931. for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
  1932. SUnit *SU = *(Pending.begin()+i);
  1933. unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  1934. if (ReadyCycle < MinReadyCycle)
  1935. MinReadyCycle = ReadyCycle;
  1936. if (!IsBuffered && ReadyCycle > CurrCycle)
  1937. continue;
  1938. if (checkHazard(SU))
  1939. continue;
  1940. if (Available.size() >= ReadyListLimit)
  1941. break;
  1942. Available.push(SU);
  1943. Pending.remove(Pending.begin()+i);
  1944. --i; --e;
  1945. }
  1946. DEBUG(if (!Pending.empty()) Pending.dump());
  1947. CheckPending = false;
  1948. }
  1949. /// Remove SU from the ready set for this boundary.
  1950. void SchedBoundary::removeReady(SUnit *SU) {
  1951. if (Available.isInQueue(SU))
  1952. Available.remove(Available.find(SU));
  1953. else {
  1954. assert(Pending.isInQueue(SU) && "bad ready count");
  1955. Pending.remove(Pending.find(SU));
  1956. }
  1957. }
  1958. /// If this queue only has one ready candidate, return it. As a side effect,
  1959. /// defer any nodes that now hit a hazard, and advance the cycle until at least
  1960. /// one node is ready. If multiple instructions are ready, return NULL.
  1961. SUnit *SchedBoundary::pickOnlyChoice() {
  1962. if (CheckPending)
  1963. releasePending();
  1964. if (CurrMOps > 0) {
  1965. // Defer any ready instrs that now have a hazard.
  1966. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
  1967. if (checkHazard(*I)) {
  1968. Pending.push(*I);
  1969. I = Available.remove(I);
  1970. continue;
  1971. }
  1972. ++I;
  1973. }
  1974. }
  1975. for (unsigned i = 0; Available.empty(); ++i) {
  1976. // FIXME: Re-enable assert once PR20057 is resolved.
  1977. // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
  1978. // "permanent hazard");
  1979. (void)i;
  1980. bumpCycle(CurrCycle + 1);
  1981. releasePending();
  1982. }
  1983. if (Available.size() == 1)
  1984. return *Available.begin();
  1985. return nullptr;
  1986. }
  1987. #ifndef NDEBUG
  1988. // This is useful information to dump after bumpNode.
  1989. // Note that the Queue contents are more useful before pickNodeFromQueue.
  1990. void SchedBoundary::dumpScheduledState() {
  1991. unsigned ResFactor;
  1992. unsigned ResCount;
  1993. if (ZoneCritResIdx) {
  1994. ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
  1995. ResCount = getResourceCount(ZoneCritResIdx);
  1996. } else {
  1997. ResFactor = SchedModel->getMicroOpFactor();
  1998. ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
  1999. }
  2000. unsigned LFactor = SchedModel->getLatencyFactor();
  2001. dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
  2002. << " Retired: " << RetiredMOps;
  2003. dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
  2004. dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
  2005. << ResCount / ResFactor << " "
  2006. << SchedModel->getResourceName(ZoneCritResIdx)
  2007. << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
  2008. << (IsResourceLimited ? " - Resource" : " - Latency")
  2009. << " limited.\n";
  2010. }
  2011. #endif
  2012. //===----------------------------------------------------------------------===//
  2013. // GenericScheduler - Generic implementation of MachineSchedStrategy.
  2014. //===----------------------------------------------------------------------===//
  2015. void GenericSchedulerBase::SchedCandidate::
  2016. initResourceDelta(const ScheduleDAGMI *DAG,
  2017. const TargetSchedModel *SchedModel) {
  2018. if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
  2019. return;
  2020. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  2021. for (TargetSchedModel::ProcResIter
  2022. PI = SchedModel->getWriteProcResBegin(SC),
  2023. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  2024. if (PI->ProcResourceIdx == Policy.ReduceResIdx)
  2025. ResDelta.CritResources += PI->Cycles;
  2026. if (PI->ProcResourceIdx == Policy.DemandResIdx)
  2027. ResDelta.DemandedResources += PI->Cycles;
  2028. }
  2029. }
  2030. /// Set the CandPolicy given a scheduling zone given the current resources and
  2031. /// latencies inside and outside the zone.
  2032. void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
  2033. SchedBoundary &CurrZone,
  2034. SchedBoundary *OtherZone) {
  2035. // Apply preemptive heuristics based on the total latency and resources
  2036. // inside and outside this zone. Potential stalls should be considered before
  2037. // following this policy.
  2038. // Compute remaining latency. We need this both to determine whether the
  2039. // overall schedule has become latency-limited and whether the instructions
  2040. // outside this zone are resource or latency limited.
  2041. //
  2042. // The "dependent" latency is updated incrementally during scheduling as the
  2043. // max height/depth of scheduled nodes minus the cycles since it was
  2044. // scheduled:
  2045. // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
  2046. //
  2047. // The "independent" latency is the max ready queue depth:
  2048. // ILat = max N.depth for N in Available|Pending
  2049. //
  2050. // RemainingLatency is the greater of independent and dependent latency.
  2051. unsigned RemLatency = CurrZone.getDependentLatency();
  2052. RemLatency = std::max(RemLatency,
  2053. CurrZone.findMaxLatency(CurrZone.Available.elements()));
  2054. RemLatency = std::max(RemLatency,
  2055. CurrZone.findMaxLatency(CurrZone.Pending.elements()));
  2056. // Compute the critical resource outside the zone.
  2057. unsigned OtherCritIdx = 0;
  2058. unsigned OtherCount =
  2059. OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
  2060. bool OtherResLimited = false;
  2061. if (SchedModel->hasInstrSchedModel()) {
  2062. unsigned LFactor = SchedModel->getLatencyFactor();
  2063. OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
  2064. }
  2065. // Schedule aggressively for latency in PostRA mode. We don't check for
  2066. // acyclic latency during PostRA, and highly out-of-order processors will
  2067. // skip PostRA scheduling.
  2068. if (!OtherResLimited) {
  2069. if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
  2070. Policy.ReduceLatency |= true;
  2071. DEBUG(dbgs() << " " << CurrZone.Available.getName()
  2072. << " RemainingLatency " << RemLatency << " + "
  2073. << CurrZone.getCurrCycle() << "c > CritPath "
  2074. << Rem.CriticalPath << "\n");
  2075. }
  2076. }
  2077. // If the same resource is limiting inside and outside the zone, do nothing.
  2078. if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
  2079. return;
  2080. DEBUG(
  2081. if (CurrZone.isResourceLimited()) {
  2082. dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
  2083. << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
  2084. << "\n";
  2085. }
  2086. if (OtherResLimited)
  2087. dbgs() << " RemainingLimit: "
  2088. << SchedModel->getResourceName(OtherCritIdx) << "\n";
  2089. if (!CurrZone.isResourceLimited() && !OtherResLimited)
  2090. dbgs() << " Latency limited both directions.\n");
  2091. if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
  2092. Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
  2093. if (OtherResLimited)
  2094. Policy.DemandResIdx = OtherCritIdx;
  2095. }
  2096. #ifndef NDEBUG
  2097. const char *GenericSchedulerBase::getReasonStr(
  2098. GenericSchedulerBase::CandReason Reason) {
  2099. switch (Reason) {
  2100. case NoCand: return "NOCAND ";
  2101. case PhysRegCopy: return "PREG-COPY";
  2102. case RegExcess: return "REG-EXCESS";
  2103. case RegCritical: return "REG-CRIT ";
  2104. case Stall: return "STALL ";
  2105. case Cluster: return "CLUSTER ";
  2106. case Weak: return "WEAK ";
  2107. case RegMax: return "REG-MAX ";
  2108. case ResourceReduce: return "RES-REDUCE";
  2109. case ResourceDemand: return "RES-DEMAND";
  2110. case TopDepthReduce: return "TOP-DEPTH ";
  2111. case TopPathReduce: return "TOP-PATH ";
  2112. case BotHeightReduce:return "BOT-HEIGHT";
  2113. case BotPathReduce: return "BOT-PATH ";
  2114. case NextDefUse: return "DEF-USE ";
  2115. case NodeOrder: return "ORDER ";
  2116. };
  2117. llvm_unreachable("Unknown reason!");
  2118. }
  2119. void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
  2120. PressureChange P;
  2121. unsigned ResIdx = 0;
  2122. unsigned Latency = 0;
  2123. switch (Cand.Reason) {
  2124. default:
  2125. break;
  2126. case RegExcess:
  2127. P = Cand.RPDelta.Excess;
  2128. break;
  2129. case RegCritical:
  2130. P = Cand.RPDelta.CriticalMax;
  2131. break;
  2132. case RegMax:
  2133. P = Cand.RPDelta.CurrentMax;
  2134. break;
  2135. case ResourceReduce:
  2136. ResIdx = Cand.Policy.ReduceResIdx;
  2137. break;
  2138. case ResourceDemand:
  2139. ResIdx = Cand.Policy.DemandResIdx;
  2140. break;
  2141. case TopDepthReduce:
  2142. Latency = Cand.SU->getDepth();
  2143. break;
  2144. case TopPathReduce:
  2145. Latency = Cand.SU->getHeight();
  2146. break;
  2147. case BotHeightReduce:
  2148. Latency = Cand.SU->getHeight();
  2149. break;
  2150. case BotPathReduce:
  2151. Latency = Cand.SU->getDepth();
  2152. break;
  2153. }
  2154. dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
  2155. if (P.isValid())
  2156. dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
  2157. << ":" << P.getUnitInc() << " ";
  2158. else
  2159. dbgs() << " ";
  2160. if (ResIdx)
  2161. dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
  2162. else
  2163. dbgs() << " ";
  2164. if (Latency)
  2165. dbgs() << " " << Latency << " cycles ";
  2166. else
  2167. dbgs() << " ";
  2168. dbgs() << '\n';
  2169. }
  2170. #endif
  2171. /// Return true if this heuristic determines order.
  2172. static bool tryLess(int TryVal, int CandVal,
  2173. GenericSchedulerBase::SchedCandidate &TryCand,
  2174. GenericSchedulerBase::SchedCandidate &Cand,
  2175. GenericSchedulerBase::CandReason Reason) {
  2176. if (TryVal < CandVal) {
  2177. TryCand.Reason = Reason;
  2178. return true;
  2179. }
  2180. if (TryVal > CandVal) {
  2181. if (Cand.Reason > Reason)
  2182. Cand.Reason = Reason;
  2183. return true;
  2184. }
  2185. Cand.setRepeat(Reason);
  2186. return false;
  2187. }
  2188. static bool tryGreater(int TryVal, int CandVal,
  2189. GenericSchedulerBase::SchedCandidate &TryCand,
  2190. GenericSchedulerBase::SchedCandidate &Cand,
  2191. GenericSchedulerBase::CandReason Reason) {
  2192. if (TryVal > CandVal) {
  2193. TryCand.Reason = Reason;
  2194. return true;
  2195. }
  2196. if (TryVal < CandVal) {
  2197. if (Cand.Reason > Reason)
  2198. Cand.Reason = Reason;
  2199. return true;
  2200. }
  2201. Cand.setRepeat(Reason);
  2202. return false;
  2203. }
  2204. static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
  2205. GenericSchedulerBase::SchedCandidate &Cand,
  2206. SchedBoundary &Zone) {
  2207. if (Zone.isTop()) {
  2208. if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
  2209. if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  2210. TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
  2211. return true;
  2212. }
  2213. if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  2214. TryCand, Cand, GenericSchedulerBase::TopPathReduce))
  2215. return true;
  2216. } else {
  2217. if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
  2218. if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  2219. TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
  2220. return true;
  2221. }
  2222. if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  2223. TryCand, Cand, GenericSchedulerBase::BotPathReduce))
  2224. return true;
  2225. }
  2226. return false;
  2227. }
  2228. static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
  2229. bool IsTop) {
  2230. DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
  2231. << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
  2232. }
  2233. void GenericScheduler::initialize(ScheduleDAGMI *dag) {
  2234. assert(dag->hasVRegLiveness() &&
  2235. "(PreRA)GenericScheduler needs vreg liveness");
  2236. DAG = static_cast<ScheduleDAGMILive*>(dag);
  2237. SchedModel = DAG->getSchedModel();
  2238. TRI = DAG->TRI;
  2239. Rem.init(DAG, SchedModel);
  2240. Top.init(DAG, SchedModel, &Rem);
  2241. Bot.init(DAG, SchedModel, &Rem);
  2242. // Initialize resource counts.
  2243. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
  2244. // are disabled, then these HazardRecs will be disabled.
  2245. const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
  2246. if (!Top.HazardRec) {
  2247. Top.HazardRec =
  2248. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  2249. Itin, DAG);
  2250. }
  2251. if (!Bot.HazardRec) {
  2252. Bot.HazardRec =
  2253. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  2254. Itin, DAG);
  2255. }
  2256. }
  2257. /// Initialize the per-region scheduling policy.
  2258. void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
  2259. MachineBasicBlock::iterator End,
  2260. unsigned NumRegionInstrs) {
  2261. const MachineFunction &MF = *Begin->getParent()->getParent();
  2262. const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
  2263. // Avoid setting up the register pressure tracker for small regions to save
  2264. // compile time. As a rough heuristic, only track pressure when the number of
  2265. // schedulable instructions exceeds half the integer register file.
  2266. RegionPolicy.ShouldTrackPressure = true;
  2267. for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
  2268. MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
  2269. if (TLI->isTypeLegal(LegalIntVT)) {
  2270. unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
  2271. TLI->getRegClassFor(LegalIntVT));
  2272. RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
  2273. }
  2274. }
  2275. // For generic targets, we default to bottom-up, because it's simpler and more
  2276. // compile-time optimizations have been implemented in that direction.
  2277. RegionPolicy.OnlyBottomUp = true;
  2278. // Allow the subtarget to override default policy.
  2279. MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
  2280. NumRegionInstrs);
  2281. // After subtarget overrides, apply command line options.
  2282. if (!EnableRegPressure)
  2283. RegionPolicy.ShouldTrackPressure = false;
  2284. // Check -misched-topdown/bottomup can force or unforce scheduling direction.
  2285. // e.g. -misched-bottomup=false allows scheduling in both directions.
  2286. assert((!ForceTopDown || !ForceBottomUp) &&
  2287. "-misched-topdown incompatible with -misched-bottomup");
  2288. if (ForceBottomUp.getNumOccurrences() > 0) {
  2289. RegionPolicy.OnlyBottomUp = ForceBottomUp;
  2290. if (RegionPolicy.OnlyBottomUp)
  2291. RegionPolicy.OnlyTopDown = false;
  2292. }
  2293. if (ForceTopDown.getNumOccurrences() > 0) {
  2294. RegionPolicy.OnlyTopDown = ForceTopDown;
  2295. if (RegionPolicy.OnlyTopDown)
  2296. RegionPolicy.OnlyBottomUp = false;
  2297. }
  2298. }
  2299. void GenericScheduler::dumpPolicy() {
  2300. dbgs() << "GenericScheduler RegionPolicy: "
  2301. << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
  2302. << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
  2303. << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
  2304. << "\n";
  2305. }
  2306. /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
  2307. /// critical path by more cycles than it takes to drain the instruction buffer.
  2308. /// We estimate an upper bounds on in-flight instructions as:
  2309. ///
  2310. /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
  2311. /// InFlightIterations = AcyclicPath / CyclesPerIteration
  2312. /// InFlightResources = InFlightIterations * LoopResources
  2313. ///
  2314. /// TODO: Check execution resources in addition to IssueCount.
  2315. void GenericScheduler::checkAcyclicLatency() {
  2316. if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
  2317. return;
  2318. // Scaled number of cycles per loop iteration.
  2319. unsigned IterCount =
  2320. std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
  2321. Rem.RemIssueCount);
  2322. // Scaled acyclic critical path.
  2323. unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
  2324. // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
  2325. unsigned InFlightCount =
  2326. (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
  2327. unsigned BufferLimit =
  2328. SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
  2329. Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
  2330. DEBUG(dbgs() << "IssueCycles="
  2331. << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
  2332. << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
  2333. << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
  2334. << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
  2335. << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
  2336. if (Rem.IsAcyclicLatencyLimited)
  2337. dbgs() << " ACYCLIC LATENCY LIMIT\n");
  2338. }
  2339. void GenericScheduler::registerRoots() {
  2340. Rem.CriticalPath = DAG->ExitSU.getDepth();
  2341. // Some roots may not feed into ExitSU. Check all of them in case.
  2342. for (std::vector<SUnit*>::const_iterator
  2343. I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
  2344. if ((*I)->getDepth() > Rem.CriticalPath)
  2345. Rem.CriticalPath = (*I)->getDepth();
  2346. }
  2347. DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
  2348. if (DumpCriticalPathLength) {
  2349. errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
  2350. }
  2351. if (EnableCyclicPath) {
  2352. Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
  2353. checkAcyclicLatency();
  2354. }
  2355. }
  2356. static bool tryPressure(const PressureChange &TryP,
  2357. const PressureChange &CandP,
  2358. GenericSchedulerBase::SchedCandidate &TryCand,
  2359. GenericSchedulerBase::SchedCandidate &Cand,
  2360. GenericSchedulerBase::CandReason Reason,
  2361. const TargetRegisterInfo *TRI,
  2362. const MachineFunction &MF) {
  2363. unsigned TryPSet = TryP.getPSetOrMax();
  2364. unsigned CandPSet = CandP.getPSetOrMax();
  2365. // If both candidates affect the same set, go with the smallest increase.
  2366. if (TryPSet == CandPSet) {
  2367. return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
  2368. Reason);
  2369. }
  2370. // If one candidate decreases and the other increases, go with it.
  2371. // Invalid candidates have UnitInc==0.
  2372. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
  2373. Reason)) {
  2374. return true;
  2375. }
  2376. int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
  2377. std::numeric_limits<int>::max();
  2378. int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
  2379. std::numeric_limits<int>::max();
  2380. // If the candidates are decreasing pressure, reverse priority.
  2381. if (TryP.getUnitInc() < 0)
  2382. std::swap(TryRank, CandRank);
  2383. return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
  2384. }
  2385. static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
  2386. return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
  2387. }
  2388. /// Minimize physical register live ranges. Regalloc wants them adjacent to
  2389. /// their physreg def/use.
  2390. ///
  2391. /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
  2392. /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
  2393. /// with the operation that produces or consumes the physreg. We'll do this when
  2394. /// regalloc has support for parallel copies.
  2395. static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
  2396. const MachineInstr *MI = SU->getInstr();
  2397. if (!MI->isCopy())
  2398. return 0;
  2399. unsigned ScheduledOper = isTop ? 1 : 0;
  2400. unsigned UnscheduledOper = isTop ? 0 : 1;
  2401. // If we have already scheduled the physreg produce/consumer, immediately
  2402. // schedule the copy.
  2403. if (TargetRegisterInfo::isPhysicalRegister(
  2404. MI->getOperand(ScheduledOper).getReg()))
  2405. return 1;
  2406. // If the physreg is at the boundary, defer it. Otherwise schedule it
  2407. // immediately to free the dependent. We can hoist the copy later.
  2408. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
  2409. if (TargetRegisterInfo::isPhysicalRegister(
  2410. MI->getOperand(UnscheduledOper).getReg()))
  2411. return AtBoundary ? -1 : 1;
  2412. return 0;
  2413. }
  2414. void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
  2415. bool AtTop,
  2416. const RegPressureTracker &RPTracker,
  2417. RegPressureTracker &TempTracker) {
  2418. Cand.SU = SU;
  2419. if (DAG->isTrackingPressure()) {
  2420. if (AtTop) {
  2421. TempTracker.getMaxDownwardPressureDelta(
  2422. Cand.SU->getInstr(),
  2423. Cand.RPDelta,
  2424. DAG->getRegionCriticalPSets(),
  2425. DAG->getRegPressure().MaxSetPressure);
  2426. } else {
  2427. if (VerifyScheduling) {
  2428. TempTracker.getMaxUpwardPressureDelta(
  2429. Cand.SU->getInstr(),
  2430. &DAG->getPressureDiff(Cand.SU),
  2431. Cand.RPDelta,
  2432. DAG->getRegionCriticalPSets(),
  2433. DAG->getRegPressure().MaxSetPressure);
  2434. } else {
  2435. RPTracker.getUpwardPressureDelta(
  2436. Cand.SU->getInstr(),
  2437. DAG->getPressureDiff(Cand.SU),
  2438. Cand.RPDelta,
  2439. DAG->getRegionCriticalPSets(),
  2440. DAG->getRegPressure().MaxSetPressure);
  2441. }
  2442. }
  2443. }
  2444. DEBUG(if (Cand.RPDelta.Excess.isValid())
  2445. dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
  2446. << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
  2447. << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
  2448. }
  2449. /// Apply a set of heursitics to a new candidate. Heuristics are currently
  2450. /// hierarchical. This may be more efficient than a graduated cost model because
  2451. /// we don't need to evaluate all aspects of the model for each node in the
  2452. /// queue. But it's really done to make the heuristics easier to debug and
  2453. /// statistically analyze.
  2454. ///
  2455. /// \param Cand provides the policy and current best candidate.
  2456. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
  2457. /// \param Zone describes the scheduled zone that we are extending.
  2458. void GenericScheduler::tryCandidate(SchedCandidate &Cand,
  2459. SchedCandidate &TryCand,
  2460. SchedBoundary &Zone) {
  2461. // Initialize the candidate if needed.
  2462. if (!Cand.isValid()) {
  2463. TryCand.Reason = NodeOrder;
  2464. return;
  2465. }
  2466. if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
  2467. biasPhysRegCopy(Cand.SU, Zone.isTop()),
  2468. TryCand, Cand, PhysRegCopy))
  2469. return;
  2470. // Avoid exceeding the target's limit.
  2471. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
  2472. Cand.RPDelta.Excess,
  2473. TryCand, Cand, RegExcess, TRI,
  2474. DAG->MF))
  2475. return;
  2476. // Avoid increasing the max critical pressure in the scheduled region.
  2477. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
  2478. Cand.RPDelta.CriticalMax,
  2479. TryCand, Cand, RegCritical, TRI,
  2480. DAG->MF))
  2481. return;
  2482. // For loops that are acyclic path limited, aggressively schedule for latency.
  2483. // This can result in very long dependence chains scheduled in sequence, so
  2484. // once every cycle (when CurrMOps == 0), switch to normal heuristics.
  2485. if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
  2486. && tryLatency(TryCand, Cand, Zone))
  2487. return;
  2488. // Prioritize instructions that read unbuffered resources by stall cycles.
  2489. if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
  2490. Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  2491. return;
  2492. // Keep clustered nodes together to encourage downstream peephole
  2493. // optimizations which may reduce resource requirements.
  2494. //
  2495. // This is a best effort to set things up for a post-RA pass. Optimizations
  2496. // like generating loads of multiple registers should ideally be done within
  2497. // the scheduler pass by combining the loads during DAG postprocessing.
  2498. const SUnit *NextClusterSU =
  2499. Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  2500. if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
  2501. TryCand, Cand, Cluster))
  2502. return;
  2503. // Weak edges are for clustering and other constraints.
  2504. if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
  2505. getWeakLeft(Cand.SU, Zone.isTop()),
  2506. TryCand, Cand, Weak)) {
  2507. return;
  2508. }
  2509. // Avoid increasing the max pressure of the entire region.
  2510. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
  2511. Cand.RPDelta.CurrentMax,
  2512. TryCand, Cand, RegMax, TRI,
  2513. DAG->MF))
  2514. return;
  2515. // Avoid critical resource consumption and balance the schedule.
  2516. TryCand.initResourceDelta(DAG, SchedModel);
  2517. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  2518. TryCand, Cand, ResourceReduce))
  2519. return;
  2520. if (tryGreater(TryCand.ResDelta.DemandedResources,
  2521. Cand.ResDelta.DemandedResources,
  2522. TryCand, Cand, ResourceDemand))
  2523. return;
  2524. // Avoid serializing long latency dependence chains.
  2525. // For acyclic path limited loops, latency was already checked above.
  2526. if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
  2527. !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
  2528. return;
  2529. }
  2530. // Prefer immediate defs/users of the last scheduled instruction. This is a
  2531. // local pressure avoidance strategy that also makes the machine code
  2532. // readable.
  2533. if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
  2534. TryCand, Cand, NextDefUse))
  2535. return;
  2536. // Fall through to original instruction order.
  2537. if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
  2538. || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
  2539. TryCand.Reason = NodeOrder;
  2540. }
  2541. }
  2542. /// Pick the best candidate from the queue.
  2543. ///
  2544. /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
  2545. /// DAG building. To adjust for the current scheduling location we need to
  2546. /// maintain the number of vreg uses remaining to be top-scheduled.
  2547. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
  2548. const RegPressureTracker &RPTracker,
  2549. SchedCandidate &Cand) {
  2550. ReadyQueue &Q = Zone.Available;
  2551. DEBUG(Q.dump());
  2552. // getMaxPressureDelta temporarily modifies the tracker.
  2553. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
  2554. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  2555. SchedCandidate TryCand(Cand.Policy);
  2556. initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
  2557. tryCandidate(Cand, TryCand, Zone);
  2558. if (TryCand.Reason != NoCand) {
  2559. // Initialize resource delta if needed in case future heuristics query it.
  2560. if (TryCand.ResDelta == SchedResourceDelta())
  2561. TryCand.initResourceDelta(DAG, SchedModel);
  2562. Cand.setBest(TryCand);
  2563. DEBUG(traceCandidate(Cand));
  2564. }
  2565. }
  2566. }
  2567. /// Pick the best candidate node from either the top or bottom queue.
  2568. SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
  2569. // Schedule as far as possible in the direction of no choice. This is most
  2570. // efficient, but also provides the best heuristics for CriticalPSets.
  2571. if (SUnit *SU = Bot.pickOnlyChoice()) {
  2572. IsTopNode = false;
  2573. DEBUG(dbgs() << "Pick Bot ONLY1\n");
  2574. return SU;
  2575. }
  2576. if (SUnit *SU = Top.pickOnlyChoice()) {
  2577. IsTopNode = true;
  2578. DEBUG(dbgs() << "Pick Top ONLY1\n");
  2579. return SU;
  2580. }
  2581. CandPolicy NoPolicy;
  2582. SchedCandidate BotCand(NoPolicy);
  2583. SchedCandidate TopCand(NoPolicy);
  2584. // Set the bottom-up policy based on the state of the current bottom zone and
  2585. // the instructions outside the zone, including the top zone.
  2586. setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
  2587. // Set the top-down policy based on the state of the current top zone and
  2588. // the instructions outside the zone, including the bottom zone.
  2589. setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
  2590. // Prefer bottom scheduling when heuristics are silent.
  2591. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  2592. assert(BotCand.Reason != NoCand && "failed to find the first candidate");
  2593. // If either Q has a single candidate that provides the least increase in
  2594. // Excess pressure, we can immediately schedule from that Q.
  2595. //
  2596. // RegionCriticalPSets summarizes the pressure within the scheduled region and
  2597. // affects picking from either Q. If scheduling in one direction must
  2598. // increase pressure for one of the excess PSets, then schedule in that
  2599. // direction first to provide more freedom in the other direction.
  2600. if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
  2601. || (BotCand.Reason == RegCritical && !BotCand.isRepeat(RegCritical)))
  2602. {
  2603. IsTopNode = false;
  2604. tracePick(BotCand, IsTopNode);
  2605. return BotCand.SU;
  2606. }
  2607. // Check if the top Q has a better candidate.
  2608. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  2609. assert(TopCand.Reason != NoCand && "failed to find the first candidate");
  2610. // Choose the queue with the most important (lowest enum) reason.
  2611. if (TopCand.Reason < BotCand.Reason) {
  2612. IsTopNode = true;
  2613. tracePick(TopCand, IsTopNode);
  2614. return TopCand.SU;
  2615. }
  2616. // Otherwise prefer the bottom candidate, in node order if all else failed.
  2617. IsTopNode = false;
  2618. tracePick(BotCand, IsTopNode);
  2619. return BotCand.SU;
  2620. }
  2621. /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
  2622. SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
  2623. if (DAG->top() == DAG->bottom()) {
  2624. assert(Top.Available.empty() && Top.Pending.empty() &&
  2625. Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
  2626. return nullptr;
  2627. }
  2628. SUnit *SU;
  2629. do {
  2630. if (RegionPolicy.OnlyTopDown) {
  2631. SU = Top.pickOnlyChoice();
  2632. if (!SU) {
  2633. CandPolicy NoPolicy;
  2634. SchedCandidate TopCand(NoPolicy);
  2635. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  2636. assert(TopCand.Reason != NoCand && "failed to find a candidate");
  2637. tracePick(TopCand, true);
  2638. SU = TopCand.SU;
  2639. }
  2640. IsTopNode = true;
  2641. } else if (RegionPolicy.OnlyBottomUp) {
  2642. SU = Bot.pickOnlyChoice();
  2643. if (!SU) {
  2644. CandPolicy NoPolicy;
  2645. SchedCandidate BotCand(NoPolicy);
  2646. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  2647. assert(BotCand.Reason != NoCand && "failed to find a candidate");
  2648. tracePick(BotCand, false);
  2649. SU = BotCand.SU;
  2650. }
  2651. IsTopNode = false;
  2652. } else {
  2653. SU = pickNodeBidirectional(IsTopNode);
  2654. }
  2655. } while (SU->isScheduled);
  2656. if (SU->isTopReady())
  2657. Top.removeReady(SU);
  2658. if (SU->isBottomReady())
  2659. Bot.removeReady(SU);
  2660. DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
  2661. return SU;
  2662. }
  2663. void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
  2664. MachineBasicBlock::iterator InsertPos = SU->getInstr();
  2665. if (!isTop)
  2666. ++InsertPos;
  2667. SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
  2668. // Find already scheduled copies with a single physreg dependence and move
  2669. // them just above the scheduled instruction.
  2670. for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
  2671. I != E; ++I) {
  2672. if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
  2673. continue;
  2674. SUnit *DepSU = I->getSUnit();
  2675. if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
  2676. continue;
  2677. MachineInstr *Copy = DepSU->getInstr();
  2678. if (!Copy->isCopy())
  2679. continue;
  2680. DEBUG(dbgs() << " Rescheduling physreg copy ";
  2681. I->getSUnit()->dump(DAG));
  2682. DAG->moveInstruction(Copy, InsertPos);
  2683. }
  2684. }
  2685. /// Update the scheduler's state after scheduling a node. This is the same node
  2686. /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
  2687. /// update it's state based on the current cycle before MachineSchedStrategy
  2688. /// does.
  2689. ///
  2690. /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
  2691. /// them here. See comments in biasPhysRegCopy.
  2692. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  2693. if (IsTopNode) {
  2694. SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
  2695. Top.bumpNode(SU);
  2696. if (SU->hasPhysRegUses)
  2697. reschedulePhysRegCopies(SU, true);
  2698. } else {
  2699. SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
  2700. Bot.bumpNode(SU);
  2701. if (SU->hasPhysRegDefs)
  2702. reschedulePhysRegCopies(SU, false);
  2703. }
  2704. }
  2705. /// Create the standard converging machine scheduler. This will be used as the
  2706. /// default scheduler if the target does not set a default.
  2707. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
  2708. ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
  2709. // Register DAG post-processors.
  2710. //
  2711. // FIXME: extend the mutation API to allow earlier mutations to instantiate
  2712. // data and pass it to later mutations. Have a single mutation that gathers
  2713. // the interesting nodes in one pass.
  2714. DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
  2715. if (EnableMemOpCluster) {
  2716. if (DAG->TII->enableClusterLoads())
  2717. DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
  2718. if (DAG->TII->enableClusterStores())
  2719. DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
  2720. }
  2721. if (EnableMacroFusion)
  2722. DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
  2723. return DAG;
  2724. }
  2725. static MachineSchedRegistry
  2726. GenericSchedRegistry("converge", "Standard converging scheduler.",
  2727. createGenericSchedLive);
  2728. //===----------------------------------------------------------------------===//
  2729. // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
  2730. //===----------------------------------------------------------------------===//
  2731. void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
  2732. DAG = Dag;
  2733. SchedModel = DAG->getSchedModel();
  2734. TRI = DAG->TRI;
  2735. Rem.init(DAG, SchedModel);
  2736. Top.init(DAG, SchedModel, &Rem);
  2737. BotRoots.clear();
  2738. // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
  2739. // or are disabled, then these HazardRecs will be disabled.
  2740. const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
  2741. if (!Top.HazardRec) {
  2742. Top.HazardRec =
  2743. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  2744. Itin, DAG);
  2745. }
  2746. }
  2747. void PostGenericScheduler::registerRoots() {
  2748. Rem.CriticalPath = DAG->ExitSU.getDepth();
  2749. // Some roots may not feed into ExitSU. Check all of them in case.
  2750. for (SmallVectorImpl<SUnit*>::const_iterator
  2751. I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
  2752. if ((*I)->getDepth() > Rem.CriticalPath)
  2753. Rem.CriticalPath = (*I)->getDepth();
  2754. }
  2755. DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
  2756. if (DumpCriticalPathLength) {
  2757. errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
  2758. }
  2759. }
  2760. /// Apply a set of heursitics to a new candidate for PostRA scheduling.
  2761. ///
  2762. /// \param Cand provides the policy and current best candidate.
  2763. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
  2764. void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
  2765. SchedCandidate &TryCand) {
  2766. // Initialize the candidate if needed.
  2767. if (!Cand.isValid()) {
  2768. TryCand.Reason = NodeOrder;
  2769. return;
  2770. }
  2771. // Prioritize instructions that read unbuffered resources by stall cycles.
  2772. if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
  2773. Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  2774. return;
  2775. // Avoid critical resource consumption and balance the schedule.
  2776. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  2777. TryCand, Cand, ResourceReduce))
  2778. return;
  2779. if (tryGreater(TryCand.ResDelta.DemandedResources,
  2780. Cand.ResDelta.DemandedResources,
  2781. TryCand, Cand, ResourceDemand))
  2782. return;
  2783. // Avoid serializing long latency dependence chains.
  2784. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
  2785. return;
  2786. }
  2787. // Fall through to original instruction order.
  2788. if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
  2789. TryCand.Reason = NodeOrder;
  2790. }
  2791. void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
  2792. ReadyQueue &Q = Top.Available;
  2793. DEBUG(Q.dump());
  2794. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  2795. SchedCandidate TryCand(Cand.Policy);
  2796. TryCand.SU = *I;
  2797. TryCand.initResourceDelta(DAG, SchedModel);
  2798. tryCandidate(Cand, TryCand);
  2799. if (TryCand.Reason != NoCand) {
  2800. Cand.setBest(TryCand);
  2801. DEBUG(traceCandidate(Cand));
  2802. }
  2803. }
  2804. }
  2805. /// Pick the next node to schedule.
  2806. SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
  2807. if (DAG->top() == DAG->bottom()) {
  2808. assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
  2809. return nullptr;
  2810. }
  2811. SUnit *SU;
  2812. do {
  2813. SU = Top.pickOnlyChoice();
  2814. if (!SU) {
  2815. CandPolicy NoPolicy;
  2816. SchedCandidate TopCand(NoPolicy);
  2817. // Set the top-down policy based on the state of the current top zone and
  2818. // the instructions outside the zone, including the bottom zone.
  2819. setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
  2820. pickNodeFromQueue(TopCand);
  2821. assert(TopCand.Reason != NoCand && "failed to find a candidate");
  2822. tracePick(TopCand, true);
  2823. SU = TopCand.SU;
  2824. }
  2825. } while (SU->isScheduled);
  2826. IsTopNode = true;
  2827. Top.removeReady(SU);
  2828. DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
  2829. return SU;
  2830. }
  2831. /// Called after ScheduleDAGMI has scheduled an instruction and updated
  2832. /// scheduled/remaining flags in the DAG nodes.
  2833. void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  2834. SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
  2835. Top.bumpNode(SU);
  2836. }
  2837. /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
  2838. static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
  2839. return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
  2840. }
  2841. //===----------------------------------------------------------------------===//
  2842. // ILP Scheduler. Currently for experimental analysis of heuristics.
  2843. //===----------------------------------------------------------------------===//
  2844. namespace {
  2845. /// \brief Order nodes by the ILP metric.
  2846. struct ILPOrder {
  2847. const SchedDFSResult *DFSResult;
  2848. const BitVector *ScheduledTrees;
  2849. bool MaximizeILP;
  2850. ILPOrder(bool MaxILP)
  2851. : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
  2852. /// \brief Apply a less-than relation on node priority.
  2853. ///
  2854. /// (Return true if A comes after B in the Q.)
  2855. bool operator()(const SUnit *A, const SUnit *B) const {
  2856. unsigned SchedTreeA = DFSResult->getSubtreeID(A);
  2857. unsigned SchedTreeB = DFSResult->getSubtreeID(B);
  2858. if (SchedTreeA != SchedTreeB) {
  2859. // Unscheduled trees have lower priority.
  2860. if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
  2861. return ScheduledTrees->test(SchedTreeB);
  2862. // Trees with shallower connections have have lower priority.
  2863. if (DFSResult->getSubtreeLevel(SchedTreeA)
  2864. != DFSResult->getSubtreeLevel(SchedTreeB)) {
  2865. return DFSResult->getSubtreeLevel(SchedTreeA)
  2866. < DFSResult->getSubtreeLevel(SchedTreeB);
  2867. }
  2868. }
  2869. if (MaximizeILP)
  2870. return DFSResult->getILP(A) < DFSResult->getILP(B);
  2871. else
  2872. return DFSResult->getILP(A) > DFSResult->getILP(B);
  2873. }
  2874. };
  2875. /// \brief Schedule based on the ILP metric.
  2876. class ILPScheduler : public MachineSchedStrategy {
  2877. ScheduleDAGMILive *DAG;
  2878. ILPOrder Cmp;
  2879. std::vector<SUnit*> ReadyQ;
  2880. public:
  2881. ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
  2882. void initialize(ScheduleDAGMI *dag) override {
  2883. assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
  2884. DAG = static_cast<ScheduleDAGMILive*>(dag);
  2885. DAG->computeDFSResult();
  2886. Cmp.DFSResult = DAG->getDFSResult();
  2887. Cmp.ScheduledTrees = &DAG->getScheduledTrees();
  2888. ReadyQ.clear();
  2889. }
  2890. void registerRoots() override {
  2891. // Restore the heap in ReadyQ with the updated DFS results.
  2892. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2893. }
  2894. /// Implement MachineSchedStrategy interface.
  2895. /// -----------------------------------------
  2896. /// Callback to select the highest priority node from the ready Q.
  2897. SUnit *pickNode(bool &IsTopNode) override {
  2898. if (ReadyQ.empty()) return nullptr;
  2899. std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2900. SUnit *SU = ReadyQ.back();
  2901. ReadyQ.pop_back();
  2902. IsTopNode = false;
  2903. DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
  2904. << " ILP: " << DAG->getDFSResult()->getILP(SU)
  2905. << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
  2906. << DAG->getDFSResult()->getSubtreeLevel(
  2907. DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
  2908. << "Scheduling " << *SU->getInstr());
  2909. return SU;
  2910. }
  2911. /// \brief Scheduler callback to notify that a new subtree is scheduled.
  2912. void scheduleTree(unsigned SubtreeID) override {
  2913. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2914. }
  2915. /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
  2916. /// DFSResults, and resort the priority Q.
  2917. void schedNode(SUnit *SU, bool IsTopNode) override {
  2918. assert(!IsTopNode && "SchedDFSResult needs bottom-up");
  2919. }
  2920. void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
  2921. void releaseBottomNode(SUnit *SU) override {
  2922. ReadyQ.push_back(SU);
  2923. std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  2924. }
  2925. };
  2926. } // namespace
  2927. static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
  2928. return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
  2929. }
  2930. static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
  2931. return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
  2932. }
  2933. static MachineSchedRegistry ILPMaxRegistry(
  2934. "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
  2935. static MachineSchedRegistry ILPMinRegistry(
  2936. "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
  2937. //===----------------------------------------------------------------------===//
  2938. // Machine Instruction Shuffler for Correctness Testing
  2939. //===----------------------------------------------------------------------===//
  2940. #ifndef NDEBUG
  2941. namespace {
  2942. /// Apply a less-than relation on the node order, which corresponds to the
  2943. /// instruction order prior to scheduling. IsReverse implements greater-than.
  2944. template<bool IsReverse>
  2945. struct SUnitOrder {
  2946. bool operator()(SUnit *A, SUnit *B) const {
  2947. if (IsReverse)
  2948. return A->NodeNum > B->NodeNum;
  2949. else
  2950. return A->NodeNum < B->NodeNum;
  2951. }
  2952. };
  2953. /// Reorder instructions as much as possible.
  2954. class InstructionShuffler : public MachineSchedStrategy {
  2955. bool IsAlternating;
  2956. bool IsTopDown;
  2957. // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
  2958. // gives nodes with a higher number higher priority causing the latest
  2959. // instructions to be scheduled first.
  2960. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
  2961. TopQ;
  2962. // When scheduling bottom-up, use greater-than as the queue priority.
  2963. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
  2964. BottomQ;
  2965. public:
  2966. InstructionShuffler(bool alternate, bool topdown)
  2967. : IsAlternating(alternate), IsTopDown(topdown) {}
  2968. void initialize(ScheduleDAGMI*) override {
  2969. TopQ.clear();
  2970. BottomQ.clear();
  2971. }
  2972. /// Implement MachineSchedStrategy interface.
  2973. /// -----------------------------------------
  2974. SUnit *pickNode(bool &IsTopNode) override {
  2975. SUnit *SU;
  2976. if (IsTopDown) {
  2977. do {
  2978. if (TopQ.empty()) return nullptr;
  2979. SU = TopQ.top();
  2980. TopQ.pop();
  2981. } while (SU->isScheduled);
  2982. IsTopNode = true;
  2983. } else {
  2984. do {
  2985. if (BottomQ.empty()) return nullptr;
  2986. SU = BottomQ.top();
  2987. BottomQ.pop();
  2988. } while (SU->isScheduled);
  2989. IsTopNode = false;
  2990. }
  2991. if (IsAlternating)
  2992. IsTopDown = !IsTopDown;
  2993. return SU;
  2994. }
  2995. void schedNode(SUnit *SU, bool IsTopNode) override {}
  2996. void releaseTopNode(SUnit *SU) override {
  2997. TopQ.push(SU);
  2998. }
  2999. void releaseBottomNode(SUnit *SU) override {
  3000. BottomQ.push(SU);
  3001. }
  3002. };
  3003. } // namespace
  3004. static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
  3005. bool Alternate = !ForceTopDown && !ForceBottomUp;
  3006. bool TopDown = !ForceBottomUp;
  3007. assert((TopDown || !ForceTopDown) &&
  3008. "-misched-topdown incompatible with -misched-bottomup");
  3009. return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
  3010. }
  3011. static MachineSchedRegistry ShufflerRegistry(
  3012. "shuffle", "Shuffle machine instructions alternating directions",
  3013. createInstructionShuffler);
  3014. #endif // !NDEBUG
  3015. //===----------------------------------------------------------------------===//
  3016. // GraphWriter support for ScheduleDAGMILive.
  3017. //===----------------------------------------------------------------------===//
  3018. #ifndef NDEBUG
  3019. namespace llvm {
  3020. template<> struct GraphTraits<
  3021. ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
  3022. template<>
  3023. struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
  3024. DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
  3025. static std::string getGraphName(const ScheduleDAG *G) {
  3026. return G->MF.getName();
  3027. }
  3028. static bool renderGraphFromBottomUp() {
  3029. return true;
  3030. }
  3031. static bool isNodeHidden(const SUnit *Node) {
  3032. if (ViewMISchedCutoff == 0)
  3033. return false;
  3034. return (Node->Preds.size() > ViewMISchedCutoff
  3035. || Node->Succs.size() > ViewMISchedCutoff);
  3036. }
  3037. /// If you want to override the dot attributes printed for a particular
  3038. /// edge, override this method.
  3039. static std::string getEdgeAttributes(const SUnit *Node,
  3040. SUnitIterator EI,
  3041. const ScheduleDAG *Graph) {
  3042. if (EI.isArtificialDep())
  3043. return "color=cyan,style=dashed";
  3044. if (EI.isCtrlDep())
  3045. return "color=blue,style=dashed";
  3046. return "";
  3047. }
  3048. static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
  3049. std::string Str;
  3050. raw_string_ostream SS(Str);
  3051. const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
  3052. const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
  3053. static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
  3054. SS << "SU:" << SU->NodeNum;
  3055. if (DFS)
  3056. SS << " I:" << DFS->getNumInstrs(SU);
  3057. return SS.str();
  3058. }
  3059. static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
  3060. return G->getGraphNodeLabel(SU);
  3061. }
  3062. static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
  3063. std::string Str("shape=Mrecord");
  3064. const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
  3065. const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
  3066. static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
  3067. if (DFS) {
  3068. Str += ",style=filled,fillcolor=\"#";
  3069. Str += DOT::getColorString(DFS->getSubtreeID(N));
  3070. Str += '"';
  3071. }
  3072. return Str;
  3073. }
  3074. };
  3075. } // namespace llvm
  3076. #endif // NDEBUG
  3077. /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
  3078. /// rendered using 'dot'.
  3079. ///
  3080. void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
  3081. #ifndef NDEBUG
  3082. ViewGraph(this, Name, false, Title);
  3083. #else
  3084. errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
  3085. << "systems with Graphviz or gv!\n";
  3086. #endif // NDEBUG
  3087. }
  3088. /// Out-of-line implementation with no arguments is handy for gdb.
  3089. void ScheduleDAGMI::viewGraph() {
  3090. viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
  3091. }