MachineTraceMetrics.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329
  1. //===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/CodeGen/MachineTraceMetrics.h"
  10. #include "llvm/ADT/PostOrderIterator.h"
  11. #include "llvm/ADT/SparseSet.h"
  12. #include "llvm/CodeGen/MachineBasicBlock.h"
  13. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  14. #include "llvm/CodeGen/MachineLoopInfo.h"
  15. #include "llvm/CodeGen/MachineRegisterInfo.h"
  16. #include "llvm/CodeGen/Passes.h"
  17. #include "llvm/MC/MCSubtargetInfo.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/Format.h"
  20. #include "llvm/Support/raw_ostream.h"
  21. #include "llvm/Target/TargetInstrInfo.h"
  22. #include "llvm/Target/TargetRegisterInfo.h"
  23. #include "llvm/Target/TargetSubtargetInfo.h"
  24. using namespace llvm;
  25. #define DEBUG_TYPE "machine-trace-metrics"
  26. char MachineTraceMetrics::ID = 0;
  27. char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
  28. INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
  29. "machine-trace-metrics", "Machine Trace Metrics", false, true)
  30. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  31. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  32. INITIALIZE_PASS_END(MachineTraceMetrics,
  33. "machine-trace-metrics", "Machine Trace Metrics", false, true)
  34. MachineTraceMetrics::MachineTraceMetrics()
  35. : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
  36. MRI(nullptr), Loops(nullptr) {
  37. std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
  38. }
  39. void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
  40. AU.setPreservesAll();
  41. AU.addRequired<MachineBranchProbabilityInfo>();
  42. AU.addRequired<MachineLoopInfo>();
  43. MachineFunctionPass::getAnalysisUsage(AU);
  44. }
  45. bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
  46. MF = &Func;
  47. TII = MF->getTarget().getSubtargetImpl()->getInstrInfo();
  48. TRI = MF->getTarget().getSubtargetImpl()->getRegisterInfo();
  49. MRI = &MF->getRegInfo();
  50. Loops = &getAnalysis<MachineLoopInfo>();
  51. const TargetSubtargetInfo &ST =
  52. MF->getTarget().getSubtarget<TargetSubtargetInfo>();
  53. SchedModel.init(*ST.getSchedModel(), &ST, TII);
  54. BlockInfo.resize(MF->getNumBlockIDs());
  55. ProcResourceCycles.resize(MF->getNumBlockIDs() *
  56. SchedModel.getNumProcResourceKinds());
  57. return false;
  58. }
  59. void MachineTraceMetrics::releaseMemory() {
  60. MF = nullptr;
  61. BlockInfo.clear();
  62. for (unsigned i = 0; i != TS_NumStrategies; ++i) {
  63. delete Ensembles[i];
  64. Ensembles[i] = nullptr;
  65. }
  66. }
  67. //===----------------------------------------------------------------------===//
  68. // Fixed block information
  69. //===----------------------------------------------------------------------===//
  70. //
  71. // The number of instructions in a basic block and the CPU resources used by
  72. // those instructions don't depend on any given trace strategy.
  73. /// Compute the resource usage in basic block MBB.
  74. const MachineTraceMetrics::FixedBlockInfo*
  75. MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
  76. assert(MBB && "No basic block");
  77. FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
  78. if (FBI->hasResources())
  79. return FBI;
  80. // Compute resource usage in the block.
  81. FBI->HasCalls = false;
  82. unsigned InstrCount = 0;
  83. // Add up per-processor resource cycles as well.
  84. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  85. SmallVector<unsigned, 32> PRCycles(PRKinds);
  86. for (const auto &MI : *MBB) {
  87. if (MI.isTransient())
  88. continue;
  89. ++InstrCount;
  90. if (MI.isCall())
  91. FBI->HasCalls = true;
  92. // Count processor resources used.
  93. if (!SchedModel.hasInstrSchedModel())
  94. continue;
  95. const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
  96. if (!SC->isValid())
  97. continue;
  98. for (TargetSchedModel::ProcResIter
  99. PI = SchedModel.getWriteProcResBegin(SC),
  100. PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
  101. assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
  102. PRCycles[PI->ProcResourceIdx] += PI->Cycles;
  103. }
  104. }
  105. FBI->InstrCount = InstrCount;
  106. // Scale the resource cycles so they are comparable.
  107. unsigned PROffset = MBB->getNumber() * PRKinds;
  108. for (unsigned K = 0; K != PRKinds; ++K)
  109. ProcResourceCycles[PROffset + K] =
  110. PRCycles[K] * SchedModel.getResourceFactor(K);
  111. return FBI;
  112. }
  113. ArrayRef<unsigned>
  114. MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
  115. assert(BlockInfo[MBBNum].hasResources() &&
  116. "getResources() must be called before getProcResourceCycles()");
  117. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  118. assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
  119. return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
  120. PRKinds);
  121. }
  122. //===----------------------------------------------------------------------===//
  123. // Ensemble utility functions
  124. //===----------------------------------------------------------------------===//
  125. MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
  126. : MTM(*ct) {
  127. BlockInfo.resize(MTM.BlockInfo.size());
  128. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  129. ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
  130. ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
  131. }
  132. // Virtual destructor serves as an anchor.
  133. MachineTraceMetrics::Ensemble::~Ensemble() {}
  134. const MachineLoop*
  135. MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
  136. return MTM.Loops->getLoopFor(MBB);
  137. }
  138. // Update resource-related information in the TraceBlockInfo for MBB.
  139. // Only update resources related to the trace above MBB.
  140. void MachineTraceMetrics::Ensemble::
  141. computeDepthResources(const MachineBasicBlock *MBB) {
  142. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  143. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  144. unsigned PROffset = MBB->getNumber() * PRKinds;
  145. // Compute resources from trace above. The top block is simple.
  146. if (!TBI->Pred) {
  147. TBI->InstrDepth = 0;
  148. TBI->Head = MBB->getNumber();
  149. std::fill(ProcResourceDepths.begin() + PROffset,
  150. ProcResourceDepths.begin() + PROffset + PRKinds, 0);
  151. return;
  152. }
  153. // Compute from the block above. A post-order traversal ensures the
  154. // predecessor is always computed first.
  155. unsigned PredNum = TBI->Pred->getNumber();
  156. TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
  157. assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
  158. const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
  159. TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
  160. TBI->Head = PredTBI->Head;
  161. // Compute per-resource depths.
  162. ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
  163. ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
  164. for (unsigned K = 0; K != PRKinds; ++K)
  165. ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
  166. }
  167. // Update resource-related information in the TraceBlockInfo for MBB.
  168. // Only update resources related to the trace below MBB.
  169. void MachineTraceMetrics::Ensemble::
  170. computeHeightResources(const MachineBasicBlock *MBB) {
  171. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  172. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  173. unsigned PROffset = MBB->getNumber() * PRKinds;
  174. // Compute resources for the current block.
  175. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
  176. ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
  177. // The trace tail is done.
  178. if (!TBI->Succ) {
  179. TBI->Tail = MBB->getNumber();
  180. std::copy(PRCycles.begin(), PRCycles.end(),
  181. ProcResourceHeights.begin() + PROffset);
  182. return;
  183. }
  184. // Compute from the block below. A post-order traversal ensures the
  185. // predecessor is always computed first.
  186. unsigned SuccNum = TBI->Succ->getNumber();
  187. TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
  188. assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
  189. TBI->InstrHeight += SuccTBI->InstrHeight;
  190. TBI->Tail = SuccTBI->Tail;
  191. // Compute per-resource heights.
  192. ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
  193. for (unsigned K = 0; K != PRKinds; ++K)
  194. ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
  195. }
  196. // Check if depth resources for MBB are valid and return the TBI.
  197. // Return NULL if the resources have been invalidated.
  198. const MachineTraceMetrics::TraceBlockInfo*
  199. MachineTraceMetrics::Ensemble::
  200. getDepthResources(const MachineBasicBlock *MBB) const {
  201. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  202. return TBI->hasValidDepth() ? TBI : nullptr;
  203. }
  204. // Check if height resources for MBB are valid and return the TBI.
  205. // Return NULL if the resources have been invalidated.
  206. const MachineTraceMetrics::TraceBlockInfo*
  207. MachineTraceMetrics::Ensemble::
  208. getHeightResources(const MachineBasicBlock *MBB) const {
  209. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  210. return TBI->hasValidHeight() ? TBI : nullptr;
  211. }
  212. /// Get an array of processor resource depths for MBB. Indexed by processor
  213. /// resource kind, this array contains the scaled processor resources consumed
  214. /// by all blocks preceding MBB in its trace. It does not include instructions
  215. /// in MBB.
  216. ///
  217. /// Compare TraceBlockInfo::InstrDepth.
  218. ArrayRef<unsigned>
  219. MachineTraceMetrics::Ensemble::
  220. getProcResourceDepths(unsigned MBBNum) const {
  221. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  222. assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
  223. return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
  224. PRKinds);
  225. }
  226. /// Get an array of processor resource heights for MBB. Indexed by processor
  227. /// resource kind, this array contains the scaled processor resources consumed
  228. /// by this block and all blocks following it in its trace.
  229. ///
  230. /// Compare TraceBlockInfo::InstrHeight.
  231. ArrayRef<unsigned>
  232. MachineTraceMetrics::Ensemble::
  233. getProcResourceHeights(unsigned MBBNum) const {
  234. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  235. assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
  236. return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
  237. PRKinds);
  238. }
  239. //===----------------------------------------------------------------------===//
  240. // Trace Selection Strategies
  241. //===----------------------------------------------------------------------===//
  242. //
  243. // A trace selection strategy is implemented as a sub-class of Ensemble. The
  244. // trace through a block B is computed by two DFS traversals of the CFG
  245. // starting from B. One upwards, and one downwards. During the upwards DFS,
  246. // pickTracePred() is called on the post-ordered blocks. During the downwards
  247. // DFS, pickTraceSucc() is called in a post-order.
  248. //
  249. // We never allow traces that leave loops, but we do allow traces to enter
  250. // nested loops. We also never allow traces to contain back-edges.
  251. //
  252. // This means that a loop header can never appear above the center block of a
  253. // trace, except as the trace head. Below the center block, loop exiting edges
  254. // are banned.
  255. //
  256. // Return true if an edge from the From loop to the To loop is leaving a loop.
  257. // Either of To and From can be null.
  258. static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
  259. return From && !From->contains(To);
  260. }
  261. // MinInstrCountEnsemble - Pick the trace that executes the least number of
  262. // instructions.
  263. namespace {
  264. class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
  265. const char *getName() const override { return "MinInstr"; }
  266. const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
  267. const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
  268. public:
  269. MinInstrCountEnsemble(MachineTraceMetrics *mtm)
  270. : MachineTraceMetrics::Ensemble(mtm) {}
  271. };
  272. }
  273. // Select the preferred predecessor for MBB.
  274. const MachineBasicBlock*
  275. MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
  276. if (MBB->pred_empty())
  277. return nullptr;
  278. const MachineLoop *CurLoop = getLoopFor(MBB);
  279. // Don't leave loops, and never follow back-edges.
  280. if (CurLoop && MBB == CurLoop->getHeader())
  281. return nullptr;
  282. unsigned CurCount = MTM.getResources(MBB)->InstrCount;
  283. const MachineBasicBlock *Best = nullptr;
  284. unsigned BestDepth = 0;
  285. for (MachineBasicBlock::const_pred_iterator
  286. I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
  287. const MachineBasicBlock *Pred = *I;
  288. const MachineTraceMetrics::TraceBlockInfo *PredTBI =
  289. getDepthResources(Pred);
  290. // Ignore cycles that aren't natural loops.
  291. if (!PredTBI)
  292. continue;
  293. // Pick the predecessor that would give this block the smallest InstrDepth.
  294. unsigned Depth = PredTBI->InstrDepth + CurCount;
  295. if (!Best || Depth < BestDepth)
  296. Best = Pred, BestDepth = Depth;
  297. }
  298. return Best;
  299. }
  300. // Select the preferred successor for MBB.
  301. const MachineBasicBlock*
  302. MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
  303. if (MBB->pred_empty())
  304. return nullptr;
  305. const MachineLoop *CurLoop = getLoopFor(MBB);
  306. const MachineBasicBlock *Best = nullptr;
  307. unsigned BestHeight = 0;
  308. for (MachineBasicBlock::const_succ_iterator
  309. I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
  310. const MachineBasicBlock *Succ = *I;
  311. // Don't consider back-edges.
  312. if (CurLoop && Succ == CurLoop->getHeader())
  313. continue;
  314. // Don't consider successors exiting CurLoop.
  315. if (isExitingLoop(CurLoop, getLoopFor(Succ)))
  316. continue;
  317. const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
  318. getHeightResources(Succ);
  319. // Ignore cycles that aren't natural loops.
  320. if (!SuccTBI)
  321. continue;
  322. // Pick the successor that would give this block the smallest InstrHeight.
  323. unsigned Height = SuccTBI->InstrHeight;
  324. if (!Best || Height < BestHeight)
  325. Best = Succ, BestHeight = Height;
  326. }
  327. return Best;
  328. }
  329. // Get an Ensemble sub-class for the requested trace strategy.
  330. MachineTraceMetrics::Ensemble *
  331. MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
  332. assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
  333. Ensemble *&E = Ensembles[strategy];
  334. if (E)
  335. return E;
  336. // Allocate new Ensemble on demand.
  337. switch (strategy) {
  338. case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
  339. default: llvm_unreachable("Invalid trace strategy enum");
  340. }
  341. }
  342. void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
  343. DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
  344. BlockInfo[MBB->getNumber()].invalidate();
  345. for (unsigned i = 0; i != TS_NumStrategies; ++i)
  346. if (Ensembles[i])
  347. Ensembles[i]->invalidate(MBB);
  348. }
  349. void MachineTraceMetrics::verifyAnalysis() const {
  350. if (!MF)
  351. return;
  352. #ifndef NDEBUG
  353. assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
  354. for (unsigned i = 0; i != TS_NumStrategies; ++i)
  355. if (Ensembles[i])
  356. Ensembles[i]->verify();
  357. #endif
  358. }
  359. //===----------------------------------------------------------------------===//
  360. // Trace building
  361. //===----------------------------------------------------------------------===//
  362. //
  363. // Traces are built by two CFG traversals. To avoid recomputing too much, use a
  364. // set abstraction that confines the search to the current loop, and doesn't
  365. // revisit blocks.
  366. namespace {
  367. struct LoopBounds {
  368. MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
  369. SmallPtrSet<const MachineBasicBlock*, 8> Visited;
  370. const MachineLoopInfo *Loops;
  371. bool Downward;
  372. LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
  373. const MachineLoopInfo *loops)
  374. : Blocks(blocks), Loops(loops), Downward(false) {}
  375. };
  376. }
  377. // Specialize po_iterator_storage in order to prune the post-order traversal so
  378. // it is limited to the current loop and doesn't traverse the loop back edges.
  379. namespace llvm {
  380. template<>
  381. class po_iterator_storage<LoopBounds, true> {
  382. LoopBounds &LB;
  383. public:
  384. po_iterator_storage(LoopBounds &lb) : LB(lb) {}
  385. void finishPostorder(const MachineBasicBlock*) {}
  386. bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
  387. // Skip already visited To blocks.
  388. MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
  389. if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
  390. return false;
  391. // From is null once when To is the trace center block.
  392. if (From) {
  393. if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
  394. // Don't follow backedges, don't leave FromLoop when going upwards.
  395. if ((LB.Downward ? To : From) == FromLoop->getHeader())
  396. return false;
  397. // Don't leave FromLoop.
  398. if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
  399. return false;
  400. }
  401. }
  402. // To is a new block. Mark the block as visited in case the CFG has cycles
  403. // that MachineLoopInfo didn't recognize as a natural loop.
  404. return LB.Visited.insert(To);
  405. }
  406. };
  407. }
  408. /// Compute the trace through MBB.
  409. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
  410. DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
  411. << MBB->getNumber() << '\n');
  412. // Set up loop bounds for the backwards post-order traversal.
  413. LoopBounds Bounds(BlockInfo, MTM.Loops);
  414. // Run an upwards post-order search for the trace start.
  415. Bounds.Downward = false;
  416. Bounds.Visited.clear();
  417. typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
  418. for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
  419. I != E; ++I) {
  420. DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
  421. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  422. // All the predecessors have been visited, pick the preferred one.
  423. TBI.Pred = pickTracePred(*I);
  424. DEBUG({
  425. if (TBI.Pred)
  426. dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
  427. else
  428. dbgs() << "null\n";
  429. });
  430. // The trace leading to I is now known, compute the depth resources.
  431. computeDepthResources(*I);
  432. }
  433. // Run a downwards post-order search for the trace end.
  434. Bounds.Downward = true;
  435. Bounds.Visited.clear();
  436. typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
  437. for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
  438. I != E; ++I) {
  439. DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
  440. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  441. // All the successors have been visited, pick the preferred one.
  442. TBI.Succ = pickTraceSucc(*I);
  443. DEBUG({
  444. if (TBI.Succ)
  445. dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
  446. else
  447. dbgs() << "null\n";
  448. });
  449. // The trace leaving I is now known, compute the height resources.
  450. computeHeightResources(*I);
  451. }
  452. }
  453. /// Invalidate traces through BadMBB.
  454. void
  455. MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
  456. SmallVector<const MachineBasicBlock*, 16> WorkList;
  457. TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
  458. // Invalidate height resources of blocks above MBB.
  459. if (BadTBI.hasValidHeight()) {
  460. BadTBI.invalidateHeight();
  461. WorkList.push_back(BadMBB);
  462. do {
  463. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  464. DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
  465. << " height.\n");
  466. // Find any MBB predecessors that have MBB as their preferred successor.
  467. // They are the only ones that need to be invalidated.
  468. for (MachineBasicBlock::const_pred_iterator
  469. I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
  470. TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
  471. if (!TBI.hasValidHeight())
  472. continue;
  473. if (TBI.Succ == MBB) {
  474. TBI.invalidateHeight();
  475. WorkList.push_back(*I);
  476. continue;
  477. }
  478. // Verify that TBI.Succ is actually a *I successor.
  479. assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
  480. }
  481. } while (!WorkList.empty());
  482. }
  483. // Invalidate depth resources of blocks below MBB.
  484. if (BadTBI.hasValidDepth()) {
  485. BadTBI.invalidateDepth();
  486. WorkList.push_back(BadMBB);
  487. do {
  488. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  489. DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
  490. << " depth.\n");
  491. // Find any MBB successors that have MBB as their preferred predecessor.
  492. // They are the only ones that need to be invalidated.
  493. for (MachineBasicBlock::const_succ_iterator
  494. I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
  495. TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
  496. if (!TBI.hasValidDepth())
  497. continue;
  498. if (TBI.Pred == MBB) {
  499. TBI.invalidateDepth();
  500. WorkList.push_back(*I);
  501. continue;
  502. }
  503. // Verify that TBI.Pred is actually a *I predecessor.
  504. assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
  505. }
  506. } while (!WorkList.empty());
  507. }
  508. // Clear any per-instruction data. We only have to do this for BadMBB itself
  509. // because the instructions in that block may change. Other blocks may be
  510. // invalidated, but their instructions will stay the same, so there is no
  511. // need to erase the Cycle entries. They will be overwritten when we
  512. // recompute.
  513. for (const auto &I : *BadMBB)
  514. Cycles.erase(&I);
  515. }
  516. void MachineTraceMetrics::Ensemble::verify() const {
  517. #ifndef NDEBUG
  518. assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
  519. "Outdated BlockInfo size");
  520. for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
  521. const TraceBlockInfo &TBI = BlockInfo[Num];
  522. if (TBI.hasValidDepth() && TBI.Pred) {
  523. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  524. assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
  525. assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
  526. "Trace is broken, depth should have been invalidated.");
  527. const MachineLoop *Loop = getLoopFor(MBB);
  528. assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
  529. }
  530. if (TBI.hasValidHeight() && TBI.Succ) {
  531. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  532. assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
  533. assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
  534. "Trace is broken, height should have been invalidated.");
  535. const MachineLoop *Loop = getLoopFor(MBB);
  536. const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
  537. assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
  538. "Trace contains backedge");
  539. }
  540. }
  541. #endif
  542. }
  543. //===----------------------------------------------------------------------===//
  544. // Data Dependencies
  545. //===----------------------------------------------------------------------===//
  546. //
  547. // Compute the depth and height of each instruction based on data dependencies
  548. // and instruction latencies. These cycle numbers assume that the CPU can issue
  549. // an infinite number of instructions per cycle as long as their dependencies
  550. // are ready.
  551. // A data dependency is represented as a defining MI and operand numbers on the
  552. // defining and using MI.
  553. namespace {
  554. struct DataDep {
  555. const MachineInstr *DefMI;
  556. unsigned DefOp;
  557. unsigned UseOp;
  558. DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
  559. : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
  560. /// Create a DataDep from an SSA form virtual register.
  561. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
  562. : UseOp(UseOp) {
  563. assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
  564. MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
  565. assert(!DefI.atEnd() && "Register has no defs");
  566. DefMI = DefI->getParent();
  567. DefOp = DefI.getOperandNo();
  568. assert((++DefI).atEnd() && "Register has multiple defs");
  569. }
  570. };
  571. }
  572. // Get the input data dependencies that must be ready before UseMI can issue.
  573. // Return true if UseMI has any physreg operands.
  574. static bool getDataDeps(const MachineInstr *UseMI,
  575. SmallVectorImpl<DataDep> &Deps,
  576. const MachineRegisterInfo *MRI) {
  577. bool HasPhysRegs = false;
  578. for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
  579. if (!MO->isReg())
  580. continue;
  581. unsigned Reg = MO->getReg();
  582. if (!Reg)
  583. continue;
  584. if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
  585. HasPhysRegs = true;
  586. continue;
  587. }
  588. // Collect virtual register reads.
  589. if (MO->readsReg())
  590. Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
  591. }
  592. return HasPhysRegs;
  593. }
  594. // Get the input data dependencies of a PHI instruction, using Pred as the
  595. // preferred predecessor.
  596. // This will add at most one dependency to Deps.
  597. static void getPHIDeps(const MachineInstr *UseMI,
  598. SmallVectorImpl<DataDep> &Deps,
  599. const MachineBasicBlock *Pred,
  600. const MachineRegisterInfo *MRI) {
  601. // No predecessor at the beginning of a trace. Ignore dependencies.
  602. if (!Pred)
  603. return;
  604. assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
  605. for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
  606. if (UseMI->getOperand(i + 1).getMBB() == Pred) {
  607. unsigned Reg = UseMI->getOperand(i).getReg();
  608. Deps.push_back(DataDep(MRI, Reg, i));
  609. return;
  610. }
  611. }
  612. }
  613. // Keep track of physreg data dependencies by recording each live register unit.
  614. // Associate each regunit with an instruction operand. Depending on the
  615. // direction instructions are scanned, it could be the operand that defined the
  616. // regunit, or the highest operand to read the regunit.
  617. namespace {
  618. struct LiveRegUnit {
  619. unsigned RegUnit;
  620. unsigned Cycle;
  621. const MachineInstr *MI;
  622. unsigned Op;
  623. unsigned getSparseSetIndex() const { return RegUnit; }
  624. LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
  625. };
  626. }
  627. // Identify physreg dependencies for UseMI, and update the live regunit
  628. // tracking set when scanning instructions downwards.
  629. static void updatePhysDepsDownwards(const MachineInstr *UseMI,
  630. SmallVectorImpl<DataDep> &Deps,
  631. SparseSet<LiveRegUnit> &RegUnits,
  632. const TargetRegisterInfo *TRI) {
  633. SmallVector<unsigned, 8> Kills;
  634. SmallVector<unsigned, 8> LiveDefOps;
  635. for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
  636. if (!MO->isReg())
  637. continue;
  638. unsigned Reg = MO->getReg();
  639. if (!TargetRegisterInfo::isPhysicalRegister(Reg))
  640. continue;
  641. // Track live defs and kills for updating RegUnits.
  642. if (MO->isDef()) {
  643. if (MO->isDead())
  644. Kills.push_back(Reg);
  645. else
  646. LiveDefOps.push_back(MO.getOperandNo());
  647. } else if (MO->isKill())
  648. Kills.push_back(Reg);
  649. // Identify dependencies.
  650. if (!MO->readsReg())
  651. continue;
  652. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  653. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  654. if (I == RegUnits.end())
  655. continue;
  656. Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
  657. break;
  658. }
  659. }
  660. // Update RegUnits to reflect live registers after UseMI.
  661. // First kills.
  662. for (unsigned i = 0, e = Kills.size(); i != e; ++i)
  663. for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
  664. RegUnits.erase(*Units);
  665. // Second, live defs.
  666. for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
  667. unsigned DefOp = LiveDefOps[i];
  668. for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
  669. Units.isValid(); ++Units) {
  670. LiveRegUnit &LRU = RegUnits[*Units];
  671. LRU.MI = UseMI;
  672. LRU.Op = DefOp;
  673. }
  674. }
  675. }
  676. /// The length of the critical path through a trace is the maximum of two path
  677. /// lengths:
  678. ///
  679. /// 1. The maximum height+depth over all instructions in the trace center block.
  680. ///
  681. /// 2. The longest cross-block dependency chain. For small blocks, it is
  682. /// possible that the critical path through the trace doesn't include any
  683. /// instructions in the block.
  684. ///
  685. /// This function computes the second number from the live-in list of the
  686. /// center block.
  687. unsigned MachineTraceMetrics::Ensemble::
  688. computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
  689. assert(TBI.HasValidInstrDepths && "Missing depth info");
  690. assert(TBI.HasValidInstrHeights && "Missing height info");
  691. unsigned MaxLen = 0;
  692. for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
  693. const LiveInReg &LIR = TBI.LiveIns[i];
  694. if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
  695. continue;
  696. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  697. // Ignore dependencies outside the current trace.
  698. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
  699. if (!DefTBI.isUsefulDominator(TBI))
  700. continue;
  701. unsigned Len = LIR.Height + Cycles[DefMI].Depth;
  702. MaxLen = std::max(MaxLen, Len);
  703. }
  704. return MaxLen;
  705. }
  706. /// Compute instruction depths for all instructions above or in MBB in its
  707. /// trace. This assumes that the trace through MBB has already been computed.
  708. void MachineTraceMetrics::Ensemble::
  709. computeInstrDepths(const MachineBasicBlock *MBB) {
  710. // The top of the trace may already be computed, and HasValidInstrDepths
  711. // implies Head->HasValidInstrDepths, so we only need to start from the first
  712. // block in the trace that needs to be recomputed.
  713. SmallVector<const MachineBasicBlock*, 8> Stack;
  714. do {
  715. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  716. assert(TBI.hasValidDepth() && "Incomplete trace");
  717. if (TBI.HasValidInstrDepths)
  718. break;
  719. Stack.push_back(MBB);
  720. MBB = TBI.Pred;
  721. } while (MBB);
  722. // FIXME: If MBB is non-null at this point, it is the last pre-computed block
  723. // in the trace. We should track any live-out physregs that were defined in
  724. // the trace. This is quite rare in SSA form, typically created by CSE
  725. // hoisting a compare.
  726. SparseSet<LiveRegUnit> RegUnits;
  727. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  728. // Go through trace blocks in top-down order, stopping after the center block.
  729. SmallVector<DataDep, 8> Deps;
  730. while (!Stack.empty()) {
  731. MBB = Stack.pop_back_val();
  732. DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
  733. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  734. TBI.HasValidInstrDepths = true;
  735. TBI.CriticalPath = 0;
  736. // Print out resource depths here as well.
  737. DEBUG({
  738. dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
  739. ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
  740. for (unsigned K = 0; K != PRDepths.size(); ++K)
  741. if (PRDepths[K]) {
  742. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  743. dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
  744. << MTM.SchedModel.getProcResource(K)->Name << " ("
  745. << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
  746. }
  747. });
  748. // Also compute the critical path length through MBB when possible.
  749. if (TBI.HasValidInstrHeights)
  750. TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
  751. for (const auto &UseMI : *MBB) {
  752. // Collect all data dependencies.
  753. Deps.clear();
  754. if (UseMI.isPHI())
  755. getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
  756. else if (getDataDeps(&UseMI, Deps, MTM.MRI))
  757. updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
  758. // Filter and process dependencies, computing the earliest issue cycle.
  759. unsigned Cycle = 0;
  760. for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
  761. const DataDep &Dep = Deps[i];
  762. const TraceBlockInfo&DepTBI =
  763. BlockInfo[Dep.DefMI->getParent()->getNumber()];
  764. // Ignore dependencies from outside the current trace.
  765. if (!DepTBI.isUsefulDominator(TBI))
  766. continue;
  767. assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
  768. unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
  769. // Add latency if DefMI is a real instruction. Transients get latency 0.
  770. if (!Dep.DefMI->isTransient())
  771. DepCycle += MTM.SchedModel
  772. .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
  773. Cycle = std::max(Cycle, DepCycle);
  774. }
  775. // Remember the instruction depth.
  776. InstrCycles &MICycles = Cycles[&UseMI];
  777. MICycles.Depth = Cycle;
  778. if (!TBI.HasValidInstrHeights) {
  779. DEBUG(dbgs() << Cycle << '\t' << UseMI);
  780. continue;
  781. }
  782. // Update critical path length.
  783. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
  784. DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
  785. }
  786. }
  787. }
  788. // Identify physreg dependencies for MI when scanning instructions upwards.
  789. // Return the issue height of MI after considering any live regunits.
  790. // Height is the issue height computed from virtual register dependencies alone.
  791. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
  792. SparseSet<LiveRegUnit> &RegUnits,
  793. const TargetSchedModel &SchedModel,
  794. const TargetInstrInfo *TII,
  795. const TargetRegisterInfo *TRI) {
  796. SmallVector<unsigned, 8> ReadOps;
  797. for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
  798. if (!MO->isReg())
  799. continue;
  800. unsigned Reg = MO->getReg();
  801. if (!TargetRegisterInfo::isPhysicalRegister(Reg))
  802. continue;
  803. if (MO->readsReg())
  804. ReadOps.push_back(MO.getOperandNo());
  805. if (!MO->isDef())
  806. continue;
  807. // This is a def of Reg. Remove corresponding entries from RegUnits, and
  808. // update MI Height to consider the physreg dependencies.
  809. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  810. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  811. if (I == RegUnits.end())
  812. continue;
  813. unsigned DepHeight = I->Cycle;
  814. if (!MI->isTransient()) {
  815. // We may not know the UseMI of this dependency, if it came from the
  816. // live-in list. SchedModel can handle a NULL UseMI.
  817. DepHeight += SchedModel
  818. .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op);
  819. }
  820. Height = std::max(Height, DepHeight);
  821. // This regunit is dead above MI.
  822. RegUnits.erase(I);
  823. }
  824. }
  825. // Now we know the height of MI. Update any regunits read.
  826. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
  827. unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
  828. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  829. LiveRegUnit &LRU = RegUnits[*Units];
  830. // Set the height to the highest reader of the unit.
  831. if (LRU.Cycle <= Height && LRU.MI != MI) {
  832. LRU.Cycle = Height;
  833. LRU.MI = MI;
  834. LRU.Op = ReadOps[i];
  835. }
  836. }
  837. }
  838. return Height;
  839. }
  840. typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
  841. // Push the height of DefMI upwards if required to match UseMI.
  842. // Return true if this is the first time DefMI was seen.
  843. static bool pushDepHeight(const DataDep &Dep,
  844. const MachineInstr *UseMI, unsigned UseHeight,
  845. MIHeightMap &Heights,
  846. const TargetSchedModel &SchedModel,
  847. const TargetInstrInfo *TII) {
  848. // Adjust height by Dep.DefMI latency.
  849. if (!Dep.DefMI->isTransient())
  850. UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
  851. UseMI, Dep.UseOp);
  852. // Update Heights[DefMI] to be the maximum height seen.
  853. MIHeightMap::iterator I;
  854. bool New;
  855. std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
  856. if (New)
  857. return true;
  858. // DefMI has been pushed before. Give it the max height.
  859. if (I->second < UseHeight)
  860. I->second = UseHeight;
  861. return false;
  862. }
  863. /// Assuming that the virtual register defined by DefMI:DefOp was used by
  864. /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
  865. /// when reaching the block that contains DefMI.
  866. void MachineTraceMetrics::Ensemble::
  867. addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
  868. ArrayRef<const MachineBasicBlock*> Trace) {
  869. assert(!Trace.empty() && "Trace should contain at least one block");
  870. unsigned Reg = DefMI->getOperand(DefOp).getReg();
  871. assert(TargetRegisterInfo::isVirtualRegister(Reg));
  872. const MachineBasicBlock *DefMBB = DefMI->getParent();
  873. // Reg is live-in to all blocks in Trace that follow DefMBB.
  874. for (unsigned i = Trace.size(); i; --i) {
  875. const MachineBasicBlock *MBB = Trace[i-1];
  876. if (MBB == DefMBB)
  877. return;
  878. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  879. // Just add the register. The height will be updated later.
  880. TBI.LiveIns.push_back(Reg);
  881. }
  882. }
  883. /// Compute instruction heights in the trace through MBB. This updates MBB and
  884. /// the blocks below it in the trace. It is assumed that the trace has already
  885. /// been computed.
  886. void MachineTraceMetrics::Ensemble::
  887. computeInstrHeights(const MachineBasicBlock *MBB) {
  888. // The bottom of the trace may already be computed.
  889. // Find the blocks that need updating.
  890. SmallVector<const MachineBasicBlock*, 8> Stack;
  891. do {
  892. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  893. assert(TBI.hasValidHeight() && "Incomplete trace");
  894. if (TBI.HasValidInstrHeights)
  895. break;
  896. Stack.push_back(MBB);
  897. TBI.LiveIns.clear();
  898. MBB = TBI.Succ;
  899. } while (MBB);
  900. // As we move upwards in the trace, keep track of instructions that are
  901. // required by deeper trace instructions. Map MI -> height required so far.
  902. MIHeightMap Heights;
  903. // For physregs, the def isn't known when we see the use.
  904. // Instead, keep track of the highest use of each regunit.
  905. SparseSet<LiveRegUnit> RegUnits;
  906. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  907. // If the bottom of the trace was already precomputed, initialize heights
  908. // from its live-in list.
  909. // MBB is the highest precomputed block in the trace.
  910. if (MBB) {
  911. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  912. for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
  913. LiveInReg LI = TBI.LiveIns[i];
  914. if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
  915. // For virtual registers, the def latency is included.
  916. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
  917. if (Height < LI.Height)
  918. Height = LI.Height;
  919. } else {
  920. // For register units, the def latency is not included because we don't
  921. // know the def yet.
  922. RegUnits[LI.Reg].Cycle = LI.Height;
  923. }
  924. }
  925. }
  926. // Go through the trace blocks in bottom-up order.
  927. SmallVector<DataDep, 8> Deps;
  928. for (;!Stack.empty(); Stack.pop_back()) {
  929. MBB = Stack.back();
  930. DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
  931. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  932. TBI.HasValidInstrHeights = true;
  933. TBI.CriticalPath = 0;
  934. DEBUG({
  935. dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
  936. ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
  937. for (unsigned K = 0; K != PRHeights.size(); ++K)
  938. if (PRHeights[K]) {
  939. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  940. dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
  941. << MTM.SchedModel.getProcResource(K)->Name << " ("
  942. << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
  943. }
  944. });
  945. // Get dependencies from PHIs in the trace successor.
  946. const MachineBasicBlock *Succ = TBI.Succ;
  947. // If MBB is the last block in the trace, and it has a back-edge to the
  948. // loop header, get loop-carried dependencies from PHIs in the header. For
  949. // that purpose, pretend that all the loop header PHIs have height 0.
  950. if (!Succ)
  951. if (const MachineLoop *Loop = getLoopFor(MBB))
  952. if (MBB->isSuccessor(Loop->getHeader()))
  953. Succ = Loop->getHeader();
  954. if (Succ) {
  955. for (const auto &PHI : *Succ) {
  956. if (!PHI.isPHI())
  957. break;
  958. Deps.clear();
  959. getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
  960. if (!Deps.empty()) {
  961. // Loop header PHI heights are all 0.
  962. unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
  963. DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
  964. if (pushDepHeight(Deps.front(), &PHI, Height,
  965. Heights, MTM.SchedModel, MTM.TII))
  966. addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
  967. }
  968. }
  969. }
  970. // Go through the block backwards.
  971. for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
  972. BI != BB;) {
  973. const MachineInstr *MI = --BI;
  974. // Find the MI height as determined by virtual register uses in the
  975. // trace below.
  976. unsigned Cycle = 0;
  977. MIHeightMap::iterator HeightI = Heights.find(MI);
  978. if (HeightI != Heights.end()) {
  979. Cycle = HeightI->second;
  980. // We won't be seeing any more MI uses.
  981. Heights.erase(HeightI);
  982. }
  983. // Don't process PHI deps. They depend on the specific predecessor, and
  984. // we'll get them when visiting the predecessor.
  985. Deps.clear();
  986. bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
  987. // There may also be regunit dependencies to include in the height.
  988. if (HasPhysRegs)
  989. Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
  990. MTM.SchedModel, MTM.TII, MTM.TRI);
  991. // Update the required height of any virtual registers read by MI.
  992. for (unsigned i = 0, e = Deps.size(); i != e; ++i)
  993. if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
  994. addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
  995. InstrCycles &MICycles = Cycles[MI];
  996. MICycles.Height = Cycle;
  997. if (!TBI.HasValidInstrDepths) {
  998. DEBUG(dbgs() << Cycle << '\t' << *MI);
  999. continue;
  1000. }
  1001. // Update critical path length.
  1002. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
  1003. DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
  1004. }
  1005. // Update virtual live-in heights. They were added by addLiveIns() with a 0
  1006. // height because the final height isn't known until now.
  1007. DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
  1008. for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
  1009. LiveInReg &LIR = TBI.LiveIns[i];
  1010. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  1011. LIR.Height = Heights.lookup(DefMI);
  1012. DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
  1013. }
  1014. // Transfer the live regunits to the live-in list.
  1015. for (SparseSet<LiveRegUnit>::const_iterator
  1016. RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
  1017. TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
  1018. DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
  1019. << '@' << RI->Cycle);
  1020. }
  1021. DEBUG(dbgs() << '\n');
  1022. if (!TBI.HasValidInstrDepths)
  1023. continue;
  1024. // Add live-ins to the critical path length.
  1025. TBI.CriticalPath = std::max(TBI.CriticalPath,
  1026. computeCrossBlockCriticalPath(TBI));
  1027. DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
  1028. }
  1029. }
  1030. MachineTraceMetrics::Trace
  1031. MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
  1032. // FIXME: Check cache tags, recompute as needed.
  1033. computeTrace(MBB);
  1034. computeInstrDepths(MBB);
  1035. computeInstrHeights(MBB);
  1036. return Trace(*this, BlockInfo[MBB->getNumber()]);
  1037. }
  1038. unsigned
  1039. MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
  1040. assert(MI && "Not an instruction.");
  1041. assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
  1042. "MI must be in the trace center block");
  1043. InstrCycles Cyc = getInstrCycles(MI);
  1044. return getCriticalPath() - (Cyc.Depth + Cyc.Height);
  1045. }
  1046. unsigned
  1047. MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
  1048. const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
  1049. SmallVector<DataDep, 1> Deps;
  1050. getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
  1051. assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
  1052. DataDep &Dep = Deps.front();
  1053. unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
  1054. // Add latency if DefMI is a real instruction. Transients get latency 0.
  1055. if (!Dep.DefMI->isTransient())
  1056. DepCycle += TE.MTM.SchedModel
  1057. .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
  1058. return DepCycle;
  1059. }
  1060. /// When bottom is set include instructions in current block in estimate.
  1061. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  1062. // Find the limiting processor resource.
  1063. // Numbers have been pre-scaled to be comparable.
  1064. unsigned PRMax = 0;
  1065. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1066. if (Bottom) {
  1067. ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
  1068. for (unsigned K = 0; K != PRDepths.size(); ++K)
  1069. PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
  1070. } else {
  1071. for (unsigned K = 0; K != PRDepths.size(); ++K)
  1072. PRMax = std::max(PRMax, PRDepths[K]);
  1073. }
  1074. // Convert to cycle count.
  1075. PRMax = TE.MTM.getCycles(PRMax);
  1076. /// All instructions before current block
  1077. unsigned Instrs = TBI.InstrDepth;
  1078. // plus instructions in current block
  1079. if (Bottom)
  1080. Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
  1081. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1082. Instrs /= IW;
  1083. // Assume issue width 1 without a schedule model.
  1084. return std::max(Instrs, PRMax);
  1085. }
  1086. unsigned MachineTraceMetrics::Trace::getResourceLength(
  1087. ArrayRef<const MachineBasicBlock *> Extrablocks,
  1088. ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
  1089. ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
  1090. // Add up resources above and below the center block.
  1091. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1092. ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
  1093. unsigned PRMax = 0;
  1094. // Capture computing cycles from extra instructions
  1095. auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
  1096. unsigned ResourceIdx)
  1097. ->unsigned {
  1098. unsigned Cycles = 0;
  1099. for (unsigned I = 0; I != Instrs.size(); ++I) {
  1100. const MCSchedClassDesc *SC = Instrs[I];
  1101. if (!SC->isValid())
  1102. continue;
  1103. for (TargetSchedModel::ProcResIter
  1104. PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
  1105. PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
  1106. PI != PE; ++PI) {
  1107. if (PI->ProcResourceIdx != ResourceIdx)
  1108. continue;
  1109. Cycles +=
  1110. (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
  1111. }
  1112. }
  1113. return Cycles;
  1114. };
  1115. for (unsigned K = 0; K != PRDepths.size(); ++K) {
  1116. unsigned PRCycles = PRDepths[K] + PRHeights[K];
  1117. for (unsigned I = 0; I != Extrablocks.size(); ++I)
  1118. PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
  1119. PRCycles += extraCycles(ExtraInstrs, K);
  1120. PRCycles -= extraCycles(RemoveInstrs, K);
  1121. PRMax = std::max(PRMax, PRCycles);
  1122. }
  1123. // Convert to cycle count.
  1124. PRMax = TE.MTM.getCycles(PRMax);
  1125. // Instrs: #instructions in current trace outside current block.
  1126. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
  1127. // Add instruction count from the extra blocks.
  1128. for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
  1129. Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
  1130. Instrs += ExtraInstrs.size();
  1131. Instrs -= RemoveInstrs.size();
  1132. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1133. Instrs /= IW;
  1134. // Assume issue width 1 without a schedule model.
  1135. return std::max(Instrs, PRMax);
  1136. }
  1137. bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
  1138. const MachineInstr *UseMI) const {
  1139. if (DefMI->getParent() == UseMI->getParent())
  1140. return true;
  1141. const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
  1142. const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
  1143. return DepTBI.isUsefulDominator(TBI);
  1144. }
  1145. void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
  1146. OS << getName() << " ensemble:\n";
  1147. for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
  1148. OS << " BB#" << i << '\t';
  1149. BlockInfo[i].print(OS);
  1150. OS << '\n';
  1151. }
  1152. }
  1153. void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
  1154. if (hasValidDepth()) {
  1155. OS << "depth=" << InstrDepth;
  1156. if (Pred)
  1157. OS << " pred=BB#" << Pred->getNumber();
  1158. else
  1159. OS << " pred=null";
  1160. OS << " head=BB#" << Head;
  1161. if (HasValidInstrDepths)
  1162. OS << " +instrs";
  1163. } else
  1164. OS << "depth invalid";
  1165. OS << ", ";
  1166. if (hasValidHeight()) {
  1167. OS << "height=" << InstrHeight;
  1168. if (Succ)
  1169. OS << " succ=BB#" << Succ->getNumber();
  1170. else
  1171. OS << " succ=null";
  1172. OS << " tail=BB#" << Tail;
  1173. if (HasValidInstrHeights)
  1174. OS << " +instrs";
  1175. } else
  1176. OS << "height invalid";
  1177. if (HasValidInstrDepths && HasValidInstrHeights)
  1178. OS << ", crit=" << CriticalPath;
  1179. }
  1180. void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
  1181. unsigned MBBNum = &TBI - &TE.BlockInfo[0];
  1182. OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
  1183. << " --> BB#" << TBI.Tail << ':';
  1184. if (TBI.hasValidHeight() && TBI.hasValidDepth())
  1185. OS << ' ' << getInstrCount() << " instrs.";
  1186. if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
  1187. OS << ' ' << TBI.CriticalPath << " cycles.";
  1188. const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
  1189. OS << "\nBB#" << MBBNum;
  1190. while (Block->hasValidDepth() && Block->Pred) {
  1191. unsigned Num = Block->Pred->getNumber();
  1192. OS << " <- BB#" << Num;
  1193. Block = &TE.BlockInfo[Num];
  1194. }
  1195. Block = &TBI;
  1196. OS << "\n ";
  1197. while (Block->hasValidHeight() && Block->Succ) {
  1198. unsigned Num = Block->Succ->getNumber();
  1199. OS << " -> BB#" << Num;
  1200. Block = &TE.BlockInfo[Num];
  1201. }
  1202. OS << '\n';
  1203. }