MachineBlockPlacement.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982
  1. //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements basic block placement transformations using the CFG
  11. // structure and branch probability estimates.
  12. //
  13. // The pass strives to preserve the structure of the CFG (that is, retain
  14. // a topological ordering of basic blocks) in the absense of a *strong* signal
  15. // to the contrary from probabilities. However, within the CFG structure, it
  16. // attempts to choose an ordering which favors placing more likely sequences of
  17. // blocks adjacent to each other.
  18. //
  19. // The algorithm works from the inner-most loop within a function outward, and
  20. // at each stage walks through the basic blocks, trying to coalesce them into
  21. // sequential chains where allowed by the CFG (or demanded by heavy
  22. // probabilities). Finally, it walks the blocks in topological order, and the
  23. // first time it reaches a chain of basic blocks, it schedules them in the
  24. // function in-order.
  25. //
  26. //===----------------------------------------------------------------------===//
  27. #define DEBUG_TYPE "block-placement2"
  28. #include "llvm/CodeGen/MachineBasicBlock.h"
  29. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  30. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  31. #include "llvm/CodeGen/MachineFunction.h"
  32. #include "llvm/CodeGen/MachineFunctionPass.h"
  33. #include "llvm/CodeGen/MachineLoopInfo.h"
  34. #include "llvm/CodeGen/MachineModuleInfo.h"
  35. #include "llvm/CodeGen/Passes.h"
  36. #include "llvm/Support/Allocator.h"
  37. #include "llvm/Support/Debug.h"
  38. #include "llvm/ADT/DenseMap.h"
  39. #include "llvm/ADT/SmallPtrSet.h"
  40. #include "llvm/ADT/SmallVector.h"
  41. #include "llvm/ADT/Statistic.h"
  42. #include "llvm/Target/TargetInstrInfo.h"
  43. #include "llvm/Target/TargetLowering.h"
  44. #include <algorithm>
  45. using namespace llvm;
  46. STATISTIC(NumCondBranches, "Number of conditional branches");
  47. STATISTIC(NumUncondBranches, "Number of uncondittional branches");
  48. STATISTIC(CondBranchTakenFreq,
  49. "Potential frequency of taking conditional branches");
  50. STATISTIC(UncondBranchTakenFreq,
  51. "Potential frequency of taking unconditional branches");
  52. namespace {
  53. class BlockChain;
  54. /// \brief Type for our function-wide basic block -> block chain mapping.
  55. typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
  56. }
  57. namespace {
  58. /// \brief A chain of blocks which will be laid out contiguously.
  59. ///
  60. /// This is the datastructure representing a chain of consecutive blocks that
  61. /// are profitable to layout together in order to maximize fallthrough
  62. /// probabilities. We also can use a block chain to represent a sequence of
  63. /// basic blocks which have some external (correctness) requirement for
  64. /// sequential layout.
  65. ///
  66. /// Eventually, the block chains will form a directed graph over the function.
  67. /// We provide an SCC-supporting-iterator in order to quicky build and walk the
  68. /// SCCs of block chains within a function.
  69. ///
  70. /// The block chains also have support for calculating and caching probability
  71. /// information related to the chain itself versus other chains. This is used
  72. /// for ranking during the final layout of block chains.
  73. class BlockChain {
  74. /// \brief The sequence of blocks belonging to this chain.
  75. ///
  76. /// This is the sequence of blocks for a particular chain. These will be laid
  77. /// out in-order within the function.
  78. SmallVector<MachineBasicBlock *, 4> Blocks;
  79. /// \brief A handle to the function-wide basic block to block chain mapping.
  80. ///
  81. /// This is retained in each block chain to simplify the computation of child
  82. /// block chains for SCC-formation and iteration. We store the edges to child
  83. /// basic blocks, and map them back to their associated chains using this
  84. /// structure.
  85. BlockToChainMapType &BlockToChain;
  86. public:
  87. /// \brief Construct a new BlockChain.
  88. ///
  89. /// This builds a new block chain representing a single basic block in the
  90. /// function. It also registers itself as the chain that block participates
  91. /// in with the BlockToChain mapping.
  92. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
  93. : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
  94. assert(BB && "Cannot create a chain with a null basic block");
  95. BlockToChain[BB] = this;
  96. }
  97. /// \brief Iterator over blocks within the chain.
  98. typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
  99. /// \brief Beginning of blocks within the chain.
  100. iterator begin() const { return Blocks.begin(); }
  101. /// \brief End of blocks within the chain.
  102. iterator end() const { return Blocks.end(); }
  103. /// \brief Merge a block chain into this one.
  104. ///
  105. /// This routine merges a block chain into this one. It takes care of forming
  106. /// a contiguous sequence of basic blocks, updating the edge list, and
  107. /// updating the block -> chain mapping. It does not free or tear down the
  108. /// old chain, but the old chain's block list is no longer valid.
  109. void merge(MachineBasicBlock *BB, BlockChain *Chain) {
  110. assert(BB);
  111. assert(!Blocks.empty());
  112. // Fast path in case we don't have a chain already.
  113. if (!Chain) {
  114. assert(!BlockToChain[BB]);
  115. Blocks.push_back(BB);
  116. BlockToChain[BB] = this;
  117. return;
  118. }
  119. assert(BB == *Chain->begin());
  120. assert(Chain->begin() != Chain->end());
  121. // Update the incoming blocks to point to this chain, and add them to the
  122. // chain structure.
  123. for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
  124. BI != BE; ++BI) {
  125. Blocks.push_back(*BI);
  126. assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
  127. BlockToChain[*BI] = this;
  128. }
  129. }
  130. /// \brief Count of predecessors within the loop currently being processed.
  131. ///
  132. /// This count is updated at each loop we process to represent the number of
  133. /// in-loop predecessors of this chain.
  134. unsigned LoopPredecessors;
  135. };
  136. }
  137. namespace {
  138. class MachineBlockPlacement : public MachineFunctionPass {
  139. /// \brief A typedef for a block filter set.
  140. typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
  141. /// \brief A handle to the branch probability pass.
  142. const MachineBranchProbabilityInfo *MBPI;
  143. /// \brief A handle to the function-wide block frequency pass.
  144. const MachineBlockFrequencyInfo *MBFI;
  145. /// \brief A handle to the loop info.
  146. const MachineLoopInfo *MLI;
  147. /// \brief A handle to the target's instruction info.
  148. const TargetInstrInfo *TII;
  149. /// \brief A handle to the target's lowering info.
  150. const TargetLowering *TLI;
  151. /// \brief Allocator and owner of BlockChain structures.
  152. ///
  153. /// We build BlockChains lazily by merging together high probability BB
  154. /// sequences acording to the "Algo2" in the paper mentioned at the top of
  155. /// the file. To reduce malloc traffic, we allocate them using this slab-like
  156. /// allocator, and destroy them after the pass completes.
  157. SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
  158. /// \brief Function wide BasicBlock to BlockChain mapping.
  159. ///
  160. /// This mapping allows efficiently moving from any given basic block to the
  161. /// BlockChain it participates in, if any. We use it to, among other things,
  162. /// allow implicitly defining edges between chains as the existing edges
  163. /// between basic blocks.
  164. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
  165. void markChainSuccessors(BlockChain &Chain,
  166. MachineBasicBlock *LoopHeaderBB,
  167. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  168. const BlockFilterSet *BlockFilter = 0);
  169. MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
  170. BlockChain &Chain,
  171. const BlockFilterSet *BlockFilter);
  172. MachineBasicBlock *selectBestCandidateBlock(
  173. BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
  174. const BlockFilterSet *BlockFilter);
  175. MachineBasicBlock *getFirstUnplacedBlock(
  176. MachineFunction &F,
  177. const BlockChain &PlacedChain,
  178. MachineFunction::iterator &PrevUnplacedBlockIt,
  179. const BlockFilterSet *BlockFilter);
  180. void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
  181. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  182. const BlockFilterSet *BlockFilter = 0);
  183. MachineBasicBlock *findBestLoopTop(MachineFunction &F,
  184. MachineLoop &L,
  185. const BlockFilterSet &LoopBlockSet);
  186. void buildLoopChains(MachineFunction &F, MachineLoop &L);
  187. void buildCFGChains(MachineFunction &F);
  188. void AlignLoops(MachineFunction &F);
  189. public:
  190. static char ID; // Pass identification, replacement for typeid
  191. MachineBlockPlacement() : MachineFunctionPass(ID) {
  192. initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
  193. }
  194. bool runOnMachineFunction(MachineFunction &F);
  195. void getAnalysisUsage(AnalysisUsage &AU) const {
  196. AU.addRequired<MachineBranchProbabilityInfo>();
  197. AU.addRequired<MachineBlockFrequencyInfo>();
  198. AU.addRequired<MachineLoopInfo>();
  199. MachineFunctionPass::getAnalysisUsage(AU);
  200. }
  201. };
  202. }
  203. char MachineBlockPlacement::ID = 0;
  204. char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
  205. INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
  206. "Branch Probability Basic Block Placement", false, false)
  207. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  208. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  209. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  210. INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
  211. "Branch Probability Basic Block Placement", false, false)
  212. #ifndef NDEBUG
  213. /// \brief Helper to print the name of a MBB.
  214. ///
  215. /// Only used by debug logging.
  216. static std::string getBlockName(MachineBasicBlock *BB) {
  217. std::string Result;
  218. raw_string_ostream OS(Result);
  219. OS << "BB#" << BB->getNumber()
  220. << " (derived from LLVM BB '" << BB->getName() << "')";
  221. OS.flush();
  222. return Result;
  223. }
  224. /// \brief Helper to print the number of a MBB.
  225. ///
  226. /// Only used by debug logging.
  227. static std::string getBlockNum(MachineBasicBlock *BB) {
  228. std::string Result;
  229. raw_string_ostream OS(Result);
  230. OS << "BB#" << BB->getNumber();
  231. OS.flush();
  232. return Result;
  233. }
  234. #endif
  235. /// \brief Mark a chain's successors as having one fewer preds.
  236. ///
  237. /// When a chain is being merged into the "placed" chain, this routine will
  238. /// quickly walk the successors of each block in the chain and mark them as
  239. /// having one fewer active predecessor. It also adds any successors of this
  240. /// chain which reach the zero-predecessor state to the worklist passed in.
  241. void MachineBlockPlacement::markChainSuccessors(
  242. BlockChain &Chain,
  243. MachineBasicBlock *LoopHeaderBB,
  244. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  245. const BlockFilterSet *BlockFilter) {
  246. // Walk all the blocks in this chain, marking their successors as having
  247. // a predecessor placed.
  248. for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
  249. CBI != CBE; ++CBI) {
  250. // Add any successors for which this is the only un-placed in-loop
  251. // predecessor to the worklist as a viable candidate for CFG-neutral
  252. // placement. No subsequent placement of this block will violate the CFG
  253. // shape, so we get to use heuristics to choose a favorable placement.
  254. for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
  255. SE = (*CBI)->succ_end();
  256. SI != SE; ++SI) {
  257. if (BlockFilter && !BlockFilter->count(*SI))
  258. continue;
  259. BlockChain &SuccChain = *BlockToChain[*SI];
  260. // Disregard edges within a fixed chain, or edges to the loop header.
  261. if (&Chain == &SuccChain || *SI == LoopHeaderBB)
  262. continue;
  263. // This is a cross-chain edge that is within the loop, so decrement the
  264. // loop predecessor count of the destination chain.
  265. if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
  266. BlockWorkList.push_back(*SuccChain.begin());
  267. }
  268. }
  269. }
  270. /// \brief Select the best successor for a block.
  271. ///
  272. /// This looks across all successors of a particular block and attempts to
  273. /// select the "best" one to be the layout successor. It only considers direct
  274. /// successors which also pass the block filter. It will attempt to avoid
  275. /// breaking CFG structure, but cave and break such structures in the case of
  276. /// very hot successor edges.
  277. ///
  278. /// \returns The best successor block found, or null if none are viable.
  279. MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
  280. MachineBasicBlock *BB, BlockChain &Chain,
  281. const BlockFilterSet *BlockFilter) {
  282. const BranchProbability HotProb(4, 5); // 80%
  283. MachineBasicBlock *BestSucc = 0;
  284. // FIXME: Due to the performance of the probability and weight routines in
  285. // the MBPI analysis, we manually compute probabilities using the edge
  286. // weights. This is suboptimal as it means that the somewhat subtle
  287. // definition of edge weight semantics is encoded here as well. We should
  288. // improve the MBPI interface to effeciently support query patterns such as
  289. // this.
  290. uint32_t BestWeight = 0;
  291. uint32_t WeightScale = 0;
  292. uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
  293. DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
  294. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  295. SE = BB->succ_end();
  296. SI != SE; ++SI) {
  297. if (BlockFilter && !BlockFilter->count(*SI))
  298. continue;
  299. BlockChain &SuccChain = *BlockToChain[*SI];
  300. if (&SuccChain == &Chain) {
  301. DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
  302. continue;
  303. }
  304. if (*SI != *SuccChain.begin()) {
  305. DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
  306. continue;
  307. }
  308. uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
  309. BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
  310. // Only consider successors which are either "hot", or wouldn't violate
  311. // any CFG constraints.
  312. if (SuccChain.LoopPredecessors != 0) {
  313. if (SuccProb < HotProb) {
  314. DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
  315. continue;
  316. }
  317. // Make sure that a hot successor doesn't have a globally more important
  318. // predecessor.
  319. BlockFrequency CandidateEdgeFreq
  320. = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
  321. bool BadCFGConflict = false;
  322. for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
  323. PE = (*SI)->pred_end();
  324. PI != PE; ++PI) {
  325. if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
  326. BlockToChain[*PI] == &Chain)
  327. continue;
  328. BlockFrequency PredEdgeFreq
  329. = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
  330. if (PredEdgeFreq >= CandidateEdgeFreq) {
  331. BadCFGConflict = true;
  332. break;
  333. }
  334. }
  335. if (BadCFGConflict) {
  336. DEBUG(dbgs() << " " << getBlockName(*SI)
  337. << " -> non-cold CFG conflict\n");
  338. continue;
  339. }
  340. }
  341. DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
  342. << " (prob)"
  343. << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
  344. << "\n");
  345. if (BestSucc && BestWeight >= SuccWeight)
  346. continue;
  347. BestSucc = *SI;
  348. BestWeight = SuccWeight;
  349. }
  350. return BestSucc;
  351. }
  352. namespace {
  353. /// \brief Predicate struct to detect blocks already placed.
  354. class IsBlockPlaced {
  355. const BlockChain &PlacedChain;
  356. const BlockToChainMapType &BlockToChain;
  357. public:
  358. IsBlockPlaced(const BlockChain &PlacedChain,
  359. const BlockToChainMapType &BlockToChain)
  360. : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
  361. bool operator()(MachineBasicBlock *BB) const {
  362. return BlockToChain.lookup(BB) == &PlacedChain;
  363. }
  364. };
  365. }
  366. /// \brief Select the best block from a worklist.
  367. ///
  368. /// This looks through the provided worklist as a list of candidate basic
  369. /// blocks and select the most profitable one to place. The definition of
  370. /// profitable only really makes sense in the context of a loop. This returns
  371. /// the most frequently visited block in the worklist, which in the case of
  372. /// a loop, is the one most desirable to be physically close to the rest of the
  373. /// loop body in order to improve icache behavior.
  374. ///
  375. /// \returns The best block found, or null if none are viable.
  376. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
  377. BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
  378. const BlockFilterSet *BlockFilter) {
  379. // Once we need to walk the worklist looking for a candidate, cleanup the
  380. // worklist of already placed entries.
  381. // FIXME: If this shows up on profiles, it could be folded (at the cost of
  382. // some code complexity) into the loop below.
  383. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
  384. IsBlockPlaced(Chain, BlockToChain)),
  385. WorkList.end());
  386. MachineBasicBlock *BestBlock = 0;
  387. BlockFrequency BestFreq;
  388. for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
  389. WBE = WorkList.end();
  390. WBI != WBE; ++WBI) {
  391. assert(!BlockFilter || BlockFilter->count(*WBI));
  392. BlockChain &SuccChain = *BlockToChain[*WBI];
  393. if (&SuccChain == &Chain) {
  394. DEBUG(dbgs() << " " << getBlockName(*WBI)
  395. << " -> Already merged!\n");
  396. continue;
  397. }
  398. assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
  399. BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
  400. DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
  401. << " (freq)\n");
  402. if (BestBlock && BestFreq >= CandidateFreq)
  403. continue;
  404. BestBlock = *WBI;
  405. BestFreq = CandidateFreq;
  406. }
  407. return BestBlock;
  408. }
  409. /// \brief Retrieve the first unplaced basic block.
  410. ///
  411. /// This routine is called when we are unable to use the CFG to walk through
  412. /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
  413. /// We walk through the function's blocks in order, starting from the
  414. /// LastUnplacedBlockIt. We update this iterator on each call to avoid
  415. /// re-scanning the entire sequence on repeated calls to this routine.
  416. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
  417. MachineFunction &F, const BlockChain &PlacedChain,
  418. MachineFunction::iterator &PrevUnplacedBlockIt,
  419. const BlockFilterSet *BlockFilter) {
  420. for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
  421. ++I) {
  422. if (BlockFilter && !BlockFilter->count(I))
  423. continue;
  424. if (BlockToChain[I] != &PlacedChain) {
  425. PrevUnplacedBlockIt = I;
  426. // Now select the head of the chain to which the unplaced block belongs
  427. // as the block to place. This will force the entire chain to be placed,
  428. // and satisfies the requirements of merging chains.
  429. return *BlockToChain[I]->begin();
  430. }
  431. }
  432. return 0;
  433. }
  434. void MachineBlockPlacement::buildChain(
  435. MachineBasicBlock *BB,
  436. BlockChain &Chain,
  437. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  438. const BlockFilterSet *BlockFilter) {
  439. assert(BB);
  440. assert(BlockToChain[BB] == &Chain);
  441. MachineFunction &F = *BB->getParent();
  442. MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
  443. MachineBasicBlock *LoopHeaderBB = BB;
  444. markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
  445. BB = *llvm::prior(Chain.end());
  446. for (;;) {
  447. assert(BB);
  448. assert(BlockToChain[BB] == &Chain);
  449. assert(*llvm::prior(Chain.end()) == BB);
  450. MachineBasicBlock *BestSucc = 0;
  451. // Look for the best viable successor if there is one to place immediately
  452. // after this block.
  453. BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
  454. // If an immediate successor isn't available, look for the best viable
  455. // block among those we've identified as not violating the loop's CFG at
  456. // this point. This won't be a fallthrough, but it will increase locality.
  457. if (!BestSucc)
  458. BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
  459. if (!BestSucc) {
  460. BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
  461. BlockFilter);
  462. if (!BestSucc)
  463. break;
  464. DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
  465. "layout successor until the CFG reduces\n");
  466. }
  467. // Place this block, updating the datastructures to reflect its placement.
  468. BlockChain &SuccChain = *BlockToChain[BestSucc];
  469. // Zero out LoopPredecessors for the successor we're about to merge in case
  470. // we selected a successor that didn't fit naturally into the CFG.
  471. SuccChain.LoopPredecessors = 0;
  472. DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
  473. << " to " << getBlockNum(BestSucc) << "\n");
  474. markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
  475. Chain.merge(BestSucc, &SuccChain);
  476. BB = *llvm::prior(Chain.end());
  477. }
  478. DEBUG(dbgs() << "Finished forming chain for header block "
  479. << getBlockNum(*Chain.begin()) << "\n");
  480. }
  481. /// \brief Find the best loop top block for layout.
  482. ///
  483. /// This routine implements the logic to analyze the loop looking for the best
  484. /// block to layout at the top of the loop. Typically this is done to maximize
  485. /// fallthrough opportunities.
  486. MachineBasicBlock *
  487. MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
  488. MachineLoop &L,
  489. const BlockFilterSet &LoopBlockSet) {
  490. BlockFrequency BestExitEdgeFreq;
  491. MachineBasicBlock *ExitingBB = 0;
  492. MachineBasicBlock *LoopingBB = 0;
  493. // If there are exits to outer loops, loop rotation can severely limit
  494. // fallthrough opportunites unless it selects such an exit. Keep a set of
  495. // blocks where rotating to exit with that block will reach an outer loop.
  496. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
  497. DEBUG(dbgs() << "Finding best loop exit for: "
  498. << getBlockName(L.getHeader()) << "\n");
  499. for (MachineLoop::block_iterator I = L.block_begin(),
  500. E = L.block_end();
  501. I != E; ++I) {
  502. BlockChain &Chain = *BlockToChain[*I];
  503. // Ensure that this block is at the end of a chain; otherwise it could be
  504. // mid-way through an inner loop or a successor of an analyzable branch.
  505. if (*I != *llvm::prior(Chain.end()))
  506. continue;
  507. // Now walk the successors. We need to establish whether this has a viable
  508. // exiting successor and whether it has a viable non-exiting successor.
  509. // We store the old exiting state and restore it if a viable looping
  510. // successor isn't found.
  511. MachineBasicBlock *OldExitingBB = ExitingBB;
  512. BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
  513. // We also compute and store the best looping successor for use in layout.
  514. MachineBasicBlock *BestLoopSucc = 0;
  515. // FIXME: Due to the performance of the probability and weight routines in
  516. // the MBPI analysis, we use the internal weights. This is only valid
  517. // because it is purely a ranking function, we don't care about anything
  518. // but the relative values.
  519. uint32_t BestLoopSuccWeight = 0;
  520. // FIXME: We also manually compute the probabilities to avoid quadratic
  521. // behavior.
  522. uint32_t WeightScale = 0;
  523. uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
  524. for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
  525. SE = (*I)->succ_end();
  526. SI != SE; ++SI) {
  527. if ((*SI)->isLandingPad())
  528. continue;
  529. if (*SI == *I)
  530. continue;
  531. BlockChain &SuccChain = *BlockToChain[*SI];
  532. // Don't split chains, either this chain or the successor's chain.
  533. if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
  534. DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: "
  535. : "exiting: ")
  536. << getBlockName(*I) << " -> "
  537. << getBlockName(*SI) << " (chain conflict)\n");
  538. continue;
  539. }
  540. uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
  541. if (LoopBlockSet.count(*SI)) {
  542. DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
  543. << getBlockName(*SI) << " (" << SuccWeight << ")\n");
  544. if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
  545. continue;
  546. BestLoopSucc = *SI;
  547. BestLoopSuccWeight = SuccWeight;
  548. continue;
  549. }
  550. BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
  551. BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
  552. DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
  553. << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
  554. // Note that we slightly bias this toward an existing layout successor to
  555. // retain incoming order in the absence of better information.
  556. // FIXME: Should we bias this more strongly? It's pretty weak.
  557. if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
  558. ((*I)->isLayoutSuccessor(*SI) &&
  559. !(ExitEdgeFreq < BestExitEdgeFreq))) {
  560. BestExitEdgeFreq = ExitEdgeFreq;
  561. ExitingBB = *I;
  562. }
  563. if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
  564. if (ExitLoop->contains(&L))
  565. BlocksExitingToOuterLoop.insert(*I);
  566. }
  567. // Restore the old exiting state, no viable looping successor was found.
  568. if (!BestLoopSucc) {
  569. ExitingBB = OldExitingBB;
  570. BestExitEdgeFreq = OldBestExitEdgeFreq;
  571. continue;
  572. }
  573. // If this was best exiting block thus far, also record the looping block.
  574. if (ExitingBB == *I)
  575. LoopingBB = BestLoopSucc;
  576. }
  577. // Without a candidate exitting block or with only a single block in the
  578. // loop, just use the loop header to layout the loop.
  579. if (!ExitingBB || L.getNumBlocks() == 1)
  580. return L.getHeader();
  581. // Also, if we have exit blocks which lead to outer loops but didn't select
  582. // one of them as the exiting block we are rotating toward, disable loop
  583. // rotation altogether.
  584. if (!BlocksExitingToOuterLoop.empty() &&
  585. !BlocksExitingToOuterLoop.count(ExitingBB))
  586. return L.getHeader();
  587. assert(LoopingBB && "All successors of a loop block are exit blocks!");
  588. DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
  589. DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n");
  590. return LoopingBB;
  591. }
  592. /// \brief Forms basic block chains from the natural loop structures.
  593. ///
  594. /// These chains are designed to preserve the existing *structure* of the code
  595. /// as much as possible. We can then stitch the chains together in a way which
  596. /// both preserves the topological structure and minimizes taken conditional
  597. /// branches.
  598. void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
  599. MachineLoop &L) {
  600. // First recurse through any nested loops, building chains for those inner
  601. // loops.
  602. for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
  603. buildLoopChains(F, **LI);
  604. SmallVector<MachineBasicBlock *, 16> BlockWorkList;
  605. BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
  606. MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
  607. BlockChain &LoopChain = *BlockToChain[LayoutTop];
  608. // FIXME: This is a really lame way of walking the chains in the loop: we
  609. // walk the blocks, and use a set to prevent visiting a particular chain
  610. // twice.
  611. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  612. assert(LoopChain.LoopPredecessors == 0);
  613. UpdatedPreds.insert(&LoopChain);
  614. for (MachineLoop::block_iterator BI = L.block_begin(),
  615. BE = L.block_end();
  616. BI != BE; ++BI) {
  617. BlockChain &Chain = *BlockToChain[*BI];
  618. if (!UpdatedPreds.insert(&Chain))
  619. continue;
  620. assert(Chain.LoopPredecessors == 0);
  621. for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
  622. BCI != BCE; ++BCI) {
  623. assert(BlockToChain[*BCI] == &Chain);
  624. for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
  625. PE = (*BCI)->pred_end();
  626. PI != PE; ++PI) {
  627. if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
  628. continue;
  629. ++Chain.LoopPredecessors;
  630. }
  631. }
  632. if (Chain.LoopPredecessors == 0)
  633. BlockWorkList.push_back(*Chain.begin());
  634. }
  635. buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
  636. DEBUG({
  637. // Crash at the end so we get all of the debugging output first.
  638. bool BadLoop = false;
  639. if (LoopChain.LoopPredecessors) {
  640. BadLoop = true;
  641. dbgs() << "Loop chain contains a block without its preds placed!\n"
  642. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  643. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
  644. }
  645. for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
  646. BCI != BCE; ++BCI)
  647. if (!LoopBlockSet.erase(*BCI)) {
  648. // We don't mark the loop as bad here because there are real situations
  649. // where this can occur. For example, with an unanalyzable fallthrough
  650. // from a loop block to a non-loop block or vice versa.
  651. dbgs() << "Loop chain contains a block not contained by the loop!\n"
  652. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  653. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  654. << " Bad block: " << getBlockName(*BCI) << "\n";
  655. }
  656. if (!LoopBlockSet.empty()) {
  657. BadLoop = true;
  658. for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
  659. LBE = LoopBlockSet.end();
  660. LBI != LBE; ++LBI)
  661. dbgs() << "Loop contains blocks never placed into a chain!\n"
  662. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  663. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  664. << " Bad block: " << getBlockName(*LBI) << "\n";
  665. }
  666. assert(!BadLoop && "Detected problems with the placement of this loop.");
  667. });
  668. }
  669. void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
  670. // Ensure that every BB in the function has an associated chain to simplify
  671. // the assumptions of the remaining algorithm.
  672. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
  673. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
  674. MachineBasicBlock *BB = FI;
  675. BlockChain *Chain
  676. = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
  677. // Also, merge any blocks which we cannot reason about and must preserve
  678. // the exact fallthrough behavior for.
  679. for (;;) {
  680. Cond.clear();
  681. MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
  682. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
  683. break;
  684. MachineFunction::iterator NextFI(llvm::next(FI));
  685. MachineBasicBlock *NextBB = NextFI;
  686. // Ensure that the layout successor is a viable block, as we know that
  687. // fallthrough is a possibility.
  688. assert(NextFI != FE && "Can't fallthrough past the last block.");
  689. DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
  690. << getBlockName(BB) << " -> " << getBlockName(NextBB)
  691. << "\n");
  692. Chain->merge(NextBB, 0);
  693. FI = NextFI;
  694. BB = NextBB;
  695. }
  696. }
  697. // Build any loop-based chains.
  698. for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
  699. ++LI)
  700. buildLoopChains(F, **LI);
  701. SmallVector<MachineBasicBlock *, 16> BlockWorkList;
  702. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  703. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
  704. MachineBasicBlock *BB = &*FI;
  705. BlockChain &Chain = *BlockToChain[BB];
  706. if (!UpdatedPreds.insert(&Chain))
  707. continue;
  708. assert(Chain.LoopPredecessors == 0);
  709. for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
  710. BCI != BCE; ++BCI) {
  711. assert(BlockToChain[*BCI] == &Chain);
  712. for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
  713. PE = (*BCI)->pred_end();
  714. PI != PE; ++PI) {
  715. if (BlockToChain[*PI] == &Chain)
  716. continue;
  717. ++Chain.LoopPredecessors;
  718. }
  719. }
  720. if (Chain.LoopPredecessors == 0)
  721. BlockWorkList.push_back(*Chain.begin());
  722. }
  723. BlockChain &FunctionChain = *BlockToChain[&F.front()];
  724. buildChain(&F.front(), FunctionChain, BlockWorkList);
  725. typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
  726. DEBUG({
  727. // Crash at the end so we get all of the debugging output first.
  728. bool BadFunc = false;
  729. FunctionBlockSetType FunctionBlockSet;
  730. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
  731. FunctionBlockSet.insert(FI);
  732. for (BlockChain::iterator BCI = FunctionChain.begin(),
  733. BCE = FunctionChain.end();
  734. BCI != BCE; ++BCI)
  735. if (!FunctionBlockSet.erase(*BCI)) {
  736. BadFunc = true;
  737. dbgs() << "Function chain contains a block not in the function!\n"
  738. << " Bad block: " << getBlockName(*BCI) << "\n";
  739. }
  740. if (!FunctionBlockSet.empty()) {
  741. BadFunc = true;
  742. for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
  743. FBE = FunctionBlockSet.end();
  744. FBI != FBE; ++FBI)
  745. dbgs() << "Function contains blocks never placed into a chain!\n"
  746. << " Bad block: " << getBlockName(*FBI) << "\n";
  747. }
  748. assert(!BadFunc && "Detected problems with the block placement.");
  749. });
  750. // Splice the blocks into place.
  751. MachineFunction::iterator InsertPos = F.begin();
  752. for (BlockChain::iterator BI = FunctionChain.begin(),
  753. BE = FunctionChain.end();
  754. BI != BE; ++BI) {
  755. DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
  756. : " ... ")
  757. << getBlockName(*BI) << "\n");
  758. if (InsertPos != MachineFunction::iterator(*BI))
  759. F.splice(InsertPos, *BI);
  760. else
  761. ++InsertPos;
  762. // Update the terminator of the previous block.
  763. if (BI == FunctionChain.begin())
  764. continue;
  765. MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
  766. // FIXME: It would be awesome of updateTerminator would just return rather
  767. // than assert when the branch cannot be analyzed in order to remove this
  768. // boiler plate.
  769. Cond.clear();
  770. MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
  771. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
  772. PrevBB->updateTerminator();
  773. }
  774. // Fixup the last block.
  775. Cond.clear();
  776. MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
  777. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
  778. F.back().updateTerminator();
  779. }
  780. /// \brief Recursive helper to align a loop and any nested loops.
  781. static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
  782. // Recurse through nested loops.
  783. for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
  784. AlignLoop(F, *I, Align);
  785. L->getTopBlock()->setAlignment(Align);
  786. }
  787. /// \brief Align loop headers to target preferred alignments.
  788. void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
  789. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
  790. return;
  791. unsigned Align = TLI->getPrefLoopAlignment();
  792. if (!Align)
  793. return; // Don't care about loop alignment.
  794. for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
  795. AlignLoop(F, *I, Align);
  796. }
  797. bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
  798. // Check for single-block functions and skip them.
  799. if (llvm::next(F.begin()) == F.end())
  800. return false;
  801. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  802. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  803. MLI = &getAnalysis<MachineLoopInfo>();
  804. TII = F.getTarget().getInstrInfo();
  805. TLI = F.getTarget().getTargetLowering();
  806. assert(BlockToChain.empty());
  807. buildCFGChains(F);
  808. AlignLoops(F);
  809. BlockToChain.clear();
  810. ChainAllocator.DestroyAll();
  811. // We always return true as we have no way to track whether the final order
  812. // differs from the original order.
  813. return true;
  814. }
  815. namespace {
  816. /// \brief A pass to compute block placement statistics.
  817. ///
  818. /// A separate pass to compute interesting statistics for evaluating block
  819. /// placement. This is separate from the actual placement pass so that they can
  820. /// be computed in the absense of any placement transformations or when using
  821. /// alternative placement strategies.
  822. class MachineBlockPlacementStats : public MachineFunctionPass {
  823. /// \brief A handle to the branch probability pass.
  824. const MachineBranchProbabilityInfo *MBPI;
  825. /// \brief A handle to the function-wide block frequency pass.
  826. const MachineBlockFrequencyInfo *MBFI;
  827. public:
  828. static char ID; // Pass identification, replacement for typeid
  829. MachineBlockPlacementStats() : MachineFunctionPass(ID) {
  830. initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
  831. }
  832. bool runOnMachineFunction(MachineFunction &F);
  833. void getAnalysisUsage(AnalysisUsage &AU) const {
  834. AU.addRequired<MachineBranchProbabilityInfo>();
  835. AU.addRequired<MachineBlockFrequencyInfo>();
  836. AU.setPreservesAll();
  837. MachineFunctionPass::getAnalysisUsage(AU);
  838. }
  839. };
  840. }
  841. char MachineBlockPlacementStats::ID = 0;
  842. char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
  843. INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
  844. "Basic Block Placement Stats", false, false)
  845. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  846. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  847. INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
  848. "Basic Block Placement Stats", false, false)
  849. bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
  850. // Check for single-block functions and skip them.
  851. if (llvm::next(F.begin()) == F.end())
  852. return false;
  853. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  854. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  855. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
  856. BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
  857. Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
  858. : NumUncondBranches;
  859. Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
  860. : UncondBranchTakenFreq;
  861. for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
  862. SE = I->succ_end();
  863. SI != SE; ++SI) {
  864. // Skip if this successor is a fallthrough.
  865. if (I->isLayoutSuccessor(*SI))
  866. continue;
  867. BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
  868. ++NumBranches;
  869. BranchTakenFreq += EdgeFreq.getFrequency();
  870. }
  871. }
  872. return false;
  873. }