MachineBlockPlacement.cpp 88 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157
  1. //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements basic block placement transformations using the CFG
  11. // structure and branch probability estimates.
  12. //
  13. // The pass strives to preserve the structure of the CFG (that is, retain
  14. // a topological ordering of basic blocks) in the absence of a *strong* signal
  15. // to the contrary from probabilities. However, within the CFG structure, it
  16. // attempts to choose an ordering which favors placing more likely sequences of
  17. // blocks adjacent to each other.
  18. //
  19. // The algorithm works from the inner-most loop within a function outward, and
  20. // at each stage walks through the basic blocks, trying to coalesce them into
  21. // sequential chains where allowed by the CFG (or demanded by heavy
  22. // probabilities). Finally, it walks the blocks in topological order, and the
  23. // first time it reaches a chain of basic blocks, it schedules them in the
  24. // function in-order.
  25. //
  26. //===----------------------------------------------------------------------===//
  27. #include "llvm/CodeGen/Passes.h"
  28. #include "llvm/CodeGen/TargetPassConfig.h"
  29. #include "BranchFolding.h"
  30. #include "llvm/ADT/DenseMap.h"
  31. #include "llvm/ADT/SmallPtrSet.h"
  32. #include "llvm/ADT/SmallVector.h"
  33. #include "llvm/ADT/Statistic.h"
  34. #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
  35. #include "llvm/CodeGen/MachineBasicBlock.h"
  36. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  37. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  38. #include "llvm/CodeGen/MachineDominators.h"
  39. #include "llvm/CodeGen/MachineFunction.h"
  40. #include "llvm/CodeGen/MachineFunctionPass.h"
  41. #include "llvm/CodeGen/MachineLoopInfo.h"
  42. #include "llvm/CodeGen/MachineModuleInfo.h"
  43. #include "llvm/CodeGen/TailDuplicator.h"
  44. #include "llvm/Support/Allocator.h"
  45. #include "llvm/Support/CommandLine.h"
  46. #include "llvm/Support/Debug.h"
  47. #include "llvm/Support/raw_ostream.h"
  48. #include "llvm/Target/TargetInstrInfo.h"
  49. #include "llvm/Target/TargetLowering.h"
  50. #include "llvm/Target/TargetSubtargetInfo.h"
  51. #include <algorithm>
  52. using namespace llvm;
  53. #define DEBUG_TYPE "block-placement"
  54. STATISTIC(NumCondBranches, "Number of conditional branches");
  55. STATISTIC(NumUncondBranches, "Number of unconditional branches");
  56. STATISTIC(CondBranchTakenFreq,
  57. "Potential frequency of taking conditional branches");
  58. STATISTIC(UncondBranchTakenFreq,
  59. "Potential frequency of taking unconditional branches");
  60. static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
  61. cl::desc("Force the alignment of all "
  62. "blocks in the function."),
  63. cl::init(0), cl::Hidden);
  64. static cl::opt<unsigned> AlignAllNonFallThruBlocks(
  65. "align-all-nofallthru-blocks",
  66. cl::desc("Force the alignment of all "
  67. "blocks that have no fall-through predecessors (i.e. don't add "
  68. "nops that are executed)."),
  69. cl::init(0), cl::Hidden);
  70. // FIXME: Find a good default for this flag and remove the flag.
  71. static cl::opt<unsigned> ExitBlockBias(
  72. "block-placement-exit-block-bias",
  73. cl::desc("Block frequency percentage a loop exit block needs "
  74. "over the original exit to be considered the new exit."),
  75. cl::init(0), cl::Hidden);
  76. // Definition:
  77. // - Outlining: placement of a basic block outside the chain or hot path.
  78. static cl::opt<bool> OutlineOptionalBranches(
  79. "outline-optional-branches",
  80. cl::desc("Outlining optional branches will place blocks that are optional "
  81. "branches, i.e. branches with a common post dominator, outside "
  82. "the hot path or chain"),
  83. cl::init(false), cl::Hidden);
  84. static cl::opt<unsigned> OutlineOptionalThreshold(
  85. "outline-optional-threshold",
  86. cl::desc("Don't outline optional branches that are a single block with an "
  87. "instruction count below this threshold"),
  88. cl::init(4), cl::Hidden);
  89. static cl::opt<unsigned> LoopToColdBlockRatio(
  90. "loop-to-cold-block-ratio",
  91. cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
  92. "(frequency of block) is greater than this ratio"),
  93. cl::init(5), cl::Hidden);
  94. static cl::opt<bool>
  95. PreciseRotationCost("precise-rotation-cost",
  96. cl::desc("Model the cost of loop rotation more "
  97. "precisely by using profile data."),
  98. cl::init(false), cl::Hidden);
  99. static cl::opt<bool>
  100. ForcePreciseRotationCost("force-precise-rotation-cost",
  101. cl::desc("Force the use of precise cost "
  102. "loop rotation strategy."),
  103. cl::init(false), cl::Hidden);
  104. static cl::opt<unsigned> MisfetchCost(
  105. "misfetch-cost",
  106. cl::desc("Cost that models the probabilistic risk of an instruction "
  107. "misfetch due to a jump comparing to falling through, whose cost "
  108. "is zero."),
  109. cl::init(1), cl::Hidden);
  110. static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
  111. cl::desc("Cost of jump instructions."),
  112. cl::init(1), cl::Hidden);
  113. static cl::opt<bool>
  114. TailDupPlacement("tail-dup-placement",
  115. cl::desc("Perform tail duplication during placement. "
  116. "Creates more fallthrough opportunites in "
  117. "outline branches."),
  118. cl::init(true), cl::Hidden);
  119. static cl::opt<bool>
  120. BranchFoldPlacement("branch-fold-placement",
  121. cl::desc("Perform branch folding during placement. "
  122. "Reduces code size."),
  123. cl::init(true), cl::Hidden);
  124. // Heuristic for tail duplication.
  125. static cl::opt<unsigned> TailDuplicatePlacementThreshold(
  126. "tail-dup-placement-threshold",
  127. cl::desc("Instruction cutoff for tail duplication during layout. "
  128. "Tail merging during layout is forced to have a threshold "
  129. "that won't conflict."), cl::init(2),
  130. cl::Hidden);
  131. extern cl::opt<unsigned> StaticLikelyProb;
  132. extern cl::opt<unsigned> ProfileLikelyProb;
  133. #ifndef NDEBUG
  134. extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
  135. extern cl::opt<std::string> ViewBlockFreqFuncName;
  136. #endif
  137. namespace {
  138. class BlockChain;
  139. /// \brief Type for our function-wide basic block -> block chain mapping.
  140. typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
  141. }
  142. namespace {
  143. /// \brief A chain of blocks which will be laid out contiguously.
  144. ///
  145. /// This is the datastructure representing a chain of consecutive blocks that
  146. /// are profitable to layout together in order to maximize fallthrough
  147. /// probabilities and code locality. We also can use a block chain to represent
  148. /// a sequence of basic blocks which have some external (correctness)
  149. /// requirement for sequential layout.
  150. ///
  151. /// Chains can be built around a single basic block and can be merged to grow
  152. /// them. They participate in a block-to-chain mapping, which is updated
  153. /// automatically as chains are merged together.
  154. class BlockChain {
  155. /// \brief The sequence of blocks belonging to this chain.
  156. ///
  157. /// This is the sequence of blocks for a particular chain. These will be laid
  158. /// out in-order within the function.
  159. SmallVector<MachineBasicBlock *, 4> Blocks;
  160. /// \brief A handle to the function-wide basic block to block chain mapping.
  161. ///
  162. /// This is retained in each block chain to simplify the computation of child
  163. /// block chains for SCC-formation and iteration. We store the edges to child
  164. /// basic blocks, and map them back to their associated chains using this
  165. /// structure.
  166. BlockToChainMapType &BlockToChain;
  167. public:
  168. /// \brief Construct a new BlockChain.
  169. ///
  170. /// This builds a new block chain representing a single basic block in the
  171. /// function. It also registers itself as the chain that block participates
  172. /// in with the BlockToChain mapping.
  173. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
  174. : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
  175. assert(BB && "Cannot create a chain with a null basic block");
  176. BlockToChain[BB] = this;
  177. }
  178. /// \brief Iterator over blocks within the chain.
  179. typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
  180. /// \brief Beginning of blocks within the chain.
  181. iterator begin() { return Blocks.begin(); }
  182. /// \brief End of blocks within the chain.
  183. iterator end() { return Blocks.end(); }
  184. bool remove(MachineBasicBlock* BB) {
  185. for(iterator i = begin(); i != end(); ++i) {
  186. if (*i == BB) {
  187. Blocks.erase(i);
  188. return true;
  189. }
  190. }
  191. return false;
  192. }
  193. /// \brief Merge a block chain into this one.
  194. ///
  195. /// This routine merges a block chain into this one. It takes care of forming
  196. /// a contiguous sequence of basic blocks, updating the edge list, and
  197. /// updating the block -> chain mapping. It does not free or tear down the
  198. /// old chain, but the old chain's block list is no longer valid.
  199. void merge(MachineBasicBlock *BB, BlockChain *Chain) {
  200. assert(BB);
  201. assert(!Blocks.empty());
  202. // Fast path in case we don't have a chain already.
  203. if (!Chain) {
  204. assert(!BlockToChain[BB]);
  205. Blocks.push_back(BB);
  206. BlockToChain[BB] = this;
  207. return;
  208. }
  209. assert(BB == *Chain->begin());
  210. assert(Chain->begin() != Chain->end());
  211. // Update the incoming blocks to point to this chain, and add them to the
  212. // chain structure.
  213. for (MachineBasicBlock *ChainBB : *Chain) {
  214. Blocks.push_back(ChainBB);
  215. assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
  216. BlockToChain[ChainBB] = this;
  217. }
  218. }
  219. #ifndef NDEBUG
  220. /// \brief Dump the blocks in this chain.
  221. LLVM_DUMP_METHOD void dump() {
  222. for (MachineBasicBlock *MBB : *this)
  223. MBB->dump();
  224. }
  225. #endif // NDEBUG
  226. /// \brief Count of predecessors of any block within the chain which have not
  227. /// yet been scheduled. In general, we will delay scheduling this chain
  228. /// until those predecessors are scheduled (or we find a sufficiently good
  229. /// reason to override this heuristic.) Note that when forming loop chains,
  230. /// blocks outside the loop are ignored and treated as if they were already
  231. /// scheduled.
  232. ///
  233. /// Note: This field is reinitialized multiple times - once for each loop,
  234. /// and then once for the function as a whole.
  235. unsigned UnscheduledPredecessors;
  236. };
  237. }
  238. namespace {
  239. class MachineBlockPlacement : public MachineFunctionPass {
  240. /// \brief A typedef for a block filter set.
  241. typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
  242. /// \brief work lists of blocks that are ready to be laid out
  243. SmallVector<MachineBasicBlock *, 16> BlockWorkList;
  244. SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
  245. /// \brief Machine Function
  246. MachineFunction *F;
  247. /// \brief A handle to the branch probability pass.
  248. const MachineBranchProbabilityInfo *MBPI;
  249. /// \brief A handle to the function-wide block frequency pass.
  250. std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
  251. /// \brief A handle to the loop info.
  252. MachineLoopInfo *MLI;
  253. /// \brief Preferred loop exit.
  254. /// Member variable for convenience. It may be removed by duplication deep
  255. /// in the call stack.
  256. MachineBasicBlock *PreferredLoopExit;
  257. /// \brief A handle to the target's instruction info.
  258. const TargetInstrInfo *TII;
  259. /// \brief A handle to the target's lowering info.
  260. const TargetLoweringBase *TLI;
  261. /// \brief A handle to the post dominator tree.
  262. MachineDominatorTree *MDT;
  263. /// \brief Duplicator used to duplicate tails during placement.
  264. ///
  265. /// Placement decisions can open up new tail duplication opportunities, but
  266. /// since tail duplication affects placement decisions of later blocks, it
  267. /// must be done inline.
  268. TailDuplicator TailDup;
  269. /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
  270. /// all terminators of the MachineFunction.
  271. SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
  272. /// \brief Allocator and owner of BlockChain structures.
  273. ///
  274. /// We build BlockChains lazily while processing the loop structure of
  275. /// a function. To reduce malloc traffic, we allocate them using this
  276. /// slab-like allocator, and destroy them after the pass completes. An
  277. /// important guarantee is that this allocator produces stable pointers to
  278. /// the chains.
  279. SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
  280. /// \brief Function wide BasicBlock to BlockChain mapping.
  281. ///
  282. /// This mapping allows efficiently moving from any given basic block to the
  283. /// BlockChain it participates in, if any. We use it to, among other things,
  284. /// allow implicitly defining edges between chains as the existing edges
  285. /// between basic blocks.
  286. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
  287. #ifndef NDEBUG
  288. /// The set of basic blocks that have terminators that cannot be fully
  289. /// analyzed. These basic blocks cannot be re-ordered safely by
  290. /// MachineBlockPlacement, and we must preserve physical layout of these
  291. /// blocks and their successors through the pass.
  292. SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
  293. #endif
  294. /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
  295. /// if the count goes to 0, add them to the appropriate work list.
  296. void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
  297. const BlockFilterSet *BlockFilter = nullptr);
  298. /// Decrease the UnscheduledPredecessors count for a single block, and
  299. /// if the count goes to 0, add them to the appropriate work list.
  300. void markBlockSuccessors(
  301. BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
  302. const BlockFilterSet *BlockFilter = nullptr);
  303. BranchProbability
  304. collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
  305. const BlockFilterSet *BlockFilter,
  306. SmallVector<MachineBasicBlock *, 4> &Successors);
  307. bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
  308. BlockChain &Chain,
  309. const BlockFilterSet *BlockFilter,
  310. BranchProbability SuccProb,
  311. BranchProbability HotProb);
  312. bool repeatedlyTailDuplicateBlock(
  313. MachineBasicBlock *BB, MachineBasicBlock *&LPred,
  314. MachineBasicBlock *LoopHeaderBB,
  315. BlockChain &Chain, BlockFilterSet *BlockFilter,
  316. MachineFunction::iterator &PrevUnplacedBlockIt);
  317. bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
  318. const BlockChain &Chain,
  319. BlockFilterSet *BlockFilter,
  320. MachineFunction::iterator &PrevUnplacedBlockIt,
  321. bool &DuplicatedToPred);
  322. bool
  323. hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
  324. BlockChain &SuccChain, BranchProbability SuccProb,
  325. BranchProbability RealSuccProb, BlockChain &Chain,
  326. const BlockFilterSet *BlockFilter);
  327. MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
  328. BlockChain &Chain,
  329. const BlockFilterSet *BlockFilter);
  330. MachineBasicBlock *
  331. selectBestCandidateBlock(BlockChain &Chain,
  332. SmallVectorImpl<MachineBasicBlock *> &WorkList);
  333. MachineBasicBlock *
  334. getFirstUnplacedBlock(const BlockChain &PlacedChain,
  335. MachineFunction::iterator &PrevUnplacedBlockIt,
  336. const BlockFilterSet *BlockFilter);
  337. /// \brief Add a basic block to the work list if it is appropriate.
  338. ///
  339. /// If the optional parameter BlockFilter is provided, only MBB
  340. /// present in the set will be added to the worklist. If nullptr
  341. /// is provided, no filtering occurs.
  342. void fillWorkLists(MachineBasicBlock *MBB,
  343. SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
  344. const BlockFilterSet *BlockFilter);
  345. void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
  346. BlockFilterSet *BlockFilter = nullptr);
  347. MachineBasicBlock *findBestLoopTop(MachineLoop &L,
  348. const BlockFilterSet &LoopBlockSet);
  349. MachineBasicBlock *findBestLoopExit(MachineLoop &L,
  350. const BlockFilterSet &LoopBlockSet);
  351. BlockFilterSet collectLoopBlockSet(MachineLoop &L);
  352. void buildLoopChains(MachineLoop &L);
  353. void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
  354. const BlockFilterSet &LoopBlockSet);
  355. void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
  356. const BlockFilterSet &LoopBlockSet);
  357. void collectMustExecuteBBs();
  358. void buildCFGChains();
  359. void optimizeBranches();
  360. void alignBlocks();
  361. public:
  362. static char ID; // Pass identification, replacement for typeid
  363. MachineBlockPlacement() : MachineFunctionPass(ID) {
  364. initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
  365. }
  366. bool runOnMachineFunction(MachineFunction &F) override;
  367. void getAnalysisUsage(AnalysisUsage &AU) const override {
  368. AU.addRequired<MachineBranchProbabilityInfo>();
  369. AU.addRequired<MachineBlockFrequencyInfo>();
  370. AU.addRequired<MachineDominatorTree>();
  371. AU.addRequired<MachineLoopInfo>();
  372. AU.addRequired<TargetPassConfig>();
  373. MachineFunctionPass::getAnalysisUsage(AU);
  374. }
  375. };
  376. }
  377. char MachineBlockPlacement::ID = 0;
  378. char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
  379. INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
  380. "Branch Probability Basic Block Placement", false, false)
  381. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  382. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  383. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  384. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  385. INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
  386. "Branch Probability Basic Block Placement", false, false)
  387. #ifndef NDEBUG
  388. /// \brief Helper to print the name of a MBB.
  389. ///
  390. /// Only used by debug logging.
  391. static std::string getBlockName(MachineBasicBlock *BB) {
  392. std::string Result;
  393. raw_string_ostream OS(Result);
  394. OS << "BB#" << BB->getNumber();
  395. OS << " ('" << BB->getName() << "')";
  396. OS.flush();
  397. return Result;
  398. }
  399. #endif
  400. /// \brief Mark a chain's successors as having one fewer preds.
  401. ///
  402. /// When a chain is being merged into the "placed" chain, this routine will
  403. /// quickly walk the successors of each block in the chain and mark them as
  404. /// having one fewer active predecessor. It also adds any successors of this
  405. /// chain which reach the zero-predecessor state to the appropriate worklist.
  406. void MachineBlockPlacement::markChainSuccessors(
  407. BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
  408. const BlockFilterSet *BlockFilter) {
  409. // Walk all the blocks in this chain, marking their successors as having
  410. // a predecessor placed.
  411. for (MachineBasicBlock *MBB : Chain) {
  412. markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
  413. }
  414. }
  415. /// \brief Mark a single block's successors as having one fewer preds.
  416. ///
  417. /// Under normal circumstances, this is only called by markChainSuccessors,
  418. /// but if a block that was to be placed is completely tail-duplicated away,
  419. /// and was duplicated into the chain end, we need to redo markBlockSuccessors
  420. /// for just that block.
  421. void MachineBlockPlacement::markBlockSuccessors(
  422. BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
  423. const BlockFilterSet *BlockFilter) {
  424. // Add any successors for which this is the only un-placed in-loop
  425. // predecessor to the worklist as a viable candidate for CFG-neutral
  426. // placement. No subsequent placement of this block will violate the CFG
  427. // shape, so we get to use heuristics to choose a favorable placement.
  428. for (MachineBasicBlock *Succ : MBB->successors()) {
  429. if (BlockFilter && !BlockFilter->count(Succ))
  430. continue;
  431. BlockChain &SuccChain = *BlockToChain[Succ];
  432. // Disregard edges within a fixed chain, or edges to the loop header.
  433. if (&Chain == &SuccChain || Succ == LoopHeaderBB)
  434. continue;
  435. // This is a cross-chain edge that is within the loop, so decrement the
  436. // loop predecessor count of the destination chain.
  437. if (SuccChain.UnscheduledPredecessors == 0 ||
  438. --SuccChain.UnscheduledPredecessors > 0)
  439. continue;
  440. auto *NewBB = *SuccChain.begin();
  441. if (NewBB->isEHPad())
  442. EHPadWorkList.push_back(NewBB);
  443. else
  444. BlockWorkList.push_back(NewBB);
  445. }
  446. }
  447. /// This helper function collects the set of successors of block
  448. /// \p BB that are allowed to be its layout successors, and return
  449. /// the total branch probability of edges from \p BB to those
  450. /// blocks.
  451. BranchProbability MachineBlockPlacement::collectViableSuccessors(
  452. MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
  453. SmallVector<MachineBasicBlock *, 4> &Successors) {
  454. // Adjust edge probabilities by excluding edges pointing to blocks that is
  455. // either not in BlockFilter or is already in the current chain. Consider the
  456. // following CFG:
  457. //
  458. // --->A
  459. // | / \
  460. // | B C
  461. // | \ / \
  462. // ----D E
  463. //
  464. // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
  465. // A->C is chosen as a fall-through, D won't be selected as a successor of C
  466. // due to CFG constraint (the probability of C->D is not greater than
  467. // HotProb to break top-order). If we exclude E that is not in BlockFilter
  468. // when calculating the probability of C->D, D will be selected and we
  469. // will get A C D B as the layout of this loop.
  470. auto AdjustedSumProb = BranchProbability::getOne();
  471. for (MachineBasicBlock *Succ : BB->successors()) {
  472. bool SkipSucc = false;
  473. if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
  474. SkipSucc = true;
  475. } else {
  476. BlockChain *SuccChain = BlockToChain[Succ];
  477. if (SuccChain == &Chain) {
  478. SkipSucc = true;
  479. } else if (Succ != *SuccChain->begin()) {
  480. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
  481. continue;
  482. }
  483. }
  484. if (SkipSucc)
  485. AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
  486. else
  487. Successors.push_back(Succ);
  488. }
  489. return AdjustedSumProb;
  490. }
  491. /// The helper function returns the branch probability that is adjusted
  492. /// or normalized over the new total \p AdjustedSumProb.
  493. static BranchProbability
  494. getAdjustedProbability(BranchProbability OrigProb,
  495. BranchProbability AdjustedSumProb) {
  496. BranchProbability SuccProb;
  497. uint32_t SuccProbN = OrigProb.getNumerator();
  498. uint32_t SuccProbD = AdjustedSumProb.getNumerator();
  499. if (SuccProbN >= SuccProbD)
  500. SuccProb = BranchProbability::getOne();
  501. else
  502. SuccProb = BranchProbability(SuccProbN, SuccProbD);
  503. return SuccProb;
  504. }
  505. /// When the option OutlineOptionalBranches is on, this method
  506. /// checks if the fallthrough candidate block \p Succ (of block
  507. /// \p BB) also has other unscheduled predecessor blocks which
  508. /// are also successors of \p BB (forming triangular shape CFG).
  509. /// If none of such predecessors are small, it returns true.
  510. /// The caller can choose to select \p Succ as the layout successors
  511. /// so that \p Succ's predecessors (optional branches) can be
  512. /// outlined.
  513. /// FIXME: fold this with more general layout cost analysis.
  514. bool MachineBlockPlacement::shouldPredBlockBeOutlined(
  515. MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
  516. const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
  517. BranchProbability HotProb) {
  518. if (!OutlineOptionalBranches)
  519. return false;
  520. // If we outline optional branches, look whether Succ is unavoidable, i.e.
  521. // dominates all terminators of the MachineFunction. If it does, other
  522. // successors must be optional. Don't do this for cold branches.
  523. if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
  524. for (MachineBasicBlock *Pred : Succ->predecessors()) {
  525. // Check whether there is an unplaced optional branch.
  526. if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
  527. BlockToChain[Pred] == &Chain)
  528. continue;
  529. // Check whether the optional branch has exactly one BB.
  530. if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
  531. continue;
  532. // Check whether the optional branch is small.
  533. if (Pred->size() < OutlineOptionalThreshold)
  534. return false;
  535. }
  536. return true;
  537. } else
  538. return false;
  539. }
  540. // When profile is not present, return the StaticLikelyProb.
  541. // When profile is available, we need to handle the triangle-shape CFG.
  542. static BranchProbability getLayoutSuccessorProbThreshold(
  543. MachineBasicBlock *BB) {
  544. if (!BB->getParent()->getFunction()->getEntryCount())
  545. return BranchProbability(StaticLikelyProb, 100);
  546. if (BB->succ_size() == 2) {
  547. const MachineBasicBlock *Succ1 = *BB->succ_begin();
  548. const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
  549. if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
  550. /* See case 1 below for the cost analysis. For BB->Succ to
  551. * be taken with smaller cost, the following needs to hold:
  552. * Prob(BB->Succ) > 2* Prob(BB->Pred)
  553. * So the threshold T
  554. * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
  555. * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
  556. * branch bias, we have
  557. * T = (2/3)*(ProfileLikelyProb/50)
  558. * = (2*ProfileLikelyProb)/150)
  559. */
  560. return BranchProbability(2 * ProfileLikelyProb, 150);
  561. }
  562. }
  563. return BranchProbability(ProfileLikelyProb, 100);
  564. }
  565. /// Checks to see if the layout candidate block \p Succ has a better layout
  566. /// predecessor than \c BB. If yes, returns true.
  567. bool MachineBlockPlacement::hasBetterLayoutPredecessor(
  568. MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
  569. BranchProbability SuccProb, BranchProbability RealSuccProb,
  570. BlockChain &Chain, const BlockFilterSet *BlockFilter) {
  571. // There isn't a better layout when there are no unscheduled predecessors.
  572. if (SuccChain.UnscheduledPredecessors == 0)
  573. return false;
  574. // There are two basic scenarios here:
  575. // -------------------------------------
  576. // Case 1: triangular shape CFG (if-then):
  577. // BB
  578. // | \
  579. // | \
  580. // | Pred
  581. // | /
  582. // Succ
  583. // In this case, we are evaluating whether to select edge -> Succ, e.g.
  584. // set Succ as the layout successor of BB. Picking Succ as BB's
  585. // successor breaks the CFG constraints (FIXME: define these constraints).
  586. // With this layout, Pred BB
  587. // is forced to be outlined, so the overall cost will be cost of the
  588. // branch taken from BB to Pred, plus the cost of back taken branch
  589. // from Pred to Succ, as well as the additional cost associated
  590. // with the needed unconditional jump instruction from Pred To Succ.
  591. // The cost of the topological order layout is the taken branch cost
  592. // from BB to Succ, so to make BB->Succ a viable candidate, the following
  593. // must hold:
  594. // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
  595. // < freq(BB->Succ) * taken_branch_cost.
  596. // Ignoring unconditional jump cost, we get
  597. // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
  598. // prob(BB->Succ) > 2 * prob(BB->Pred)
  599. //
  600. // When real profile data is available, we can precisely compute the
  601. // probability threshold that is needed for edge BB->Succ to be considered.
  602. // Without profile data, the heuristic requires the branch bias to be
  603. // a lot larger to make sure the signal is very strong (e.g. 80% default).
  604. // -----------------------------------------------------------------
  605. // Case 2: diamond like CFG (if-then-else):
  606. // S
  607. // / \
  608. // | \
  609. // BB Pred
  610. // \ /
  611. // Succ
  612. // ..
  613. //
  614. // The current block is BB and edge BB->Succ is now being evaluated.
  615. // Note that edge S->BB was previously already selected because
  616. // prob(S->BB) > prob(S->Pred).
  617. // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
  618. // choose Pred, we will have a topological ordering as shown on the left
  619. // in the picture below. If we choose Succ, we have the solution as shown
  620. // on the right:
  621. //
  622. // topo-order:
  623. //
  624. // S----- ---S
  625. // | | | |
  626. // ---BB | | BB
  627. // | | | |
  628. // | pred-- | Succ--
  629. // | | | |
  630. // ---succ ---pred--
  631. //
  632. // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
  633. // = freq(S->Pred) + freq(S->BB)
  634. //
  635. // If we have profile data (i.e, branch probabilities can be trusted), the
  636. // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
  637. // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
  638. // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
  639. // means the cost of topological order is greater.
  640. // When profile data is not available, however, we need to be more
  641. // conservative. If the branch prediction is wrong, breaking the topo-order
  642. // will actually yield a layout with large cost. For this reason, we need
  643. // strong biased branch at block S with Prob(S->BB) in order to select
  644. // BB->Succ. This is equivalent to looking the CFG backward with backward
  645. // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
  646. // profile data).
  647. // --------------------------------------------------------------------------
  648. // Case 3: forked diamond
  649. // S
  650. // / \
  651. // / \
  652. // BB Pred
  653. // | \ / |
  654. // | \ / |
  655. // | X |
  656. // | / \ |
  657. // | / \ |
  658. // S1 S2
  659. //
  660. // The current block is BB and edge BB->S1 is now being evaluated.
  661. // As above S->BB was already selected because
  662. // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
  663. //
  664. // topo-order:
  665. //
  666. // S-------| ---S
  667. // | | | |
  668. // ---BB | | BB
  669. // | | | |
  670. // | Pred----| | S1----
  671. // | | | |
  672. // --(S1 or S2) ---Pred--
  673. //
  674. // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
  675. // + min(freq(Pred->S1), freq(Pred->S2))
  676. // Non-topo-order cost:
  677. // In the worst case, S2 will not get laid out after Pred.
  678. // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
  679. // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
  680. // is 0. Then the non topo layout is better when
  681. // freq(S->Pred) < freq(BB->S1).
  682. // This is exactly what is checked below.
  683. // Note there are other shapes that apply (Pred may not be a single block,
  684. // but they all fit this general pattern.)
  685. BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
  686. // Make sure that a hot successor doesn't have a globally more
  687. // important predecessor.
  688. BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
  689. bool BadCFGConflict = false;
  690. for (MachineBasicBlock *Pred : Succ->predecessors()) {
  691. if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
  692. (BlockFilter && !BlockFilter->count(Pred)) ||
  693. BlockToChain[Pred] == &Chain)
  694. continue;
  695. // Do backward checking.
  696. // For all cases above, we need a backward checking to filter out edges that
  697. // are not 'strongly' biased. With profile data available, the check is
  698. // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
  699. // has more than two successors.
  700. // BB Pred
  701. // \ /
  702. // Succ
  703. // We select edge BB->Succ if
  704. // freq(BB->Succ) > freq(Succ) * HotProb
  705. // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
  706. // HotProb
  707. // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
  708. // Case 1 is covered too, because the first equation reduces to:
  709. // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
  710. BlockFrequency PredEdgeFreq =
  711. MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
  712. if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
  713. BadCFGConflict = true;
  714. break;
  715. }
  716. }
  717. if (BadCFGConflict) {
  718. DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb
  719. << " (prob) (non-cold CFG conflict)\n");
  720. return true;
  721. }
  722. return false;
  723. }
  724. /// \brief Select the best successor for a block.
  725. ///
  726. /// This looks across all successors of a particular block and attempts to
  727. /// select the "best" one to be the layout successor. It only considers direct
  728. /// successors which also pass the block filter. It will attempt to avoid
  729. /// breaking CFG structure, but cave and break such structures in the case of
  730. /// very hot successor edges.
  731. ///
  732. /// \returns The best successor block found, or null if none are viable.
  733. MachineBasicBlock *
  734. MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
  735. BlockChain &Chain,
  736. const BlockFilterSet *BlockFilter) {
  737. const BranchProbability HotProb(StaticLikelyProb, 100);
  738. MachineBasicBlock *BestSucc = nullptr;
  739. auto BestProb = BranchProbability::getZero();
  740. SmallVector<MachineBasicBlock *, 4> Successors;
  741. auto AdjustedSumProb =
  742. collectViableSuccessors(BB, Chain, BlockFilter, Successors);
  743. DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
  744. for (MachineBasicBlock *Succ : Successors) {
  745. auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
  746. BranchProbability SuccProb =
  747. getAdjustedProbability(RealSuccProb, AdjustedSumProb);
  748. // This heuristic is off by default.
  749. if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
  750. HotProb))
  751. return Succ;
  752. BlockChain &SuccChain = *BlockToChain[Succ];
  753. // Skip the edge \c BB->Succ if block \c Succ has a better layout
  754. // predecessor that yields lower global cost.
  755. if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
  756. Chain, BlockFilter))
  757. continue;
  758. DEBUG(
  759. dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
  760. << SuccProb
  761. << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
  762. << "\n");
  763. if (BestSucc && BestProb >= SuccProb) {
  764. DEBUG(dbgs() << " Not the best candidate, continuing\n");
  765. continue;
  766. }
  767. DEBUG(dbgs() << " Setting it as best candidate\n");
  768. BestSucc = Succ;
  769. BestProb = SuccProb;
  770. }
  771. if (BestSucc)
  772. DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
  773. return BestSucc;
  774. }
  775. /// \brief Select the best block from a worklist.
  776. ///
  777. /// This looks through the provided worklist as a list of candidate basic
  778. /// blocks and select the most profitable one to place. The definition of
  779. /// profitable only really makes sense in the context of a loop. This returns
  780. /// the most frequently visited block in the worklist, which in the case of
  781. /// a loop, is the one most desirable to be physically close to the rest of the
  782. /// loop body in order to improve i-cache behavior.
  783. ///
  784. /// \returns The best block found, or null if none are viable.
  785. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
  786. BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
  787. // Once we need to walk the worklist looking for a candidate, cleanup the
  788. // worklist of already placed entries.
  789. // FIXME: If this shows up on profiles, it could be folded (at the cost of
  790. // some code complexity) into the loop below.
  791. WorkList.erase(remove_if(WorkList,
  792. [&](MachineBasicBlock *BB) {
  793. return BlockToChain.lookup(BB) == &Chain;
  794. }),
  795. WorkList.end());
  796. if (WorkList.empty())
  797. return nullptr;
  798. bool IsEHPad = WorkList[0]->isEHPad();
  799. MachineBasicBlock *BestBlock = nullptr;
  800. BlockFrequency BestFreq;
  801. for (MachineBasicBlock *MBB : WorkList) {
  802. assert(MBB->isEHPad() == IsEHPad);
  803. BlockChain &SuccChain = *BlockToChain[MBB];
  804. if (&SuccChain == &Chain)
  805. continue;
  806. assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
  807. BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
  808. DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
  809. MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
  810. // For ehpad, we layout the least probable first as to avoid jumping back
  811. // from least probable landingpads to more probable ones.
  812. //
  813. // FIXME: Using probability is probably (!) not the best way to achieve
  814. // this. We should probably have a more principled approach to layout
  815. // cleanup code.
  816. //
  817. // The goal is to get:
  818. //
  819. // +--------------------------+
  820. // | V
  821. // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
  822. //
  823. // Rather than:
  824. //
  825. // +-------------------------------------+
  826. // V |
  827. // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
  828. if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
  829. continue;
  830. BestBlock = MBB;
  831. BestFreq = CandidateFreq;
  832. }
  833. return BestBlock;
  834. }
  835. /// \brief Retrieve the first unplaced basic block.
  836. ///
  837. /// This routine is called when we are unable to use the CFG to walk through
  838. /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
  839. /// We walk through the function's blocks in order, starting from the
  840. /// LastUnplacedBlockIt. We update this iterator on each call to avoid
  841. /// re-scanning the entire sequence on repeated calls to this routine.
  842. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
  843. const BlockChain &PlacedChain,
  844. MachineFunction::iterator &PrevUnplacedBlockIt,
  845. const BlockFilterSet *BlockFilter) {
  846. for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
  847. ++I) {
  848. if (BlockFilter && !BlockFilter->count(&*I))
  849. continue;
  850. if (BlockToChain[&*I] != &PlacedChain) {
  851. PrevUnplacedBlockIt = I;
  852. // Now select the head of the chain to which the unplaced block belongs
  853. // as the block to place. This will force the entire chain to be placed,
  854. // and satisfies the requirements of merging chains.
  855. return *BlockToChain[&*I]->begin();
  856. }
  857. }
  858. return nullptr;
  859. }
  860. void MachineBlockPlacement::fillWorkLists(
  861. MachineBasicBlock *MBB,
  862. SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
  863. const BlockFilterSet *BlockFilter = nullptr) {
  864. BlockChain &Chain = *BlockToChain[MBB];
  865. if (!UpdatedPreds.insert(&Chain).second)
  866. return;
  867. assert(Chain.UnscheduledPredecessors == 0);
  868. for (MachineBasicBlock *ChainBB : Chain) {
  869. assert(BlockToChain[ChainBB] == &Chain);
  870. for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
  871. if (BlockFilter && !BlockFilter->count(Pred))
  872. continue;
  873. if (BlockToChain[Pred] == &Chain)
  874. continue;
  875. ++Chain.UnscheduledPredecessors;
  876. }
  877. }
  878. if (Chain.UnscheduledPredecessors != 0)
  879. return;
  880. MBB = *Chain.begin();
  881. if (MBB->isEHPad())
  882. EHPadWorkList.push_back(MBB);
  883. else
  884. BlockWorkList.push_back(MBB);
  885. }
  886. void MachineBlockPlacement::buildChain(
  887. MachineBasicBlock *BB, BlockChain &Chain,
  888. BlockFilterSet *BlockFilter) {
  889. assert(BB && "BB must not be null.\n");
  890. assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
  891. MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
  892. MachineBasicBlock *LoopHeaderBB = BB;
  893. markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
  894. BB = *std::prev(Chain.end());
  895. for (;;) {
  896. assert(BB && "null block found at end of chain in loop.");
  897. assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
  898. assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
  899. // Look for the best viable successor if there is one to place immediately
  900. // after this block.
  901. MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
  902. // If an immediate successor isn't available, look for the best viable
  903. // block among those we've identified as not violating the loop's CFG at
  904. // this point. This won't be a fallthrough, but it will increase locality.
  905. if (!BestSucc)
  906. BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
  907. if (!BestSucc)
  908. BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
  909. if (!BestSucc) {
  910. BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
  911. if (!BestSucc)
  912. break;
  913. DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
  914. "layout successor until the CFG reduces\n");
  915. }
  916. // Placement may have changed tail duplication opportunities.
  917. // Check for that now.
  918. if (TailDupPlacement && BestSucc) {
  919. // If the chosen successor was duplicated into all its predecessors,
  920. // don't bother laying it out, just go round the loop again with BB as
  921. // the chain end.
  922. if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
  923. BlockFilter, PrevUnplacedBlockIt))
  924. continue;
  925. }
  926. // Place this block, updating the datastructures to reflect its placement.
  927. BlockChain &SuccChain = *BlockToChain[BestSucc];
  928. // Zero out UnscheduledPredecessors for the successor we're about to merge in case
  929. // we selected a successor that didn't fit naturally into the CFG.
  930. SuccChain.UnscheduledPredecessors = 0;
  931. DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
  932. << getBlockName(BestSucc) << "\n");
  933. markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
  934. Chain.merge(BestSucc, &SuccChain);
  935. BB = *std::prev(Chain.end());
  936. }
  937. DEBUG(dbgs() << "Finished forming chain for header block "
  938. << getBlockName(*Chain.begin()) << "\n");
  939. }
  940. /// \brief Find the best loop top block for layout.
  941. ///
  942. /// Look for a block which is strictly better than the loop header for laying
  943. /// out at the top of the loop. This looks for one and only one pattern:
  944. /// a latch block with no conditional exit. This block will cause a conditional
  945. /// jump around it or will be the bottom of the loop if we lay it out in place,
  946. /// but if it it doesn't end up at the bottom of the loop for any reason,
  947. /// rotation alone won't fix it. Because such a block will always result in an
  948. /// unconditional jump (for the backedge) rotating it in front of the loop
  949. /// header is always profitable.
  950. MachineBasicBlock *
  951. MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
  952. const BlockFilterSet &LoopBlockSet) {
  953. // Placing the latch block before the header may introduce an extra branch
  954. // that skips this block the first time the loop is executed, which we want
  955. // to avoid when optimising for size.
  956. // FIXME: in theory there is a case that does not introduce a new branch,
  957. // i.e. when the layout predecessor does not fallthrough to the loop header.
  958. // In practice this never happens though: there always seems to be a preheader
  959. // that can fallthrough and that is also placed before the header.
  960. if (F->getFunction()->optForSize())
  961. return L.getHeader();
  962. // Check that the header hasn't been fused with a preheader block due to
  963. // crazy branches. If it has, we need to start with the header at the top to
  964. // prevent pulling the preheader into the loop body.
  965. BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
  966. if (!LoopBlockSet.count(*HeaderChain.begin()))
  967. return L.getHeader();
  968. DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
  969. << "\n");
  970. BlockFrequency BestPredFreq;
  971. MachineBasicBlock *BestPred = nullptr;
  972. for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
  973. if (!LoopBlockSet.count(Pred))
  974. continue;
  975. DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
  976. << Pred->succ_size() << " successors, ";
  977. MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
  978. if (Pred->succ_size() > 1)
  979. continue;
  980. BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
  981. if (!BestPred || PredFreq > BestPredFreq ||
  982. (!(PredFreq < BestPredFreq) &&
  983. Pred->isLayoutSuccessor(L.getHeader()))) {
  984. BestPred = Pred;
  985. BestPredFreq = PredFreq;
  986. }
  987. }
  988. // If no direct predecessor is fine, just use the loop header.
  989. if (!BestPred) {
  990. DEBUG(dbgs() << " final top unchanged\n");
  991. return L.getHeader();
  992. }
  993. // Walk backwards through any straight line of predecessors.
  994. while (BestPred->pred_size() == 1 &&
  995. (*BestPred->pred_begin())->succ_size() == 1 &&
  996. *BestPred->pred_begin() != L.getHeader())
  997. BestPred = *BestPred->pred_begin();
  998. DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
  999. return BestPred;
  1000. }
  1001. /// \brief Find the best loop exiting block for layout.
  1002. ///
  1003. /// This routine implements the logic to analyze the loop looking for the best
  1004. /// block to layout at the top of the loop. Typically this is done to maximize
  1005. /// fallthrough opportunities.
  1006. MachineBasicBlock *
  1007. MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
  1008. const BlockFilterSet &LoopBlockSet) {
  1009. // We don't want to layout the loop linearly in all cases. If the loop header
  1010. // is just a normal basic block in the loop, we want to look for what block
  1011. // within the loop is the best one to layout at the top. However, if the loop
  1012. // header has be pre-merged into a chain due to predecessors not having
  1013. // analyzable branches, *and* the predecessor it is merged with is *not* part
  1014. // of the loop, rotating the header into the middle of the loop will create
  1015. // a non-contiguous range of blocks which is Very Bad. So start with the
  1016. // header and only rotate if safe.
  1017. BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
  1018. if (!LoopBlockSet.count(*HeaderChain.begin()))
  1019. return nullptr;
  1020. BlockFrequency BestExitEdgeFreq;
  1021. unsigned BestExitLoopDepth = 0;
  1022. MachineBasicBlock *ExitingBB = nullptr;
  1023. // If there are exits to outer loops, loop rotation can severely limit
  1024. // fallthrough opportunities unless it selects such an exit. Keep a set of
  1025. // blocks where rotating to exit with that block will reach an outer loop.
  1026. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
  1027. DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
  1028. << "\n");
  1029. for (MachineBasicBlock *MBB : L.getBlocks()) {
  1030. BlockChain &Chain = *BlockToChain[MBB];
  1031. // Ensure that this block is at the end of a chain; otherwise it could be
  1032. // mid-way through an inner loop or a successor of an unanalyzable branch.
  1033. if (MBB != *std::prev(Chain.end()))
  1034. continue;
  1035. // Now walk the successors. We need to establish whether this has a viable
  1036. // exiting successor and whether it has a viable non-exiting successor.
  1037. // We store the old exiting state and restore it if a viable looping
  1038. // successor isn't found.
  1039. MachineBasicBlock *OldExitingBB = ExitingBB;
  1040. BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
  1041. bool HasLoopingSucc = false;
  1042. for (MachineBasicBlock *Succ : MBB->successors()) {
  1043. if (Succ->isEHPad())
  1044. continue;
  1045. if (Succ == MBB)
  1046. continue;
  1047. BlockChain &SuccChain = *BlockToChain[Succ];
  1048. // Don't split chains, either this chain or the successor's chain.
  1049. if (&Chain == &SuccChain) {
  1050. DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
  1051. << getBlockName(Succ) << " (chain conflict)\n");
  1052. continue;
  1053. }
  1054. auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
  1055. if (LoopBlockSet.count(Succ)) {
  1056. DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
  1057. << getBlockName(Succ) << " (" << SuccProb << ")\n");
  1058. HasLoopingSucc = true;
  1059. continue;
  1060. }
  1061. unsigned SuccLoopDepth = 0;
  1062. if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
  1063. SuccLoopDepth = ExitLoop->getLoopDepth();
  1064. if (ExitLoop->contains(&L))
  1065. BlocksExitingToOuterLoop.insert(MBB);
  1066. }
  1067. BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
  1068. DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
  1069. << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
  1070. MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
  1071. // Note that we bias this toward an existing layout successor to retain
  1072. // incoming order in the absence of better information. The exit must have
  1073. // a frequency higher than the current exit before we consider breaking
  1074. // the layout.
  1075. BranchProbability Bias(100 - ExitBlockBias, 100);
  1076. if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
  1077. ExitEdgeFreq > BestExitEdgeFreq ||
  1078. (MBB->isLayoutSuccessor(Succ) &&
  1079. !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
  1080. BestExitEdgeFreq = ExitEdgeFreq;
  1081. ExitingBB = MBB;
  1082. }
  1083. }
  1084. if (!HasLoopingSucc) {
  1085. // Restore the old exiting state, no viable looping successor was found.
  1086. ExitingBB = OldExitingBB;
  1087. BestExitEdgeFreq = OldBestExitEdgeFreq;
  1088. }
  1089. }
  1090. // Without a candidate exiting block or with only a single block in the
  1091. // loop, just use the loop header to layout the loop.
  1092. if (!ExitingBB) {
  1093. DEBUG(dbgs() << " No other candidate exit blocks, using loop header\n");
  1094. return nullptr;
  1095. }
  1096. if (L.getNumBlocks() == 1) {
  1097. DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
  1098. return nullptr;
  1099. }
  1100. // Also, if we have exit blocks which lead to outer loops but didn't select
  1101. // one of them as the exiting block we are rotating toward, disable loop
  1102. // rotation altogether.
  1103. if (!BlocksExitingToOuterLoop.empty() &&
  1104. !BlocksExitingToOuterLoop.count(ExitingBB))
  1105. return nullptr;
  1106. DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
  1107. return ExitingBB;
  1108. }
  1109. /// \brief Attempt to rotate an exiting block to the bottom of the loop.
  1110. ///
  1111. /// Once we have built a chain, try to rotate it to line up the hot exit block
  1112. /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
  1113. /// branches. For example, if the loop has fallthrough into its header and out
  1114. /// of its bottom already, don't rotate it.
  1115. void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
  1116. MachineBasicBlock *ExitingBB,
  1117. const BlockFilterSet &LoopBlockSet) {
  1118. if (!ExitingBB)
  1119. return;
  1120. MachineBasicBlock *Top = *LoopChain.begin();
  1121. bool ViableTopFallthrough = false;
  1122. for (MachineBasicBlock *Pred : Top->predecessors()) {
  1123. BlockChain *PredChain = BlockToChain[Pred];
  1124. if (!LoopBlockSet.count(Pred) &&
  1125. (!PredChain || Pred == *std::prev(PredChain->end()))) {
  1126. ViableTopFallthrough = true;
  1127. break;
  1128. }
  1129. }
  1130. // If the header has viable fallthrough, check whether the current loop
  1131. // bottom is a viable exiting block. If so, bail out as rotating will
  1132. // introduce an unnecessary branch.
  1133. if (ViableTopFallthrough) {
  1134. MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
  1135. for (MachineBasicBlock *Succ : Bottom->successors()) {
  1136. BlockChain *SuccChain = BlockToChain[Succ];
  1137. if (!LoopBlockSet.count(Succ) &&
  1138. (!SuccChain || Succ == *SuccChain->begin()))
  1139. return;
  1140. }
  1141. }
  1142. BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
  1143. if (ExitIt == LoopChain.end())
  1144. return;
  1145. std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
  1146. }
  1147. /// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
  1148. ///
  1149. /// With profile data, we can determine the cost in terms of missed fall through
  1150. /// opportunities when rotating a loop chain and select the best rotation.
  1151. /// Basically, there are three kinds of cost to consider for each rotation:
  1152. /// 1. The possibly missed fall through edge (if it exists) from BB out of
  1153. /// the loop to the loop header.
  1154. /// 2. The possibly missed fall through edges (if they exist) from the loop
  1155. /// exits to BB out of the loop.
  1156. /// 3. The missed fall through edge (if it exists) from the last BB to the
  1157. /// first BB in the loop chain.
  1158. /// Therefore, the cost for a given rotation is the sum of costs listed above.
  1159. /// We select the best rotation with the smallest cost.
  1160. void MachineBlockPlacement::rotateLoopWithProfile(
  1161. BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
  1162. auto HeaderBB = L.getHeader();
  1163. auto HeaderIter = find(LoopChain, HeaderBB);
  1164. auto RotationPos = LoopChain.end();
  1165. BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
  1166. // A utility lambda that scales up a block frequency by dividing it by a
  1167. // branch probability which is the reciprocal of the scale.
  1168. auto ScaleBlockFrequency = [](BlockFrequency Freq,
  1169. unsigned Scale) -> BlockFrequency {
  1170. if (Scale == 0)
  1171. return 0;
  1172. // Use operator / between BlockFrequency and BranchProbability to implement
  1173. // saturating multiplication.
  1174. return Freq / BranchProbability(1, Scale);
  1175. };
  1176. // Compute the cost of the missed fall-through edge to the loop header if the
  1177. // chain head is not the loop header. As we only consider natural loops with
  1178. // single header, this computation can be done only once.
  1179. BlockFrequency HeaderFallThroughCost(0);
  1180. for (auto *Pred : HeaderBB->predecessors()) {
  1181. BlockChain *PredChain = BlockToChain[Pred];
  1182. if (!LoopBlockSet.count(Pred) &&
  1183. (!PredChain || Pred == *std::prev(PredChain->end()))) {
  1184. auto EdgeFreq =
  1185. MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
  1186. auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
  1187. // If the predecessor has only an unconditional jump to the header, we
  1188. // need to consider the cost of this jump.
  1189. if (Pred->succ_size() == 1)
  1190. FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
  1191. HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
  1192. }
  1193. }
  1194. // Here we collect all exit blocks in the loop, and for each exit we find out
  1195. // its hottest exit edge. For each loop rotation, we define the loop exit cost
  1196. // as the sum of frequencies of exit edges we collect here, excluding the exit
  1197. // edge from the tail of the loop chain.
  1198. SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
  1199. for (auto BB : LoopChain) {
  1200. auto LargestExitEdgeProb = BranchProbability::getZero();
  1201. for (auto *Succ : BB->successors()) {
  1202. BlockChain *SuccChain = BlockToChain[Succ];
  1203. if (!LoopBlockSet.count(Succ) &&
  1204. (!SuccChain || Succ == *SuccChain->begin())) {
  1205. auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
  1206. LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
  1207. }
  1208. }
  1209. if (LargestExitEdgeProb > BranchProbability::getZero()) {
  1210. auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
  1211. ExitsWithFreq.emplace_back(BB, ExitFreq);
  1212. }
  1213. }
  1214. // In this loop we iterate every block in the loop chain and calculate the
  1215. // cost assuming the block is the head of the loop chain. When the loop ends,
  1216. // we should have found the best candidate as the loop chain's head.
  1217. for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
  1218. EndIter = LoopChain.end();
  1219. Iter != EndIter; Iter++, TailIter++) {
  1220. // TailIter is used to track the tail of the loop chain if the block we are
  1221. // checking (pointed by Iter) is the head of the chain.
  1222. if (TailIter == LoopChain.end())
  1223. TailIter = LoopChain.begin();
  1224. auto TailBB = *TailIter;
  1225. // Calculate the cost by putting this BB to the top.
  1226. BlockFrequency Cost = 0;
  1227. // If the current BB is the loop header, we need to take into account the
  1228. // cost of the missed fall through edge from outside of the loop to the
  1229. // header.
  1230. if (Iter != HeaderIter)
  1231. Cost += HeaderFallThroughCost;
  1232. // Collect the loop exit cost by summing up frequencies of all exit edges
  1233. // except the one from the chain tail.
  1234. for (auto &ExitWithFreq : ExitsWithFreq)
  1235. if (TailBB != ExitWithFreq.first)
  1236. Cost += ExitWithFreq.second;
  1237. // The cost of breaking the once fall-through edge from the tail to the top
  1238. // of the loop chain. Here we need to consider three cases:
  1239. // 1. If the tail node has only one successor, then we will get an
  1240. // additional jmp instruction. So the cost here is (MisfetchCost +
  1241. // JumpInstCost) * tail node frequency.
  1242. // 2. If the tail node has two successors, then we may still get an
  1243. // additional jmp instruction if the layout successor after the loop
  1244. // chain is not its CFG successor. Note that the more frequently executed
  1245. // jmp instruction will be put ahead of the other one. Assume the
  1246. // frequency of those two branches are x and y, where x is the frequency
  1247. // of the edge to the chain head, then the cost will be
  1248. // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
  1249. // 3. If the tail node has more than two successors (this rarely happens),
  1250. // we won't consider any additional cost.
  1251. if (TailBB->isSuccessor(*Iter)) {
  1252. auto TailBBFreq = MBFI->getBlockFreq(TailBB);
  1253. if (TailBB->succ_size() == 1)
  1254. Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
  1255. MisfetchCost + JumpInstCost);
  1256. else if (TailBB->succ_size() == 2) {
  1257. auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
  1258. auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
  1259. auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
  1260. ? TailBBFreq * TailToHeadProb.getCompl()
  1261. : TailToHeadFreq;
  1262. Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
  1263. ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
  1264. }
  1265. }
  1266. DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
  1267. << " to the top: " << Cost.getFrequency() << "\n");
  1268. if (Cost < SmallestRotationCost) {
  1269. SmallestRotationCost = Cost;
  1270. RotationPos = Iter;
  1271. }
  1272. }
  1273. if (RotationPos != LoopChain.end()) {
  1274. DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
  1275. << " to the top\n");
  1276. std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
  1277. }
  1278. }
  1279. /// \brief Collect blocks in the given loop that are to be placed.
  1280. ///
  1281. /// When profile data is available, exclude cold blocks from the returned set;
  1282. /// otherwise, collect all blocks in the loop.
  1283. MachineBlockPlacement::BlockFilterSet
  1284. MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
  1285. BlockFilterSet LoopBlockSet;
  1286. // Filter cold blocks off from LoopBlockSet when profile data is available.
  1287. // Collect the sum of frequencies of incoming edges to the loop header from
  1288. // outside. If we treat the loop as a super block, this is the frequency of
  1289. // the loop. Then for each block in the loop, we calculate the ratio between
  1290. // its frequency and the frequency of the loop block. When it is too small,
  1291. // don't add it to the loop chain. If there are outer loops, then this block
  1292. // will be merged into the first outer loop chain for which this block is not
  1293. // cold anymore. This needs precise profile data and we only do this when
  1294. // profile data is available.
  1295. if (F->getFunction()->getEntryCount()) {
  1296. BlockFrequency LoopFreq(0);
  1297. for (auto LoopPred : L.getHeader()->predecessors())
  1298. if (!L.contains(LoopPred))
  1299. LoopFreq += MBFI->getBlockFreq(LoopPred) *
  1300. MBPI->getEdgeProbability(LoopPred, L.getHeader());
  1301. for (MachineBasicBlock *LoopBB : L.getBlocks()) {
  1302. auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
  1303. if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
  1304. continue;
  1305. LoopBlockSet.insert(LoopBB);
  1306. }
  1307. } else
  1308. LoopBlockSet.insert(L.block_begin(), L.block_end());
  1309. return LoopBlockSet;
  1310. }
  1311. /// \brief Forms basic block chains from the natural loop structures.
  1312. ///
  1313. /// These chains are designed to preserve the existing *structure* of the code
  1314. /// as much as possible. We can then stitch the chains together in a way which
  1315. /// both preserves the topological structure and minimizes taken conditional
  1316. /// branches.
  1317. void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
  1318. // First recurse through any nested loops, building chains for those inner
  1319. // loops.
  1320. for (MachineLoop *InnerLoop : L)
  1321. buildLoopChains(*InnerLoop);
  1322. assert(BlockWorkList.empty());
  1323. assert(EHPadWorkList.empty());
  1324. BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
  1325. // Check if we have profile data for this function. If yes, we will rotate
  1326. // this loop by modeling costs more precisely which requires the profile data
  1327. // for better layout.
  1328. bool RotateLoopWithProfile =
  1329. ForcePreciseRotationCost ||
  1330. (PreciseRotationCost && F->getFunction()->getEntryCount());
  1331. // First check to see if there is an obviously preferable top block for the
  1332. // loop. This will default to the header, but may end up as one of the
  1333. // predecessors to the header if there is one which will result in strictly
  1334. // fewer branches in the loop body.
  1335. // When we use profile data to rotate the loop, this is unnecessary.
  1336. MachineBasicBlock *LoopTop =
  1337. RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
  1338. // If we selected just the header for the loop top, look for a potentially
  1339. // profitable exit block in the event that rotating the loop can eliminate
  1340. // branches by placing an exit edge at the bottom.
  1341. if (!RotateLoopWithProfile && LoopTop == L.getHeader())
  1342. PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
  1343. BlockChain &LoopChain = *BlockToChain[LoopTop];
  1344. // FIXME: This is a really lame way of walking the chains in the loop: we
  1345. // walk the blocks, and use a set to prevent visiting a particular chain
  1346. // twice.
  1347. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  1348. assert(LoopChain.UnscheduledPredecessors == 0);
  1349. UpdatedPreds.insert(&LoopChain);
  1350. for (MachineBasicBlock *LoopBB : LoopBlockSet)
  1351. fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
  1352. buildChain(LoopTop, LoopChain, &LoopBlockSet);
  1353. if (RotateLoopWithProfile)
  1354. rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
  1355. else
  1356. rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
  1357. DEBUG({
  1358. // Crash at the end so we get all of the debugging output first.
  1359. bool BadLoop = false;
  1360. if (LoopChain.UnscheduledPredecessors) {
  1361. BadLoop = true;
  1362. dbgs() << "Loop chain contains a block without its preds placed!\n"
  1363. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  1364. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
  1365. }
  1366. for (MachineBasicBlock *ChainBB : LoopChain) {
  1367. dbgs() << " ... " << getBlockName(ChainBB) << "\n";
  1368. if (!LoopBlockSet.remove(ChainBB)) {
  1369. // We don't mark the loop as bad here because there are real situations
  1370. // where this can occur. For example, with an unanalyzable fallthrough
  1371. // from a loop block to a non-loop block or vice versa.
  1372. dbgs() << "Loop chain contains a block not contained by the loop!\n"
  1373. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  1374. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  1375. << " Bad block: " << getBlockName(ChainBB) << "\n";
  1376. }
  1377. }
  1378. if (!LoopBlockSet.empty()) {
  1379. BadLoop = true;
  1380. for (MachineBasicBlock *LoopBB : LoopBlockSet)
  1381. dbgs() << "Loop contains blocks never placed into a chain!\n"
  1382. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  1383. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  1384. << " Bad block: " << getBlockName(LoopBB) << "\n";
  1385. }
  1386. assert(!BadLoop && "Detected problems with the placement of this loop.");
  1387. });
  1388. BlockWorkList.clear();
  1389. EHPadWorkList.clear();
  1390. }
  1391. /// When OutlineOpitonalBranches is on, this method collects BBs that
  1392. /// dominates all terminator blocks of the function \p F.
  1393. void MachineBlockPlacement::collectMustExecuteBBs() {
  1394. if (OutlineOptionalBranches) {
  1395. // Find the nearest common dominator of all of F's terminators.
  1396. MachineBasicBlock *Terminator = nullptr;
  1397. for (MachineBasicBlock &MBB : *F) {
  1398. if (MBB.succ_size() == 0) {
  1399. if (Terminator == nullptr)
  1400. Terminator = &MBB;
  1401. else
  1402. Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
  1403. }
  1404. }
  1405. // MBBs dominating this common dominator are unavoidable.
  1406. UnavoidableBlocks.clear();
  1407. for (MachineBasicBlock &MBB : *F) {
  1408. if (MDT->dominates(&MBB, Terminator)) {
  1409. UnavoidableBlocks.insert(&MBB);
  1410. }
  1411. }
  1412. }
  1413. }
  1414. void MachineBlockPlacement::buildCFGChains() {
  1415. // Ensure that every BB in the function has an associated chain to simplify
  1416. // the assumptions of the remaining algorithm.
  1417. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
  1418. for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
  1419. ++FI) {
  1420. MachineBasicBlock *BB = &*FI;
  1421. BlockChain *Chain =
  1422. new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
  1423. // Also, merge any blocks which we cannot reason about and must preserve
  1424. // the exact fallthrough behavior for.
  1425. for (;;) {
  1426. Cond.clear();
  1427. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  1428. if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
  1429. break;
  1430. MachineFunction::iterator NextFI = std::next(FI);
  1431. MachineBasicBlock *NextBB = &*NextFI;
  1432. // Ensure that the layout successor is a viable block, as we know that
  1433. // fallthrough is a possibility.
  1434. assert(NextFI != FE && "Can't fallthrough past the last block.");
  1435. DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
  1436. << getBlockName(BB) << " -> " << getBlockName(NextBB)
  1437. << "\n");
  1438. Chain->merge(NextBB, nullptr);
  1439. #ifndef NDEBUG
  1440. BlocksWithUnanalyzableExits.insert(&*BB);
  1441. #endif
  1442. FI = NextFI;
  1443. BB = NextBB;
  1444. }
  1445. }
  1446. // Turned on with OutlineOptionalBranches option
  1447. collectMustExecuteBBs();
  1448. // Build any loop-based chains.
  1449. PreferredLoopExit = nullptr;
  1450. for (MachineLoop *L : *MLI)
  1451. buildLoopChains(*L);
  1452. assert(BlockWorkList.empty());
  1453. assert(EHPadWorkList.empty());
  1454. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  1455. for (MachineBasicBlock &MBB : *F)
  1456. fillWorkLists(&MBB, UpdatedPreds);
  1457. BlockChain &FunctionChain = *BlockToChain[&F->front()];
  1458. buildChain(&F->front(), FunctionChain);
  1459. #ifndef NDEBUG
  1460. typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
  1461. #endif
  1462. DEBUG({
  1463. // Crash at the end so we get all of the debugging output first.
  1464. bool BadFunc = false;
  1465. FunctionBlockSetType FunctionBlockSet;
  1466. for (MachineBasicBlock &MBB : *F)
  1467. FunctionBlockSet.insert(&MBB);
  1468. for (MachineBasicBlock *ChainBB : FunctionChain)
  1469. if (!FunctionBlockSet.erase(ChainBB)) {
  1470. BadFunc = true;
  1471. dbgs() << "Function chain contains a block not in the function!\n"
  1472. << " Bad block: " << getBlockName(ChainBB) << "\n";
  1473. }
  1474. if (!FunctionBlockSet.empty()) {
  1475. BadFunc = true;
  1476. for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
  1477. dbgs() << "Function contains blocks never placed into a chain!\n"
  1478. << " Bad block: " << getBlockName(RemainingBB) << "\n";
  1479. }
  1480. assert(!BadFunc && "Detected problems with the block placement.");
  1481. });
  1482. // Splice the blocks into place.
  1483. MachineFunction::iterator InsertPos = F->begin();
  1484. DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
  1485. for (MachineBasicBlock *ChainBB : FunctionChain) {
  1486. DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
  1487. : " ... ")
  1488. << getBlockName(ChainBB) << "\n");
  1489. if (InsertPos != MachineFunction::iterator(ChainBB))
  1490. F->splice(InsertPos, ChainBB);
  1491. else
  1492. ++InsertPos;
  1493. // Update the terminator of the previous block.
  1494. if (ChainBB == *FunctionChain.begin())
  1495. continue;
  1496. MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
  1497. // FIXME: It would be awesome of updateTerminator would just return rather
  1498. // than assert when the branch cannot be analyzed in order to remove this
  1499. // boiler plate.
  1500. Cond.clear();
  1501. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  1502. #ifndef NDEBUG
  1503. if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
  1504. // Given the exact block placement we chose, we may actually not _need_ to
  1505. // be able to edit PrevBB's terminator sequence, but not being _able_ to
  1506. // do that at this point is a bug.
  1507. assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
  1508. !PrevBB->canFallThrough()) &&
  1509. "Unexpected block with un-analyzable fallthrough!");
  1510. Cond.clear();
  1511. TBB = FBB = nullptr;
  1512. }
  1513. #endif
  1514. // The "PrevBB" is not yet updated to reflect current code layout, so,
  1515. // o. it may fall-through to a block without explicit "goto" instruction
  1516. // before layout, and no longer fall-through it after layout; or
  1517. // o. just opposite.
  1518. //
  1519. // analyzeBranch() may return erroneous value for FBB when these two
  1520. // situations take place. For the first scenario FBB is mistakenly set NULL;
  1521. // for the 2nd scenario, the FBB, which is expected to be NULL, is
  1522. // mistakenly pointing to "*BI".
  1523. // Thus, if the future change needs to use FBB before the layout is set, it
  1524. // has to correct FBB first by using the code similar to the following:
  1525. //
  1526. // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
  1527. // PrevBB->updateTerminator();
  1528. // Cond.clear();
  1529. // TBB = FBB = nullptr;
  1530. // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
  1531. // // FIXME: This should never take place.
  1532. // TBB = FBB = nullptr;
  1533. // }
  1534. // }
  1535. if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
  1536. PrevBB->updateTerminator();
  1537. }
  1538. // Fixup the last block.
  1539. Cond.clear();
  1540. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  1541. if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
  1542. F->back().updateTerminator();
  1543. BlockWorkList.clear();
  1544. EHPadWorkList.clear();
  1545. }
  1546. void MachineBlockPlacement::optimizeBranches() {
  1547. BlockChain &FunctionChain = *BlockToChain[&F->front()];
  1548. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
  1549. // Now that all the basic blocks in the chain have the proper layout,
  1550. // make a final call to AnalyzeBranch with AllowModify set.
  1551. // Indeed, the target may be able to optimize the branches in a way we
  1552. // cannot because all branches may not be analyzable.
  1553. // E.g., the target may be able to remove an unconditional branch to
  1554. // a fallthrough when it occurs after predicated terminators.
  1555. for (MachineBasicBlock *ChainBB : FunctionChain) {
  1556. Cond.clear();
  1557. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  1558. if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
  1559. // If PrevBB has a two-way branch, try to re-order the branches
  1560. // such that we branch to the successor with higher probability first.
  1561. if (TBB && !Cond.empty() && FBB &&
  1562. MBPI->getEdgeProbability(ChainBB, FBB) >
  1563. MBPI->getEdgeProbability(ChainBB, TBB) &&
  1564. !TII->reverseBranchCondition(Cond)) {
  1565. DEBUG(dbgs() << "Reverse order of the two branches: "
  1566. << getBlockName(ChainBB) << "\n");
  1567. DEBUG(dbgs() << " Edge probability: "
  1568. << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
  1569. << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
  1570. DebugLoc dl; // FIXME: this is nowhere
  1571. TII->removeBranch(*ChainBB);
  1572. TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
  1573. ChainBB->updateTerminator();
  1574. }
  1575. }
  1576. }
  1577. }
  1578. void MachineBlockPlacement::alignBlocks() {
  1579. // Walk through the backedges of the function now that we have fully laid out
  1580. // the basic blocks and align the destination of each backedge. We don't rely
  1581. // exclusively on the loop info here so that we can align backedges in
  1582. // unnatural CFGs and backedges that were introduced purely because of the
  1583. // loop rotations done during this layout pass.
  1584. if (F->getFunction()->optForSize())
  1585. return;
  1586. BlockChain &FunctionChain = *BlockToChain[&F->front()];
  1587. if (FunctionChain.begin() == FunctionChain.end())
  1588. return; // Empty chain.
  1589. const BranchProbability ColdProb(1, 5); // 20%
  1590. BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
  1591. BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
  1592. for (MachineBasicBlock *ChainBB : FunctionChain) {
  1593. if (ChainBB == *FunctionChain.begin())
  1594. continue;
  1595. // Don't align non-looping basic blocks. These are unlikely to execute
  1596. // enough times to matter in practice. Note that we'll still handle
  1597. // unnatural CFGs inside of a natural outer loop (the common case) and
  1598. // rotated loops.
  1599. MachineLoop *L = MLI->getLoopFor(ChainBB);
  1600. if (!L)
  1601. continue;
  1602. unsigned Align = TLI->getPrefLoopAlignment(L);
  1603. if (!Align)
  1604. continue; // Don't care about loop alignment.
  1605. // If the block is cold relative to the function entry don't waste space
  1606. // aligning it.
  1607. BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
  1608. if (Freq < WeightedEntryFreq)
  1609. continue;
  1610. // If the block is cold relative to its loop header, don't align it
  1611. // regardless of what edges into the block exist.
  1612. MachineBasicBlock *LoopHeader = L->getHeader();
  1613. BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
  1614. if (Freq < (LoopHeaderFreq * ColdProb))
  1615. continue;
  1616. // Check for the existence of a non-layout predecessor which would benefit
  1617. // from aligning this block.
  1618. MachineBasicBlock *LayoutPred =
  1619. &*std::prev(MachineFunction::iterator(ChainBB));
  1620. // Force alignment if all the predecessors are jumps. We already checked
  1621. // that the block isn't cold above.
  1622. if (!LayoutPred->isSuccessor(ChainBB)) {
  1623. ChainBB->setAlignment(Align);
  1624. continue;
  1625. }
  1626. // Align this block if the layout predecessor's edge into this block is
  1627. // cold relative to the block. When this is true, other predecessors make up
  1628. // all of the hot entries into the block and thus alignment is likely to be
  1629. // important.
  1630. BranchProbability LayoutProb =
  1631. MBPI->getEdgeProbability(LayoutPred, ChainBB);
  1632. BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
  1633. if (LayoutEdgeFreq <= (Freq * ColdProb))
  1634. ChainBB->setAlignment(Align);
  1635. }
  1636. }
  1637. /// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
  1638. /// it was duplicated into its chain predecessor and removed.
  1639. /// \p BB - Basic block that may be duplicated.
  1640. ///
  1641. /// \p LPred - Chosen layout predecessor of \p BB.
  1642. /// Updated to be the chain end if LPred is removed.
  1643. /// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
  1644. /// \p BlockFilter - Set of blocks that belong to the loop being laid out.
  1645. /// Used to identify which blocks to update predecessor
  1646. /// counts.
  1647. /// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
  1648. /// chosen in the given order due to unnatural CFG
  1649. /// only needed if \p BB is removed and
  1650. /// \p PrevUnplacedBlockIt pointed to \p BB.
  1651. /// @return true if \p BB was removed.
  1652. bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
  1653. MachineBasicBlock *BB, MachineBasicBlock *&LPred,
  1654. MachineBasicBlock *LoopHeaderBB,
  1655. BlockChain &Chain, BlockFilterSet *BlockFilter,
  1656. MachineFunction::iterator &PrevUnplacedBlockIt) {
  1657. bool Removed, DuplicatedToLPred;
  1658. bool DuplicatedToOriginalLPred;
  1659. Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
  1660. PrevUnplacedBlockIt,
  1661. DuplicatedToLPred);
  1662. if (!Removed)
  1663. return false;
  1664. DuplicatedToOriginalLPred = DuplicatedToLPred;
  1665. // Iteratively try to duplicate again. It can happen that a block that is
  1666. // duplicated into is still small enough to be duplicated again.
  1667. // No need to call markBlockSuccessors in this case, as the blocks being
  1668. // duplicated from here on are already scheduled.
  1669. // Note that DuplicatedToLPred always implies Removed.
  1670. while (DuplicatedToLPred) {
  1671. assert (Removed && "Block must have been removed to be duplicated into its "
  1672. "layout predecessor.");
  1673. MachineBasicBlock *DupBB, *DupPred;
  1674. // The removal callback causes Chain.end() to be updated when a block is
  1675. // removed. On the first pass through the loop, the chain end should be the
  1676. // same as it was on function entry. On subsequent passes, because we are
  1677. // duplicating the block at the end of the chain, if it is removed the
  1678. // chain will have shrunk by one block.
  1679. BlockChain::iterator ChainEnd = Chain.end();
  1680. DupBB = *(--ChainEnd);
  1681. // Now try to duplicate again.
  1682. if (ChainEnd == Chain.begin())
  1683. break;
  1684. DupPred = *std::prev(ChainEnd);
  1685. Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
  1686. PrevUnplacedBlockIt,
  1687. DuplicatedToLPred);
  1688. }
  1689. // If BB was duplicated into LPred, it is now scheduled. But because it was
  1690. // removed, markChainSuccessors won't be called for its chain. Instead we
  1691. // call markBlockSuccessors for LPred to achieve the same effect. This must go
  1692. // at the end because repeating the tail duplication can increase the number
  1693. // of unscheduled predecessors.
  1694. LPred = *std::prev(Chain.end());
  1695. if (DuplicatedToOriginalLPred)
  1696. markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
  1697. return true;
  1698. }
  1699. /// Tail duplicate \p BB into (some) predecessors if profitable.
  1700. /// \p BB - Basic block that may be duplicated
  1701. /// \p LPred - Chosen layout predecessor of \p BB
  1702. /// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
  1703. /// \p BlockFilter - Set of blocks that belong to the loop being laid out.
  1704. /// Used to identify which blocks to update predecessor
  1705. /// counts.
  1706. /// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
  1707. /// chosen in the given order due to unnatural CFG
  1708. /// only needed if \p BB is removed and
  1709. /// \p PrevUnplacedBlockIt pointed to \p BB.
  1710. /// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
  1711. /// only be true if the block was removed.
  1712. /// \return - True if the block was duplicated into all preds and removed.
  1713. bool MachineBlockPlacement::maybeTailDuplicateBlock(
  1714. MachineBasicBlock *BB, MachineBasicBlock *LPred,
  1715. const BlockChain &Chain, BlockFilterSet *BlockFilter,
  1716. MachineFunction::iterator &PrevUnplacedBlockIt,
  1717. bool &DuplicatedToLPred) {
  1718. DuplicatedToLPred = false;
  1719. DEBUG(dbgs() << "Redoing tail duplication for Succ#"
  1720. << BB->getNumber() << "\n");
  1721. bool IsSimple = TailDup.isSimpleBB(BB);
  1722. // Blocks with single successors don't create additional fallthrough
  1723. // opportunities. Don't duplicate them. TODO: When conditional exits are
  1724. // analyzable, allow them to be duplicated.
  1725. if (!IsSimple && BB->succ_size() == 1)
  1726. return false;
  1727. if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
  1728. return false;
  1729. // This has to be a callback because none of it can be done after
  1730. // BB is deleted.
  1731. bool Removed = false;
  1732. auto RemovalCallback =
  1733. [&](MachineBasicBlock *RemBB) {
  1734. // Signal to outer function
  1735. Removed = true;
  1736. // Conservative default.
  1737. bool InWorkList = true;
  1738. // Remove from the Chain and Chain Map
  1739. if (BlockToChain.count(RemBB)) {
  1740. BlockChain *Chain = BlockToChain[RemBB];
  1741. InWorkList = Chain->UnscheduledPredecessors == 0;
  1742. Chain->remove(RemBB);
  1743. BlockToChain.erase(RemBB);
  1744. }
  1745. // Handle the unplaced block iterator
  1746. if (&(*PrevUnplacedBlockIt) == RemBB) {
  1747. PrevUnplacedBlockIt++;
  1748. }
  1749. // Handle the Work Lists
  1750. if (InWorkList) {
  1751. SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
  1752. if (RemBB->isEHPad())
  1753. RemoveList = EHPadWorkList;
  1754. RemoveList.erase(
  1755. remove_if(RemoveList,
  1756. [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
  1757. RemoveList.end());
  1758. }
  1759. // Handle the filter set
  1760. if (BlockFilter) {
  1761. BlockFilter->remove(RemBB);
  1762. }
  1763. // Remove the block from loop info.
  1764. MLI->removeBlock(RemBB);
  1765. if (RemBB == PreferredLoopExit)
  1766. PreferredLoopExit = nullptr;
  1767. DEBUG(dbgs() << "TailDuplicator deleted block: "
  1768. << getBlockName(RemBB) << "\n");
  1769. };
  1770. auto RemovalCallbackRef =
  1771. llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
  1772. SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
  1773. TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
  1774. &DuplicatedPreds, &RemovalCallbackRef);
  1775. // Update UnscheduledPredecessors to reflect tail-duplication.
  1776. DuplicatedToLPred = false;
  1777. for (MachineBasicBlock *Pred : DuplicatedPreds) {
  1778. // We're only looking for unscheduled predecessors that match the filter.
  1779. BlockChain* PredChain = BlockToChain[Pred];
  1780. if (Pred == LPred)
  1781. DuplicatedToLPred = true;
  1782. if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
  1783. || PredChain == &Chain)
  1784. continue;
  1785. for (MachineBasicBlock *NewSucc : Pred->successors()) {
  1786. if (BlockFilter && !BlockFilter->count(NewSucc))
  1787. continue;
  1788. BlockChain *NewChain = BlockToChain[NewSucc];
  1789. if (NewChain != &Chain && NewChain != PredChain)
  1790. NewChain->UnscheduledPredecessors++;
  1791. }
  1792. }
  1793. return Removed;
  1794. }
  1795. bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
  1796. if (skipFunction(*MF.getFunction()))
  1797. return false;
  1798. // Check for single-block functions and skip them.
  1799. if (std::next(MF.begin()) == MF.end())
  1800. return false;
  1801. F = &MF;
  1802. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  1803. MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
  1804. getAnalysis<MachineBlockFrequencyInfo>());
  1805. MLI = &getAnalysis<MachineLoopInfo>();
  1806. TII = MF.getSubtarget().getInstrInfo();
  1807. TLI = MF.getSubtarget().getTargetLowering();
  1808. MDT = &getAnalysis<MachineDominatorTree>();
  1809. // Initialize PreferredLoopExit to nullptr here since it may never be set if
  1810. // there are no MachineLoops.
  1811. PreferredLoopExit = nullptr;
  1812. if (TailDupPlacement) {
  1813. unsigned TailDupSize = TailDuplicatePlacementThreshold;
  1814. if (MF.getFunction()->optForSize())
  1815. TailDupSize = 1;
  1816. TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
  1817. }
  1818. assert(BlockToChain.empty());
  1819. buildCFGChains();
  1820. // Changing the layout can create new tail merging opportunities.
  1821. TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
  1822. // TailMerge can create jump into if branches that make CFG irreducible for
  1823. // HW that requires structured CFG.
  1824. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
  1825. PassConfig->getEnableTailMerge() &&
  1826. BranchFoldPlacement;
  1827. // No tail merging opportunities if the block number is less than four.
  1828. if (MF.size() > 3 && EnableTailMerge) {
  1829. unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
  1830. BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
  1831. *MBPI, TailMergeSize);
  1832. if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
  1833. getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
  1834. /*AfterBlockPlacement=*/true)) {
  1835. // Redo the layout if tail merging creates/removes/moves blocks.
  1836. BlockToChain.clear();
  1837. // Must redo the dominator tree if blocks were changed.
  1838. MDT->runOnMachineFunction(MF);
  1839. ChainAllocator.DestroyAll();
  1840. buildCFGChains();
  1841. }
  1842. }
  1843. optimizeBranches();
  1844. alignBlocks();
  1845. BlockToChain.clear();
  1846. ChainAllocator.DestroyAll();
  1847. if (AlignAllBlock)
  1848. // Align all of the blocks in the function to a specific alignment.
  1849. for (MachineBasicBlock &MBB : MF)
  1850. MBB.setAlignment(AlignAllBlock);
  1851. else if (AlignAllNonFallThruBlocks) {
  1852. // Align all of the blocks that have no fall-through predecessors to a
  1853. // specific alignment.
  1854. for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
  1855. auto LayoutPred = std::prev(MBI);
  1856. if (!LayoutPred->isSuccessor(&*MBI))
  1857. MBI->setAlignment(AlignAllNonFallThruBlocks);
  1858. }
  1859. }
  1860. #ifndef NDEBUG
  1861. if (ViewBlockLayoutWithBFI != GVDT_None &&
  1862. (ViewBlockFreqFuncName.empty() ||
  1863. F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
  1864. MBFI->view(false);
  1865. }
  1866. #endif
  1867. // We always return true as we have no way to track whether the final order
  1868. // differs from the original order.
  1869. return true;
  1870. }
  1871. namespace {
  1872. /// \brief A pass to compute block placement statistics.
  1873. ///
  1874. /// A separate pass to compute interesting statistics for evaluating block
  1875. /// placement. This is separate from the actual placement pass so that they can
  1876. /// be computed in the absence of any placement transformations or when using
  1877. /// alternative placement strategies.
  1878. class MachineBlockPlacementStats : public MachineFunctionPass {
  1879. /// \brief A handle to the branch probability pass.
  1880. const MachineBranchProbabilityInfo *MBPI;
  1881. /// \brief A handle to the function-wide block frequency pass.
  1882. const MachineBlockFrequencyInfo *MBFI;
  1883. public:
  1884. static char ID; // Pass identification, replacement for typeid
  1885. MachineBlockPlacementStats() : MachineFunctionPass(ID) {
  1886. initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
  1887. }
  1888. bool runOnMachineFunction(MachineFunction &F) override;
  1889. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1890. AU.addRequired<MachineBranchProbabilityInfo>();
  1891. AU.addRequired<MachineBlockFrequencyInfo>();
  1892. AU.setPreservesAll();
  1893. MachineFunctionPass::getAnalysisUsage(AU);
  1894. }
  1895. };
  1896. }
  1897. char MachineBlockPlacementStats::ID = 0;
  1898. char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
  1899. INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
  1900. "Basic Block Placement Stats", false, false)
  1901. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  1902. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  1903. INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
  1904. "Basic Block Placement Stats", false, false)
  1905. bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
  1906. // Check for single-block functions and skip them.
  1907. if (std::next(F.begin()) == F.end())
  1908. return false;
  1909. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  1910. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  1911. for (MachineBasicBlock &MBB : F) {
  1912. BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
  1913. Statistic &NumBranches =
  1914. (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
  1915. Statistic &BranchTakenFreq =
  1916. (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
  1917. for (MachineBasicBlock *Succ : MBB.successors()) {
  1918. // Skip if this successor is a fallthrough.
  1919. if (MBB.isLayoutSuccessor(Succ))
  1920. continue;
  1921. BlockFrequency EdgeFreq =
  1922. BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
  1923. ++NumBranches;
  1924. BranchTakenFreq += EdgeFreq.getFrequency();
  1925. }
  1926. }
  1927. return false;
  1928. }