TailDuplicator.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. //===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This utility class duplicates basic blocks ending in unconditional branches
  11. // into the tails of their predecessors.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/TailDuplicator.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/SetVector.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  20. #include "llvm/CodeGen/MachineFunctionPass.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineModuleInfo.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/IR/Function.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/ErrorHandling.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. using namespace llvm;
  30. #define DEBUG_TYPE "tailduplication"
  31. STATISTIC(NumTails, "Number of tails duplicated");
  32. STATISTIC(NumTailDups, "Number of tail duplicated blocks");
  33. STATISTIC(NumTailDupAdded,
  34. "Number of instructions added due to tail duplication");
  35. STATISTIC(NumTailDupRemoved,
  36. "Number of instructions removed due to tail duplication");
  37. STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
  38. STATISTIC(NumAddedPHIs, "Number of phis added");
  39. namespace llvm {
  40. // Heuristic for tail duplication.
  41. static cl::opt<unsigned> TailDuplicateSize(
  42. "tail-dup-size",
  43. cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
  44. cl::Hidden);
  45. cl::opt<unsigned> TailDupIndirectBranchSize(
  46. "tail-dup-indirect-size",
  47. cl::desc("Maximum instructions to consider tail duplicating blocks that "
  48. "end with indirect branches."), cl::init(20),
  49. cl::Hidden);
  50. static cl::opt<bool>
  51. TailDupVerify("tail-dup-verify",
  52. cl::desc("Verify sanity of PHI instructions during taildup"),
  53. cl::init(false), cl::Hidden);
  54. static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
  55. cl::Hidden);
  56. void TailDuplicator::initMF(MachineFunction &MFin,
  57. const MachineBranchProbabilityInfo *MBPIin,
  58. unsigned TailDupSizeIn) {
  59. MF = &MFin;
  60. TII = MF->getSubtarget().getInstrInfo();
  61. TRI = MF->getSubtarget().getRegisterInfo();
  62. MRI = &MF->getRegInfo();
  63. MMI = &MF->getMMI();
  64. MBPI = MBPIin;
  65. TailDupSize = TailDupSizeIn;
  66. assert(MBPI != nullptr && "Machine Branch Probability Info required");
  67. PreRegAlloc = MRI->isSSA();
  68. }
  69. static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
  70. for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
  71. MachineBasicBlock *MBB = &*I;
  72. SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
  73. MBB->pred_end());
  74. MachineBasicBlock::iterator MI = MBB->begin();
  75. while (MI != MBB->end()) {
  76. if (!MI->isPHI())
  77. break;
  78. for (MachineBasicBlock *PredBB : Preds) {
  79. bool Found = false;
  80. for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
  81. MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
  82. if (PHIBB == PredBB) {
  83. Found = true;
  84. break;
  85. }
  86. }
  87. if (!Found) {
  88. dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
  89. dbgs() << " missing input from predecessor BB#"
  90. << PredBB->getNumber() << '\n';
  91. llvm_unreachable(nullptr);
  92. }
  93. }
  94. for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
  95. MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
  96. if (CheckExtra && !Preds.count(PHIBB)) {
  97. dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
  98. << *MI;
  99. dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber()
  100. << '\n';
  101. llvm_unreachable(nullptr);
  102. }
  103. if (PHIBB->getNumber() < 0) {
  104. dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
  105. dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
  106. llvm_unreachable(nullptr);
  107. }
  108. }
  109. ++MI;
  110. }
  111. }
  112. }
  113. /// Tail duplicate the block and cleanup.
  114. /// \p IsSimple - return value of isSimpleBB
  115. /// \p MBB - block to be duplicated
  116. /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
  117. /// all Preds that received a copy of \p MBB.
  118. bool TailDuplicator::tailDuplicateAndUpdate(
  119. bool IsSimple, MachineBasicBlock *MBB,
  120. SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds) {
  121. // Save the successors list.
  122. SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
  123. MBB->succ_end());
  124. SmallVector<MachineBasicBlock *, 8> TDBBs;
  125. SmallVector<MachineInstr *, 16> Copies;
  126. if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies))
  127. return false;
  128. ++NumTails;
  129. SmallVector<MachineInstr *, 8> NewPHIs;
  130. MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
  131. // TailBB's immediate successors are now successors of those predecessors
  132. // which duplicated TailBB. Add the predecessors as sources to the PHI
  133. // instructions.
  134. bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
  135. if (PreRegAlloc)
  136. updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
  137. // If it is dead, remove it.
  138. if (isDead) {
  139. NumTailDupRemoved += MBB->size();
  140. removeDeadBlock(MBB);
  141. ++NumDeadBlocks;
  142. }
  143. // Update SSA form.
  144. if (!SSAUpdateVRs.empty()) {
  145. for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
  146. unsigned VReg = SSAUpdateVRs[i];
  147. SSAUpdate.Initialize(VReg);
  148. // If the original definition is still around, add it as an available
  149. // value.
  150. MachineInstr *DefMI = MRI->getVRegDef(VReg);
  151. MachineBasicBlock *DefBB = nullptr;
  152. if (DefMI) {
  153. DefBB = DefMI->getParent();
  154. SSAUpdate.AddAvailableValue(DefBB, VReg);
  155. }
  156. // Add the new vregs as available values.
  157. DenseMap<unsigned, AvailableValsTy>::iterator LI =
  158. SSAUpdateVals.find(VReg);
  159. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
  160. MachineBasicBlock *SrcBB = LI->second[j].first;
  161. unsigned SrcReg = LI->second[j].second;
  162. SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
  163. }
  164. // Rewrite uses that are outside of the original def's block.
  165. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
  166. while (UI != MRI->use_end()) {
  167. MachineOperand &UseMO = *UI;
  168. MachineInstr *UseMI = UseMO.getParent();
  169. ++UI;
  170. if (UseMI->isDebugValue()) {
  171. // SSAUpdate can replace the use with an undef. That creates
  172. // a debug instruction that is a kill.
  173. // FIXME: Should it SSAUpdate job to delete debug instructions
  174. // instead of replacing the use with undef?
  175. UseMI->eraseFromParent();
  176. continue;
  177. }
  178. if (UseMI->getParent() == DefBB && !UseMI->isPHI())
  179. continue;
  180. SSAUpdate.RewriteUse(UseMO);
  181. }
  182. }
  183. SSAUpdateVRs.clear();
  184. SSAUpdateVals.clear();
  185. }
  186. // Eliminate some of the copies inserted by tail duplication to maintain
  187. // SSA form.
  188. for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
  189. MachineInstr *Copy = Copies[i];
  190. if (!Copy->isCopy())
  191. continue;
  192. unsigned Dst = Copy->getOperand(0).getReg();
  193. unsigned Src = Copy->getOperand(1).getReg();
  194. if (MRI->hasOneNonDBGUse(Src) &&
  195. MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
  196. // Copy is the only use. Do trivial copy propagation here.
  197. MRI->replaceRegWith(Dst, Src);
  198. Copy->eraseFromParent();
  199. }
  200. }
  201. if (NewPHIs.size())
  202. NumAddedPHIs += NewPHIs.size();
  203. if (DuplicatedPreds)
  204. *DuplicatedPreds = std::move(TDBBs);
  205. return true;
  206. }
  207. /// Look for small blocks that are unconditionally branched to and do not fall
  208. /// through. Tail-duplicate their instructions into their predecessors to
  209. /// eliminate (dynamic) branches.
  210. bool TailDuplicator::tailDuplicateBlocks() {
  211. bool MadeChange = false;
  212. if (PreRegAlloc && TailDupVerify) {
  213. DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
  214. VerifyPHIs(*MF, true);
  215. }
  216. for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
  217. MachineBasicBlock *MBB = &*I++;
  218. if (NumTails == TailDupLimit)
  219. break;
  220. bool IsSimple = isSimpleBB(MBB);
  221. if (!shouldTailDuplicate(IsSimple, *MBB))
  222. continue;
  223. MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB);
  224. }
  225. if (PreRegAlloc && TailDupVerify)
  226. VerifyPHIs(*MF, false);
  227. return MadeChange;
  228. }
  229. static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
  230. const MachineRegisterInfo *MRI) {
  231. for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
  232. if (UseMI.isDebugValue())
  233. continue;
  234. if (UseMI.getParent() != BB)
  235. return true;
  236. }
  237. return false;
  238. }
  239. static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
  240. for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
  241. if (MI->getOperand(i + 1).getMBB() == SrcBB)
  242. return i;
  243. return 0;
  244. }
  245. // Remember which registers are used by phis in this block. This is
  246. // used to determine which registers are liveout while modifying the
  247. // block (which is why we need to copy the information).
  248. static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
  249. DenseSet<unsigned> *UsedByPhi) {
  250. for (const auto &MI : BB) {
  251. if (!MI.isPHI())
  252. break;
  253. for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
  254. unsigned SrcReg = MI.getOperand(i).getReg();
  255. UsedByPhi->insert(SrcReg);
  256. }
  257. }
  258. }
  259. /// Add a definition and source virtual registers pair for SSA update.
  260. void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
  261. MachineBasicBlock *BB) {
  262. DenseMap<unsigned, AvailableValsTy>::iterator LI =
  263. SSAUpdateVals.find(OrigReg);
  264. if (LI != SSAUpdateVals.end())
  265. LI->second.push_back(std::make_pair(BB, NewReg));
  266. else {
  267. AvailableValsTy Vals;
  268. Vals.push_back(std::make_pair(BB, NewReg));
  269. SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
  270. SSAUpdateVRs.push_back(OrigReg);
  271. }
  272. }
  273. /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
  274. /// source register that's contributed by PredBB and update SSA update map.
  275. void TailDuplicator::processPHI(
  276. MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
  277. DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
  278. SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
  279. const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
  280. unsigned DefReg = MI->getOperand(0).getReg();
  281. unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
  282. assert(SrcOpIdx && "Unable to find matching PHI source?");
  283. unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
  284. unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
  285. const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
  286. LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
  287. // Insert a copy from source to the end of the block. The def register is the
  288. // available value liveout of the block.
  289. unsigned NewDef = MRI->createVirtualRegister(RC);
  290. Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
  291. if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
  292. addSSAUpdateEntry(DefReg, NewDef, PredBB);
  293. if (!Remove)
  294. return;
  295. // Remove PredBB from the PHI node.
  296. MI->RemoveOperand(SrcOpIdx + 1);
  297. MI->RemoveOperand(SrcOpIdx);
  298. if (MI->getNumOperands() == 1)
  299. MI->eraseFromParent();
  300. }
  301. /// Duplicate a TailBB instruction to PredBB and update
  302. /// the source operands due to earlier PHI translation.
  303. void TailDuplicator::duplicateInstruction(
  304. MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
  305. DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
  306. const DenseSet<unsigned> &UsedByPhi) {
  307. MachineInstr *NewMI = TII->duplicate(*MI, *MF);
  308. if (PreRegAlloc) {
  309. for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
  310. MachineOperand &MO = NewMI->getOperand(i);
  311. if (!MO.isReg())
  312. continue;
  313. unsigned Reg = MO.getReg();
  314. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  315. continue;
  316. if (MO.isDef()) {
  317. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  318. unsigned NewReg = MRI->createVirtualRegister(RC);
  319. MO.setReg(NewReg);
  320. LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
  321. if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
  322. addSSAUpdateEntry(Reg, NewReg, PredBB);
  323. } else {
  324. auto VI = LocalVRMap.find(Reg);
  325. if (VI != LocalVRMap.end()) {
  326. // Need to make sure that the register class of the mapped register
  327. // will satisfy the constraints of the class of the register being
  328. // replaced.
  329. auto *OrigRC = MRI->getRegClass(Reg);
  330. auto *MappedRC = MRI->getRegClass(VI->second.Reg);
  331. const TargetRegisterClass *ConstrRC;
  332. if (VI->second.SubReg != 0) {
  333. ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
  334. VI->second.SubReg);
  335. if (ConstrRC) {
  336. // The actual constraining (as in "find appropriate new class")
  337. // is done by getMatchingSuperRegClass, so now we only need to
  338. // change the class of the mapped register.
  339. MRI->setRegClass(VI->second.Reg, ConstrRC);
  340. }
  341. } else {
  342. // For mapped registers that do not have sub-registers, simply
  343. // restrict their class to match the original one.
  344. ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
  345. }
  346. if (ConstrRC) {
  347. // If the class constraining succeeded, we can simply replace
  348. // the old register with the mapped one.
  349. MO.setReg(VI->second.Reg);
  350. // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
  351. // sub-register, we need to compose the sub-register indices.
  352. MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
  353. VI->second.SubReg));
  354. } else {
  355. // The direct replacement is not possible, due to failing register
  356. // class constraints. An explicit COPY is necessary. Create one
  357. // that can be reused
  358. auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
  359. if (NewRC == nullptr)
  360. NewRC = OrigRC;
  361. unsigned NewReg = MRI->createVirtualRegister(NewRC);
  362. BuildMI(*PredBB, MI, MI->getDebugLoc(),
  363. TII->get(TargetOpcode::COPY), NewReg)
  364. .addReg(VI->second.Reg, 0, VI->second.SubReg);
  365. LocalVRMap.erase(VI);
  366. LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
  367. MO.setReg(NewReg);
  368. // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
  369. // is equivalent to the whole register Reg. Hence, Reg:subreg
  370. // is same as NewReg:subreg, so keep the sub-register index
  371. // unchanged.
  372. }
  373. // Clear any kill flags from this operand. The new register could
  374. // have uses after this one, so kills are not valid here.
  375. MO.setIsKill(false);
  376. }
  377. }
  378. }
  379. }
  380. PredBB->insert(PredBB->instr_end(), NewMI);
  381. }
  382. /// After FromBB is tail duplicated into its predecessor blocks, the successors
  383. /// have gained new predecessors. Update the PHI instructions in them
  384. /// accordingly.
  385. void TailDuplicator::updateSuccessorsPHIs(
  386. MachineBasicBlock *FromBB, bool isDead,
  387. SmallVectorImpl<MachineBasicBlock *> &TDBBs,
  388. SmallSetVector<MachineBasicBlock *, 8> &Succs) {
  389. for (MachineBasicBlock *SuccBB : Succs) {
  390. for (MachineInstr &MI : *SuccBB) {
  391. if (!MI.isPHI())
  392. break;
  393. MachineInstrBuilder MIB(*FromBB->getParent(), MI);
  394. unsigned Idx = 0;
  395. for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
  396. MachineOperand &MO = MI.getOperand(i + 1);
  397. if (MO.getMBB() == FromBB) {
  398. Idx = i;
  399. break;
  400. }
  401. }
  402. assert(Idx != 0);
  403. MachineOperand &MO0 = MI.getOperand(Idx);
  404. unsigned Reg = MO0.getReg();
  405. if (isDead) {
  406. // Folded into the previous BB.
  407. // There could be duplicate phi source entries. FIXME: Should sdisel
  408. // or earlier pass fixed this?
  409. for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
  410. MachineOperand &MO = MI.getOperand(i + 1);
  411. if (MO.getMBB() == FromBB) {
  412. MI.RemoveOperand(i + 1);
  413. MI.RemoveOperand(i);
  414. }
  415. }
  416. } else
  417. Idx = 0;
  418. // If Idx is set, the operands at Idx and Idx+1 must be removed.
  419. // We reuse the location to avoid expensive RemoveOperand calls.
  420. DenseMap<unsigned, AvailableValsTy>::iterator LI =
  421. SSAUpdateVals.find(Reg);
  422. if (LI != SSAUpdateVals.end()) {
  423. // This register is defined in the tail block.
  424. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
  425. MachineBasicBlock *SrcBB = LI->second[j].first;
  426. // If we didn't duplicate a bb into a particular predecessor, we
  427. // might still have added an entry to SSAUpdateVals to correcly
  428. // recompute SSA. If that case, avoid adding a dummy extra argument
  429. // this PHI.
  430. if (!SrcBB->isSuccessor(SuccBB))
  431. continue;
  432. unsigned SrcReg = LI->second[j].second;
  433. if (Idx != 0) {
  434. MI.getOperand(Idx).setReg(SrcReg);
  435. MI.getOperand(Idx + 1).setMBB(SrcBB);
  436. Idx = 0;
  437. } else {
  438. MIB.addReg(SrcReg).addMBB(SrcBB);
  439. }
  440. }
  441. } else {
  442. // Live in tail block, must also be live in predecessors.
  443. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
  444. MachineBasicBlock *SrcBB = TDBBs[j];
  445. if (Idx != 0) {
  446. MI.getOperand(Idx).setReg(Reg);
  447. MI.getOperand(Idx + 1).setMBB(SrcBB);
  448. Idx = 0;
  449. } else {
  450. MIB.addReg(Reg).addMBB(SrcBB);
  451. }
  452. }
  453. }
  454. if (Idx != 0) {
  455. MI.RemoveOperand(Idx + 1);
  456. MI.RemoveOperand(Idx);
  457. }
  458. }
  459. }
  460. }
  461. /// Determine if it is profitable to duplicate this block.
  462. bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
  463. MachineBasicBlock &TailBB) {
  464. // Only duplicate blocks that end with unconditional branches.
  465. if (TailBB.canFallThrough())
  466. return false;
  467. // Don't try to tail-duplicate single-block loops.
  468. if (TailBB.isSuccessor(&TailBB))
  469. return false;
  470. // Set the limit on the cost to duplicate. When optimizing for size,
  471. // duplicate only one, because one branch instruction can be eliminated to
  472. // compensate for the duplication.
  473. unsigned MaxDuplicateCount;
  474. if (TailDupSize == 0 &&
  475. TailDuplicateSize.getNumOccurrences() == 0 &&
  476. MF->getFunction()->optForSize())
  477. MaxDuplicateCount = 1;
  478. else if (TailDupSize == 0)
  479. MaxDuplicateCount = TailDuplicateSize;
  480. else
  481. MaxDuplicateCount = TailDupSize;
  482. // If the block to be duplicated ends in an unanalyzable fallthrough, don't
  483. // duplicate it.
  484. // A similar check is necessary in MachineBlockPlacement to make sure pairs of
  485. // blocks with unanalyzable fallthrough get layed out contiguously.
  486. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
  487. SmallVector<MachineOperand, 4> PredCond;
  488. if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true)
  489. && TailBB.canFallThrough())
  490. return false;
  491. // If the target has hardware branch prediction that can handle indirect
  492. // branches, duplicating them can often make them predictable when there
  493. // are common paths through the code. The limit needs to be high enough
  494. // to allow undoing the effects of tail merging and other optimizations
  495. // that rearrange the predecessors of the indirect branch.
  496. bool HasIndirectbr = false;
  497. if (!TailBB.empty())
  498. HasIndirectbr = TailBB.back().isIndirectBranch();
  499. if (HasIndirectbr && PreRegAlloc)
  500. MaxDuplicateCount = TailDupIndirectBranchSize;
  501. // Check the instructions in the block to determine whether tail-duplication
  502. // is invalid or unlikely to be profitable.
  503. unsigned InstrCount = 0;
  504. for (MachineInstr &MI : TailBB) {
  505. // Non-duplicable things shouldn't be tail-duplicated.
  506. if (MI.isNotDuplicable())
  507. return false;
  508. // Convergent instructions can be duplicated only if doing so doesn't add
  509. // new control dependencies, which is what we're going to do here.
  510. if (MI.isConvergent())
  511. return false;
  512. // Do not duplicate 'return' instructions if this is a pre-regalloc run.
  513. // A return may expand into a lot more instructions (e.g. reload of callee
  514. // saved registers) after PEI.
  515. if (PreRegAlloc && MI.isReturn())
  516. return false;
  517. // Avoid duplicating calls before register allocation. Calls presents a
  518. // barrier to register allocation so duplicating them may end up increasing
  519. // spills.
  520. if (PreRegAlloc && MI.isCall())
  521. return false;
  522. if (!MI.isPHI() && !MI.isDebugValue())
  523. InstrCount += 1;
  524. if (InstrCount > MaxDuplicateCount)
  525. return false;
  526. }
  527. // Check if any of the successors of TailBB has a PHI node in which the
  528. // value corresponding to TailBB uses a subregister.
  529. // If a phi node uses a register paired with a subregister, the actual
  530. // "value type" of the phi may differ from the type of the register without
  531. // any subregisters. Due to a bug, tail duplication may add a new operand
  532. // without a necessary subregister, producing an invalid code. This is
  533. // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
  534. // Disable tail duplication for this case for now, until the problem is
  535. // fixed.
  536. for (auto SB : TailBB.successors()) {
  537. for (auto &I : *SB) {
  538. if (!I.isPHI())
  539. break;
  540. unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
  541. assert(Idx != 0);
  542. MachineOperand &PU = I.getOperand(Idx);
  543. if (PU.getSubReg() != 0)
  544. return false;
  545. }
  546. }
  547. if (HasIndirectbr && PreRegAlloc)
  548. return true;
  549. if (IsSimple)
  550. return true;
  551. if (!PreRegAlloc)
  552. return true;
  553. return canCompletelyDuplicateBB(TailBB);
  554. }
  555. /// True if this BB has only one unconditional jump.
  556. bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
  557. if (TailBB->succ_size() != 1)
  558. return false;
  559. if (TailBB->pred_empty())
  560. return false;
  561. MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
  562. if (I == TailBB->end())
  563. return true;
  564. return I->isUnconditionalBranch();
  565. }
  566. static bool bothUsedInPHI(const MachineBasicBlock &A,
  567. const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
  568. for (MachineBasicBlock *BB : A.successors())
  569. if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
  570. return true;
  571. return false;
  572. }
  573. bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
  574. for (MachineBasicBlock *PredBB : BB.predecessors()) {
  575. if (PredBB->succ_size() > 1)
  576. return false;
  577. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
  578. SmallVector<MachineOperand, 4> PredCond;
  579. if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
  580. return false;
  581. if (!PredCond.empty())
  582. return false;
  583. }
  584. return true;
  585. }
  586. bool TailDuplicator::duplicateSimpleBB(
  587. MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
  588. const DenseSet<unsigned> &UsedByPhi,
  589. SmallVectorImpl<MachineInstr *> &Copies) {
  590. SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
  591. TailBB->succ_end());
  592. SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
  593. TailBB->pred_end());
  594. bool Changed = false;
  595. for (MachineBasicBlock *PredBB : Preds) {
  596. if (PredBB->hasEHPadSuccessor())
  597. continue;
  598. if (bothUsedInPHI(*PredBB, Succs))
  599. continue;
  600. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
  601. SmallVector<MachineOperand, 4> PredCond;
  602. if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
  603. continue;
  604. Changed = true;
  605. DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
  606. << "From simple Succ: " << *TailBB);
  607. MachineBasicBlock *NewTarget = *TailBB->succ_begin();
  608. MachineBasicBlock *NextBB = PredBB->getNextNode();
  609. // Make PredFBB explicit.
  610. if (PredCond.empty())
  611. PredFBB = PredTBB;
  612. // Make fall through explicit.
  613. if (!PredTBB)
  614. PredTBB = NextBB;
  615. if (!PredFBB)
  616. PredFBB = NextBB;
  617. // Redirect
  618. if (PredFBB == TailBB)
  619. PredFBB = NewTarget;
  620. if (PredTBB == TailBB)
  621. PredTBB = NewTarget;
  622. // Make the branch unconditional if possible
  623. if (PredTBB == PredFBB) {
  624. PredCond.clear();
  625. PredFBB = nullptr;
  626. }
  627. // Avoid adding fall through branches.
  628. if (PredFBB == NextBB)
  629. PredFBB = nullptr;
  630. if (PredTBB == NextBB && PredFBB == nullptr)
  631. PredTBB = nullptr;
  632. TII->RemoveBranch(*PredBB);
  633. if (!PredBB->isSuccessor(NewTarget))
  634. PredBB->replaceSuccessor(TailBB, NewTarget);
  635. else {
  636. PredBB->removeSuccessor(TailBB, true);
  637. assert(PredBB->succ_size() <= 1);
  638. }
  639. if (PredTBB)
  640. TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
  641. TDBBs.push_back(PredBB);
  642. }
  643. return Changed;
  644. }
  645. bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
  646. MachineBasicBlock *PredBB) {
  647. // EH edges are ignored by AnalyzeBranch.
  648. if (PredBB->succ_size() > 1)
  649. return false;
  650. MachineBasicBlock *PredTBB, *PredFBB;
  651. SmallVector<MachineOperand, 4> PredCond;
  652. if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
  653. return false;
  654. if (!PredCond.empty())
  655. return false;
  656. return true;
  657. }
  658. /// If it is profitable, duplicate TailBB's contents in each
  659. /// of its predecessors.
  660. bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
  661. SmallVectorImpl<MachineBasicBlock *> &TDBBs,
  662. SmallVectorImpl<MachineInstr *> &Copies) {
  663. DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
  664. DenseSet<unsigned> UsedByPhi;
  665. getRegsUsedByPHIs(*TailBB, &UsedByPhi);
  666. if (IsSimple)
  667. return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
  668. // Iterate through all the unique predecessors and tail-duplicate this
  669. // block into them, if possible. Copying the list ahead of time also
  670. // avoids trouble with the predecessor list reallocating.
  671. bool Changed = false;
  672. SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
  673. TailBB->pred_end());
  674. for (MachineBasicBlock *PredBB : Preds) {
  675. assert(TailBB != PredBB &&
  676. "Single-block loop should have been rejected earlier!");
  677. if (!canTailDuplicate(TailBB, PredBB))
  678. continue;
  679. // Don't duplicate into a fall-through predecessor (at least for now).
  680. if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
  681. continue;
  682. DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
  683. << "From Succ: " << *TailBB);
  684. TDBBs.push_back(PredBB);
  685. // Remove PredBB's unconditional branch.
  686. TII->RemoveBranch(*PredBB);
  687. // Clone the contents of TailBB into PredBB.
  688. DenseMap<unsigned, RegSubRegPair> LocalVRMap;
  689. SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
  690. // Use instr_iterator here to properly handle bundles, e.g.
  691. // ARM Thumb2 IT block.
  692. MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
  693. while (I != TailBB->instr_end()) {
  694. MachineInstr *MI = &*I;
  695. ++I;
  696. if (MI->isPHI()) {
  697. // Replace the uses of the def of the PHI with the register coming
  698. // from PredBB.
  699. processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
  700. } else {
  701. // Replace def of virtual registers with new registers, and update
  702. // uses with PHI source register or the new registers.
  703. duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
  704. }
  705. }
  706. appendCopies(PredBB, CopyInfos, Copies);
  707. // Simplify
  708. MachineBasicBlock *PredTBB, *PredFBB;
  709. SmallVector<MachineOperand, 4> PredCond;
  710. TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
  711. NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
  712. // Update the CFG.
  713. PredBB->removeSuccessor(PredBB->succ_begin());
  714. assert(PredBB->succ_empty() &&
  715. "TailDuplicate called on block with multiple successors!");
  716. for (MachineBasicBlock *Succ : TailBB->successors())
  717. PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
  718. Changed = true;
  719. ++NumTailDups;
  720. }
  721. // If TailBB was duplicated into all its predecessors except for the prior
  722. // block, which falls through unconditionally, move the contents of this
  723. // block into the prior block.
  724. MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
  725. MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
  726. SmallVector<MachineOperand, 4> PriorCond;
  727. // This has to check PrevBB->succ_size() because EH edges are ignored by
  728. // AnalyzeBranch.
  729. if (PrevBB->succ_size() == 1 &&
  730. // Layout preds are not always CFG preds. Check.
  731. *PrevBB->succ_begin() == TailBB &&
  732. !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
  733. PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
  734. !TailBB->hasAddressTaken()) {
  735. DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
  736. << "From MBB: " << *TailBB);
  737. if (PreRegAlloc) {
  738. DenseMap<unsigned, RegSubRegPair> LocalVRMap;
  739. SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
  740. MachineBasicBlock::iterator I = TailBB->begin();
  741. // Process PHI instructions first.
  742. while (I != TailBB->end() && I->isPHI()) {
  743. // Replace the uses of the def of the PHI with the register coming
  744. // from PredBB.
  745. MachineInstr *MI = &*I++;
  746. processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
  747. }
  748. // Now copy the non-PHI instructions.
  749. while (I != TailBB->end()) {
  750. // Replace def of virtual registers with new registers, and update
  751. // uses with PHI source register or the new registers.
  752. MachineInstr *MI = &*I++;
  753. assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
  754. duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
  755. MI->eraseFromParent();
  756. }
  757. appendCopies(PrevBB, CopyInfos, Copies);
  758. } else {
  759. // No PHIs to worry about, just splice the instructions over.
  760. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
  761. }
  762. PrevBB->removeSuccessor(PrevBB->succ_begin());
  763. assert(PrevBB->succ_empty());
  764. PrevBB->transferSuccessors(TailBB);
  765. TDBBs.push_back(PrevBB);
  766. Changed = true;
  767. }
  768. // If this is after register allocation, there are no phis to fix.
  769. if (!PreRegAlloc)
  770. return Changed;
  771. // If we made no changes so far, we are safe.
  772. if (!Changed)
  773. return Changed;
  774. // Handle the nasty case in that we duplicated a block that is part of a loop
  775. // into some but not all of its predecessors. For example:
  776. // 1 -> 2 <-> 3 |
  777. // \ |
  778. // \---> rest |
  779. // if we duplicate 2 into 1 but not into 3, we end up with
  780. // 12 -> 3 <-> 2 -> rest |
  781. // \ / |
  782. // \----->-----/ |
  783. // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
  784. // with a phi in 3 (which now dominates 2).
  785. // What we do here is introduce a copy in 3 of the register defined by the
  786. // phi, just like when we are duplicating 2 into 3, but we don't copy any
  787. // real instructions or remove the 3 -> 2 edge from the phi in 2.
  788. for (MachineBasicBlock *PredBB : Preds) {
  789. if (is_contained(TDBBs, PredBB))
  790. continue;
  791. // EH edges
  792. if (PredBB->succ_size() != 1)
  793. continue;
  794. DenseMap<unsigned, RegSubRegPair> LocalVRMap;
  795. SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
  796. MachineBasicBlock::iterator I = TailBB->begin();
  797. // Process PHI instructions first.
  798. while (I != TailBB->end() && I->isPHI()) {
  799. // Replace the uses of the def of the PHI with the register coming
  800. // from PredBB.
  801. MachineInstr *MI = &*I++;
  802. processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
  803. }
  804. appendCopies(PredBB, CopyInfos, Copies);
  805. }
  806. return Changed;
  807. }
  808. /// At the end of the block \p MBB generate COPY instructions between registers
  809. /// described by \p CopyInfos. Append resulting instructions to \p Copies.
  810. void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
  811. SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
  812. SmallVectorImpl<MachineInstr*> &Copies) {
  813. MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
  814. const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
  815. for (auto &CI : CopyInfos) {
  816. auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
  817. .addReg(CI.second.Reg, 0, CI.second.SubReg);
  818. Copies.push_back(C);
  819. }
  820. }
  821. /// Remove the specified dead machine basic block from the function, updating
  822. /// the CFG.
  823. void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
  824. assert(MBB->pred_empty() && "MBB must be dead!");
  825. DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
  826. // Remove all successors.
  827. while (!MBB->succ_empty())
  828. MBB->removeSuccessor(MBB->succ_end() - 1);
  829. // Remove the block.
  830. MBB->eraseFromParent();
  831. }
  832. } // End llvm namespace