VPlan.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This is the LLVM vectorization plan. It represents a candidate for
  11. /// vectorization, allowing to plan and optimize how to vectorize a given loop
  12. /// before generating LLVM-IR.
  13. /// The vectorizer uses vectorization plans to estimate the costs of potential
  14. /// candidates and if profitable to execute the desired plan, generating vector
  15. /// LLVM-IR code.
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #include "VPlan.h"
  19. #include "VPlanDominatorTree.h"
  20. #include "llvm/ADT/DepthFirstIterator.h"
  21. #include "llvm/ADT/PostOrderIterator.h"
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/ADT/Twine.h"
  24. #include "llvm/Analysis/LoopInfo.h"
  25. #include "llvm/IR/BasicBlock.h"
  26. #include "llvm/IR/CFG.h"
  27. #include "llvm/IR/InstrTypes.h"
  28. #include "llvm/IR/Instruction.h"
  29. #include "llvm/IR/Instructions.h"
  30. #include "llvm/IR/Type.h"
  31. #include "llvm/IR/Value.h"
  32. #include "llvm/Support/Casting.h"
  33. #include "llvm/Support/Debug.h"
  34. #include "llvm/Support/ErrorHandling.h"
  35. #include "llvm/Support/GenericDomTreeConstruction.h"
  36. #include "llvm/Support/GraphWriter.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  39. #include <cassert>
  40. #include <iterator>
  41. #include <string>
  42. #include <vector>
  43. using namespace llvm;
  44. extern cl::opt<bool> EnableVPlanNativePath;
  45. #define DEBUG_TYPE "vplan"
  46. raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
  47. if (const VPInstruction *Instr = dyn_cast<VPInstruction>(&V))
  48. Instr->print(OS);
  49. else
  50. V.printAsOperand(OS);
  51. return OS;
  52. }
  53. /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
  54. const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
  55. const VPBlockBase *Block = this;
  56. while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  57. Block = Region->getEntry();
  58. return cast<VPBasicBlock>(Block);
  59. }
  60. VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
  61. VPBlockBase *Block = this;
  62. while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  63. Block = Region->getEntry();
  64. return cast<VPBasicBlock>(Block);
  65. }
  66. /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
  67. const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
  68. const VPBlockBase *Block = this;
  69. while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  70. Block = Region->getExit();
  71. return cast<VPBasicBlock>(Block);
  72. }
  73. VPBasicBlock *VPBlockBase::getExitBasicBlock() {
  74. VPBlockBase *Block = this;
  75. while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  76. Block = Region->getExit();
  77. return cast<VPBasicBlock>(Block);
  78. }
  79. VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
  80. if (!Successors.empty() || !Parent)
  81. return this;
  82. assert(Parent->getExit() == this &&
  83. "Block w/o successors not the exit of its parent.");
  84. return Parent->getEnclosingBlockWithSuccessors();
  85. }
  86. VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
  87. if (!Predecessors.empty() || !Parent)
  88. return this;
  89. assert(Parent->getEntry() == this &&
  90. "Block w/o predecessors not the entry of its parent.");
  91. return Parent->getEnclosingBlockWithPredecessors();
  92. }
  93. void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
  94. SmallVector<VPBlockBase *, 8> Blocks;
  95. for (VPBlockBase *Block : depth_first(Entry))
  96. Blocks.push_back(Block);
  97. for (VPBlockBase *Block : Blocks)
  98. delete Block;
  99. }
  100. BasicBlock *
  101. VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
  102. // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
  103. // Pred stands for Predessor. Prev stands for Previous - last visited/created.
  104. BasicBlock *PrevBB = CFG.PrevBB;
  105. BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
  106. PrevBB->getParent(), CFG.LastBB);
  107. LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
  108. // Hook up the new basic block to its predecessors.
  109. for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
  110. VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
  111. auto &PredVPSuccessors = PredVPBB->getSuccessors();
  112. BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
  113. // In outer loop vectorization scenario, the predecessor BBlock may not yet
  114. // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
  115. // vectorization. We do not encounter this case in inner loop vectorization
  116. // as we start out by building a loop skeleton with the vector loop header
  117. // and latch blocks. As a result, we never enter this function for the
  118. // header block in the non VPlan-native path.
  119. if (!PredBB) {
  120. assert(EnableVPlanNativePath &&
  121. "Unexpected null predecessor in non VPlan-native path");
  122. CFG.VPBBsToFix.push_back(PredVPBB);
  123. continue;
  124. }
  125. assert(PredBB && "Predecessor basic-block not found building successor.");
  126. auto *PredBBTerminator = PredBB->getTerminator();
  127. LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
  128. if (isa<UnreachableInst>(PredBBTerminator)) {
  129. assert(PredVPSuccessors.size() == 1 &&
  130. "Predecessor ending w/o branch must have single successor.");
  131. PredBBTerminator->eraseFromParent();
  132. BranchInst::Create(NewBB, PredBB);
  133. } else {
  134. assert(PredVPSuccessors.size() == 2 &&
  135. "Predecessor ending with branch must have two successors.");
  136. unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
  137. assert(!PredBBTerminator->getSuccessor(idx) &&
  138. "Trying to reset an existing successor block.");
  139. PredBBTerminator->setSuccessor(idx, NewBB);
  140. }
  141. }
  142. return NewBB;
  143. }
  144. void VPBasicBlock::execute(VPTransformState *State) {
  145. bool Replica = State->Instance &&
  146. !(State->Instance->Part == 0 && State->Instance->Lane == 0);
  147. VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
  148. VPBlockBase *SingleHPred = nullptr;
  149. BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
  150. // 1. Create an IR basic block, or reuse the last one if possible.
  151. // The last IR basic block is reused, as an optimization, in three cases:
  152. // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
  153. // B. when the current VPBB has a single (hierarchical) predecessor which
  154. // is PrevVPBB and the latter has a single (hierarchical) successor; and
  155. // C. when the current VPBB is an entry of a region replica - where PrevVPBB
  156. // is the exit of this region from a previous instance, or the predecessor
  157. // of this region.
  158. if (PrevVPBB && /* A */
  159. !((SingleHPred = getSingleHierarchicalPredecessor()) &&
  160. SingleHPred->getExitBasicBlock() == PrevVPBB &&
  161. PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
  162. !(Replica && getPredecessors().empty())) { /* C */
  163. NewBB = createEmptyBasicBlock(State->CFG);
  164. State->Builder.SetInsertPoint(NewBB);
  165. // Temporarily terminate with unreachable until CFG is rewired.
  166. UnreachableInst *Terminator = State->Builder.CreateUnreachable();
  167. State->Builder.SetInsertPoint(Terminator);
  168. // Register NewBB in its loop. In innermost loops its the same for all BB's.
  169. Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
  170. L->addBasicBlockToLoop(NewBB, *State->LI);
  171. State->CFG.PrevBB = NewBB;
  172. }
  173. // 2. Fill the IR basic block with IR instructions.
  174. LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
  175. << " in BB:" << NewBB->getName() << '\n');
  176. State->CFG.VPBB2IRBB[this] = NewBB;
  177. State->CFG.PrevVPBB = this;
  178. for (VPRecipeBase &Recipe : Recipes)
  179. Recipe.execute(*State);
  180. VPValue *CBV;
  181. if (EnableVPlanNativePath && (CBV = getCondBit())) {
  182. Value *IRCBV = CBV->getUnderlyingValue();
  183. assert(IRCBV && "Unexpected null underlying value for condition bit");
  184. // Condition bit value in a VPBasicBlock is used as the branch selector. In
  185. // the VPlan-native path case, since all branches are uniform we generate a
  186. // branch instruction using the condition value from vector lane 0 and dummy
  187. // successors. The successors are fixed later when the successor blocks are
  188. // visited.
  189. Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
  190. NewCond = State->Builder.CreateExtractElement(NewCond,
  191. State->Builder.getInt32(0));
  192. // Replace the temporary unreachable terminator with the new conditional
  193. // branch.
  194. auto *CurrentTerminator = NewBB->getTerminator();
  195. assert(isa<UnreachableInst>(CurrentTerminator) &&
  196. "Expected to replace unreachable terminator with conditional "
  197. "branch.");
  198. auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
  199. CondBr->setSuccessor(0, nullptr);
  200. ReplaceInstWithInst(CurrentTerminator, CondBr);
  201. }
  202. LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
  203. }
  204. void VPRegionBlock::execute(VPTransformState *State) {
  205. ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
  206. if (!isReplicator()) {
  207. // Visit the VPBlocks connected to "this", starting from it.
  208. for (VPBlockBase *Block : RPOT) {
  209. if (EnableVPlanNativePath) {
  210. // The inner loop vectorization path does not represent loop preheader
  211. // and exit blocks as part of the VPlan. In the VPlan-native path, skip
  212. // vectorizing loop preheader block. In future, we may replace this
  213. // check with the check for loop preheader.
  214. if (Block->getNumPredecessors() == 0)
  215. continue;
  216. // Skip vectorizing loop exit block. In future, we may replace this
  217. // check with the check for loop exit.
  218. if (Block->getNumSuccessors() == 0)
  219. continue;
  220. }
  221. LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
  222. Block->execute(State);
  223. }
  224. return;
  225. }
  226. assert(!State->Instance && "Replicating a Region with non-null instance.");
  227. // Enter replicating mode.
  228. State->Instance = {0, 0};
  229. for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
  230. State->Instance->Part = Part;
  231. for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
  232. State->Instance->Lane = Lane;
  233. // Visit the VPBlocks connected to \p this, starting from it.
  234. for (VPBlockBase *Block : RPOT) {
  235. LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
  236. Block->execute(State);
  237. }
  238. }
  239. }
  240. // Exit replicating mode.
  241. State->Instance.reset();
  242. }
  243. void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
  244. Parent = InsertPos->getParent();
  245. Parent->getRecipeList().insert(InsertPos->getIterator(), this);
  246. }
  247. iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
  248. return getParent()->getRecipeList().erase(getIterator());
  249. }
  250. void VPInstruction::generateInstruction(VPTransformState &State,
  251. unsigned Part) {
  252. IRBuilder<> &Builder = State.Builder;
  253. if (Instruction::isBinaryOp(getOpcode())) {
  254. Value *A = State.get(getOperand(0), Part);
  255. Value *B = State.get(getOperand(1), Part);
  256. Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
  257. State.set(this, V, Part);
  258. return;
  259. }
  260. switch (getOpcode()) {
  261. case VPInstruction::Not: {
  262. Value *A = State.get(getOperand(0), Part);
  263. Value *V = Builder.CreateNot(A);
  264. State.set(this, V, Part);
  265. break;
  266. }
  267. case VPInstruction::ICmpULE: {
  268. Value *IV = State.get(getOperand(0), Part);
  269. Value *TC = State.get(getOperand(1), Part);
  270. Value *V = Builder.CreateICmpULE(IV, TC);
  271. State.set(this, V, Part);
  272. break;
  273. }
  274. case Instruction::Select: {
  275. Value *Cond = State.get(getOperand(0), Part);
  276. Value *Op1 = State.get(getOperand(1), Part);
  277. Value *Op2 = State.get(getOperand(2), Part);
  278. Value *V = Builder.CreateSelect(Cond, Op1, Op2);
  279. State.set(this, V, Part);
  280. break;
  281. }
  282. default:
  283. llvm_unreachable("Unsupported opcode for instruction");
  284. }
  285. }
  286. void VPInstruction::execute(VPTransformState &State) {
  287. assert(!State.Instance && "VPInstruction executing an Instance");
  288. for (unsigned Part = 0; Part < State.UF; ++Part)
  289. generateInstruction(State, Part);
  290. }
  291. void VPInstruction::print(raw_ostream &O, const Twine &Indent) const {
  292. O << " +\n" << Indent << "\"EMIT ";
  293. print(O);
  294. O << "\\l\"";
  295. }
  296. void VPInstruction::print(raw_ostream &O) const {
  297. printAsOperand(O);
  298. O << " = ";
  299. switch (getOpcode()) {
  300. case VPInstruction::Not:
  301. O << "not";
  302. break;
  303. case VPInstruction::ICmpULE:
  304. O << "icmp ule";
  305. break;
  306. case VPInstruction::SLPLoad:
  307. O << "combined load";
  308. break;
  309. case VPInstruction::SLPStore:
  310. O << "combined store";
  311. break;
  312. default:
  313. O << Instruction::getOpcodeName(getOpcode());
  314. }
  315. for (const VPValue *Operand : operands()) {
  316. O << " ";
  317. Operand->printAsOperand(O);
  318. }
  319. }
  320. /// Generate the code inside the body of the vectorized loop. Assumes a single
  321. /// LoopVectorBody basic-block was created for this. Introduce additional
  322. /// basic-blocks as needed, and fill them all.
  323. void VPlan::execute(VPTransformState *State) {
  324. // -1. Check if the backedge taken count is needed, and if so build it.
  325. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
  326. Value *TC = State->TripCount;
  327. IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
  328. auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
  329. "trip.count.minus.1");
  330. Value2VPValue[TCMO] = BackedgeTakenCount;
  331. }
  332. // 0. Set the reverse mapping from VPValues to Values for code generation.
  333. for (auto &Entry : Value2VPValue)
  334. State->VPValue2Value[Entry.second] = Entry.first;
  335. BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
  336. BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
  337. assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
  338. // 1. Make room to generate basic-blocks inside loop body if needed.
  339. BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
  340. VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
  341. Loop *L = State->LI->getLoopFor(VectorHeaderBB);
  342. L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
  343. // Remove the edge between Header and Latch to allow other connections.
  344. // Temporarily terminate with unreachable until CFG is rewired.
  345. // Note: this asserts the generated code's assumption that
  346. // getFirstInsertionPt() can be dereferenced into an Instruction.
  347. VectorHeaderBB->getTerminator()->eraseFromParent();
  348. State->Builder.SetInsertPoint(VectorHeaderBB);
  349. UnreachableInst *Terminator = State->Builder.CreateUnreachable();
  350. State->Builder.SetInsertPoint(Terminator);
  351. // 2. Generate code in loop body.
  352. State->CFG.PrevVPBB = nullptr;
  353. State->CFG.PrevBB = VectorHeaderBB;
  354. State->CFG.LastBB = VectorLatchBB;
  355. for (VPBlockBase *Block : depth_first(Entry))
  356. Block->execute(State);
  357. // Setup branch terminator successors for VPBBs in VPBBsToFix based on
  358. // VPBB's successors.
  359. for (auto VPBB : State->CFG.VPBBsToFix) {
  360. assert(EnableVPlanNativePath &&
  361. "Unexpected VPBBsToFix in non VPlan-native path");
  362. BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
  363. assert(BB && "Unexpected null basic block for VPBB");
  364. unsigned Idx = 0;
  365. auto *BBTerminator = BB->getTerminator();
  366. for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
  367. VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
  368. BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
  369. ++Idx;
  370. }
  371. }
  372. // 3. Merge the temporary latch created with the last basic-block filled.
  373. BasicBlock *LastBB = State->CFG.PrevBB;
  374. // Connect LastBB to VectorLatchBB to facilitate their merge.
  375. assert((EnableVPlanNativePath ||
  376. isa<UnreachableInst>(LastBB->getTerminator())) &&
  377. "Expected InnerLoop VPlan CFG to terminate with unreachable");
  378. assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
  379. "Expected VPlan CFG to terminate with branch in NativePath");
  380. LastBB->getTerminator()->eraseFromParent();
  381. BranchInst::Create(VectorLatchBB, LastBB);
  382. // Merge LastBB with Latch.
  383. bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
  384. (void)Merged;
  385. assert(Merged && "Could not merge last basic block with latch.");
  386. VectorLatchBB = LastBB;
  387. // We do not attempt to preserve DT for outer loop vectorization currently.
  388. if (!EnableVPlanNativePath)
  389. updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
  390. }
  391. void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
  392. BasicBlock *LoopLatchBB) {
  393. BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
  394. assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
  395. DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
  396. // The vector body may be more than a single basic-block by this point.
  397. // Update the dominator tree information inside the vector body by propagating
  398. // it from header to latch, expecting only triangular control-flow, if any.
  399. BasicBlock *PostDomSucc = nullptr;
  400. for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
  401. // Get the list of successors of this block.
  402. std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
  403. assert(Succs.size() <= 2 &&
  404. "Basic block in vector loop has more than 2 successors.");
  405. PostDomSucc = Succs[0];
  406. if (Succs.size() == 1) {
  407. assert(PostDomSucc->getSinglePredecessor() &&
  408. "PostDom successor has more than one predecessor.");
  409. DT->addNewBlock(PostDomSucc, BB);
  410. continue;
  411. }
  412. BasicBlock *InterimSucc = Succs[1];
  413. if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
  414. PostDomSucc = Succs[1];
  415. InterimSucc = Succs[0];
  416. }
  417. assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
  418. "One successor of a basic block does not lead to the other.");
  419. assert(InterimSucc->getSinglePredecessor() &&
  420. "Interim successor has more than one predecessor.");
  421. assert(PostDomSucc->hasNPredecessors(2) &&
  422. "PostDom successor has more than two predecessors.");
  423. DT->addNewBlock(InterimSucc, BB);
  424. DT->addNewBlock(PostDomSucc, BB);
  425. }
  426. }
  427. const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
  428. return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
  429. Twine(getOrCreateBID(Block));
  430. }
  431. const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
  432. const std::string &Name = Block->getName();
  433. if (!Name.empty())
  434. return Name;
  435. return "VPB" + Twine(getOrCreateBID(Block));
  436. }
  437. void VPlanPrinter::dump() {
  438. Depth = 1;
  439. bumpIndent(0);
  440. OS << "digraph VPlan {\n";
  441. OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
  442. if (!Plan.getName().empty())
  443. OS << "\\n" << DOT::EscapeString(Plan.getName());
  444. if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
  445. OS << ", where:";
  446. if (Plan.BackedgeTakenCount)
  447. OS << "\\n"
  448. << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
  449. for (auto Entry : Plan.Value2VPValue) {
  450. OS << "\\n" << *Entry.second;
  451. OS << DOT::EscapeString(" := ");
  452. Entry.first->printAsOperand(OS, false);
  453. }
  454. }
  455. OS << "\"]\n";
  456. OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
  457. OS << "edge [fontname=Courier, fontsize=30]\n";
  458. OS << "compound=true\n";
  459. for (VPBlockBase *Block : depth_first(Plan.getEntry()))
  460. dumpBlock(Block);
  461. OS << "}\n";
  462. }
  463. void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
  464. if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
  465. dumpBasicBlock(BasicBlock);
  466. else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  467. dumpRegion(Region);
  468. else
  469. llvm_unreachable("Unsupported kind of VPBlock.");
  470. }
  471. void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
  472. bool Hidden, const Twine &Label) {
  473. // Due to "dot" we print an edge between two regions as an edge between the
  474. // exit basic block and the entry basic of the respective regions.
  475. const VPBlockBase *Tail = From->getExitBasicBlock();
  476. const VPBlockBase *Head = To->getEntryBasicBlock();
  477. OS << Indent << getUID(Tail) << " -> " << getUID(Head);
  478. OS << " [ label=\"" << Label << '\"';
  479. if (Tail != From)
  480. OS << " ltail=" << getUID(From);
  481. if (Head != To)
  482. OS << " lhead=" << getUID(To);
  483. if (Hidden)
  484. OS << "; splines=none";
  485. OS << "]\n";
  486. }
  487. void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
  488. auto &Successors = Block->getSuccessors();
  489. if (Successors.size() == 1)
  490. drawEdge(Block, Successors.front(), false, "");
  491. else if (Successors.size() == 2) {
  492. drawEdge(Block, Successors.front(), false, "T");
  493. drawEdge(Block, Successors.back(), false, "F");
  494. } else {
  495. unsigned SuccessorNumber = 0;
  496. for (auto *Successor : Successors)
  497. drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
  498. }
  499. }
  500. void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
  501. OS << Indent << getUID(BasicBlock) << " [label =\n";
  502. bumpIndent(1);
  503. OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
  504. bumpIndent(1);
  505. // Dump the block predicate.
  506. const VPValue *Pred = BasicBlock->getPredicate();
  507. if (Pred) {
  508. OS << " +\n" << Indent << " \"BlockPredicate: ";
  509. if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
  510. PredI->printAsOperand(OS);
  511. OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
  512. << ")\\l\"";
  513. } else
  514. Pred->printAsOperand(OS);
  515. }
  516. for (const VPRecipeBase &Recipe : *BasicBlock)
  517. Recipe.print(OS, Indent);
  518. // Dump the condition bit.
  519. const VPValue *CBV = BasicBlock->getCondBit();
  520. if (CBV) {
  521. OS << " +\n" << Indent << " \"CondBit: ";
  522. if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) {
  523. CBI->printAsOperand(OS);
  524. OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\"";
  525. } else {
  526. CBV->printAsOperand(OS);
  527. OS << "\"";
  528. }
  529. }
  530. bumpIndent(-2);
  531. OS << "\n" << Indent << "]\n";
  532. dumpEdges(BasicBlock);
  533. }
  534. void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
  535. OS << Indent << "subgraph " << getUID(Region) << " {\n";
  536. bumpIndent(1);
  537. OS << Indent << "fontname=Courier\n"
  538. << Indent << "label=\""
  539. << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
  540. << DOT::EscapeString(Region->getName()) << "\"\n";
  541. // Dump the blocks of the region.
  542. assert(Region->getEntry() && "Region contains no inner blocks.");
  543. for (const VPBlockBase *Block : depth_first(Region->getEntry()))
  544. dumpBlock(Block);
  545. bumpIndent(-1);
  546. OS << Indent << "}\n";
  547. dumpEdges(Region);
  548. }
  549. void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
  550. std::string IngredientString;
  551. raw_string_ostream RSO(IngredientString);
  552. if (auto *Inst = dyn_cast<Instruction>(V)) {
  553. if (!Inst->getType()->isVoidTy()) {
  554. Inst->printAsOperand(RSO, false);
  555. RSO << " = ";
  556. }
  557. RSO << Inst->getOpcodeName() << " ";
  558. unsigned E = Inst->getNumOperands();
  559. if (E > 0) {
  560. Inst->getOperand(0)->printAsOperand(RSO, false);
  561. for (unsigned I = 1; I < E; ++I)
  562. Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
  563. }
  564. } else // !Inst
  565. V->printAsOperand(RSO, false);
  566. RSO.flush();
  567. O << DOT::EscapeString(IngredientString);
  568. }
  569. void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const {
  570. O << " +\n" << Indent << "\"WIDEN\\l\"";
  571. for (auto &Instr : make_range(Begin, End))
  572. O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\"";
  573. }
  574. void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O,
  575. const Twine &Indent) const {
  576. O << " +\n" << Indent << "\"WIDEN-INDUCTION";
  577. if (Trunc) {
  578. O << "\\l\"";
  579. O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
  580. O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc) << "\\l\"";
  581. } else
  582. O << " " << VPlanIngredient(IV) << "\\l\"";
  583. }
  584. void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
  585. O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\"";
  586. }
  587. void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const {
  588. O << " +\n" << Indent << "\"BLEND ";
  589. Phi->printAsOperand(O, false);
  590. O << " =";
  591. if (!User) {
  592. // Not a User of any mask: not really blending, this is a
  593. // single-predecessor phi.
  594. O << " ";
  595. Phi->getIncomingValue(0)->printAsOperand(O, false);
  596. } else {
  597. for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
  598. O << " ";
  599. Phi->getIncomingValue(I)->printAsOperand(O, false);
  600. O << "/";
  601. User->getOperand(I)->printAsOperand(O);
  602. }
  603. }
  604. O << "\\l\"";
  605. }
  606. void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const {
  607. O << " +\n"
  608. << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ")
  609. << VPlanIngredient(Ingredient);
  610. if (AlsoPack)
  611. O << " (S->V)";
  612. O << "\\l\"";
  613. }
  614. void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
  615. O << " +\n"
  616. << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst)
  617. << "\\l\"";
  618. }
  619. void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
  620. const Twine &Indent) const {
  621. O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
  622. if (User) {
  623. O << ", ";
  624. User->getOperand(0)->printAsOperand(O);
  625. }
  626. O << "\\l\"";
  627. }
  628. template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
  629. void VPValue::replaceAllUsesWith(VPValue *New) {
  630. for (VPUser *User : users())
  631. for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
  632. if (User->getOperand(I) == this)
  633. User->setOperand(I, New);
  634. }
  635. void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
  636. Old2NewTy &Old2New,
  637. InterleavedAccessInfo &IAI) {
  638. ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
  639. for (VPBlockBase *Base : RPOT) {
  640. visitBlock(Base, Old2New, IAI);
  641. }
  642. }
  643. void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
  644. InterleavedAccessInfo &IAI) {
  645. if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
  646. for (VPRecipeBase &VPI : *VPBB) {
  647. assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
  648. auto *VPInst = cast<VPInstruction>(&VPI);
  649. auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
  650. auto *IG = IAI.getInterleaveGroup(Inst);
  651. if (!IG)
  652. continue;
  653. auto NewIGIter = Old2New.find(IG);
  654. if (NewIGIter == Old2New.end())
  655. Old2New[IG] = new InterleaveGroup<VPInstruction>(
  656. IG->getFactor(), IG->isReverse(), IG->getAlignment());
  657. if (Inst == IG->getInsertPos())
  658. Old2New[IG]->setInsertPos(VPInst);
  659. InterleaveGroupMap[VPInst] = Old2New[IG];
  660. InterleaveGroupMap[VPInst]->insertMember(
  661. VPInst, IG->getIndex(Inst),
  662. IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor());
  663. }
  664. } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  665. visitRegion(Region, Old2New, IAI);
  666. else
  667. llvm_unreachable("Unsupported kind of VPBlock.");
  668. }
  669. VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
  670. InterleavedAccessInfo &IAI) {
  671. Old2NewTy Old2New;
  672. visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
  673. }