DFAPacketizer.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // This class implements a deterministic finite automaton (DFA) based
  9. // packetizing mechanism for VLIW architectures. It provides APIs to
  10. // determine whether there exists a legal mapping of instructions to
  11. // functional unit assignments in a packet. The DFA is auto-generated from
  12. // the target's Schedule.td file.
  13. //
  14. // A DFA consists of 3 major elements: states, inputs, and transitions. For
  15. // the packetizing mechanism, the input is the set of instruction classes for
  16. // a target. The state models all possible combinations of functional unit
  17. // consumption for a given set of instructions in a packet. A transition
  18. // models the addition of an instruction to a packet. In the DFA constructed
  19. // by this class, if an instruction can be added to a packet, then a valid
  20. // transition exists from the corresponding state. Invalid transitions
  21. // indicate that the instruction cannot be added to the current packet.
  22. //
  23. //===----------------------------------------------------------------------===//
  24. #include "llvm/CodeGen/DFAPacketizer.h"
  25. #include "llvm/ADT/StringExtras.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineInstrBundle.h"
  29. #include "llvm/CodeGen/ScheduleDAG.h"
  30. #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  31. #include "llvm/CodeGen/TargetInstrInfo.h"
  32. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  33. #include "llvm/MC/MCInstrDesc.h"
  34. #include "llvm/MC/MCInstrItineraries.h"
  35. #include "llvm/Support/CommandLine.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. #include <algorithm>
  39. #include <cassert>
  40. #include <iterator>
  41. #include <memory>
  42. #include <vector>
  43. using namespace llvm;
  44. #define DEBUG_TYPE "packets"
  45. static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
  46. cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
  47. static unsigned InstrCount = 0;
  48. // --------------------------------------------------------------------
  49. // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
  50. static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
  51. return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
  52. }
  53. /// Return the DFAInput for an instruction class input vector.
  54. /// This function is used in both DFAPacketizer.cpp and in
  55. /// DFAPacketizerEmitter.cpp.
  56. static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
  57. DFAInput InsnInput = 0;
  58. assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
  59. "Exceeded maximum number of DFA terms");
  60. for (auto U : InsnClass)
  61. InsnInput = addDFAFuncUnits(InsnInput, U);
  62. return InsnInput;
  63. }
  64. // --------------------------------------------------------------------
  65. DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
  66. const DFAStateInput (*SIT)[2], const unsigned *SET,
  67. const unsigned (*RTT)[2],
  68. const unsigned *RTET)
  69. : InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET),
  70. DFAResourceTransitionTable(RTT), DFAResourceTransitionEntryTable(RTET) {
  71. // Make sure DFA types are large enough for the number of terms & resources.
  72. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
  73. (8 * sizeof(DFAInput)),
  74. "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
  75. static_assert(
  76. (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
  77. "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
  78. clearResources();
  79. }
  80. // Read the DFA transition table and update CachedTable.
  81. //
  82. // Format of the transition tables:
  83. // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
  84. // transitions
  85. // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
  86. // for the ith state
  87. //
  88. void DFAPacketizer::ReadTable(unsigned int state) {
  89. unsigned ThisStateIdx = DFAStateEntryTable[state];
  90. unsigned NextStateIdxInTable = DFAStateEntryTable[state + 1];
  91. // Early exit in case CachedTable has already contains this
  92. // state's transitions.
  93. if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisStateIdx][0])))
  94. return;
  95. for (unsigned TransitionIdx = ThisStateIdx;
  96. TransitionIdx < NextStateIdxInTable; TransitionIdx++) {
  97. auto TransitionPair =
  98. UnsignPair(state, DFAStateInputTable[TransitionIdx][0]);
  99. CachedTable[TransitionPair] = DFAStateInputTable[TransitionIdx][1];
  100. if (TrackResources) {
  101. unsigned I = DFAResourceTransitionEntryTable[TransitionIdx];
  102. unsigned E = DFAResourceTransitionEntryTable[TransitionIdx + 1];
  103. CachedResourceTransitions[TransitionPair] = makeArrayRef(
  104. &DFAResourceTransitionTable[I], &DFAResourceTransitionTable[E]);
  105. }
  106. }
  107. }
  108. // Return the DFAInput for an instruction class.
  109. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
  110. // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
  111. DFAInput InsnInput = 0;
  112. unsigned i = 0;
  113. (void)i;
  114. for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
  115. *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
  116. InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
  117. assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
  118. }
  119. return InsnInput;
  120. }
  121. // Return the DFAInput for an instruction class input vector.
  122. DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
  123. return getDFAInsnInput(InsnClass);
  124. }
  125. // Check if the resources occupied by a MCInstrDesc are available in the
  126. // current state.
  127. bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
  128. unsigned InsnClass = MID->getSchedClass();
  129. DFAInput InsnInput = getInsnInput(InsnClass);
  130. UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
  131. ReadTable(CurrentState);
  132. return CachedTable.count(StateTrans) != 0;
  133. }
  134. // Reserve the resources occupied by a MCInstrDesc and change the current
  135. // state to reflect that change.
  136. void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
  137. unsigned InsnClass = MID->getSchedClass();
  138. DFAInput InsnInput = getInsnInput(InsnClass);
  139. UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
  140. ReadTable(CurrentState);
  141. if (TrackResources) {
  142. DenseMap<unsigned, SmallVector<unsigned, 8>> NewResourceStates;
  143. for (const auto &KV : CachedResourceTransitions[StateTrans]) {
  144. assert(ResourceStates.count(KV[0]));
  145. NewResourceStates[KV[1]] = ResourceStates[KV[0]];
  146. NewResourceStates[KV[1]].push_back(KV[1]);
  147. }
  148. ResourceStates = NewResourceStates;
  149. }
  150. assert(CachedTable.count(StateTrans) != 0);
  151. CurrentState = CachedTable[StateTrans];
  152. }
  153. // Check if the resources occupied by a machine instruction are available
  154. // in the current state.
  155. bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
  156. const MCInstrDesc &MID = MI.getDesc();
  157. return canReserveResources(&MID);
  158. }
  159. // Reserve the resources occupied by a machine instruction and change the
  160. // current state to reflect that change.
  161. void DFAPacketizer::reserveResources(MachineInstr &MI) {
  162. const MCInstrDesc &MID = MI.getDesc();
  163. reserveResources(&MID);
  164. }
  165. unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
  166. assert(TrackResources && "getUsedResources requires resource tracking!");
  167. // Assert that there is at least one example of a valid bundle format.
  168. assert(!ResourceStates.empty() && "Invalid bundle!");
  169. SmallVectorImpl<unsigned> &RS = ResourceStates.begin()->second;
  170. // RS stores the cumulative resources used up to and including the I'th
  171. // instruction. The 0th instruction is the base case.
  172. if (InstIdx == 0)
  173. return RS[0];
  174. // Return the difference between the cumulative resources used by InstIdx and
  175. // its predecessor.
  176. return RS[InstIdx] ^ RS[InstIdx - 1];
  177. }
  178. namespace llvm {
  179. // This class extends ScheduleDAGInstrs and overrides the schedule method
  180. // to build the dependence graph.
  181. class DefaultVLIWScheduler : public ScheduleDAGInstrs {
  182. private:
  183. AliasAnalysis *AA;
  184. /// Ordered list of DAG postprocessing steps.
  185. std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
  186. public:
  187. DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
  188. AliasAnalysis *AA);
  189. // Actual scheduling work.
  190. void schedule() override;
  191. /// DefaultVLIWScheduler takes ownership of the Mutation object.
  192. void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
  193. Mutations.push_back(std::move(Mutation));
  194. }
  195. protected:
  196. void postprocessDAG();
  197. };
  198. } // end namespace llvm
  199. DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
  200. MachineLoopInfo &MLI,
  201. AliasAnalysis *AA)
  202. : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
  203. CanHandleTerminators = true;
  204. }
  205. /// Apply each ScheduleDAGMutation step in order.
  206. void DefaultVLIWScheduler::postprocessDAG() {
  207. for (auto &M : Mutations)
  208. M->apply(this);
  209. }
  210. void DefaultVLIWScheduler::schedule() {
  211. // Build the scheduling graph.
  212. buildSchedGraph(AA);
  213. postprocessDAG();
  214. }
  215. VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
  216. MachineLoopInfo &mli, AliasAnalysis *aa)
  217. : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
  218. ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
  219. ResourceTracker->setTrackResources(true);
  220. VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
  221. }
  222. VLIWPacketizerList::~VLIWPacketizerList() {
  223. delete VLIWScheduler;
  224. delete ResourceTracker;
  225. }
  226. // End the current packet, bundle packet instructions and reset DFA state.
  227. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
  228. MachineBasicBlock::iterator MI) {
  229. LLVM_DEBUG({
  230. if (!CurrentPacketMIs.empty()) {
  231. dbgs() << "Finalizing packet:\n";
  232. unsigned Idx = 0;
  233. for (MachineInstr *MI : CurrentPacketMIs) {
  234. unsigned R = ResourceTracker->getUsedResources(Idx++);
  235. dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
  236. }
  237. }
  238. });
  239. if (CurrentPacketMIs.size() > 1) {
  240. MachineInstr &MIFirst = *CurrentPacketMIs.front();
  241. finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
  242. }
  243. CurrentPacketMIs.clear();
  244. ResourceTracker->clearResources();
  245. LLVM_DEBUG(dbgs() << "End packet\n");
  246. }
  247. // Bundle machine instructions into packets.
  248. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
  249. MachineBasicBlock::iterator BeginItr,
  250. MachineBasicBlock::iterator EndItr) {
  251. assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
  252. VLIWScheduler->startBlock(MBB);
  253. VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
  254. std::distance(BeginItr, EndItr));
  255. VLIWScheduler->schedule();
  256. LLVM_DEBUG({
  257. dbgs() << "Scheduling DAG of the packetize region\n";
  258. VLIWScheduler->dump();
  259. });
  260. // Generate MI -> SU map.
  261. MIToSUnit.clear();
  262. for (SUnit &SU : VLIWScheduler->SUnits)
  263. MIToSUnit[SU.getInstr()] = &SU;
  264. bool LimitPresent = InstrLimit.getPosition();
  265. // The main packetizer loop.
  266. for (; BeginItr != EndItr; ++BeginItr) {
  267. if (LimitPresent) {
  268. if (InstrCount >= InstrLimit) {
  269. EndItr = BeginItr;
  270. break;
  271. }
  272. InstrCount++;
  273. }
  274. MachineInstr &MI = *BeginItr;
  275. initPacketizerState();
  276. // End the current packet if needed.
  277. if (isSoloInstruction(MI)) {
  278. endPacket(MBB, MI);
  279. continue;
  280. }
  281. // Ignore pseudo instructions.
  282. if (ignorePseudoInstruction(MI, MBB))
  283. continue;
  284. SUnit *SUI = MIToSUnit[&MI];
  285. assert(SUI && "Missing SUnit Info!");
  286. // Ask DFA if machine resource is available for MI.
  287. LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
  288. bool ResourceAvail = ResourceTracker->canReserveResources(MI);
  289. LLVM_DEBUG({
  290. if (ResourceAvail)
  291. dbgs() << " Resources are available for adding MI to packet\n";
  292. else
  293. dbgs() << " Resources NOT available\n";
  294. });
  295. if (ResourceAvail && shouldAddToPacket(MI)) {
  296. // Dependency check for MI with instructions in CurrentPacketMIs.
  297. for (auto MJ : CurrentPacketMIs) {
  298. SUnit *SUJ = MIToSUnit[MJ];
  299. assert(SUJ && "Missing SUnit Info!");
  300. LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ);
  301. // Is it legal to packetize SUI and SUJ together.
  302. if (!isLegalToPacketizeTogether(SUI, SUJ)) {
  303. LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
  304. // Allow packetization if dependency can be pruned.
  305. if (!isLegalToPruneDependencies(SUI, SUJ)) {
  306. // End the packet if dependency cannot be pruned.
  307. LLVM_DEBUG(dbgs()
  308. << " Could not prune dependencies for adding MI\n");
  309. endPacket(MBB, MI);
  310. break;
  311. }
  312. LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n");
  313. }
  314. }
  315. } else {
  316. LLVM_DEBUG(if (ResourceAvail) dbgs()
  317. << "Resources are available, but instruction should not be "
  318. "added to packet\n "
  319. << MI);
  320. // End the packet if resource is not available, or if the instruction
  321. // shoud not be added to the current packet.
  322. endPacket(MBB, MI);
  323. }
  324. // Add MI to the current packet.
  325. LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
  326. BeginItr = addToPacket(MI);
  327. } // For all instructions in the packetization range.
  328. // End any packet left behind.
  329. endPacket(MBB, EndItr);
  330. VLIWScheduler->exitRegion();
  331. VLIWScheduler->finishBlock();
  332. }
  333. bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
  334. const MachineMemOperand &Op2,
  335. bool UseTBAA) const {
  336. if (!Op1.getValue() || !Op2.getValue())
  337. return true;
  338. int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
  339. int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
  340. int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
  341. AliasResult AAResult =
  342. AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
  343. UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
  344. MemoryLocation(Op2.getValue(), Overlapb,
  345. UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
  346. return AAResult != NoAlias;
  347. }
  348. bool VLIWPacketizerList::alias(const MachineInstr &MI1,
  349. const MachineInstr &MI2,
  350. bool UseTBAA) const {
  351. if (MI1.memoperands_empty() || MI2.memoperands_empty())
  352. return true;
  353. for (const MachineMemOperand *Op1 : MI1.memoperands())
  354. for (const MachineMemOperand *Op2 : MI2.memoperands())
  355. if (alias(*Op1, *Op2, UseTBAA))
  356. return true;
  357. return false;
  358. }
  359. // Add a DAG mutation object to the ordered list.
  360. void VLIWPacketizerList::addMutation(
  361. std::unique_ptr<ScheduleDAGMutation> Mutation) {
  362. VLIWScheduler->addMutation(std::move(Mutation));
  363. }