DFAPacketizer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. // This class implements a deterministic finite automaton (DFA) based
  10. // packetizing mechanism for VLIW architectures. It provides APIs to
  11. // determine whether there exists a legal mapping of instructions to
  12. // functional unit assignments in a packet. The DFA is auto-generated from
  13. // the target's Schedule.td file.
  14. //
  15. // A DFA consists of 3 major elements: states, inputs, and transitions. For
  16. // the packetizing mechanism, the input is the set of instruction classes for
  17. // a target. The state models all possible combinations of functional unit
  18. // consumption for a given set of instructions in a packet. A transition
  19. // models the addition of an instruction to a packet. In the DFA constructed
  20. // by this class, if an instruction can be added to a packet, then a valid
  21. // transition exists from the corresponding state. Invalid transitions
  22. // indicate that the instruction cannot be added to the current packet.
  23. //
  24. //===----------------------------------------------------------------------===//
  25. #include "llvm/CodeGen/DFAPacketizer.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineInstrBundle.h"
  29. #include "llvm/CodeGen/ScheduleDAG.h"
  30. #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  31. #include "llvm/CodeGen/TargetInstrInfo.h"
  32. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  33. #include "llvm/MC/MCInstrDesc.h"
  34. #include "llvm/MC/MCInstrItineraries.h"
  35. #include "llvm/Support/CommandLine.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. #include <algorithm>
  39. #include <cassert>
  40. #include <iterator>
  41. #include <memory>
  42. #include <vector>
  43. using namespace llvm;
  44. #define DEBUG_TYPE "packets"
  45. static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
  46. cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
  47. static unsigned InstrCount = 0;
  48. // --------------------------------------------------------------------
  49. // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
  50. static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
  51. return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
  52. }
  53. /// Return the DFAInput for an instruction class input vector.
  54. /// This function is used in both DFAPacketizer.cpp and in
  55. /// DFAPacketizerEmitter.cpp.
  56. static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
  57. DFAInput InsnInput = 0;
  58. assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
  59. "Exceeded maximum number of DFA terms");
  60. for (auto U : InsnClass)
  61. InsnInput = addDFAFuncUnits(InsnInput, U);
  62. return InsnInput;
  63. }
  64. // --------------------------------------------------------------------
  65. DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
  66. const DFAStateInput (*SIT)[2],
  67. const unsigned *SET):
  68. InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
  69. // Make sure DFA types are large enough for the number of terms & resources.
  70. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
  71. (8 * sizeof(DFAInput)),
  72. "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
  73. static_assert(
  74. (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
  75. "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
  76. }
  77. // Read the DFA transition table and update CachedTable.
  78. //
  79. // Format of the transition tables:
  80. // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
  81. // transitions
  82. // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
  83. // for the ith state
  84. //
  85. void DFAPacketizer::ReadTable(unsigned int state) {
  86. unsigned ThisState = DFAStateEntryTable[state];
  87. unsigned NextStateInTable = DFAStateEntryTable[state+1];
  88. // Early exit in case CachedTable has already contains this
  89. // state's transitions.
  90. if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
  91. return;
  92. for (unsigned i = ThisState; i < NextStateInTable; i++)
  93. CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
  94. DFAStateInputTable[i][1];
  95. }
  96. // Return the DFAInput for an instruction class.
  97. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
  98. // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
  99. DFAInput InsnInput = 0;
  100. unsigned i = 0;
  101. (void)i;
  102. for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
  103. *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
  104. InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
  105. assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
  106. }
  107. return InsnInput;
  108. }
  109. // Return the DFAInput for an instruction class input vector.
  110. DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
  111. return getDFAInsnInput(InsnClass);
  112. }
  113. // Check if the resources occupied by a MCInstrDesc are available in the
  114. // current state.
  115. bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
  116. unsigned InsnClass = MID->getSchedClass();
  117. DFAInput InsnInput = getInsnInput(InsnClass);
  118. UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
  119. ReadTable(CurrentState);
  120. return CachedTable.count(StateTrans) != 0;
  121. }
  122. // Reserve the resources occupied by a MCInstrDesc and change the current
  123. // state to reflect that change.
  124. void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
  125. unsigned InsnClass = MID->getSchedClass();
  126. DFAInput InsnInput = getInsnInput(InsnClass);
  127. UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
  128. ReadTable(CurrentState);
  129. assert(CachedTable.count(StateTrans) != 0);
  130. CurrentState = CachedTable[StateTrans];
  131. }
  132. // Check if the resources occupied by a machine instruction are available
  133. // in the current state.
  134. bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
  135. const MCInstrDesc &MID = MI.getDesc();
  136. return canReserveResources(&MID);
  137. }
  138. // Reserve the resources occupied by a machine instruction and change the
  139. // current state to reflect that change.
  140. void DFAPacketizer::reserveResources(MachineInstr &MI) {
  141. const MCInstrDesc &MID = MI.getDesc();
  142. reserveResources(&MID);
  143. }
  144. namespace llvm {
  145. // This class extends ScheduleDAGInstrs and overrides the schedule method
  146. // to build the dependence graph.
  147. class DefaultVLIWScheduler : public ScheduleDAGInstrs {
  148. private:
  149. AliasAnalysis *AA;
  150. /// Ordered list of DAG postprocessing steps.
  151. std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
  152. public:
  153. DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
  154. AliasAnalysis *AA);
  155. // Actual scheduling work.
  156. void schedule() override;
  157. /// DefaultVLIWScheduler takes ownership of the Mutation object.
  158. void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
  159. Mutations.push_back(std::move(Mutation));
  160. }
  161. protected:
  162. void postprocessDAG();
  163. };
  164. } // end namespace llvm
  165. DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
  166. MachineLoopInfo &MLI,
  167. AliasAnalysis *AA)
  168. : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
  169. CanHandleTerminators = true;
  170. }
  171. /// Apply each ScheduleDAGMutation step in order.
  172. void DefaultVLIWScheduler::postprocessDAG() {
  173. for (auto &M : Mutations)
  174. M->apply(this);
  175. }
  176. void DefaultVLIWScheduler::schedule() {
  177. // Build the scheduling graph.
  178. buildSchedGraph(AA);
  179. postprocessDAG();
  180. }
  181. VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
  182. MachineLoopInfo &mli, AliasAnalysis *aa)
  183. : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
  184. ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
  185. VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
  186. }
  187. VLIWPacketizerList::~VLIWPacketizerList() {
  188. delete VLIWScheduler;
  189. delete ResourceTracker;
  190. }
  191. // End the current packet, bundle packet instructions and reset DFA state.
  192. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
  193. MachineBasicBlock::iterator MI) {
  194. LLVM_DEBUG({
  195. if (!CurrentPacketMIs.empty()) {
  196. dbgs() << "Finalizing packet:\n";
  197. for (MachineInstr *MI : CurrentPacketMIs)
  198. dbgs() << " * " << *MI;
  199. }
  200. });
  201. if (CurrentPacketMIs.size() > 1) {
  202. MachineInstr &MIFirst = *CurrentPacketMIs.front();
  203. finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
  204. }
  205. CurrentPacketMIs.clear();
  206. ResourceTracker->clearResources();
  207. LLVM_DEBUG(dbgs() << "End packet\n");
  208. }
  209. // Bundle machine instructions into packets.
  210. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
  211. MachineBasicBlock::iterator BeginItr,
  212. MachineBasicBlock::iterator EndItr) {
  213. assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
  214. VLIWScheduler->startBlock(MBB);
  215. VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
  216. std::distance(BeginItr, EndItr));
  217. VLIWScheduler->schedule();
  218. LLVM_DEBUG({
  219. dbgs() << "Scheduling DAG of the packetize region\n";
  220. VLIWScheduler->dump();
  221. });
  222. // Generate MI -> SU map.
  223. MIToSUnit.clear();
  224. for (SUnit &SU : VLIWScheduler->SUnits)
  225. MIToSUnit[SU.getInstr()] = &SU;
  226. bool LimitPresent = InstrLimit.getPosition();
  227. // The main packetizer loop.
  228. for (; BeginItr != EndItr; ++BeginItr) {
  229. if (LimitPresent) {
  230. if (InstrCount >= InstrLimit) {
  231. EndItr = BeginItr;
  232. break;
  233. }
  234. InstrCount++;
  235. }
  236. MachineInstr &MI = *BeginItr;
  237. initPacketizerState();
  238. // End the current packet if needed.
  239. if (isSoloInstruction(MI)) {
  240. endPacket(MBB, MI);
  241. continue;
  242. }
  243. // Ignore pseudo instructions.
  244. if (ignorePseudoInstruction(MI, MBB))
  245. continue;
  246. SUnit *SUI = MIToSUnit[&MI];
  247. assert(SUI && "Missing SUnit Info!");
  248. // Ask DFA if machine resource is available for MI.
  249. LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
  250. bool ResourceAvail = ResourceTracker->canReserveResources(MI);
  251. LLVM_DEBUG({
  252. if (ResourceAvail)
  253. dbgs() << " Resources are available for adding MI to packet\n";
  254. else
  255. dbgs() << " Resources NOT available\n";
  256. });
  257. if (ResourceAvail && shouldAddToPacket(MI)) {
  258. // Dependency check for MI with instructions in CurrentPacketMIs.
  259. for (auto MJ : CurrentPacketMIs) {
  260. SUnit *SUJ = MIToSUnit[MJ];
  261. assert(SUJ && "Missing SUnit Info!");
  262. LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ);
  263. // Is it legal to packetize SUI and SUJ together.
  264. if (!isLegalToPacketizeTogether(SUI, SUJ)) {
  265. LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
  266. // Allow packetization if dependency can be pruned.
  267. if (!isLegalToPruneDependencies(SUI, SUJ)) {
  268. // End the packet if dependency cannot be pruned.
  269. LLVM_DEBUG(dbgs()
  270. << " Could not prune dependencies for adding MI\n");
  271. endPacket(MBB, MI);
  272. break;
  273. }
  274. LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n");
  275. }
  276. }
  277. } else {
  278. LLVM_DEBUG(if (ResourceAvail) dbgs()
  279. << "Resources are available, but instruction should not be "
  280. "added to packet\n "
  281. << MI);
  282. // End the packet if resource is not available, or if the instruction
  283. // shoud not be added to the current packet.
  284. endPacket(MBB, MI);
  285. }
  286. // Add MI to the current packet.
  287. LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
  288. BeginItr = addToPacket(MI);
  289. } // For all instructions in the packetization range.
  290. // End any packet left behind.
  291. endPacket(MBB, EndItr);
  292. VLIWScheduler->exitRegion();
  293. VLIWScheduler->finishBlock();
  294. }
  295. bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
  296. const MachineMemOperand &Op2,
  297. bool UseTBAA) const {
  298. if (!Op1.getValue() || !Op2.getValue())
  299. return true;
  300. int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
  301. int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
  302. int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
  303. AliasResult AAResult =
  304. AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
  305. UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
  306. MemoryLocation(Op2.getValue(), Overlapb,
  307. UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
  308. return AAResult != NoAlias;
  309. }
  310. bool VLIWPacketizerList::alias(const MachineInstr &MI1,
  311. const MachineInstr &MI2,
  312. bool UseTBAA) const {
  313. if (MI1.memoperands_empty() || MI2.memoperands_empty())
  314. return true;
  315. for (const MachineMemOperand *Op1 : MI1.memoperands())
  316. for (const MachineMemOperand *Op2 : MI2.memoperands())
  317. if (alias(*Op1, *Op2, UseTBAA))
  318. return true;
  319. return false;
  320. }
  321. // Add a DAG mutation object to the ordered list.
  322. void VLIWPacketizerList::addMutation(
  323. std::unique_ptr<ScheduleDAGMutation> Mutation) {
  324. VLIWScheduler->addMutation(std::move(Mutation));
  325. }