ScheduleDAGSDNodes.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907
  1. //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This implements the ScheduleDAG class, which is a base class used by
  11. // scheduling implementation classes.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #define DEBUG_TYPE "pre-RA-sched"
  15. #include "ScheduleDAGSDNodes.h"
  16. #include "InstrEmitter.h"
  17. #include "SDNodeDbgValue.h"
  18. #include "llvm/ADT/DenseMap.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallSet.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/MachineRegisterInfo.h"
  25. #include "llvm/CodeGen/SelectionDAG.h"
  26. #include "llvm/MC/MCInstrItineraries.h"
  27. #include "llvm/Support/CommandLine.h"
  28. #include "llvm/Support/Debug.h"
  29. #include "llvm/Support/raw_ostream.h"
  30. #include "llvm/Target/TargetInstrInfo.h"
  31. #include "llvm/Target/TargetLowering.h"
  32. #include "llvm/Target/TargetMachine.h"
  33. #include "llvm/Target/TargetRegisterInfo.h"
  34. #include "llvm/Target/TargetSubtargetInfo.h"
  35. using namespace llvm;
  36. STATISTIC(LoadsClustered, "Number of loads clustered together");
  37. // This allows latency based scheduler to notice high latency instructions
  38. // without a target itinerary. The choise if number here has more to do with
  39. // balancing scheduler heursitics than with the actual machine latency.
  40. static cl::opt<int> HighLatencyCycles(
  41. "sched-high-latency-cycles", cl::Hidden, cl::init(10),
  42. cl::desc("Roughly estimate the number of cycles that 'long latency'"
  43. "instructions take for targets with no itinerary"));
  44. ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
  45. : ScheduleDAG(mf), BB(0), DAG(0),
  46. InstrItins(mf.getTarget().getInstrItineraryData()) {}
  47. /// Run - perform scheduling.
  48. ///
  49. void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
  50. BB = bb;
  51. DAG = dag;
  52. // Clear the scheduler's SUnit DAG.
  53. ScheduleDAG::clearDAG();
  54. Sequence.clear();
  55. // Invoke the target's selection of scheduler.
  56. Schedule();
  57. }
  58. /// NewSUnit - Creates a new SUnit and return a ptr to it.
  59. ///
  60. SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
  61. #ifndef NDEBUG
  62. const SUnit *Addr = 0;
  63. if (!SUnits.empty())
  64. Addr = &SUnits[0];
  65. #endif
  66. SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
  67. assert((Addr == 0 || Addr == &SUnits[0]) &&
  68. "SUnits std::vector reallocated on the fly!");
  69. SUnits.back().OrigNode = &SUnits.back();
  70. SUnit *SU = &SUnits.back();
  71. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  72. if (!N ||
  73. (N->isMachineOpcode() &&
  74. N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
  75. SU->SchedulingPref = Sched::None;
  76. else
  77. SU->SchedulingPref = TLI.getSchedulingPreference(N);
  78. return SU;
  79. }
  80. SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
  81. SUnit *SU = newSUnit(Old->getNode());
  82. SU->OrigNode = Old->OrigNode;
  83. SU->Latency = Old->Latency;
  84. SU->isVRegCycle = Old->isVRegCycle;
  85. SU->isCall = Old->isCall;
  86. SU->isCallOp = Old->isCallOp;
  87. SU->isTwoAddress = Old->isTwoAddress;
  88. SU->isCommutable = Old->isCommutable;
  89. SU->hasPhysRegDefs = Old->hasPhysRegDefs;
  90. SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
  91. SU->isScheduleHigh = Old->isScheduleHigh;
  92. SU->isScheduleLow = Old->isScheduleLow;
  93. SU->SchedulingPref = Old->SchedulingPref;
  94. Old->isCloned = true;
  95. return SU;
  96. }
  97. /// CheckForPhysRegDependency - Check if the dependency between def and use of
  98. /// a specified operand is a physical register dependency. If so, returns the
  99. /// register and the cost of copying the register.
  100. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
  101. const TargetRegisterInfo *TRI,
  102. const TargetInstrInfo *TII,
  103. unsigned &PhysReg, int &Cost) {
  104. if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
  105. return;
  106. unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
  107. if (TargetRegisterInfo::isVirtualRegister(Reg))
  108. return;
  109. unsigned ResNo = User->getOperand(2).getResNo();
  110. if (Def->isMachineOpcode()) {
  111. const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
  112. if (ResNo >= II.getNumDefs() &&
  113. II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
  114. PhysReg = Reg;
  115. const TargetRegisterClass *RC =
  116. TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
  117. Cost = RC->getCopyCost();
  118. }
  119. }
  120. }
  121. // Helper for AddGlue to clone node operands.
  122. static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
  123. SmallVectorImpl<EVT> &VTs,
  124. SDValue ExtraOper = SDValue()) {
  125. SmallVector<SDValue, 4> Ops;
  126. for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
  127. Ops.push_back(N->getOperand(I));
  128. if (ExtraOper.getNode())
  129. Ops.push_back(ExtraOper);
  130. SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
  131. MachineSDNode::mmo_iterator Begin = 0, End = 0;
  132. MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
  133. // Store memory references.
  134. if (MN) {
  135. Begin = MN->memoperands_begin();
  136. End = MN->memoperands_end();
  137. }
  138. DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
  139. // Reset the memory references
  140. if (MN)
  141. MN->setMemRefs(Begin, End);
  142. }
  143. static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
  144. SmallVector<EVT, 4> VTs;
  145. SDNode *GlueDestNode = Glue.getNode();
  146. // Don't add glue from a node to itself.
  147. if (GlueDestNode == N) return false;
  148. // Don't add a glue operand to something that already uses glue.
  149. if (GlueDestNode &&
  150. N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
  151. return false;
  152. }
  153. // Don't add glue to something that already has a glue value.
  154. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
  155. for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
  156. VTs.push_back(N->getValueType(I));
  157. if (AddGlue)
  158. VTs.push_back(MVT::Glue);
  159. CloneNodeWithValues(N, DAG, VTs, Glue);
  160. return true;
  161. }
  162. // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
  163. // node even though simply shrinking the value list is sufficient.
  164. static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
  165. assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
  166. !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
  167. "expected an unused glue value");
  168. SmallVector<EVT, 4> VTs;
  169. for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
  170. VTs.push_back(N->getValueType(I));
  171. CloneNodeWithValues(N, DAG, VTs);
  172. }
  173. /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
  174. /// This function finds loads of the same base and different offsets. If the
  175. /// offsets are not far apart (target specific), it add MVT::Glue inputs and
  176. /// outputs to ensure they are scheduled together and in order. This
  177. /// optimization may benefit some targets by improving cache locality.
  178. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
  179. SDNode *Chain = 0;
  180. unsigned NumOps = Node->getNumOperands();
  181. if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
  182. Chain = Node->getOperand(NumOps-1).getNode();
  183. if (!Chain)
  184. return;
  185. // Look for other loads of the same chain. Find loads that are loading from
  186. // the same base pointer and different offsets.
  187. SmallPtrSet<SDNode*, 16> Visited;
  188. SmallVector<int64_t, 4> Offsets;
  189. DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
  190. bool Cluster = false;
  191. SDNode *Base = Node;
  192. for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
  193. I != E; ++I) {
  194. SDNode *User = *I;
  195. if (User == Node || !Visited.insert(User))
  196. continue;
  197. int64_t Offset1, Offset2;
  198. if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
  199. Offset1 == Offset2)
  200. // FIXME: Should be ok if they addresses are identical. But earlier
  201. // optimizations really should have eliminated one of the loads.
  202. continue;
  203. if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
  204. Offsets.push_back(Offset1);
  205. O2SMap.insert(std::make_pair(Offset2, User));
  206. Offsets.push_back(Offset2);
  207. if (Offset2 < Offset1)
  208. Base = User;
  209. Cluster = true;
  210. }
  211. if (!Cluster)
  212. return;
  213. // Sort them in increasing order.
  214. std::sort(Offsets.begin(), Offsets.end());
  215. // Check if the loads are close enough.
  216. SmallVector<SDNode*, 4> Loads;
  217. unsigned NumLoads = 0;
  218. int64_t BaseOff = Offsets[0];
  219. SDNode *BaseLoad = O2SMap[BaseOff];
  220. Loads.push_back(BaseLoad);
  221. for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
  222. int64_t Offset = Offsets[i];
  223. SDNode *Load = O2SMap[Offset];
  224. if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
  225. break; // Stop right here. Ignore loads that are further away.
  226. Loads.push_back(Load);
  227. ++NumLoads;
  228. }
  229. if (NumLoads == 0)
  230. return;
  231. // Cluster loads by adding MVT::Glue outputs and inputs. This also
  232. // ensure they are scheduled in order of increasing addresses.
  233. SDNode *Lead = Loads[0];
  234. SDValue InGlue = SDValue(0, 0);
  235. if (AddGlue(Lead, InGlue, true, DAG))
  236. InGlue = SDValue(Lead, Lead->getNumValues() - 1);
  237. for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
  238. bool OutGlue = I < E - 1;
  239. SDNode *Load = Loads[I];
  240. // If AddGlue fails, we could leave an unsused glue value. This should not
  241. // cause any
  242. if (AddGlue(Load, InGlue, OutGlue, DAG)) {
  243. if (OutGlue)
  244. InGlue = SDValue(Load, Load->getNumValues() - 1);
  245. ++LoadsClustered;
  246. }
  247. else if (!OutGlue && InGlue.getNode())
  248. RemoveUnusedGlue(InGlue.getNode(), DAG);
  249. }
  250. }
  251. /// ClusterNodes - Cluster certain nodes which should be scheduled together.
  252. ///
  253. void ScheduleDAGSDNodes::ClusterNodes() {
  254. for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
  255. E = DAG->allnodes_end(); NI != E; ++NI) {
  256. SDNode *Node = &*NI;
  257. if (!Node || !Node->isMachineOpcode())
  258. continue;
  259. unsigned Opc = Node->getMachineOpcode();
  260. const MCInstrDesc &MCID = TII->get(Opc);
  261. if (MCID.mayLoad())
  262. // Cluster loads from "near" addresses into combined SUnits.
  263. ClusterNeighboringLoads(Node);
  264. }
  265. }
  266. void ScheduleDAGSDNodes::BuildSchedUnits() {
  267. // During scheduling, the NodeId field of SDNode is used to map SDNodes
  268. // to their associated SUnits by holding SUnits table indices. A value
  269. // of -1 means the SDNode does not yet have an associated SUnit.
  270. unsigned NumNodes = 0;
  271. for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
  272. E = DAG->allnodes_end(); NI != E; ++NI) {
  273. NI->setNodeId(-1);
  274. ++NumNodes;
  275. }
  276. // Reserve entries in the vector for each of the SUnits we are creating. This
  277. // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  278. // invalidated.
  279. // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  280. // This is a temporary workaround.
  281. SUnits.reserve(NumNodes * 2);
  282. // Add all nodes in depth first order.
  283. SmallVector<SDNode*, 64> Worklist;
  284. SmallPtrSet<SDNode*, 64> Visited;
  285. Worklist.push_back(DAG->getRoot().getNode());
  286. Visited.insert(DAG->getRoot().getNode());
  287. SmallVector<SUnit*, 8> CallSUnits;
  288. while (!Worklist.empty()) {
  289. SDNode *NI = Worklist.pop_back_val();
  290. // Add all operands to the worklist unless they've already been added.
  291. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
  292. if (Visited.insert(NI->getOperand(i).getNode()))
  293. Worklist.push_back(NI->getOperand(i).getNode());
  294. if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
  295. continue;
  296. // If this node has already been processed, stop now.
  297. if (NI->getNodeId() != -1) continue;
  298. SUnit *NodeSUnit = newSUnit(NI);
  299. // See if anything is glued to this node, if so, add them to glued
  300. // nodes. Nodes can have at most one glue input and one glue output. Glue
  301. // is required to be the last operand and result of a node.
  302. // Scan up to find glued preds.
  303. SDNode *N = NI;
  304. while (N->getNumOperands() &&
  305. N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
  306. N = N->getOperand(N->getNumOperands()-1).getNode();
  307. assert(N->getNodeId() == -1 && "Node already inserted!");
  308. N->setNodeId(NodeSUnit->NodeNum);
  309. if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
  310. NodeSUnit->isCall = true;
  311. }
  312. // Scan down to find any glued succs.
  313. N = NI;
  314. while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
  315. SDValue GlueVal(N, N->getNumValues()-1);
  316. // There are either zero or one users of the Glue result.
  317. bool HasGlueUse = false;
  318. for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
  319. UI != E; ++UI)
  320. if (GlueVal.isOperandOf(*UI)) {
  321. HasGlueUse = true;
  322. assert(N->getNodeId() == -1 && "Node already inserted!");
  323. N->setNodeId(NodeSUnit->NodeNum);
  324. N = *UI;
  325. if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
  326. NodeSUnit->isCall = true;
  327. break;
  328. }
  329. if (!HasGlueUse) break;
  330. }
  331. if (NodeSUnit->isCall)
  332. CallSUnits.push_back(NodeSUnit);
  333. // Schedule zero-latency TokenFactor below any nodes that may increase the
  334. // schedule height. Otherwise, ancestors of the TokenFactor may appear to
  335. // have false stalls.
  336. if (NI->getOpcode() == ISD::TokenFactor)
  337. NodeSUnit->isScheduleLow = true;
  338. // If there are glue operands involved, N is now the bottom-most node
  339. // of the sequence of nodes that are glued together.
  340. // Update the SUnit.
  341. NodeSUnit->setNode(N);
  342. assert(N->getNodeId() == -1 && "Node already inserted!");
  343. N->setNodeId(NodeSUnit->NodeNum);
  344. // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
  345. InitNumRegDefsLeft(NodeSUnit);
  346. // Assign the Latency field of NodeSUnit using target-provided information.
  347. computeLatency(NodeSUnit);
  348. }
  349. // Find all call operands.
  350. while (!CallSUnits.empty()) {
  351. SUnit *SU = CallSUnits.pop_back_val();
  352. for (const SDNode *SUNode = SU->getNode(); SUNode;
  353. SUNode = SUNode->getGluedNode()) {
  354. if (SUNode->getOpcode() != ISD::CopyToReg)
  355. continue;
  356. SDNode *SrcN = SUNode->getOperand(2).getNode();
  357. if (isPassiveNode(SrcN)) continue; // Not scheduled.
  358. SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
  359. SrcSU->isCallOp = true;
  360. }
  361. }
  362. }
  363. void ScheduleDAGSDNodes::AddSchedEdges() {
  364. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
  365. // Check to see if the scheduler cares about latencies.
  366. bool UnitLatencies = forceUnitLatencies();
  367. // Pass 2: add the preds, succs, etc.
  368. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
  369. SUnit *SU = &SUnits[su];
  370. SDNode *MainNode = SU->getNode();
  371. if (MainNode->isMachineOpcode()) {
  372. unsigned Opc = MainNode->getMachineOpcode();
  373. const MCInstrDesc &MCID = TII->get(Opc);
  374. for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
  375. if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
  376. SU->isTwoAddress = true;
  377. break;
  378. }
  379. }
  380. if (MCID.isCommutable())
  381. SU->isCommutable = true;
  382. }
  383. // Find all predecessors and successors of the group.
  384. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
  385. if (N->isMachineOpcode() &&
  386. TII->get(N->getMachineOpcode()).getImplicitDefs()) {
  387. SU->hasPhysRegClobbers = true;
  388. unsigned NumUsed = InstrEmitter::CountResults(N);
  389. while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
  390. --NumUsed; // Skip over unused values at the end.
  391. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
  392. SU->hasPhysRegDefs = true;
  393. }
  394. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  395. SDNode *OpN = N->getOperand(i).getNode();
  396. if (isPassiveNode(OpN)) continue; // Not scheduled.
  397. SUnit *OpSU = &SUnits[OpN->getNodeId()];
  398. assert(OpSU && "Node has no SUnit!");
  399. if (OpSU == SU) continue; // In the same group.
  400. EVT OpVT = N->getOperand(i).getValueType();
  401. assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
  402. bool isChain = OpVT == MVT::Other;
  403. unsigned PhysReg = 0;
  404. int Cost = 1;
  405. // Determine if this is a physical register dependency.
  406. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
  407. assert((PhysReg == 0 || !isChain) &&
  408. "Chain dependence via physreg data?");
  409. // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
  410. // emits a copy from the physical register to a virtual register unless
  411. // it requires a cross class copy (cost < 0). That means we are only
  412. // treating "expensive to copy" register dependency as physical register
  413. // dependency. This may change in the future though.
  414. if (Cost >= 0 && !StressSched)
  415. PhysReg = 0;
  416. // If this is a ctrl dep, latency is 1.
  417. unsigned OpLatency = isChain ? 1 : OpSU->Latency;
  418. // Special-case TokenFactor chains as zero-latency.
  419. if(isChain && OpN->getOpcode() == ISD::TokenFactor)
  420. OpLatency = 0;
  421. SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
  422. : SDep(OpSU, SDep::Data, PhysReg);
  423. Dep.setLatency(OpLatency);
  424. if (!isChain && !UnitLatencies) {
  425. computeOperandLatency(OpN, N, i, Dep);
  426. ST.adjustSchedDependency(OpSU, SU, Dep);
  427. }
  428. if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
  429. // Multiple register uses are combined in the same SUnit. For example,
  430. // we could have a set of glued nodes with all their defs consumed by
  431. // another set of glued nodes. Register pressure tracking sees this as
  432. // a single use, so to keep pressure balanced we reduce the defs.
  433. //
  434. // We can't tell (without more book-keeping) if this results from
  435. // glued nodes or duplicate operands. As long as we don't reduce
  436. // NumRegDefsLeft to zero, we handle the common cases well.
  437. --OpSU->NumRegDefsLeft;
  438. }
  439. }
  440. }
  441. }
  442. }
  443. /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
  444. /// are input. This SUnit graph is similar to the SelectionDAG, but
  445. /// excludes nodes that aren't interesting to scheduling, and represents
  446. /// glued together nodes with a single SUnit.
  447. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
  448. // Cluster certain nodes which should be scheduled together.
  449. ClusterNodes();
  450. // Populate the SUnits array.
  451. BuildSchedUnits();
  452. // Compute all the scheduling dependencies between nodes.
  453. AddSchedEdges();
  454. }
  455. // Initialize NumNodeDefs for the current Node's opcode.
  456. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
  457. // Check for phys reg copy.
  458. if (!Node)
  459. return;
  460. if (!Node->isMachineOpcode()) {
  461. if (Node->getOpcode() == ISD::CopyFromReg)
  462. NodeNumDefs = 1;
  463. else
  464. NodeNumDefs = 0;
  465. return;
  466. }
  467. unsigned POpc = Node->getMachineOpcode();
  468. if (POpc == TargetOpcode::IMPLICIT_DEF) {
  469. // No register need be allocated for this.
  470. NodeNumDefs = 0;
  471. return;
  472. }
  473. unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
  474. // Some instructions define regs that are not represented in the selection DAG
  475. // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
  476. NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
  477. DefIdx = 0;
  478. }
  479. // Construct a RegDefIter for this SUnit and find the first valid value.
  480. ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
  481. const ScheduleDAGSDNodes *SD)
  482. : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
  483. InitNodeNumDefs();
  484. Advance();
  485. }
  486. // Advance to the next valid value defined by the SUnit.
  487. void ScheduleDAGSDNodes::RegDefIter::Advance() {
  488. for (;Node;) { // Visit all glued nodes.
  489. for (;DefIdx < NodeNumDefs; ++DefIdx) {
  490. if (!Node->hasAnyUseOfValue(DefIdx))
  491. continue;
  492. ValueType = Node->getSimpleValueType(DefIdx);
  493. ++DefIdx;
  494. return; // Found a normal regdef.
  495. }
  496. Node = Node->getGluedNode();
  497. if (Node == NULL) {
  498. return; // No values left to visit.
  499. }
  500. InitNodeNumDefs();
  501. }
  502. }
  503. void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
  504. assert(SU->NumRegDefsLeft == 0 && "expect a new node");
  505. for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
  506. assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
  507. ++SU->NumRegDefsLeft;
  508. }
  509. }
  510. void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
  511. SDNode *N = SU->getNode();
  512. // TokenFactor operands are considered zero latency, and some schedulers
  513. // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
  514. // whenever node latency is nonzero.
  515. if (N && N->getOpcode() == ISD::TokenFactor) {
  516. SU->Latency = 0;
  517. return;
  518. }
  519. // Check to see if the scheduler cares about latencies.
  520. if (forceUnitLatencies()) {
  521. SU->Latency = 1;
  522. return;
  523. }
  524. if (!InstrItins || InstrItins->isEmpty()) {
  525. if (N && N->isMachineOpcode() &&
  526. TII->isHighLatencyDef(N->getMachineOpcode()))
  527. SU->Latency = HighLatencyCycles;
  528. else
  529. SU->Latency = 1;
  530. return;
  531. }
  532. // Compute the latency for the node. We use the sum of the latencies for
  533. // all nodes glued together into this SUnit.
  534. SU->Latency = 0;
  535. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
  536. if (N->isMachineOpcode())
  537. SU->Latency += TII->getInstrLatency(InstrItins, N);
  538. }
  539. void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
  540. unsigned OpIdx, SDep& dep) const{
  541. // Check to see if the scheduler cares about latencies.
  542. if (forceUnitLatencies())
  543. return;
  544. if (dep.getKind() != SDep::Data)
  545. return;
  546. unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
  547. if (Use->isMachineOpcode())
  548. // Adjust the use operand index by num of defs.
  549. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
  550. int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
  551. if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
  552. !BB->succ_empty()) {
  553. unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
  554. if (TargetRegisterInfo::isVirtualRegister(Reg))
  555. // This copy is a liveout value. It is likely coalesced, so reduce the
  556. // latency so not to penalize the def.
  557. // FIXME: need target specific adjustment here?
  558. Latency = (Latency > 1) ? Latency - 1 : 1;
  559. }
  560. if (Latency >= 0)
  561. dep.setLatency(Latency);
  562. }
  563. void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
  564. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  565. if (!SU->getNode()) {
  566. dbgs() << "PHYS REG COPY\n";
  567. return;
  568. }
  569. SU->getNode()->dump(DAG);
  570. dbgs() << "\n";
  571. SmallVector<SDNode *, 4> GluedNodes;
  572. for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
  573. GluedNodes.push_back(N);
  574. while (!GluedNodes.empty()) {
  575. dbgs() << " ";
  576. GluedNodes.back()->dump(DAG);
  577. dbgs() << "\n";
  578. GluedNodes.pop_back();
  579. }
  580. #endif
  581. }
  582. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  583. void ScheduleDAGSDNodes::dumpSchedule() const {
  584. for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
  585. if (SUnit *SU = Sequence[i])
  586. SU->dump(this);
  587. else
  588. dbgs() << "**** NOOP ****\n";
  589. }
  590. }
  591. #endif
  592. #ifndef NDEBUG
  593. /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
  594. /// their state is consistent with the nodes listed in Sequence.
  595. ///
  596. void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
  597. unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
  598. unsigned Noops = 0;
  599. for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
  600. if (!Sequence[i])
  601. ++Noops;
  602. assert(Sequence.size() - Noops == ScheduledNodes &&
  603. "The number of nodes scheduled doesn't match the expected number!");
  604. }
  605. #endif // NDEBUG
  606. /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
  607. static void
  608. ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
  609. SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
  610. DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
  611. if (!N->getHasDebugValue())
  612. return;
  613. // Opportunistically insert immediate dbg_value uses, i.e. those with source
  614. // order number right after the N.
  615. MachineBasicBlock *BB = Emitter.getBlock();
  616. MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
  617. ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
  618. for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
  619. if (DVs[i]->isInvalidated())
  620. continue;
  621. unsigned DVOrder = DVs[i]->getOrder();
  622. if (!Order || DVOrder == ++Order) {
  623. MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
  624. if (DbgMI) {
  625. Orders.push_back(std::make_pair(DVOrder, DbgMI));
  626. BB->insert(InsertPos, DbgMI);
  627. }
  628. DVs[i]->setIsInvalidated();
  629. }
  630. }
  631. }
  632. // ProcessSourceNode - Process nodes with source order numbers. These are added
  633. // to a vector which EmitSchedule uses to determine how to insert dbg_value
  634. // instructions in the right order.
  635. static void
  636. ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
  637. DenseMap<SDValue, unsigned> &VRBaseMap,
  638. SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
  639. SmallSet<unsigned, 8> &Seen) {
  640. unsigned Order = N->getIROrder();
  641. if (!Order || !Seen.insert(Order)) {
  642. // Process any valid SDDbgValues even if node does not have any order
  643. // assigned.
  644. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
  645. return;
  646. }
  647. MachineBasicBlock *BB = Emitter.getBlock();
  648. if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
  649. // Fast-isel may have inserted some instructions, in which case the
  650. // BB->back().isPHI() test will not fire when we want it to.
  651. std::prev(Emitter.getInsertPos())->isPHI()) {
  652. // Did not insert any instruction.
  653. Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
  654. return;
  655. }
  656. Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos())));
  657. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
  658. }
  659. void ScheduleDAGSDNodes::
  660. EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
  661. MachineBasicBlock::iterator InsertPos) {
  662. for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
  663. I != E; ++I) {
  664. if (I->isCtrl()) continue; // ignore chain preds
  665. if (I->getSUnit()->CopyDstRC) {
  666. // Copy to physical register.
  667. DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
  668. assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
  669. // Find the destination physical register.
  670. unsigned Reg = 0;
  671. for (SUnit::const_succ_iterator II = SU->Succs.begin(),
  672. EE = SU->Succs.end(); II != EE; ++II) {
  673. if (II->isCtrl()) continue; // ignore chain preds
  674. if (II->getReg()) {
  675. Reg = II->getReg();
  676. break;
  677. }
  678. }
  679. BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
  680. .addReg(VRI->second);
  681. } else {
  682. // Copy from physical register.
  683. assert(I->getReg() && "Unknown physical register!");
  684. unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
  685. bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
  686. (void)isNew; // Silence compiler warning.
  687. assert(isNew && "Node emitted out of order - early");
  688. BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
  689. .addReg(I->getReg());
  690. }
  691. break;
  692. }
  693. }
  694. /// EmitSchedule - Emit the machine code in scheduled order. Return the new
  695. /// InsertPos and MachineBasicBlock that contains this insertion
  696. /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
  697. /// not necessarily refer to returned BB. The emitter may split blocks.
  698. MachineBasicBlock *ScheduleDAGSDNodes::
  699. EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
  700. InstrEmitter Emitter(BB, InsertPos);
  701. DenseMap<SDValue, unsigned> VRBaseMap;
  702. DenseMap<SUnit*, unsigned> CopyVRBaseMap;
  703. SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
  704. SmallSet<unsigned, 8> Seen;
  705. bool HasDbg = DAG->hasDebugValues();
  706. // If this is the first BB, emit byval parameter dbg_value's.
  707. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
  708. SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
  709. SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
  710. for (; PDI != PDE; ++PDI) {
  711. MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
  712. if (DbgMI)
  713. BB->insert(InsertPos, DbgMI);
  714. }
  715. }
  716. for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
  717. SUnit *SU = Sequence[i];
  718. if (!SU) {
  719. // Null SUnit* is a noop.
  720. TII->insertNoop(*Emitter.getBlock(), InsertPos);
  721. continue;
  722. }
  723. // For pre-regalloc scheduling, create instructions corresponding to the
  724. // SDNode and any glued SDNodes and append them to the block.
  725. if (!SU->getNode()) {
  726. // Emit a copy.
  727. EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
  728. continue;
  729. }
  730. SmallVector<SDNode *, 4> GluedNodes;
  731. for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
  732. GluedNodes.push_back(N);
  733. while (!GluedNodes.empty()) {
  734. SDNode *N = GluedNodes.back();
  735. Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
  736. VRBaseMap);
  737. // Remember the source order of the inserted instruction.
  738. if (HasDbg)
  739. ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
  740. GluedNodes.pop_back();
  741. }
  742. Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
  743. VRBaseMap);
  744. // Remember the source order of the inserted instruction.
  745. if (HasDbg)
  746. ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
  747. Seen);
  748. }
  749. // Insert all the dbg_values which have not already been inserted in source
  750. // order sequence.
  751. if (HasDbg) {
  752. MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
  753. // Sort the source order instructions and use the order to insert debug
  754. // values.
  755. std::sort(Orders.begin(), Orders.end(), less_first());
  756. SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
  757. SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
  758. // Now emit the rest according to source order.
  759. unsigned LastOrder = 0;
  760. for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
  761. unsigned Order = Orders[i].first;
  762. MachineInstr *MI = Orders[i].second;
  763. // Insert all SDDbgValue's whose order(s) are before "Order".
  764. if (!MI)
  765. continue;
  766. for (; DI != DE &&
  767. (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
  768. if ((*DI)->isInvalidated())
  769. continue;
  770. MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
  771. if (DbgMI) {
  772. if (!LastOrder)
  773. // Insert to start of the BB (after PHIs).
  774. BB->insert(BBBegin, DbgMI);
  775. else {
  776. // Insert at the instruction, which may be in a different
  777. // block, if the block was split by a custom inserter.
  778. MachineBasicBlock::iterator Pos = MI;
  779. MI->getParent()->insert(Pos, DbgMI);
  780. }
  781. }
  782. }
  783. LastOrder = Order;
  784. }
  785. // Add trailing DbgValue's before the terminator. FIXME: May want to add
  786. // some of them before one or more conditional branches?
  787. SmallVector<MachineInstr*, 8> DbgMIs;
  788. while (DI != DE) {
  789. if (!(*DI)->isInvalidated())
  790. if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
  791. DbgMIs.push_back(DbgMI);
  792. ++DI;
  793. }
  794. MachineBasicBlock *InsertBB = Emitter.getBlock();
  795. MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
  796. InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
  797. }
  798. InsertPos = Emitter.getInsertPos();
  799. return Emitter.getBlock();
  800. }
  801. /// Return the basic block label.
  802. std::string ScheduleDAGSDNodes::getDAGName() const {
  803. return "sunit-dag." + BB->getFullName();
  804. }