MachineCombiner.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // The machine combiner pass uses machine trace metrics to ensure the combined
  11. // instructions does not lengthen the critical path or the resource depth.
  12. //===----------------------------------------------------------------------===//
  13. #define DEBUG_TYPE "machine-combiner"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/CodeGen/MachineDominators.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineFunctionPass.h"
  19. #include "llvm/CodeGen/MachineInstrBuilder.h"
  20. #include "llvm/CodeGen/MachineLoopInfo.h"
  21. #include "llvm/CodeGen/MachineRegisterInfo.h"
  22. #include "llvm/CodeGen/MachineTraceMetrics.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/CodeGen/TargetSchedule.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include "llvm/Target/TargetInstrInfo.h"
  29. #include "llvm/Target/TargetRegisterInfo.h"
  30. #include "llvm/Target/TargetSubtargetInfo.h"
  31. using namespace llvm;
  32. STATISTIC(NumInstCombined, "Number of machineinst combined");
  33. namespace {
  34. class MachineCombiner : public MachineFunctionPass {
  35. const TargetInstrInfo *TII;
  36. const TargetRegisterInfo *TRI;
  37. const MCSchedModel *SchedModel;
  38. MachineRegisterInfo *MRI;
  39. MachineTraceMetrics *Traces;
  40. MachineTraceMetrics::Ensemble *MinInstr;
  41. TargetSchedModel TSchedModel;
  42. /// OptSize - True if optimizing for code size.
  43. bool OptSize;
  44. public:
  45. static char ID;
  46. MachineCombiner() : MachineFunctionPass(ID) {
  47. initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
  48. }
  49. void getAnalysisUsage(AnalysisUsage &AU) const override;
  50. bool runOnMachineFunction(MachineFunction &MF) override;
  51. const char *getPassName() const override { return "Machine InstCombiner"; }
  52. private:
  53. bool doSubstitute(unsigned NewSize, unsigned OldSize);
  54. bool combineInstructions(MachineBasicBlock *);
  55. MachineInstr *getOperandDef(const MachineOperand &MO);
  56. unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
  57. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
  58. MachineTraceMetrics::Trace BlockTrace);
  59. unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
  60. MachineTraceMetrics::Trace BlockTrace);
  61. bool
  62. preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
  63. MachineTraceMetrics::Trace BlockTrace,
  64. SmallVectorImpl<MachineInstr *> &InsInstrs,
  65. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
  66. bool preservesResourceLen(MachineBasicBlock *MBB,
  67. MachineTraceMetrics::Trace BlockTrace,
  68. SmallVectorImpl<MachineInstr *> &InsInstrs,
  69. SmallVectorImpl<MachineInstr *> &DelInstrs);
  70. void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
  71. SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
  72. };
  73. }
  74. char MachineCombiner::ID = 0;
  75. char &llvm::MachineCombinerID = MachineCombiner::ID;
  76. INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
  77. "Machine InstCombiner", false, false)
  78. INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
  79. INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
  80. false, false)
  81. void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
  82. AU.setPreservesCFG();
  83. AU.addPreserved<MachineDominatorTree>();
  84. AU.addPreserved<MachineLoopInfo>();
  85. AU.addRequired<MachineTraceMetrics>();
  86. AU.addPreserved<MachineTraceMetrics>();
  87. MachineFunctionPass::getAnalysisUsage(AU);
  88. }
  89. MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
  90. MachineInstr *DefInstr = nullptr;
  91. // We need a virtual register definition.
  92. if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
  93. DefInstr = MRI->getUniqueVRegDef(MO.getReg());
  94. // PHI's have no depth etc.
  95. if (DefInstr && DefInstr->isPHI())
  96. DefInstr = nullptr;
  97. return DefInstr;
  98. }
  99. /// getDepth - Computes depth of instructions in vector \InsInstr.
  100. ///
  101. /// \param InsInstrs is a vector of machine instructions
  102. /// \param InstrIdxForVirtReg is a dense map of virtual register to index
  103. /// of defining machine instruction in \p InsInstrs
  104. /// \param BlockTrace is a trace of machine instructions
  105. ///
  106. /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
  107. unsigned
  108. MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
  109. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
  110. MachineTraceMetrics::Trace BlockTrace) {
  111. SmallVector<unsigned, 16> InstrDepth;
  112. assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
  113. // Foreach instruction in in the new sequence compute the depth based on the
  114. // operands. Use the trace information when possible. For new operands which
  115. // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
  116. for (auto *InstrPtr : InsInstrs) { // for each Use
  117. unsigned IDepth = 0;
  118. DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
  119. for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
  120. const MachineOperand &MO = InstrPtr->getOperand(i);
  121. // Check for virtual register operand.
  122. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
  123. continue;
  124. if (!MO.isUse())
  125. continue;
  126. unsigned DepthOp = 0;
  127. unsigned LatencyOp = 0;
  128. DenseMap<unsigned, unsigned>::iterator II =
  129. InstrIdxForVirtReg.find(MO.getReg());
  130. if (II != InstrIdxForVirtReg.end()) {
  131. // Operand is new virtual register not in trace
  132. assert(II->second < InstrDepth.size() && "Bad Index");
  133. MachineInstr *DefInstr = InsInstrs[II->second];
  134. assert(DefInstr &&
  135. "There must be a definition for a new virtual register");
  136. DepthOp = InstrDepth[II->second];
  137. LatencyOp = TSchedModel.computeOperandLatency(
  138. DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
  139. InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
  140. } else {
  141. MachineInstr *DefInstr = getOperandDef(MO);
  142. if (DefInstr) {
  143. DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
  144. LatencyOp = TSchedModel.computeOperandLatency(
  145. DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
  146. InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
  147. }
  148. }
  149. IDepth = std::max(IDepth, DepthOp + LatencyOp);
  150. }
  151. InstrDepth.push_back(IDepth);
  152. }
  153. unsigned NewRootIdx = InsInstrs.size() - 1;
  154. return InstrDepth[NewRootIdx];
  155. }
  156. /// getLatency - Computes instruction latency as max of latency of defined
  157. /// operands
  158. ///
  159. /// \param Root is a machine instruction that could be replaced by NewRoot.
  160. /// It is used to compute a more accurate latency information for NewRoot in
  161. /// case there is a dependent instruction in the same trace (\p BlockTrace)
  162. /// \param NewRoot is the instruction for which the latency is computed
  163. /// \param BlockTrace is a trace of machine instructions
  164. ///
  165. /// \returns Latency of \p NewRoot
  166. unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
  167. MachineTraceMetrics::Trace BlockTrace) {
  168. assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
  169. // Check each definition in NewRoot and compute the latency
  170. unsigned NewRootLatency = 0;
  171. for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
  172. const MachineOperand &MO = NewRoot->getOperand(i);
  173. // Check for virtual register operand.
  174. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
  175. continue;
  176. if (!MO.isDef())
  177. continue;
  178. // Get the first instruction that uses MO
  179. MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
  180. RI++;
  181. MachineInstr *UseMO = RI->getParent();
  182. unsigned LatencyOp = 0;
  183. if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
  184. LatencyOp = TSchedModel.computeOperandLatency(
  185. NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
  186. UseMO->findRegisterUseOperandIdx(MO.getReg()));
  187. } else {
  188. LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
  189. }
  190. NewRootLatency = std::max(NewRootLatency, LatencyOp);
  191. }
  192. return NewRootLatency;
  193. }
  194. /// preservesCriticalPathlen - True when the new instruction sequence does not
  195. /// lengthen the critical path. The DAGCombine code sequence ends in MI
  196. /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
  197. /// necessary condition for the new sequence to replace the old sequence is that
  198. /// is cannot lengthen the critical path. This is decided by the formula
  199. /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
  200. /// The slack is the number of cycles Root can be delayed before the critical
  201. /// patch becomes longer.
  202. bool MachineCombiner::preservesCriticalPathLen(
  203. MachineBasicBlock *MBB, MachineInstr *Root,
  204. MachineTraceMetrics::Trace BlockTrace,
  205. SmallVectorImpl<MachineInstr *> &InsInstrs,
  206. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
  207. assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
  208. // NewRoot is the last instruction in the \p InsInstrs vector
  209. // Get depth and latency of NewRoot
  210. unsigned NewRootIdx = InsInstrs.size() - 1;
  211. MachineInstr *NewRoot = InsInstrs[NewRootIdx];
  212. unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
  213. unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
  214. // Get depth, latency and slack of Root
  215. unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
  216. unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
  217. unsigned RootSlack = BlockTrace.getInstrSlack(Root);
  218. DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
  219. dbgs() << " NewRootDepth: " << NewRootDepth
  220. << " NewRootLatency: " << NewRootLatency << "\n";
  221. dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
  222. << " RootSlack: " << RootSlack << "\n";
  223. dbgs() << " NewRootDepth + NewRootLatency "
  224. << NewRootDepth + NewRootLatency << "\n";
  225. dbgs() << " RootDepth + RootLatency + RootSlack "
  226. << RootDepth + RootLatency + RootSlack << "\n";);
  227. /// True when the new sequence does not lenghten the critical path.
  228. return ((NewRootDepth + NewRootLatency) <=
  229. (RootDepth + RootLatency + RootSlack));
  230. }
  231. /// helper routine to convert instructions into SC
  232. void MachineCombiner::instr2instrSC(
  233. SmallVectorImpl<MachineInstr *> &Instrs,
  234. SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
  235. for (auto *InstrPtr : Instrs) {
  236. unsigned Opc = InstrPtr->getOpcode();
  237. unsigned Idx = TII->get(Opc).getSchedClass();
  238. const MCSchedClassDesc *SC = SchedModel->getSchedClassDesc(Idx);
  239. InstrsSC.push_back(SC);
  240. }
  241. }
  242. /// preservesResourceLen - True when the new instructions do not increase
  243. /// resource length
  244. bool MachineCombiner::preservesResourceLen(
  245. MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
  246. SmallVectorImpl<MachineInstr *> &InsInstrs,
  247. SmallVectorImpl<MachineInstr *> &DelInstrs) {
  248. // Compute current resource length
  249. ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
  250. unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
  251. // Deal with SC rather than Instructions.
  252. SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
  253. SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
  254. instr2instrSC(InsInstrs, InsInstrsSC);
  255. instr2instrSC(DelInstrs, DelInstrsSC);
  256. ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
  257. ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
  258. // Compute new resource length
  259. unsigned ResLenAfterCombine =
  260. BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
  261. DEBUG(dbgs() << "RESOURCE DATA: \n";
  262. dbgs() << " resource len before: " << ResLenBeforeCombine
  263. << " after: " << ResLenAfterCombine << "\n";);
  264. return ResLenAfterCombine <= ResLenBeforeCombine;
  265. }
  266. /// \returns true when new instruction sequence should be generated
  267. /// independent if it lenghtens critical path or not
  268. bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
  269. if (OptSize && (NewSize < OldSize))
  270. return true;
  271. if (!TSchedModel.hasInstrSchedModel())
  272. return true;
  273. return false;
  274. }
  275. /// combineInstructions - substitute a slow code sequence with a faster one by
  276. /// evaluating instruction combining pattern.
  277. /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
  278. /// combining based on machine trace metrics. Only combine a sequence of
  279. /// instructions when this neither lengthens the critical path nor increases
  280. /// resource pressure. When optimizing for codesize always combine when the new
  281. /// sequence is shorter.
  282. bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
  283. bool Changed = false;
  284. DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
  285. auto BlockIter = MBB->begin();
  286. while (BlockIter != MBB->end()) {
  287. auto &MI = *BlockIter++;
  288. DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
  289. SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
  290. // The motivating example is:
  291. //
  292. // MUL Other MUL_op1 MUL_op2 Other
  293. // \ / \ | /
  294. // ADD/SUB => MADD/MSUB
  295. // (=Root) (=NewRoot)
  296. // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
  297. // usually beneficial for code size it unfortunately can hurt performance
  298. // when the ADD is on the critical path, but the MUL is not. With the
  299. // substitution the MUL becomes part of the critical path (in form of the
  300. // MADD) and can lengthen it on architectures where the MADD latency is
  301. // longer than the ADD latency.
  302. //
  303. // For each instruction we check if it can be the root of a combiner
  304. // pattern. Then for each pattern the new code sequence in form of MI is
  305. // generated and evaluated. When the efficiency criteria (don't lengthen
  306. // critical path, don't use more resources) is met the new sequence gets
  307. // hooked up into the basic block before the old sequence is removed.
  308. //
  309. // The algorithm does not try to evaluate all patterns and pick the best.
  310. // This is only an artificial restriction though. In practice there is
  311. // mostly one pattern and hasPattern() can order patterns based on an
  312. // internal cost heuristic.
  313. if (TII->hasPattern(MI, Pattern)) {
  314. for (auto P : Pattern) {
  315. SmallVector<MachineInstr *, 16> InsInstrs;
  316. SmallVector<MachineInstr *, 16> DelInstrs;
  317. DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
  318. if (!MinInstr)
  319. MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
  320. MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
  321. Traces->verifyAnalysis();
  322. TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
  323. InstrIdxForVirtReg);
  324. // Found pattern, but did not generate alternative sequence.
  325. // This can happen e.g. when an immediate could not be materialized
  326. // in a single instruction.
  327. if (!InsInstrs.size())
  328. continue;
  329. // Substitute when we optimize for codesize and the new sequence has
  330. // fewer instructions OR
  331. // the new sequence neither lenghten the critical path nor increases
  332. // resource pressure.
  333. if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
  334. (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
  335. InstrIdxForVirtReg) &&
  336. preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
  337. for (auto *InstrPtr : InsInstrs)
  338. MBB->insert((MachineBasicBlock::iterator) & MI,
  339. (MachineInstr *)InstrPtr);
  340. for (auto *InstrPtr : DelInstrs)
  341. InstrPtr->eraseFromParent();
  342. Changed = true;
  343. ++NumInstCombined;
  344. Traces->invalidate(MBB);
  345. Traces->verifyAnalysis();
  346. // Eagerly stop after the first pattern fired
  347. break;
  348. } else {
  349. // Cleanup instructions of the alternative code sequence. There is no
  350. // use for them.
  351. for (auto *InstrPtr : InsInstrs) {
  352. MachineFunction *MF = MBB->getParent();
  353. MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
  354. }
  355. }
  356. InstrIdxForVirtReg.clear();
  357. }
  358. }
  359. }
  360. return Changed;
  361. }
  362. bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
  363. const TargetSubtargetInfo &STI =
  364. MF.getTarget().getSubtarget<TargetSubtargetInfo>();
  365. TII = STI.getInstrInfo();
  366. TRI = STI.getRegisterInfo();
  367. SchedModel = STI.getSchedModel();
  368. TSchedModel.init(*SchedModel, &STI, TII);
  369. MRI = &MF.getRegInfo();
  370. Traces = &getAnalysis<MachineTraceMetrics>();
  371. MinInstr = 0;
  372. OptSize = MF.getFunction()->getAttributes().hasAttribute(
  373. AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
  374. DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
  375. if (!TII->useMachineCombiner()) {
  376. DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
  377. return false;
  378. }
  379. bool Changed = false;
  380. // Try to combine instructions.
  381. for (auto &MBB : MF)
  382. Changed |= combineInstructions(&MBB);
  383. return Changed;
  384. }