PeepholeOptimizer.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Perform peephole optimizations on the machine code:
  11. //
  12. // - Optimize Extensions
  13. //
  14. // Optimization of sign / zero extension instructions. It may be extended to
  15. // handle other instructions with similar properties.
  16. //
  17. // On some targets, some instructions, e.g. X86 sign / zero extension, may
  18. // leave the source value in the lower part of the result. This optimization
  19. // will replace some uses of the pre-extension value with uses of the
  20. // sub-register of the results.
  21. //
  22. // - Optimize Comparisons
  23. //
  24. // Optimization of comparison instructions. For instance, in this code:
  25. //
  26. // sub r1, 1
  27. // cmp r1, 0
  28. // bz L1
  29. //
  30. // If the "sub" instruction all ready sets (or could be modified to set) the
  31. // same flag that the "cmp" instruction sets and that "bz" uses, then we can
  32. // eliminate the "cmp" instruction.
  33. //
  34. // - Optimize Bitcast pairs:
  35. //
  36. // v1 = bitcast v0
  37. // v2 = bitcast v1
  38. // = v2
  39. // =>
  40. // v1 = bitcast v0
  41. // = v0
  42. //
  43. //===----------------------------------------------------------------------===//
  44. #define DEBUG_TYPE "peephole-opt"
  45. #include "llvm/CodeGen/Passes.h"
  46. #include "llvm/CodeGen/MachineDominators.h"
  47. #include "llvm/CodeGen/MachineInstrBuilder.h"
  48. #include "llvm/CodeGen/MachineRegisterInfo.h"
  49. #include "llvm/Target/TargetInstrInfo.h"
  50. #include "llvm/Target/TargetRegisterInfo.h"
  51. #include "llvm/Support/CommandLine.h"
  52. #include "llvm/ADT/DenseMap.h"
  53. #include "llvm/ADT/SmallPtrSet.h"
  54. #include "llvm/ADT/SmallSet.h"
  55. #include "llvm/ADT/Statistic.h"
  56. using namespace llvm;
  57. // Optimize Extensions
  58. static cl::opt<bool>
  59. Aggressive("aggressive-ext-opt", cl::Hidden,
  60. cl::desc("Aggressive extension optimization"));
  61. static cl::opt<bool>
  62. DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
  63. cl::desc("Disable the peephole optimizer"));
  64. STATISTIC(NumReuse, "Number of extension results reused");
  65. STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
  66. STATISTIC(NumCmps, "Number of compares eliminated");
  67. STATISTIC(NumImmFold, "Number of move immediate foled");
  68. namespace {
  69. class PeepholeOptimizer : public MachineFunctionPass {
  70. const TargetMachine *TM;
  71. const TargetInstrInfo *TII;
  72. MachineRegisterInfo *MRI;
  73. MachineDominatorTree *DT; // Machine dominator tree
  74. public:
  75. static char ID; // Pass identification
  76. PeepholeOptimizer() : MachineFunctionPass(ID) {
  77. initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
  78. }
  79. virtual bool runOnMachineFunction(MachineFunction &MF);
  80. virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  81. AU.setPreservesCFG();
  82. MachineFunctionPass::getAnalysisUsage(AU);
  83. if (Aggressive) {
  84. AU.addRequired<MachineDominatorTree>();
  85. AU.addPreserved<MachineDominatorTree>();
  86. }
  87. }
  88. private:
  89. bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
  90. bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
  91. bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
  92. SmallPtrSet<MachineInstr*, 8> &LocalMIs);
  93. bool isMoveImmediate(MachineInstr *MI,
  94. SmallSet<unsigned, 4> &ImmDefRegs,
  95. DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
  96. bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
  97. SmallSet<unsigned, 4> &ImmDefRegs,
  98. DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
  99. };
  100. }
  101. char PeepholeOptimizer::ID = 0;
  102. INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
  103. "Peephole Optimizations", false, false)
  104. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  105. INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
  106. "Peephole Optimizations", false, false)
  107. FunctionPass *llvm::createPeepholeOptimizerPass() {
  108. return new PeepholeOptimizer();
  109. }
  110. /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
  111. /// a single register and writes a single register and it does not modify the
  112. /// source, and if the source value is preserved as a sub-register of the
  113. /// result, then replace all reachable uses of the source with the subreg of the
  114. /// result.
  115. ///
  116. /// Do not generate an EXTRACT that is used only in a debug use, as this changes
  117. /// the code. Since this code does not currently share EXTRACTs, just ignore all
  118. /// debug uses.
  119. bool PeepholeOptimizer::
  120. OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
  121. SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  122. unsigned SrcReg, DstReg, SubIdx;
  123. if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
  124. return false;
  125. if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
  126. TargetRegisterInfo::isPhysicalRegister(SrcReg))
  127. return false;
  128. MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
  129. if (++UI == MRI->use_nodbg_end())
  130. // No other uses.
  131. return false;
  132. // The source has other uses. See if we can replace the other uses with use of
  133. // the result of the extension.
  134. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  135. UI = MRI->use_nodbg_begin(DstReg);
  136. for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
  137. UI != UE; ++UI)
  138. ReachedBBs.insert(UI->getParent());
  139. // Uses that are in the same BB of uses of the result of the instruction.
  140. SmallVector<MachineOperand*, 8> Uses;
  141. // Uses that the result of the instruction can reach.
  142. SmallVector<MachineOperand*, 8> ExtendedUses;
  143. bool ExtendLife = true;
  144. UI = MRI->use_nodbg_begin(SrcReg);
  145. for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
  146. UI != UE; ++UI) {
  147. MachineOperand &UseMO = UI.getOperand();
  148. MachineInstr *UseMI = &*UI;
  149. if (UseMI == MI)
  150. continue;
  151. if (UseMI->isPHI()) {
  152. ExtendLife = false;
  153. continue;
  154. }
  155. // It's an error to translate this:
  156. //
  157. // %reg1025 = <sext> %reg1024
  158. // ...
  159. // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
  160. //
  161. // into this:
  162. //
  163. // %reg1025 = <sext> %reg1024
  164. // ...
  165. // %reg1027 = COPY %reg1025:4
  166. // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
  167. //
  168. // The problem here is that SUBREG_TO_REG is there to assert that an
  169. // implicit zext occurs. It doesn't insert a zext instruction. If we allow
  170. // the COPY here, it will give us the value after the <sext>, not the
  171. // original value of %reg1024 before <sext>.
  172. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
  173. continue;
  174. MachineBasicBlock *UseMBB = UseMI->getParent();
  175. if (UseMBB == MBB) {
  176. // Local uses that come after the extension.
  177. if (!LocalMIs.count(UseMI))
  178. Uses.push_back(&UseMO);
  179. } else if (ReachedBBs.count(UseMBB)) {
  180. // Non-local uses where the result of the extension is used. Always
  181. // replace these unless it's a PHI.
  182. Uses.push_back(&UseMO);
  183. } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
  184. // We may want to extend the live range of the extension result in order
  185. // to replace these uses.
  186. ExtendedUses.push_back(&UseMO);
  187. } else {
  188. // Both will be live out of the def MBB anyway. Don't extend live range of
  189. // the extension result.
  190. ExtendLife = false;
  191. break;
  192. }
  193. }
  194. if (ExtendLife && !ExtendedUses.empty())
  195. // Extend the liveness of the extension result.
  196. std::copy(ExtendedUses.begin(), ExtendedUses.end(),
  197. std::back_inserter(Uses));
  198. // Now replace all uses.
  199. bool Changed = false;
  200. if (!Uses.empty()) {
  201. SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
  202. // Look for PHI uses of the extended result, we don't want to extend the
  203. // liveness of a PHI input. It breaks all kinds of assumptions down
  204. // stream. A PHI use is expected to be the kill of its source values.
  205. UI = MRI->use_nodbg_begin(DstReg);
  206. for (MachineRegisterInfo::use_nodbg_iterator
  207. UE = MRI->use_nodbg_end(); UI != UE; ++UI)
  208. if (UI->isPHI())
  209. PHIBBs.insert(UI->getParent());
  210. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
  211. for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
  212. MachineOperand *UseMO = Uses[i];
  213. MachineInstr *UseMI = UseMO->getParent();
  214. MachineBasicBlock *UseMBB = UseMI->getParent();
  215. if (PHIBBs.count(UseMBB))
  216. continue;
  217. unsigned NewVR = MRI->createVirtualRegister(RC);
  218. BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
  219. TII->get(TargetOpcode::COPY), NewVR)
  220. .addReg(DstReg, 0, SubIdx);
  221. UseMO->setReg(NewVR);
  222. ++NumReuse;
  223. Changed = true;
  224. }
  225. }
  226. return Changed;
  227. }
  228. /// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
  229. /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
  230. /// a value cross register classes), and the source is defined by another
  231. /// bitcast instruction B. And if the register class of source of B matches
  232. /// the register class of instruction A, then it is legal to replace all uses
  233. /// of the def of A with source of B. e.g.
  234. /// %vreg0<def> = VMOVSR %vreg1
  235. /// %vreg3<def> = VMOVRS %vreg0
  236. /// Replace all uses of vreg3 with vreg1.
  237. bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
  238. MachineBasicBlock *MBB) {
  239. unsigned NumDefs = MI->getDesc().getNumDefs();
  240. unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
  241. if (NumDefs != 1)
  242. return false;
  243. unsigned Def = 0;
  244. unsigned Src = 0;
  245. for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
  246. const MachineOperand &MO = MI->getOperand(i);
  247. if (!MO.isReg())
  248. continue;
  249. unsigned Reg = MO.getReg();
  250. if (!Reg)
  251. continue;
  252. if (MO.isDef())
  253. Def = Reg;
  254. else if (Src)
  255. // Multiple sources?
  256. return false;
  257. else
  258. Src = Reg;
  259. }
  260. assert(Def && Src && "Malformed bitcast instruction!");
  261. MachineInstr *DefMI = MRI->getVRegDef(Src);
  262. if (!DefMI || !DefMI->isBitcast())
  263. return false;
  264. unsigned SrcSrc = 0;
  265. NumDefs = DefMI->getDesc().getNumDefs();
  266. NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
  267. if (NumDefs != 1)
  268. return false;
  269. for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
  270. const MachineOperand &MO = DefMI->getOperand(i);
  271. if (!MO.isReg() || MO.isDef())
  272. continue;
  273. unsigned Reg = MO.getReg();
  274. if (!Reg)
  275. continue;
  276. if (!MO.isDef()) {
  277. if (SrcSrc)
  278. // Multiple sources?
  279. return false;
  280. else
  281. SrcSrc = Reg;
  282. }
  283. }
  284. if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
  285. return false;
  286. MRI->replaceRegWith(Def, SrcSrc);
  287. MRI->clearKillFlags(SrcSrc);
  288. MI->eraseFromParent();
  289. ++NumBitcasts;
  290. return true;
  291. }
  292. /// OptimizeCmpInstr - If the instruction is a compare and the previous
  293. /// instruction it's comparing against all ready sets (or could be modified to
  294. /// set) the same flag as the compare, then we can remove the comparison and use
  295. /// the flag from the previous instruction.
  296. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
  297. MachineBasicBlock *MBB) {
  298. // If this instruction is a comparison against zero and isn't comparing a
  299. // physical register, we can try to optimize it.
  300. unsigned SrcReg;
  301. int CmpMask, CmpValue;
  302. if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
  303. TargetRegisterInfo::isPhysicalRegister(SrcReg))
  304. return false;
  305. // Attempt to optimize the comparison instruction.
  306. if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
  307. ++NumCmps;
  308. return true;
  309. }
  310. return false;
  311. }
  312. bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
  313. SmallSet<unsigned, 4> &ImmDefRegs,
  314. DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
  315. const MCInstrDesc &MCID = MI->getDesc();
  316. if (!MI->isMoveImmediate())
  317. return false;
  318. if (MCID.getNumDefs() != 1)
  319. return false;
  320. unsigned Reg = MI->getOperand(0).getReg();
  321. if (TargetRegisterInfo::isVirtualRegister(Reg)) {
  322. ImmDefMIs.insert(std::make_pair(Reg, MI));
  323. ImmDefRegs.insert(Reg);
  324. return true;
  325. }
  326. return false;
  327. }
  328. /// FoldImmediate - Try folding register operands that are defined by move
  329. /// immediate instructions, i.e. a trivial constant folding optimization, if
  330. /// and only if the def and use are in the same BB.
  331. bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
  332. SmallSet<unsigned, 4> &ImmDefRegs,
  333. DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
  334. for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
  335. MachineOperand &MO = MI->getOperand(i);
  336. if (!MO.isReg() || MO.isDef())
  337. continue;
  338. unsigned Reg = MO.getReg();
  339. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  340. continue;
  341. if (ImmDefRegs.count(Reg) == 0)
  342. continue;
  343. DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
  344. assert(II != ImmDefMIs.end());
  345. if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
  346. ++NumImmFold;
  347. return true;
  348. }
  349. }
  350. return false;
  351. }
  352. bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  353. if (DisablePeephole)
  354. return false;
  355. TM = &MF.getTarget();
  356. TII = TM->getInstrInfo();
  357. MRI = &MF.getRegInfo();
  358. DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
  359. bool Changed = false;
  360. SmallPtrSet<MachineInstr*, 8> LocalMIs;
  361. SmallSet<unsigned, 4> ImmDefRegs;
  362. DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  363. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
  364. MachineBasicBlock *MBB = &*I;
  365. bool SeenMoveImm = false;
  366. LocalMIs.clear();
  367. ImmDefRegs.clear();
  368. ImmDefMIs.clear();
  369. bool First = true;
  370. MachineBasicBlock::iterator PMII;
  371. for (MachineBasicBlock::iterator
  372. MII = I->begin(), MIE = I->end(); MII != MIE; ) {
  373. MachineInstr *MI = &*MII;
  374. LocalMIs.insert(MI);
  375. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
  376. MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
  377. MI->hasUnmodeledSideEffects()) {
  378. ++MII;
  379. continue;
  380. }
  381. if (MI->isBitcast()) {
  382. if (OptimizeBitcastInstr(MI, MBB)) {
  383. // MI is deleted.
  384. LocalMIs.erase(MI);
  385. Changed = true;
  386. MII = First ? I->begin() : llvm::next(PMII);
  387. continue;
  388. }
  389. } else if (MI->isCompare()) {
  390. if (OptimizeCmpInstr(MI, MBB)) {
  391. // MI is deleted.
  392. LocalMIs.erase(MI);
  393. Changed = true;
  394. MII = First ? I->begin() : llvm::next(PMII);
  395. continue;
  396. }
  397. }
  398. if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
  399. SeenMoveImm = true;
  400. } else {
  401. Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
  402. if (SeenMoveImm)
  403. Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
  404. }
  405. First = false;
  406. PMII = MII;
  407. ++MII;
  408. }
  409. }
  410. return Changed;
  411. }