ARMLoadStoreOptimizer.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains a pass that performs load / store related peephole
  11. // optimizations. This pass should be run after register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #define DEBUG_TYPE "arm-ldst-opt"
  15. #include "ARM.h"
  16. #include "ARMAddressingModes.h"
  17. #include "ARMMachineFunctionInfo.h"
  18. #include "ARMRegisterInfo.h"
  19. #include "llvm/ADT/STLExtras.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineFunctionPass.h"
  24. #include "llvm/CodeGen/MachineInstr.h"
  25. #include "llvm/CodeGen/MachineInstrBuilder.h"
  26. #include "llvm/CodeGen/RegisterScavenging.h"
  27. #include "llvm/Support/Compiler.h"
  28. #include "llvm/Target/TargetRegisterInfo.h"
  29. #include "llvm/Target/TargetInstrInfo.h"
  30. #include "llvm/Target/TargetMachine.h"
  31. using namespace llvm;
  32. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  33. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  34. STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
  35. STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
  36. namespace {
  37. struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
  38. static char ID;
  39. ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
  40. const TargetInstrInfo *TII;
  41. const TargetRegisterInfo *TRI;
  42. ARMFunctionInfo *AFI;
  43. RegScavenger *RS;
  44. virtual bool runOnMachineFunction(MachineFunction &Fn);
  45. virtual const char *getPassName() const {
  46. return "ARM load / store optimization pass";
  47. }
  48. private:
  49. struct MemOpQueueEntry {
  50. int Offset;
  51. unsigned Position;
  52. MachineBasicBlock::iterator MBBI;
  53. bool Merged;
  54. MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
  55. : Offset(o), Position(p), MBBI(i), Merged(false) {};
  56. };
  57. typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
  58. typedef MemOpQueue::iterator MemOpQueueIter;
  59. SmallVector<MachineBasicBlock::iterator, 4>
  60. MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
  61. int Opcode, unsigned Size,
  62. ARMCC::CondCodes Pred, unsigned PredReg,
  63. unsigned Scratch, MemOpQueue &MemOps);
  64. void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
  65. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  66. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  67. };
  68. char ARMLoadStoreOpt::ID = 0;
  69. }
  70. /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
  71. /// optimization pass.
  72. FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
  73. return new ARMLoadStoreOpt();
  74. }
  75. static int getLoadStoreMultipleOpcode(int Opcode) {
  76. switch (Opcode) {
  77. case ARM::LDR:
  78. NumLDMGened++;
  79. return ARM::LDM;
  80. case ARM::STR:
  81. NumSTMGened++;
  82. return ARM::STM;
  83. case ARM::FLDS:
  84. NumFLDMGened++;
  85. return ARM::FLDMS;
  86. case ARM::FSTS:
  87. NumFSTMGened++;
  88. return ARM::FSTMS;
  89. case ARM::FLDD:
  90. NumFLDMGened++;
  91. return ARM::FLDMD;
  92. case ARM::FSTD:
  93. NumFSTMGened++;
  94. return ARM::FSTMD;
  95. default: abort();
  96. }
  97. return 0;
  98. }
  99. /// mergeOps - Create and insert a LDM or STM with Base as base register and
  100. /// registers in Regs as the register operands that would be loaded / stored.
  101. /// It returns true if the transformation is done.
  102. static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  103. int Offset, unsigned Base, bool BaseKill, int Opcode,
  104. ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
  105. SmallVector<std::pair<unsigned, bool>, 8> &Regs,
  106. const TargetInstrInfo *TII) {
  107. // Only a single register to load / store. Don't bother.
  108. unsigned NumRegs = Regs.size();
  109. if (NumRegs <= 1)
  110. return false;
  111. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  112. bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
  113. if (isAM4 && Offset == 4)
  114. Mode = ARM_AM::ib;
  115. else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
  116. Mode = ARM_AM::da;
  117. else if (isAM4 && Offset == -4 * (int)NumRegs)
  118. Mode = ARM_AM::db;
  119. else if (Offset != 0) {
  120. // If starting offset isn't zero, insert a MI to materialize a new base.
  121. // But only do so if it is cost effective, i.e. merging more than two
  122. // loads / stores.
  123. if (NumRegs <= 2)
  124. return false;
  125. unsigned NewBase;
  126. if (Opcode == ARM::LDR)
  127. // If it is a load, then just use one of the destination register to
  128. // use as the new base.
  129. NewBase = Regs[NumRegs-1].first;
  130. else {
  131. // Use the scratch register to use as a new base.
  132. NewBase = Scratch;
  133. if (NewBase == 0)
  134. return false;
  135. }
  136. int BaseOpc = ARM::ADDri;
  137. if (Offset < 0) {
  138. BaseOpc = ARM::SUBri;
  139. Offset = - Offset;
  140. }
  141. int ImmedOffset = ARM_AM::getSOImmVal(Offset);
  142. if (ImmedOffset == -1)
  143. return false; // Probably not worth it then.
  144. BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase)
  145. .addReg(Base, false, false, BaseKill).addImm(ImmedOffset)
  146. .addImm(Pred).addReg(PredReg).addReg(0);
  147. Base = NewBase;
  148. BaseKill = true; // New base is always killed right its use.
  149. }
  150. bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
  151. bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  152. Opcode = getLoadStoreMultipleOpcode(Opcode);
  153. MachineInstrBuilder MIB = (isAM4)
  154. ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill)
  155. .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
  156. : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill)
  157. .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
  158. .addImm(Pred).addReg(PredReg);
  159. for (unsigned i = 0; i != NumRegs; ++i)
  160. MIB = MIB.addReg(Regs[i].first, isDef, false, Regs[i].second);
  161. return true;
  162. }
  163. /// MergeLDR_STR - Merge a number of load / store instructions into one or more
  164. /// load / store multiple instructions.
  165. SmallVector<MachineBasicBlock::iterator, 4>
  166. ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
  167. unsigned Base, int Opcode, unsigned Size,
  168. ARMCC::CondCodes Pred, unsigned PredReg,
  169. unsigned Scratch, MemOpQueue &MemOps) {
  170. SmallVector<MachineBasicBlock::iterator, 4> Merges;
  171. bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
  172. int Offset = MemOps[SIndex].Offset;
  173. int SOffset = Offset;
  174. unsigned Pos = MemOps[SIndex].Position;
  175. MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
  176. unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
  177. unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
  178. bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
  179. SmallVector<std::pair<unsigned,bool>, 8> Regs;
  180. Regs.push_back(std::make_pair(PReg, isKill));
  181. for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
  182. int NewOffset = MemOps[i].Offset;
  183. unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
  184. unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
  185. isKill = MemOps[i].MBBI->getOperand(0).isKill();
  186. // AM4 - register numbers in ascending order.
  187. // AM5 - consecutive register numbers in ascending order.
  188. if (NewOffset == Offset + (int)Size &&
  189. ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
  190. Offset += Size;
  191. Regs.push_back(std::make_pair(Reg, isKill));
  192. PRegNum = RegNum;
  193. } else {
  194. // Can't merge this in. Try merge the earlier ones first.
  195. if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
  196. Scratch, Regs, TII)) {
  197. Merges.push_back(prior(Loc));
  198. for (unsigned j = SIndex; j < i; ++j) {
  199. MBB.erase(MemOps[j].MBBI);
  200. MemOps[j].Merged = true;
  201. }
  202. }
  203. SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
  204. MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
  205. Merges.append(Merges2.begin(), Merges2.end());
  206. return Merges;
  207. }
  208. if (MemOps[i].Position > Pos) {
  209. Pos = MemOps[i].Position;
  210. Loc = MemOps[i].MBBI;
  211. }
  212. }
  213. bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
  214. if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
  215. Scratch, Regs, TII)) {
  216. Merges.push_back(prior(Loc));
  217. for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
  218. MBB.erase(MemOps[i].MBBI);
  219. MemOps[i].Merged = true;
  220. }
  221. }
  222. return Merges;
  223. }
  224. /// getInstrPredicate - If instruction is predicated, returns its predicate
  225. /// condition, otherwise returns AL. It also returns the condition code
  226. /// register by reference.
  227. static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
  228. int PIdx = MI->findFirstPredOperandIdx();
  229. if (PIdx == -1) {
  230. PredReg = 0;
  231. return ARMCC::AL;
  232. }
  233. PredReg = MI->getOperand(PIdx+1).getReg();
  234. return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
  235. }
  236. static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
  237. unsigned Bytes, ARMCC::CondCodes Pred,
  238. unsigned PredReg) {
  239. unsigned MyPredReg = 0;
  240. return (MI && MI->getOpcode() == ARM::SUBri &&
  241. MI->getOperand(0).getReg() == Base &&
  242. MI->getOperand(1).getReg() == Base &&
  243. ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
  244. getInstrPredicate(MI, MyPredReg) == Pred &&
  245. MyPredReg == PredReg);
  246. }
  247. static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
  248. unsigned Bytes, ARMCC::CondCodes Pred,
  249. unsigned PredReg) {
  250. unsigned MyPredReg = 0;
  251. return (MI && MI->getOpcode() == ARM::ADDri &&
  252. MI->getOperand(0).getReg() == Base &&
  253. MI->getOperand(1).getReg() == Base &&
  254. ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
  255. getInstrPredicate(MI, MyPredReg) == Pred &&
  256. MyPredReg == PredReg);
  257. }
  258. static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
  259. switch (MI->getOpcode()) {
  260. default: return 0;
  261. case ARM::LDR:
  262. case ARM::STR:
  263. case ARM::FLDS:
  264. case ARM::FSTS:
  265. return 4;
  266. case ARM::FLDD:
  267. case ARM::FSTD:
  268. return 8;
  269. case ARM::LDM:
  270. case ARM::STM:
  271. return (MI->getNumOperands() - 4) * 4;
  272. case ARM::FLDMS:
  273. case ARM::FSTMS:
  274. case ARM::FLDMD:
  275. case ARM::FSTMD:
  276. return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
  277. }
  278. }
  279. /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
  280. /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
  281. ///
  282. /// stmia rn, <ra, rb, rc>
  283. /// rn := rn + 4 * 3;
  284. /// =>
  285. /// stmia rn!, <ra, rb, rc>
  286. ///
  287. /// rn := rn - 4 * 3;
  288. /// ldmia rn, <ra, rb, rc>
  289. /// =>
  290. /// ldmdb rn!, <ra, rb, rc>
  291. static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
  292. MachineBasicBlock::iterator MBBI,
  293. bool &Advance,
  294. MachineBasicBlock::iterator &I) {
  295. MachineInstr *MI = MBBI;
  296. unsigned Base = MI->getOperand(0).getReg();
  297. unsigned Bytes = getLSMultipleTransferSize(MI);
  298. unsigned PredReg = 0;
  299. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  300. int Opcode = MI->getOpcode();
  301. bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
  302. if (isAM4) {
  303. if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
  304. return false;
  305. // Can't use the updating AM4 sub-mode if the base register is also a dest
  306. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  307. for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
  308. if (MI->getOperand(i).getReg() == Base)
  309. return false;
  310. }
  311. ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
  312. if (MBBI != MBB.begin()) {
  313. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  314. if (Mode == ARM_AM::ia &&
  315. isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
  316. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
  317. MBB.erase(PrevMBBI);
  318. return true;
  319. } else if (Mode == ARM_AM::ib &&
  320. isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
  321. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
  322. MBB.erase(PrevMBBI);
  323. return true;
  324. }
  325. }
  326. if (MBBI != MBB.end()) {
  327. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  328. if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
  329. isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
  330. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  331. if (NextMBBI == I) {
  332. Advance = true;
  333. ++I;
  334. }
  335. MBB.erase(NextMBBI);
  336. return true;
  337. } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
  338. isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
  339. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  340. if (NextMBBI == I) {
  341. Advance = true;
  342. ++I;
  343. }
  344. MBB.erase(NextMBBI);
  345. return true;
  346. }
  347. }
  348. } else {
  349. // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
  350. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
  351. return false;
  352. ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
  353. unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
  354. if (MBBI != MBB.begin()) {
  355. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  356. if (Mode == ARM_AM::ia &&
  357. isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
  358. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
  359. MBB.erase(PrevMBBI);
  360. return true;
  361. }
  362. }
  363. if (MBBI != MBB.end()) {
  364. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  365. if (Mode == ARM_AM::ia &&
  366. isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
  367. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
  368. if (NextMBBI == I) {
  369. Advance = true;
  370. ++I;
  371. }
  372. MBB.erase(NextMBBI);
  373. }
  374. return true;
  375. }
  376. }
  377. return false;
  378. }
  379. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
  380. switch (Opc) {
  381. case ARM::LDR: return ARM::LDR_PRE;
  382. case ARM::STR: return ARM::STR_PRE;
  383. case ARM::FLDS: return ARM::FLDMS;
  384. case ARM::FLDD: return ARM::FLDMD;
  385. case ARM::FSTS: return ARM::FSTMS;
  386. case ARM::FSTD: return ARM::FSTMD;
  387. default: abort();
  388. }
  389. return 0;
  390. }
  391. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
  392. switch (Opc) {
  393. case ARM::LDR: return ARM::LDR_POST;
  394. case ARM::STR: return ARM::STR_POST;
  395. case ARM::FLDS: return ARM::FLDMS;
  396. case ARM::FLDD: return ARM::FLDMD;
  397. case ARM::FSTS: return ARM::FSTMS;
  398. case ARM::FSTD: return ARM::FSTMD;
  399. default: abort();
  400. }
  401. return 0;
  402. }
  403. /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
  404. /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  405. static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
  406. MachineBasicBlock::iterator MBBI,
  407. const TargetInstrInfo *TII,
  408. bool &Advance,
  409. MachineBasicBlock::iterator &I) {
  410. MachineInstr *MI = MBBI;
  411. unsigned Base = MI->getOperand(1).getReg();
  412. bool BaseKill = MI->getOperand(1).isKill();
  413. unsigned Bytes = getLSMultipleTransferSize(MI);
  414. int Opcode = MI->getOpcode();
  415. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  416. if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
  417. (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
  418. return false;
  419. bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  420. // Can't do the merge if the destination register is the same as the would-be
  421. // writeback register.
  422. if (isLd && MI->getOperand(0).getReg() == Base)
  423. return false;
  424. unsigned PredReg = 0;
  425. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  426. bool DoMerge = false;
  427. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  428. unsigned NewOpc = 0;
  429. if (MBBI != MBB.begin()) {
  430. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  431. if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
  432. DoMerge = true;
  433. AddSub = ARM_AM::sub;
  434. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  435. } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
  436. Pred, PredReg)) {
  437. DoMerge = true;
  438. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  439. }
  440. if (DoMerge)
  441. MBB.erase(PrevMBBI);
  442. }
  443. if (!DoMerge && MBBI != MBB.end()) {
  444. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  445. if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
  446. DoMerge = true;
  447. AddSub = ARM_AM::sub;
  448. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  449. } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
  450. DoMerge = true;
  451. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  452. }
  453. if (DoMerge) {
  454. if (NextMBBI == I) {
  455. Advance = true;
  456. ++I;
  457. }
  458. MBB.erase(NextMBBI);
  459. }
  460. }
  461. if (!DoMerge)
  462. return false;
  463. bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
  464. unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
  465. : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
  466. true, isDPR ? 2 : 1);
  467. if (isLd) {
  468. if (isAM2)
  469. // LDR_PRE, LDR_POST;
  470. BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
  471. .addReg(Base, true)
  472. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  473. else
  474. // FLDMS, FLDMD
  475. BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base, false, false, BaseKill)
  476. .addImm(Offset).addImm(Pred).addReg(PredReg)
  477. .addReg(MI->getOperand(0).getReg(), true);
  478. } else {
  479. MachineOperand &MO = MI->getOperand(0);
  480. if (isAM2)
  481. // STR_PRE, STR_POST;
  482. BuildMI(MBB, MBBI, TII->get(NewOpc), Base)
  483. .addReg(MO.getReg(), false, false, MO.isKill())
  484. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  485. else
  486. // FSTMS, FSTMD
  487. BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base).addImm(Offset)
  488. .addImm(Pred).addReg(PredReg)
  489. .addReg(MO.getReg(), false, false, MO.isKill());
  490. }
  491. MBB.erase(MBBI);
  492. return true;
  493. }
  494. /// isMemoryOp - Returns true if instruction is a memory operations (that this
  495. /// pass is capable of operating on).
  496. static bool isMemoryOp(MachineInstr *MI) {
  497. int Opcode = MI->getOpcode();
  498. switch (Opcode) {
  499. default: break;
  500. case ARM::LDR:
  501. case ARM::STR:
  502. return MI->getOperand(1).isRegister() && MI->getOperand(2).getReg() == 0;
  503. case ARM::FLDS:
  504. case ARM::FSTS:
  505. return MI->getOperand(1).isRegister();
  506. case ARM::FLDD:
  507. case ARM::FSTD:
  508. return MI->getOperand(1).isRegister();
  509. }
  510. return false;
  511. }
  512. /// AdvanceRS - Advance register scavenger to just before the earliest memory
  513. /// op that is being merged.
  514. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
  515. MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
  516. unsigned Position = MemOps[0].Position;
  517. for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
  518. if (MemOps[i].Position < Position) {
  519. Position = MemOps[i].Position;
  520. Loc = MemOps[i].MBBI;
  521. }
  522. }
  523. if (Loc != MBB.begin())
  524. RS->forward(prior(Loc));
  525. }
  526. /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
  527. /// ops of the same base and incrementing offset into LDM / STM ops.
  528. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  529. unsigned NumMerges = 0;
  530. unsigned NumMemOps = 0;
  531. MemOpQueue MemOps;
  532. unsigned CurrBase = 0;
  533. int CurrOpc = -1;
  534. unsigned CurrSize = 0;
  535. ARMCC::CondCodes CurrPred = ARMCC::AL;
  536. unsigned CurrPredReg = 0;
  537. unsigned Position = 0;
  538. RS->enterBasicBlock(&MBB);
  539. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  540. while (MBBI != E) {
  541. bool Advance = false;
  542. bool TryMerge = false;
  543. bool Clobber = false;
  544. bool isMemOp = isMemoryOp(MBBI);
  545. if (isMemOp) {
  546. int Opcode = MBBI->getOpcode();
  547. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  548. unsigned Size = getLSMultipleTransferSize(MBBI);
  549. unsigned Base = MBBI->getOperand(1).getReg();
  550. unsigned PredReg = 0;
  551. ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
  552. unsigned NumOperands = MBBI->getDesc().getNumOperands();
  553. unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
  554. int Offset = isAM2
  555. ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
  556. if (isAM2) {
  557. if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
  558. Offset = -Offset;
  559. } else {
  560. if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
  561. Offset = -Offset;
  562. }
  563. // Watch out for:
  564. // r4 := ldr [r5]
  565. // r5 := ldr [r5, #4]
  566. // r6 := ldr [r5, #8]
  567. //
  568. // The second ldr has effectively broken the chain even though it
  569. // looks like the later ldr(s) use the same base register. Try to
  570. // merge the ldr's so far, including this one. But don't try to
  571. // combine the following ldr(s).
  572. Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
  573. if (CurrBase == 0 && !Clobber) {
  574. // Start of a new chain.
  575. CurrBase = Base;
  576. CurrOpc = Opcode;
  577. CurrSize = Size;
  578. CurrPred = Pred;
  579. CurrPredReg = PredReg;
  580. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  581. NumMemOps++;
  582. Advance = true;
  583. } else {
  584. if (Clobber) {
  585. TryMerge = true;
  586. Advance = true;
  587. }
  588. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  589. // No need to match PredReg.
  590. // Continue adding to the queue.
  591. if (Offset > MemOps.back().Offset) {
  592. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  593. NumMemOps++;
  594. Advance = true;
  595. } else {
  596. for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
  597. I != E; ++I) {
  598. if (Offset < I->Offset) {
  599. MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
  600. NumMemOps++;
  601. Advance = true;
  602. break;
  603. } else if (Offset == I->Offset) {
  604. // Collision! This can't be merged!
  605. break;
  606. }
  607. }
  608. }
  609. }
  610. }
  611. }
  612. if (Advance) {
  613. ++Position;
  614. ++MBBI;
  615. } else
  616. TryMerge = true;
  617. if (TryMerge) {
  618. if (NumMemOps > 1) {
  619. // Try to find a free register to use as a new base in case it's needed.
  620. // First advance to the instruction just before the start of the chain.
  621. AdvanceRS(MBB, MemOps);
  622. // Find a scratch register. Make sure it's a call clobbered register or
  623. // a spilled callee-saved register.
  624. unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
  625. if (!Scratch)
  626. Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
  627. AFI->getSpilledCSRegisters());
  628. // Process the load / store instructions.
  629. RS->forward(prior(MBBI));
  630. // Merge ops.
  631. SmallVector<MachineBasicBlock::iterator,4> MBBII =
  632. MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
  633. CurrPred, CurrPredReg, Scratch, MemOps);
  634. // Try folding preceeding/trailing base inc/dec into the generated
  635. // LDM/STM ops.
  636. for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
  637. if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
  638. NumMerges++;
  639. NumMerges += MBBII.size();
  640. // Try folding preceeding/trailing base inc/dec into those load/store
  641. // that were not merged to form LDM/STM ops.
  642. for (unsigned i = 0; i != NumMemOps; ++i)
  643. if (!MemOps[i].Merged)
  644. if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
  645. NumMerges++;
  646. // RS may be pointing to an instruction that's deleted.
  647. RS->skipTo(prior(MBBI));
  648. }
  649. CurrBase = 0;
  650. CurrOpc = -1;
  651. CurrSize = 0;
  652. CurrPred = ARMCC::AL;
  653. CurrPredReg = 0;
  654. if (NumMemOps) {
  655. MemOps.clear();
  656. NumMemOps = 0;
  657. }
  658. // If iterator hasn't been advanced and this is not a memory op, skip it.
  659. // It can't start a new chain anyway.
  660. if (!Advance && !isMemOp && MBBI != E) {
  661. ++Position;
  662. ++MBBI;
  663. }
  664. }
  665. }
  666. return NumMerges > 0;
  667. }
  668. /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
  669. /// (bx lr) into the preceeding stack restore so it directly restore the value
  670. /// of LR into pc.
  671. /// ldmfd sp!, {r7, lr}
  672. /// bx lr
  673. /// =>
  674. /// ldmfd sp!, {r7, pc}
  675. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  676. if (MBB.empty()) return false;
  677. MachineBasicBlock::iterator MBBI = prior(MBB.end());
  678. if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
  679. MachineInstr *PrevMI = prior(MBBI);
  680. if (PrevMI->getOpcode() == ARM::LDM) {
  681. MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
  682. if (MO.getReg() == ARM::LR) {
  683. PrevMI->setDesc(TII->get(ARM::LDM_RET));
  684. MO.setReg(ARM::PC);
  685. MBB.erase(MBBI);
  686. return true;
  687. }
  688. }
  689. }
  690. return false;
  691. }
  692. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  693. const TargetMachine &TM = Fn.getTarget();
  694. AFI = Fn.getInfo<ARMFunctionInfo>();
  695. TII = TM.getInstrInfo();
  696. TRI = TM.getRegisterInfo();
  697. RS = new RegScavenger();
  698. bool Modified = false;
  699. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  700. ++MFI) {
  701. MachineBasicBlock &MBB = *MFI;
  702. Modified |= LoadStoreMultipleOpti(MBB);
  703. Modified |= MergeReturnIntoLDM(MBB);
  704. }
  705. delete RS;
  706. return Modified;
  707. }