ARMLoadStoreOptimizer.cpp 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463
  1. //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains a pass that performs load / store related peephole
  11. // optimizations. This pass should be run after register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #define DEBUG_TYPE "arm-ldst-opt"
  15. #include "ARM.h"
  16. #include "ARMAddressingModes.h"
  17. #include "ARMBaseInstrInfo.h"
  18. #include "ARMMachineFunctionInfo.h"
  19. #include "ARMRegisterInfo.h"
  20. #include "llvm/DerivedTypes.h"
  21. #include "llvm/Function.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineFunctionPass.h"
  24. #include "llvm/CodeGen/MachineInstr.h"
  25. #include "llvm/CodeGen/MachineInstrBuilder.h"
  26. #include "llvm/CodeGen/MachineRegisterInfo.h"
  27. #include "llvm/CodeGen/RegisterScavenging.h"
  28. #include "llvm/Target/TargetData.h"
  29. #include "llvm/Target/TargetInstrInfo.h"
  30. #include "llvm/Target/TargetMachine.h"
  31. #include "llvm/Target/TargetRegisterInfo.h"
  32. #include "llvm/Support/Compiler.h"
  33. #include "llvm/Support/ErrorHandling.h"
  34. #include "llvm/ADT/DenseMap.h"
  35. #include "llvm/ADT/STLExtras.h"
  36. #include "llvm/ADT/SmallPtrSet.h"
  37. #include "llvm/ADT/SmallSet.h"
  38. #include "llvm/ADT/SmallVector.h"
  39. #include "llvm/ADT/Statistic.h"
  40. using namespace llvm;
  41. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  42. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  43. STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
  44. STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
  45. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  46. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  47. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  48. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  49. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  50. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  51. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  52. /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
  53. /// load / store instructions to form ldm / stm instructions.
  54. namespace {
  55. struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
  56. static char ID;
  57. ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
  58. const TargetInstrInfo *TII;
  59. const TargetRegisterInfo *TRI;
  60. ARMFunctionInfo *AFI;
  61. RegScavenger *RS;
  62. bool isThumb2;
  63. virtual bool runOnMachineFunction(MachineFunction &Fn);
  64. virtual const char *getPassName() const {
  65. return "ARM load / store optimization pass";
  66. }
  67. private:
  68. struct MemOpQueueEntry {
  69. int Offset;
  70. unsigned Position;
  71. MachineBasicBlock::iterator MBBI;
  72. bool Merged;
  73. MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
  74. : Offset(o), Position(p), MBBI(i), Merged(false) {};
  75. };
  76. typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
  77. typedef MemOpQueue::iterator MemOpQueueIter;
  78. bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  79. int Offset, unsigned Base, bool BaseKill, int Opcode,
  80. ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
  81. DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
  82. void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
  83. int Opcode, unsigned Size,
  84. ARMCC::CondCodes Pred, unsigned PredReg,
  85. unsigned Scratch, MemOpQueue &MemOps,
  86. SmallVector<MachineBasicBlock::iterator, 4> &Merges);
  87. void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
  88. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  89. MachineBasicBlock::iterator &MBBI);
  90. bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
  91. MachineBasicBlock::iterator MBBI,
  92. const TargetInstrInfo *TII,
  93. bool &Advance,
  94. MachineBasicBlock::iterator &I);
  95. bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
  96. MachineBasicBlock::iterator MBBI,
  97. bool &Advance,
  98. MachineBasicBlock::iterator &I);
  99. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  100. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  101. };
  102. char ARMLoadStoreOpt::ID = 0;
  103. }
  104. static int getLoadStoreMultipleOpcode(int Opcode) {
  105. switch (Opcode) {
  106. case ARM::LDR:
  107. NumLDMGened++;
  108. return ARM::LDM;
  109. case ARM::STR:
  110. NumSTMGened++;
  111. return ARM::STM;
  112. case ARM::t2LDRi8:
  113. case ARM::t2LDRi12:
  114. NumLDMGened++;
  115. return ARM::t2LDM;
  116. case ARM::t2STRi8:
  117. case ARM::t2STRi12:
  118. NumSTMGened++;
  119. return ARM::t2STM;
  120. case ARM::FLDS:
  121. NumFLDMGened++;
  122. return ARM::FLDMS;
  123. case ARM::FSTS:
  124. NumFSTMGened++;
  125. return ARM::FSTMS;
  126. case ARM::FLDD:
  127. NumFLDMGened++;
  128. return ARM::FLDMD;
  129. case ARM::FSTD:
  130. NumFSTMGened++;
  131. return ARM::FSTMD;
  132. default: llvm_unreachable("Unhandled opcode!");
  133. }
  134. return 0;
  135. }
  136. static bool isT2i32Load(unsigned Opc) {
  137. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  138. }
  139. static bool isi32Load(unsigned Opc) {
  140. return Opc == ARM::LDR || isT2i32Load(Opc);
  141. }
  142. static bool isT2i32Store(unsigned Opc) {
  143. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  144. }
  145. static bool isi32Store(unsigned Opc) {
  146. return Opc == ARM::STR || isT2i32Store(Opc);
  147. }
  148. /// MergeOps - Create and insert a LDM or STM with Base as base register and
  149. /// registers in Regs as the register operands that would be loaded / stored.
  150. /// It returns true if the transformation is done.
  151. bool
  152. ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
  153. MachineBasicBlock::iterator MBBI,
  154. int Offset, unsigned Base, bool BaseKill,
  155. int Opcode, ARMCC::CondCodes Pred,
  156. unsigned PredReg, unsigned Scratch, DebugLoc dl,
  157. SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
  158. // Only a single register to load / store. Don't bother.
  159. unsigned NumRegs = Regs.size();
  160. if (NumRegs <= 1)
  161. return false;
  162. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  163. bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
  164. if (isAM4 && Offset == 4) {
  165. if (isThumb2)
  166. // Thumb2 does not support ldmib / stmib.
  167. return false;
  168. Mode = ARM_AM::ib;
  169. } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
  170. if (isThumb2)
  171. // Thumb2 does not support ldmda / stmda.
  172. return false;
  173. Mode = ARM_AM::da;
  174. } else if (isAM4 && Offset == -4 * (int)NumRegs) {
  175. Mode = ARM_AM::db;
  176. } else if (Offset != 0) {
  177. // If starting offset isn't zero, insert a MI to materialize a new base.
  178. // But only do so if it is cost effective, i.e. merging more than two
  179. // loads / stores.
  180. if (NumRegs <= 2)
  181. return false;
  182. unsigned NewBase;
  183. if (isi32Load(Opcode))
  184. // If it is a load, then just use one of the destination register to
  185. // use as the new base.
  186. NewBase = Regs[NumRegs-1].first;
  187. else {
  188. // Use the scratch register to use as a new base.
  189. NewBase = Scratch;
  190. if (NewBase == 0)
  191. return false;
  192. }
  193. int BaseOpc = !isThumb2
  194. ? ARM::ADDri
  195. : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
  196. if (Offset < 0) {
  197. BaseOpc = !isThumb2
  198. ? ARM::SUBri
  199. : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
  200. Offset = - Offset;
  201. }
  202. int ImmedOffset = isThumb2
  203. ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
  204. if (ImmedOffset == -1)
  205. // FIXME: Try t2ADDri12 or t2SUBri12?
  206. return false; // Probably not worth it then.
  207. BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
  208. .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
  209. .addImm(Pred).addReg(PredReg).addReg(0);
  210. Base = NewBase;
  211. BaseKill = true; // New base is always killed right its use.
  212. }
  213. bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
  214. bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  215. Opcode = getLoadStoreMultipleOpcode(Opcode);
  216. MachineInstrBuilder MIB = (isAM4)
  217. ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
  218. .addReg(Base, getKillRegState(BaseKill))
  219. .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
  220. : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
  221. .addReg(Base, getKillRegState(BaseKill))
  222. .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
  223. .addImm(Pred).addReg(PredReg);
  224. for (unsigned i = 0; i != NumRegs; ++i)
  225. MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
  226. | getKillRegState(Regs[i].second));
  227. return true;
  228. }
  229. /// MergeLDR_STR - Merge a number of load / store instructions into one or more
  230. /// load / store multiple instructions.
  231. void
  232. ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
  233. unsigned Base, int Opcode, unsigned Size,
  234. ARMCC::CondCodes Pred, unsigned PredReg,
  235. unsigned Scratch, MemOpQueue &MemOps,
  236. SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
  237. bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
  238. int Offset = MemOps[SIndex].Offset;
  239. int SOffset = Offset;
  240. unsigned Pos = MemOps[SIndex].Position;
  241. MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
  242. DebugLoc dl = Loc->getDebugLoc();
  243. unsigned PReg = Loc->getOperand(0).getReg();
  244. unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
  245. bool isKill = Loc->getOperand(0).isKill();
  246. SmallVector<std::pair<unsigned,bool>, 8> Regs;
  247. Regs.push_back(std::make_pair(PReg, isKill));
  248. for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
  249. int NewOffset = MemOps[i].Offset;
  250. unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
  251. unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
  252. isKill = MemOps[i].MBBI->getOperand(0).isKill();
  253. // AM4 - register numbers in ascending order.
  254. // AM5 - consecutive register numbers in ascending order.
  255. if (NewOffset == Offset + (int)Size &&
  256. ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
  257. Offset += Size;
  258. Regs.push_back(std::make_pair(Reg, isKill));
  259. PRegNum = RegNum;
  260. } else {
  261. // Can't merge this in. Try merge the earlier ones first.
  262. if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
  263. Scratch, dl, Regs)) {
  264. Merges.push_back(prior(Loc));
  265. for (unsigned j = SIndex; j < i; ++j) {
  266. MBB.erase(MemOps[j].MBBI);
  267. MemOps[j].Merged = true;
  268. }
  269. }
  270. MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
  271. MemOps, Merges);
  272. return;
  273. }
  274. if (MemOps[i].Position > Pos) {
  275. Pos = MemOps[i].Position;
  276. Loc = MemOps[i].MBBI;
  277. }
  278. }
  279. bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
  280. if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
  281. Scratch, dl, Regs)) {
  282. Merges.push_back(prior(Loc));
  283. for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
  284. MBB.erase(MemOps[i].MBBI);
  285. MemOps[i].Merged = true;
  286. }
  287. }
  288. return;
  289. }
  290. static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
  291. unsigned Bytes, unsigned Limit,
  292. ARMCC::CondCodes Pred, unsigned PredReg){
  293. unsigned MyPredReg = 0;
  294. if (!MI)
  295. return false;
  296. if (MI->getOpcode() != ARM::t2SUBri &&
  297. MI->getOpcode() != ARM::t2SUBrSPi &&
  298. MI->getOpcode() != ARM::t2SUBrSPi12 &&
  299. MI->getOpcode() != ARM::tSUBspi &&
  300. MI->getOpcode() != ARM::SUBri)
  301. return false;
  302. // Make sure the offset fits in 8 bits.
  303. if (Bytes <= 0 || (Limit && Bytes >= Limit))
  304. return false;
  305. unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
  306. return (MI->getOperand(0).getReg() == Base &&
  307. MI->getOperand(1).getReg() == Base &&
  308. (MI->getOperand(2).getImm()*Scale) == Bytes &&
  309. llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
  310. MyPredReg == PredReg);
  311. }
  312. static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
  313. unsigned Bytes, unsigned Limit,
  314. ARMCC::CondCodes Pred, unsigned PredReg){
  315. unsigned MyPredReg = 0;
  316. if (!MI)
  317. return false;
  318. if (MI->getOpcode() != ARM::t2ADDri &&
  319. MI->getOpcode() != ARM::t2ADDrSPi &&
  320. MI->getOpcode() != ARM::t2ADDrSPi12 &&
  321. MI->getOpcode() != ARM::tADDspi &&
  322. MI->getOpcode() != ARM::ADDri)
  323. return false;
  324. if (Bytes <= 0 || (Limit && Bytes >= Limit))
  325. // Make sure the offset fits in 8 bits.
  326. return false;
  327. unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
  328. return (MI->getOperand(0).getReg() == Base &&
  329. MI->getOperand(1).getReg() == Base &&
  330. (MI->getOperand(2).getImm()*Scale) == Bytes &&
  331. llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
  332. MyPredReg == PredReg);
  333. }
  334. static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
  335. switch (MI->getOpcode()) {
  336. default: return 0;
  337. case ARM::LDR:
  338. case ARM::STR:
  339. case ARM::t2LDRi8:
  340. case ARM::t2LDRi12:
  341. case ARM::t2STRi8:
  342. case ARM::t2STRi12:
  343. case ARM::FLDS:
  344. case ARM::FSTS:
  345. return 4;
  346. case ARM::FLDD:
  347. case ARM::FSTD:
  348. return 8;
  349. case ARM::LDM:
  350. case ARM::STM:
  351. case ARM::t2LDM:
  352. case ARM::t2STM:
  353. return (MI->getNumOperands() - 4) * 4;
  354. case ARM::FLDMS:
  355. case ARM::FSTMS:
  356. case ARM::FLDMD:
  357. case ARM::FSTMD:
  358. return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
  359. }
  360. }
  361. /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
  362. /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
  363. ///
  364. /// stmia rn, <ra, rb, rc>
  365. /// rn := rn + 4 * 3;
  366. /// =>
  367. /// stmia rn!, <ra, rb, rc>
  368. ///
  369. /// rn := rn - 4 * 3;
  370. /// ldmia rn, <ra, rb, rc>
  371. /// =>
  372. /// ldmdb rn!, <ra, rb, rc>
  373. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
  374. MachineBasicBlock::iterator MBBI,
  375. bool &Advance,
  376. MachineBasicBlock::iterator &I) {
  377. MachineInstr *MI = MBBI;
  378. unsigned Base = MI->getOperand(0).getReg();
  379. unsigned Bytes = getLSMultipleTransferSize(MI);
  380. unsigned PredReg = 0;
  381. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  382. int Opcode = MI->getOpcode();
  383. bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
  384. Opcode == ARM::STM || Opcode == ARM::t2STM;
  385. if (isAM4) {
  386. if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
  387. return false;
  388. // Can't use the updating AM4 sub-mode if the base register is also a dest
  389. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  390. for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
  391. if (MI->getOperand(i).getReg() == Base)
  392. return false;
  393. }
  394. ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
  395. if (MBBI != MBB.begin()) {
  396. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  397. if (Mode == ARM_AM::ia &&
  398. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  399. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
  400. MBB.erase(PrevMBBI);
  401. return true;
  402. } else if (Mode == ARM_AM::ib &&
  403. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  404. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
  405. MBB.erase(PrevMBBI);
  406. return true;
  407. }
  408. }
  409. if (MBBI != MBB.end()) {
  410. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  411. if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
  412. isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  413. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  414. if (NextMBBI == I) {
  415. Advance = true;
  416. ++I;
  417. }
  418. MBB.erase(NextMBBI);
  419. return true;
  420. } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
  421. isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  422. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  423. if (NextMBBI == I) {
  424. Advance = true;
  425. ++I;
  426. }
  427. MBB.erase(NextMBBI);
  428. return true;
  429. }
  430. }
  431. } else {
  432. // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
  433. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
  434. return false;
  435. ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
  436. unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
  437. if (MBBI != MBB.begin()) {
  438. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  439. if (Mode == ARM_AM::ia &&
  440. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  441. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
  442. MBB.erase(PrevMBBI);
  443. return true;
  444. }
  445. }
  446. if (MBBI != MBB.end()) {
  447. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  448. if (Mode == ARM_AM::ia &&
  449. isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  450. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
  451. if (NextMBBI == I) {
  452. Advance = true;
  453. ++I;
  454. }
  455. MBB.erase(NextMBBI);
  456. }
  457. return true;
  458. }
  459. }
  460. return false;
  461. }
  462. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
  463. switch (Opc) {
  464. case ARM::LDR: return ARM::LDR_PRE;
  465. case ARM::STR: return ARM::STR_PRE;
  466. case ARM::FLDS: return ARM::FLDMS;
  467. case ARM::FLDD: return ARM::FLDMD;
  468. case ARM::FSTS: return ARM::FSTMS;
  469. case ARM::FSTD: return ARM::FSTMD;
  470. case ARM::t2LDRi8:
  471. case ARM::t2LDRi12:
  472. return ARM::t2LDR_PRE;
  473. case ARM::t2STRi8:
  474. case ARM::t2STRi12:
  475. return ARM::t2STR_PRE;
  476. default: llvm_unreachable("Unhandled opcode!");
  477. }
  478. return 0;
  479. }
  480. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
  481. switch (Opc) {
  482. case ARM::LDR: return ARM::LDR_POST;
  483. case ARM::STR: return ARM::STR_POST;
  484. case ARM::FLDS: return ARM::FLDMS;
  485. case ARM::FLDD: return ARM::FLDMD;
  486. case ARM::FSTS: return ARM::FSTMS;
  487. case ARM::FSTD: return ARM::FSTMD;
  488. case ARM::t2LDRi8:
  489. case ARM::t2LDRi12:
  490. return ARM::t2LDR_POST;
  491. case ARM::t2STRi8:
  492. case ARM::t2STRi12:
  493. return ARM::t2STR_POST;
  494. default: llvm_unreachable("Unhandled opcode!");
  495. }
  496. return 0;
  497. }
  498. /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
  499. /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  500. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
  501. MachineBasicBlock::iterator MBBI,
  502. const TargetInstrInfo *TII,
  503. bool &Advance,
  504. MachineBasicBlock::iterator &I) {
  505. MachineInstr *MI = MBBI;
  506. unsigned Base = MI->getOperand(1).getReg();
  507. bool BaseKill = MI->getOperand(1).isKill();
  508. unsigned Bytes = getLSMultipleTransferSize(MI);
  509. int Opcode = MI->getOpcode();
  510. DebugLoc dl = MI->getDebugLoc();
  511. bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
  512. Opcode == ARM::FSTD || Opcode == ARM::FSTS;
  513. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  514. if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
  515. return false;
  516. else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  517. return false;
  518. else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
  519. if (MI->getOperand(2).getImm() != 0)
  520. return false;
  521. bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  522. // Can't do the merge if the destination register is the same as the would-be
  523. // writeback register.
  524. if (isLd && MI->getOperand(0).getReg() == Base)
  525. return false;
  526. unsigned PredReg = 0;
  527. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  528. bool DoMerge = false;
  529. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  530. unsigned NewOpc = 0;
  531. // AM2 - 12 bits, thumb2 - 8 bits.
  532. unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
  533. if (MBBI != MBB.begin()) {
  534. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  535. if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
  536. DoMerge = true;
  537. AddSub = ARM_AM::sub;
  538. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  539. } else if (!isAM5 &&
  540. isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
  541. DoMerge = true;
  542. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  543. }
  544. if (DoMerge)
  545. MBB.erase(PrevMBBI);
  546. }
  547. if (!DoMerge && MBBI != MBB.end()) {
  548. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  549. if (!isAM5 &&
  550. isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
  551. DoMerge = true;
  552. AddSub = ARM_AM::sub;
  553. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  554. } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
  555. DoMerge = true;
  556. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  557. }
  558. if (DoMerge) {
  559. if (NextMBBI == I) {
  560. Advance = true;
  561. ++I;
  562. }
  563. MBB.erase(NextMBBI);
  564. }
  565. }
  566. if (!DoMerge)
  567. return false;
  568. bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
  569. unsigned Offset = 0;
  570. if (isAM5)
  571. Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
  572. ? ARM_AM::db
  573. : ARM_AM::ia, true, (isDPR ? 2 : 1));
  574. else if (isAM2)
  575. Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  576. else
  577. Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
  578. if (isLd) {
  579. if (isAM5)
  580. // FLDMS, FLDMD
  581. BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
  582. .addReg(Base, getKillRegState(BaseKill))
  583. .addImm(Offset).addImm(Pred).addReg(PredReg)
  584. .addReg(MI->getOperand(0).getReg(), RegState::Define);
  585. else if (isAM2)
  586. // LDR_PRE, LDR_POST,
  587. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
  588. .addReg(Base, RegState::Define)
  589. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  590. else
  591. // t2LDR_PRE, t2LDR_POST
  592. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
  593. .addReg(Base, RegState::Define)
  594. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  595. } else {
  596. MachineOperand &MO = MI->getOperand(0);
  597. if (isAM5)
  598. // FSTMS, FSTMD
  599. BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
  600. .addImm(Pred).addReg(PredReg)
  601. .addReg(MO.getReg(), getKillRegState(MO.isKill()));
  602. else if (isAM2)
  603. // STR_PRE, STR_POST
  604. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
  605. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  606. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  607. else
  608. // t2STR_PRE, t2STR_POST
  609. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
  610. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  611. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  612. }
  613. MBB.erase(MBBI);
  614. return true;
  615. }
  616. /// isMemoryOp - Returns true if instruction is a memory operations (that this
  617. /// pass is capable of operating on).
  618. static bool isMemoryOp(const MachineInstr *MI) {
  619. int Opcode = MI->getOpcode();
  620. switch (Opcode) {
  621. default: break;
  622. case ARM::LDR:
  623. case ARM::STR:
  624. return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
  625. case ARM::FLDS:
  626. case ARM::FSTS:
  627. return MI->getOperand(1).isReg();
  628. case ARM::FLDD:
  629. case ARM::FSTD:
  630. return MI->getOperand(1).isReg();
  631. case ARM::t2LDRi8:
  632. case ARM::t2LDRi12:
  633. case ARM::t2STRi8:
  634. case ARM::t2STRi12:
  635. return true;
  636. }
  637. return false;
  638. }
  639. /// AdvanceRS - Advance register scavenger to just before the earliest memory
  640. /// op that is being merged.
  641. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
  642. MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
  643. unsigned Position = MemOps[0].Position;
  644. for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
  645. if (MemOps[i].Position < Position) {
  646. Position = MemOps[i].Position;
  647. Loc = MemOps[i].MBBI;
  648. }
  649. }
  650. if (Loc != MBB.begin())
  651. RS->forward(prior(Loc));
  652. }
  653. static int getMemoryOpOffset(const MachineInstr *MI) {
  654. int Opcode = MI->getOpcode();
  655. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  656. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  657. unsigned NumOperands = MI->getDesc().getNumOperands();
  658. unsigned OffField = MI->getOperand(NumOperands-3).getImm();
  659. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  660. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  661. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
  662. return OffField;
  663. int Offset = isAM2
  664. ? ARM_AM::getAM2Offset(OffField)
  665. : (isAM3 ? ARM_AM::getAM3Offset(OffField)
  666. : ARM_AM::getAM5Offset(OffField) * 4);
  667. if (isAM2) {
  668. if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
  669. Offset = -Offset;
  670. } else if (isAM3) {
  671. if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
  672. Offset = -Offset;
  673. } else {
  674. if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
  675. Offset = -Offset;
  676. }
  677. return Offset;
  678. }
  679. static void InsertLDR_STR(MachineBasicBlock &MBB,
  680. MachineBasicBlock::iterator &MBBI,
  681. int OffImm, bool isDef,
  682. DebugLoc dl, unsigned NewOpc,
  683. unsigned Reg, bool RegDeadKill,
  684. unsigned BaseReg, bool BaseKill,
  685. unsigned OffReg, bool OffKill,
  686. ARMCC::CondCodes Pred, unsigned PredReg,
  687. const TargetInstrInfo *TII) {
  688. unsigned Offset;
  689. if (OffImm < 0)
  690. Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
  691. else
  692. Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
  693. if (isDef)
  694. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  695. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  696. .addReg(BaseReg, getKillRegState(BaseKill))
  697. .addReg(OffReg, getKillRegState(OffKill))
  698. .addImm(Offset)
  699. .addImm(Pred).addReg(PredReg);
  700. else
  701. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  702. .addReg(Reg, getKillRegState(RegDeadKill))
  703. .addReg(BaseReg, getKillRegState(BaseKill))
  704. .addReg(OffReg, getKillRegState(OffKill))
  705. .addImm(Offset)
  706. .addImm(Pred).addReg(PredReg);
  707. }
  708. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  709. MachineBasicBlock::iterator &MBBI) {
  710. MachineInstr *MI = &*MBBI;
  711. unsigned Opcode = MI->getOpcode();
  712. if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
  713. unsigned EvenReg = MI->getOperand(0).getReg();
  714. unsigned OddReg = MI->getOperand(1).getReg();
  715. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  716. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  717. if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
  718. return false;
  719. bool isLd = Opcode == ARM::LDRD;
  720. bool EvenDeadKill = isLd ?
  721. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  722. bool OddDeadKill = isLd ?
  723. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  724. const MachineOperand &BaseOp = MI->getOperand(2);
  725. unsigned BaseReg = BaseOp.getReg();
  726. bool BaseKill = BaseOp.isKill();
  727. const MachineOperand &OffOp = MI->getOperand(3);
  728. unsigned OffReg = OffOp.getReg();
  729. bool OffKill = OffOp.isKill();
  730. int OffImm = getMemoryOpOffset(MI);
  731. unsigned PredReg = 0;
  732. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  733. if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
  734. // Ascending register numbers and no offset. It's safe to change it to a
  735. // ldm or stm.
  736. unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
  737. if (isLd) {
  738. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  739. .addReg(BaseReg, getKillRegState(BaseKill))
  740. .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
  741. .addImm(Pred).addReg(PredReg)
  742. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  743. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
  744. ++NumLDRD2LDM;
  745. } else {
  746. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  747. .addReg(BaseReg, getKillRegState(BaseKill))
  748. .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
  749. .addImm(Pred).addReg(PredReg)
  750. .addReg(EvenReg, getKillRegState(EvenDeadKill))
  751. .addReg(OddReg, getKillRegState(OddDeadKill));
  752. ++NumSTRD2STM;
  753. }
  754. } else {
  755. // Split into two instructions.
  756. unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
  757. DebugLoc dl = MBBI->getDebugLoc();
  758. // If this is a load and base register is killed, it may have been
  759. // re-defed by the load, make sure the first load does not clobber it.
  760. if (isLd &&
  761. (BaseKill || OffKill) &&
  762. (TRI->regsOverlap(EvenReg, BaseReg) ||
  763. (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
  764. assert(!TRI->regsOverlap(OddReg, BaseReg) &&
  765. (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
  766. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
  767. BaseReg, false, OffReg, false, Pred, PredReg, TII);
  768. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
  769. BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
  770. } else {
  771. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
  772. EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
  773. Pred, PredReg, TII);
  774. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
  775. OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
  776. Pred, PredReg, TII);
  777. }
  778. if (isLd)
  779. ++NumLDRD2LDR;
  780. else
  781. ++NumSTRD2STR;
  782. }
  783. MBBI = prior(MBBI);
  784. MBB.erase(MI);
  785. }
  786. return false;
  787. }
  788. /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
  789. /// ops of the same base and incrementing offset into LDM / STM ops.
  790. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  791. unsigned NumMerges = 0;
  792. unsigned NumMemOps = 0;
  793. MemOpQueue MemOps;
  794. unsigned CurrBase = 0;
  795. int CurrOpc = -1;
  796. unsigned CurrSize = 0;
  797. ARMCC::CondCodes CurrPred = ARMCC::AL;
  798. unsigned CurrPredReg = 0;
  799. unsigned Position = 0;
  800. SmallVector<MachineBasicBlock::iterator,4> Merges;
  801. RS->enterBasicBlock(&MBB);
  802. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  803. while (MBBI != E) {
  804. if (FixInvalidRegPairOp(MBB, MBBI))
  805. continue;
  806. bool Advance = false;
  807. bool TryMerge = false;
  808. bool Clobber = false;
  809. bool isMemOp = isMemoryOp(MBBI);
  810. if (isMemOp) {
  811. int Opcode = MBBI->getOpcode();
  812. unsigned Size = getLSMultipleTransferSize(MBBI);
  813. unsigned Base = MBBI->getOperand(1).getReg();
  814. unsigned PredReg = 0;
  815. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
  816. int Offset = getMemoryOpOffset(MBBI);
  817. // Watch out for:
  818. // r4 := ldr [r5]
  819. // r5 := ldr [r5, #4]
  820. // r6 := ldr [r5, #8]
  821. //
  822. // The second ldr has effectively broken the chain even though it
  823. // looks like the later ldr(s) use the same base register. Try to
  824. // merge the ldr's so far, including this one. But don't try to
  825. // combine the following ldr(s).
  826. Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
  827. if (CurrBase == 0 && !Clobber) {
  828. // Start of a new chain.
  829. CurrBase = Base;
  830. CurrOpc = Opcode;
  831. CurrSize = Size;
  832. CurrPred = Pred;
  833. CurrPredReg = PredReg;
  834. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  835. NumMemOps++;
  836. Advance = true;
  837. } else {
  838. if (Clobber) {
  839. TryMerge = true;
  840. Advance = true;
  841. }
  842. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  843. // No need to match PredReg.
  844. // Continue adding to the queue.
  845. if (Offset > MemOps.back().Offset) {
  846. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  847. NumMemOps++;
  848. Advance = true;
  849. } else {
  850. for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
  851. I != E; ++I) {
  852. if (Offset < I->Offset) {
  853. MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
  854. NumMemOps++;
  855. Advance = true;
  856. break;
  857. } else if (Offset == I->Offset) {
  858. // Collision! This can't be merged!
  859. break;
  860. }
  861. }
  862. }
  863. }
  864. }
  865. }
  866. if (Advance) {
  867. ++Position;
  868. ++MBBI;
  869. } else
  870. TryMerge = true;
  871. if (TryMerge) {
  872. if (NumMemOps > 1) {
  873. // Try to find a free register to use as a new base in case it's needed.
  874. // First advance to the instruction just before the start of the chain.
  875. AdvanceRS(MBB, MemOps);
  876. // Find a scratch register. Make sure it's a call clobbered register or
  877. // a spilled callee-saved register.
  878. unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
  879. if (!Scratch)
  880. Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
  881. AFI->getSpilledCSRegisters());
  882. // Process the load / store instructions.
  883. RS->forward(prior(MBBI));
  884. // Merge ops.
  885. Merges.clear();
  886. MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
  887. CurrPred, CurrPredReg, Scratch, MemOps, Merges);
  888. // Try folding preceeding/trailing base inc/dec into the generated
  889. // LDM/STM ops.
  890. for (unsigned i = 0, e = Merges.size(); i < e; ++i)
  891. if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
  892. ++NumMerges;
  893. NumMerges += Merges.size();
  894. // Try folding preceeding/trailing base inc/dec into those load/store
  895. // that were not merged to form LDM/STM ops.
  896. for (unsigned i = 0; i != NumMemOps; ++i)
  897. if (!MemOps[i].Merged)
  898. if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
  899. ++NumMerges;
  900. // RS may be pointing to an instruction that's deleted.
  901. RS->skipTo(prior(MBBI));
  902. } else if (NumMemOps == 1) {
  903. // Try folding preceeding/trailing base inc/dec into the single
  904. // load/store.
  905. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
  906. ++NumMerges;
  907. RS->forward(prior(MBBI));
  908. }
  909. }
  910. CurrBase = 0;
  911. CurrOpc = -1;
  912. CurrSize = 0;
  913. CurrPred = ARMCC::AL;
  914. CurrPredReg = 0;
  915. if (NumMemOps) {
  916. MemOps.clear();
  917. NumMemOps = 0;
  918. }
  919. // If iterator hasn't been advanced and this is not a memory op, skip it.
  920. // It can't start a new chain anyway.
  921. if (!Advance && !isMemOp && MBBI != E) {
  922. ++Position;
  923. ++MBBI;
  924. }
  925. }
  926. }
  927. return NumMerges > 0;
  928. }
  929. namespace {
  930. struct OffsetCompare {
  931. bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
  932. int LOffset = getMemoryOpOffset(LHS);
  933. int ROffset = getMemoryOpOffset(RHS);
  934. assert(LHS == RHS || LOffset != ROffset);
  935. return LOffset > ROffset;
  936. }
  937. };
  938. }
  939. /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
  940. /// (bx lr) into the preceeding stack restore so it directly restore the value
  941. /// of LR into pc.
  942. /// ldmfd sp!, {r7, lr}
  943. /// bx lr
  944. /// =>
  945. /// ldmfd sp!, {r7, pc}
  946. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  947. if (MBB.empty()) return false;
  948. MachineBasicBlock::iterator MBBI = prior(MBB.end());
  949. if (MBBI != MBB.begin() &&
  950. (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
  951. MachineInstr *PrevMI = prior(MBBI);
  952. if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
  953. MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
  954. if (MO.getReg() != ARM::LR)
  955. return false;
  956. unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
  957. PrevMI->setDesc(TII->get(NewOpc));
  958. MO.setReg(ARM::PC);
  959. MBB.erase(MBBI);
  960. return true;
  961. }
  962. }
  963. return false;
  964. }
  965. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  966. const TargetMachine &TM = Fn.getTarget();
  967. AFI = Fn.getInfo<ARMFunctionInfo>();
  968. TII = TM.getInstrInfo();
  969. TRI = TM.getRegisterInfo();
  970. RS = new RegScavenger();
  971. isThumb2 = AFI->isThumb2Function();
  972. bool Modified = false;
  973. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  974. ++MFI) {
  975. MachineBasicBlock &MBB = *MFI;
  976. Modified |= LoadStoreMultipleOpti(MBB);
  977. Modified |= MergeReturnIntoLDM(MBB);
  978. }
  979. delete RS;
  980. return Modified;
  981. }
  982. /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
  983. /// load / stores from consecutive locations close to make it more
  984. /// likely they will be combined later.
  985. namespace {
  986. struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  987. static char ID;
  988. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
  989. const TargetData *TD;
  990. const TargetInstrInfo *TII;
  991. const TargetRegisterInfo *TRI;
  992. const ARMSubtarget *STI;
  993. MachineRegisterInfo *MRI;
  994. virtual bool runOnMachineFunction(MachineFunction &Fn);
  995. virtual const char *getPassName() const {
  996. return "ARM pre- register allocation load / store optimization pass";
  997. }
  998. private:
  999. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1000. unsigned &NewOpc, unsigned &EvenReg,
  1001. unsigned &OddReg, unsigned &BaseReg,
  1002. unsigned &OffReg, unsigned &Offset,
  1003. unsigned &PredReg, ARMCC::CondCodes &Pred);
  1004. bool RescheduleOps(MachineBasicBlock *MBB,
  1005. SmallVector<MachineInstr*, 4> &Ops,
  1006. unsigned Base, bool isLd,
  1007. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1008. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1009. };
  1010. char ARMPreAllocLoadStoreOpt::ID = 0;
  1011. }
  1012. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1013. TD = Fn.getTarget().getTargetData();
  1014. TII = Fn.getTarget().getInstrInfo();
  1015. TRI = Fn.getTarget().getRegisterInfo();
  1016. STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
  1017. MRI = &Fn.getRegInfo();
  1018. bool Modified = false;
  1019. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1020. ++MFI)
  1021. Modified |= RescheduleLoadStoreInstrs(MFI);
  1022. return Modified;
  1023. }
  1024. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1025. MachineBasicBlock::iterator I,
  1026. MachineBasicBlock::iterator E,
  1027. SmallPtrSet<MachineInstr*, 4> &MemOps,
  1028. SmallSet<unsigned, 4> &MemRegs,
  1029. const TargetRegisterInfo *TRI) {
  1030. // Are there stores / loads / calls between them?
  1031. // FIXME: This is overly conservative. We should make use of alias information
  1032. // some day.
  1033. SmallSet<unsigned, 4> AddedRegPressure;
  1034. while (++I != E) {
  1035. if (MemOps.count(&*I))
  1036. continue;
  1037. const TargetInstrDesc &TID = I->getDesc();
  1038. if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
  1039. return false;
  1040. if (isLd && TID.mayStore())
  1041. return false;
  1042. if (!isLd) {
  1043. if (TID.mayLoad())
  1044. return false;
  1045. // It's not safe to move the first 'str' down.
  1046. // str r1, [r0]
  1047. // strh r5, [r0]
  1048. // str r4, [r0, #+4]
  1049. if (TID.mayStore())
  1050. return false;
  1051. }
  1052. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  1053. MachineOperand &MO = I->getOperand(j);
  1054. if (!MO.isReg())
  1055. continue;
  1056. unsigned Reg = MO.getReg();
  1057. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  1058. return false;
  1059. if (Reg != Base && !MemRegs.count(Reg))
  1060. AddedRegPressure.insert(Reg);
  1061. }
  1062. }
  1063. // Estimate register pressure increase due to the transformation.
  1064. if (MemRegs.size() <= 4)
  1065. // Ok if we are moving small number of instructions.
  1066. return true;
  1067. return AddedRegPressure.size() <= MemRegs.size() * 2;
  1068. }
  1069. bool
  1070. ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
  1071. DebugLoc &dl,
  1072. unsigned &NewOpc, unsigned &EvenReg,
  1073. unsigned &OddReg, unsigned &BaseReg,
  1074. unsigned &OffReg, unsigned &Offset,
  1075. unsigned &PredReg,
  1076. ARMCC::CondCodes &Pred) {
  1077. // FIXME: FLDS / FSTS -> FLDD / FSTD
  1078. unsigned Opcode = Op0->getOpcode();
  1079. if (Opcode == ARM::LDR)
  1080. NewOpc = ARM::LDRD;
  1081. else if (Opcode == ARM::STR)
  1082. NewOpc = ARM::STRD;
  1083. else
  1084. return 0;
  1085. // Must sure the base address satisfies i64 ld / st alignment requirement.
  1086. if (!Op0->hasOneMemOperand() ||
  1087. !Op0->memoperands_begin()->getValue() ||
  1088. Op0->memoperands_begin()->isVolatile())
  1089. return false;
  1090. unsigned Align = Op0->memoperands_begin()->getAlignment();
  1091. unsigned ReqAlign = STI->hasV6Ops()
  1092. ? TD->getPrefTypeAlignment(
  1093. Type::getInt64Ty(Op0->getParent()->getParent()->getFunction()->getContext()))
  1094. : 8; // Pre-v6 need 8-byte align
  1095. if (Align < ReqAlign)
  1096. return false;
  1097. // Then make sure the immediate offset fits.
  1098. int OffImm = getMemoryOpOffset(Op0);
  1099. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  1100. if (OffImm < 0) {
  1101. AddSub = ARM_AM::sub;
  1102. OffImm = - OffImm;
  1103. }
  1104. if (OffImm >= 256) // 8 bits
  1105. return false;
  1106. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  1107. EvenReg = Op0->getOperand(0).getReg();
  1108. OddReg = Op1->getOperand(0).getReg();
  1109. if (EvenReg == OddReg)
  1110. return false;
  1111. BaseReg = Op0->getOperand(1).getReg();
  1112. OffReg = Op0->getOperand(2).getReg();
  1113. Pred = llvm::getInstrPredicate(Op0, PredReg);
  1114. dl = Op0->getDebugLoc();
  1115. return true;
  1116. }
  1117. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  1118. SmallVector<MachineInstr*, 4> &Ops,
  1119. unsigned Base, bool isLd,
  1120. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  1121. bool RetVal = false;
  1122. // Sort by offset (in reverse order).
  1123. std::sort(Ops.begin(), Ops.end(), OffsetCompare());
  1124. // The loads / stores of the same base are in order. Scan them from first to
  1125. // last and check for the followins:
  1126. // 1. Any def of base.
  1127. // 2. Any gaps.
  1128. while (Ops.size() > 1) {
  1129. unsigned FirstLoc = ~0U;
  1130. unsigned LastLoc = 0;
  1131. MachineInstr *FirstOp = 0;
  1132. MachineInstr *LastOp = 0;
  1133. int LastOffset = 0;
  1134. unsigned LastOpcode = 0;
  1135. unsigned LastBytes = 0;
  1136. unsigned NumMove = 0;
  1137. for (int i = Ops.size() - 1; i >= 0; --i) {
  1138. MachineInstr *Op = Ops[i];
  1139. unsigned Loc = MI2LocMap[Op];
  1140. if (Loc <= FirstLoc) {
  1141. FirstLoc = Loc;
  1142. FirstOp = Op;
  1143. }
  1144. if (Loc >= LastLoc) {
  1145. LastLoc = Loc;
  1146. LastOp = Op;
  1147. }
  1148. unsigned Opcode = Op->getOpcode();
  1149. if (LastOpcode && Opcode != LastOpcode)
  1150. break;
  1151. int Offset = getMemoryOpOffset(Op);
  1152. unsigned Bytes = getLSMultipleTransferSize(Op);
  1153. if (LastBytes) {
  1154. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  1155. break;
  1156. }
  1157. LastOffset = Offset;
  1158. LastBytes = Bytes;
  1159. LastOpcode = Opcode;
  1160. if (++NumMove == 8) // FIXME: Tune
  1161. break;
  1162. }
  1163. if (NumMove <= 1)
  1164. Ops.pop_back();
  1165. else {
  1166. SmallPtrSet<MachineInstr*, 4> MemOps;
  1167. SmallSet<unsigned, 4> MemRegs;
  1168. for (int i = NumMove-1; i >= 0; --i) {
  1169. MemOps.insert(Ops[i]);
  1170. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  1171. }
  1172. // Be conservative, if the instructions are too far apart, don't
  1173. // move them. We want to limit the increase of register pressure.
  1174. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  1175. if (DoMove)
  1176. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  1177. MemOps, MemRegs, TRI);
  1178. if (!DoMove) {
  1179. for (unsigned i = 0; i != NumMove; ++i)
  1180. Ops.pop_back();
  1181. } else {
  1182. // This is the new location for the loads / stores.
  1183. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  1184. while (InsertPos != MBB->end() && MemOps.count(InsertPos))
  1185. ++InsertPos;
  1186. // If we are moving a pair of loads / stores, see if it makes sense
  1187. // to try to allocate a pair of registers that can form register pairs.
  1188. MachineInstr *Op0 = Ops.back();
  1189. MachineInstr *Op1 = Ops[Ops.size()-2];
  1190. unsigned EvenReg = 0, OddReg = 0;
  1191. unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
  1192. ARMCC::CondCodes Pred = ARMCC::AL;
  1193. unsigned NewOpc = 0;
  1194. unsigned Offset = 0;
  1195. DebugLoc dl;
  1196. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  1197. EvenReg, OddReg, BaseReg, OffReg,
  1198. Offset, PredReg, Pred)) {
  1199. Ops.pop_back();
  1200. Ops.pop_back();
  1201. // Form the pair instruction.
  1202. if (isLd) {
  1203. BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
  1204. .addReg(EvenReg, RegState::Define)
  1205. .addReg(OddReg, RegState::Define)
  1206. .addReg(BaseReg).addReg(0).addImm(Offset)
  1207. .addImm(Pred).addReg(PredReg);
  1208. ++NumLDRDFormed;
  1209. } else {
  1210. BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
  1211. .addReg(EvenReg)
  1212. .addReg(OddReg)
  1213. .addReg(BaseReg).addReg(0).addImm(Offset)
  1214. .addImm(Pred).addReg(PredReg);
  1215. ++NumSTRDFormed;
  1216. }
  1217. MBB->erase(Op0);
  1218. MBB->erase(Op1);
  1219. // Add register allocation hints to form register pairs.
  1220. MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
  1221. MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
  1222. } else {
  1223. for (unsigned i = 0; i != NumMove; ++i) {
  1224. MachineInstr *Op = Ops.back();
  1225. Ops.pop_back();
  1226. MBB->splice(InsertPos, MBB, Op);
  1227. }
  1228. }
  1229. NumLdStMoved += NumMove;
  1230. RetVal = true;
  1231. }
  1232. }
  1233. }
  1234. return RetVal;
  1235. }
  1236. bool
  1237. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  1238. bool RetVal = false;
  1239. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  1240. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
  1241. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
  1242. SmallVector<unsigned, 4> LdBases;
  1243. SmallVector<unsigned, 4> StBases;
  1244. unsigned Loc = 0;
  1245. MachineBasicBlock::iterator MBBI = MBB->begin();
  1246. MachineBasicBlock::iterator E = MBB->end();
  1247. while (MBBI != E) {
  1248. for (; MBBI != E; ++MBBI) {
  1249. MachineInstr *MI = MBBI;
  1250. const TargetInstrDesc &TID = MI->getDesc();
  1251. if (TID.isCall() || TID.isTerminator()) {
  1252. // Stop at barriers.
  1253. ++MBBI;
  1254. break;
  1255. }
  1256. MI2LocMap[MI] = Loc++;
  1257. if (!isMemoryOp(MI))
  1258. continue;
  1259. unsigned PredReg = 0;
  1260. if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
  1261. continue;
  1262. int Opcode = MI->getOpcode();
  1263. bool isLd = Opcode == ARM::LDR ||
  1264. Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  1265. unsigned Base = MI->getOperand(1).getReg();
  1266. int Offset = getMemoryOpOffset(MI);
  1267. bool StopHere = false;
  1268. if (isLd) {
  1269. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  1270. Base2LdsMap.find(Base);
  1271. if (BI != Base2LdsMap.end()) {
  1272. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  1273. if (Offset == getMemoryOpOffset(BI->second[i])) {
  1274. StopHere = true;
  1275. break;
  1276. }
  1277. }
  1278. if (!StopHere)
  1279. BI->second.push_back(MI);
  1280. } else {
  1281. SmallVector<MachineInstr*, 4> MIs;
  1282. MIs.push_back(MI);
  1283. Base2LdsMap[Base] = MIs;
  1284. LdBases.push_back(Base);
  1285. }
  1286. } else {
  1287. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  1288. Base2StsMap.find(Base);
  1289. if (BI != Base2StsMap.end()) {
  1290. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  1291. if (Offset == getMemoryOpOffset(BI->second[i])) {
  1292. StopHere = true;
  1293. break;
  1294. }
  1295. }
  1296. if (!StopHere)
  1297. BI->second.push_back(MI);
  1298. } else {
  1299. SmallVector<MachineInstr*, 4> MIs;
  1300. MIs.push_back(MI);
  1301. Base2StsMap[Base] = MIs;
  1302. StBases.push_back(Base);
  1303. }
  1304. }
  1305. if (StopHere) {
  1306. // Found a duplicate (a base+offset combination that's seen earlier).
  1307. // Backtrack.
  1308. --Loc;
  1309. break;
  1310. }
  1311. }
  1312. // Re-schedule loads.
  1313. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  1314. unsigned Base = LdBases[i];
  1315. SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
  1316. if (Lds.size() > 1)
  1317. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  1318. }
  1319. // Re-schedule stores.
  1320. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  1321. unsigned Base = StBases[i];
  1322. SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
  1323. if (Sts.size() > 1)
  1324. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  1325. }
  1326. if (MBBI != E) {
  1327. Base2LdsMap.clear();
  1328. Base2StsMap.clear();
  1329. LdBases.clear();
  1330. StBases.clear();
  1331. }
  1332. }
  1333. return RetVal;
  1334. }
  1335. /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
  1336. /// optimization pass.
  1337. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  1338. if (PreAlloc)
  1339. return new ARMPreAllocLoadStoreOpt();
  1340. return new ARMLoadStoreOpt();
  1341. }