ARMLoadStoreOptimizer.cpp 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544
  1. //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains a pass that performs load / store related peephole
  11. // optimizations. This pass should be run after register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #define DEBUG_TYPE "arm-ldst-opt"
  15. #include "ARM.h"
  16. #include "ARMAddressingModes.h"
  17. #include "ARMBaseInstrInfo.h"
  18. #include "ARMMachineFunctionInfo.h"
  19. #include "ARMRegisterInfo.h"
  20. #include "llvm/DerivedTypes.h"
  21. #include "llvm/Function.h"
  22. #include "llvm/CodeGen/MachineBasicBlock.h"
  23. #include "llvm/CodeGen/MachineFunctionPass.h"
  24. #include "llvm/CodeGen/MachineInstr.h"
  25. #include "llvm/CodeGen/MachineInstrBuilder.h"
  26. #include "llvm/CodeGen/MachineRegisterInfo.h"
  27. #include "llvm/CodeGen/RegisterScavenging.h"
  28. #include "llvm/Target/TargetData.h"
  29. #include "llvm/Target/TargetInstrInfo.h"
  30. #include "llvm/Target/TargetMachine.h"
  31. #include "llvm/Target/TargetRegisterInfo.h"
  32. #include "llvm/Support/ErrorHandling.h"
  33. #include "llvm/ADT/DenseMap.h"
  34. #include "llvm/ADT/STLExtras.h"
  35. #include "llvm/ADT/SmallPtrSet.h"
  36. #include "llvm/ADT/SmallSet.h"
  37. #include "llvm/ADT/SmallVector.h"
  38. #include "llvm/ADT/Statistic.h"
  39. using namespace llvm;
  40. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  41. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  42. STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
  43. STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
  44. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  45. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  46. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  47. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  48. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  49. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  50. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  51. /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
  52. /// load / store instructions to form ldm / stm instructions.
  53. namespace {
  54. struct ARMLoadStoreOpt : public MachineFunctionPass {
  55. static char ID;
  56. ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
  57. const TargetInstrInfo *TII;
  58. const TargetRegisterInfo *TRI;
  59. ARMFunctionInfo *AFI;
  60. RegScavenger *RS;
  61. bool isThumb2;
  62. virtual bool runOnMachineFunction(MachineFunction &Fn);
  63. virtual const char *getPassName() const {
  64. return "ARM load / store optimization pass";
  65. }
  66. private:
  67. struct MemOpQueueEntry {
  68. int Offset;
  69. unsigned Position;
  70. MachineBasicBlock::iterator MBBI;
  71. bool Merged;
  72. MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
  73. : Offset(o), Position(p), MBBI(i), Merged(false) {};
  74. };
  75. typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
  76. typedef MemOpQueue::iterator MemOpQueueIter;
  77. bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  78. int Offset, unsigned Base, bool BaseKill, int Opcode,
  79. ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
  80. DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
  81. void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
  82. int Opcode, unsigned Size,
  83. ARMCC::CondCodes Pred, unsigned PredReg,
  84. unsigned Scratch, MemOpQueue &MemOps,
  85. SmallVector<MachineBasicBlock::iterator, 4> &Merges);
  86. void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
  87. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  88. MachineBasicBlock::iterator &MBBI);
  89. bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
  90. MachineBasicBlock::iterator MBBI,
  91. const TargetInstrInfo *TII,
  92. bool &Advance,
  93. MachineBasicBlock::iterator &I);
  94. bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
  95. MachineBasicBlock::iterator MBBI,
  96. bool &Advance,
  97. MachineBasicBlock::iterator &I);
  98. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  99. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  100. };
  101. char ARMLoadStoreOpt::ID = 0;
  102. }
  103. static int getLoadStoreMultipleOpcode(int Opcode) {
  104. switch (Opcode) {
  105. case ARM::LDR:
  106. NumLDMGened++;
  107. return ARM::LDM;
  108. case ARM::STR:
  109. NumSTMGened++;
  110. return ARM::STM;
  111. case ARM::t2LDRi8:
  112. case ARM::t2LDRi12:
  113. NumLDMGened++;
  114. return ARM::t2LDM;
  115. case ARM::t2STRi8:
  116. case ARM::t2STRi12:
  117. NumSTMGened++;
  118. return ARM::t2STM;
  119. case ARM::FLDS:
  120. NumFLDMGened++;
  121. return ARM::FLDMS;
  122. case ARM::FSTS:
  123. NumFSTMGened++;
  124. return ARM::FSTMS;
  125. case ARM::FLDD:
  126. NumFLDMGened++;
  127. return ARM::FLDMD;
  128. case ARM::FSTD:
  129. NumFSTMGened++;
  130. return ARM::FSTMD;
  131. default: llvm_unreachable("Unhandled opcode!");
  132. }
  133. return 0;
  134. }
  135. static bool isT2i32Load(unsigned Opc) {
  136. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  137. }
  138. static bool isi32Load(unsigned Opc) {
  139. return Opc == ARM::LDR || isT2i32Load(Opc);
  140. }
  141. static bool isT2i32Store(unsigned Opc) {
  142. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  143. }
  144. static bool isi32Store(unsigned Opc) {
  145. return Opc == ARM::STR || isT2i32Store(Opc);
  146. }
  147. /// MergeOps - Create and insert a LDM or STM with Base as base register and
  148. /// registers in Regs as the register operands that would be loaded / stored.
  149. /// It returns true if the transformation is done.
  150. bool
  151. ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
  152. MachineBasicBlock::iterator MBBI,
  153. int Offset, unsigned Base, bool BaseKill,
  154. int Opcode, ARMCC::CondCodes Pred,
  155. unsigned PredReg, unsigned Scratch, DebugLoc dl,
  156. SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
  157. // Only a single register to load / store. Don't bother.
  158. unsigned NumRegs = Regs.size();
  159. if (NumRegs <= 1)
  160. return false;
  161. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  162. bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
  163. if (isAM4 && Offset == 4) {
  164. if (isThumb2)
  165. // Thumb2 does not support ldmib / stmib.
  166. return false;
  167. Mode = ARM_AM::ib;
  168. } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
  169. if (isThumb2)
  170. // Thumb2 does not support ldmda / stmda.
  171. return false;
  172. Mode = ARM_AM::da;
  173. } else if (isAM4 && Offset == -4 * (int)NumRegs) {
  174. Mode = ARM_AM::db;
  175. } else if (Offset != 0) {
  176. // If starting offset isn't zero, insert a MI to materialize a new base.
  177. // But only do so if it is cost effective, i.e. merging more than two
  178. // loads / stores.
  179. if (NumRegs <= 2)
  180. return false;
  181. unsigned NewBase;
  182. if (isi32Load(Opcode))
  183. // If it is a load, then just use one of the destination register to
  184. // use as the new base.
  185. NewBase = Regs[NumRegs-1].first;
  186. else {
  187. // Use the scratch register to use as a new base.
  188. NewBase = Scratch;
  189. if (NewBase == 0)
  190. return false;
  191. }
  192. int BaseOpc = !isThumb2
  193. ? ARM::ADDri
  194. : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
  195. if (Offset < 0) {
  196. BaseOpc = !isThumb2
  197. ? ARM::SUBri
  198. : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
  199. Offset = - Offset;
  200. }
  201. int ImmedOffset = isThumb2
  202. ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
  203. if (ImmedOffset == -1)
  204. // FIXME: Try t2ADDri12 or t2SUBri12?
  205. return false; // Probably not worth it then.
  206. BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
  207. .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
  208. .addImm(Pred).addReg(PredReg).addReg(0);
  209. Base = NewBase;
  210. BaseKill = true; // New base is always killed right its use.
  211. }
  212. bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
  213. bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  214. Opcode = getLoadStoreMultipleOpcode(Opcode);
  215. MachineInstrBuilder MIB = (isAM4)
  216. ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
  217. .addReg(Base, getKillRegState(BaseKill))
  218. .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
  219. : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
  220. .addReg(Base, getKillRegState(BaseKill))
  221. .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
  222. .addImm(Pred).addReg(PredReg);
  223. MIB.addReg(0); // Add optional writeback (0 for now).
  224. for (unsigned i = 0; i != NumRegs; ++i)
  225. MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
  226. | getKillRegState(Regs[i].second));
  227. return true;
  228. }
  229. /// MergeLDR_STR - Merge a number of load / store instructions into one or more
  230. /// load / store multiple instructions.
  231. void
  232. ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
  233. unsigned Base, int Opcode, unsigned Size,
  234. ARMCC::CondCodes Pred, unsigned PredReg,
  235. unsigned Scratch, MemOpQueue &MemOps,
  236. SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
  237. bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
  238. int Offset = MemOps[SIndex].Offset;
  239. int SOffset = Offset;
  240. unsigned Pos = MemOps[SIndex].Position;
  241. MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
  242. DebugLoc dl = Loc->getDebugLoc();
  243. unsigned PReg = Loc->getOperand(0).getReg();
  244. unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
  245. bool isKill = Loc->getOperand(0).isKill();
  246. SmallVector<std::pair<unsigned,bool>, 8> Regs;
  247. Regs.push_back(std::make_pair(PReg, isKill));
  248. for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
  249. int NewOffset = MemOps[i].Offset;
  250. unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
  251. unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
  252. isKill = MemOps[i].MBBI->getOperand(0).isKill();
  253. // AM4 - register numbers in ascending order.
  254. // AM5 - consecutive register numbers in ascending order.
  255. if (NewOffset == Offset + (int)Size &&
  256. ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
  257. Offset += Size;
  258. Regs.push_back(std::make_pair(Reg, isKill));
  259. PRegNum = RegNum;
  260. } else {
  261. // Can't merge this in. Try merge the earlier ones first.
  262. if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
  263. Scratch, dl, Regs)) {
  264. Merges.push_back(prior(Loc));
  265. for (unsigned j = SIndex; j < i; ++j) {
  266. MBB.erase(MemOps[j].MBBI);
  267. MemOps[j].Merged = true;
  268. }
  269. }
  270. MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
  271. MemOps, Merges);
  272. return;
  273. }
  274. if (MemOps[i].Position > Pos) {
  275. Pos = MemOps[i].Position;
  276. Loc = MemOps[i].MBBI;
  277. }
  278. }
  279. bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
  280. if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
  281. Scratch, dl, Regs)) {
  282. Merges.push_back(prior(Loc));
  283. for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
  284. MBB.erase(MemOps[i].MBBI);
  285. MemOps[i].Merged = true;
  286. }
  287. }
  288. return;
  289. }
  290. static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
  291. unsigned Bytes, unsigned Limit,
  292. ARMCC::CondCodes Pred, unsigned PredReg){
  293. unsigned MyPredReg = 0;
  294. if (!MI)
  295. return false;
  296. if (MI->getOpcode() != ARM::t2SUBri &&
  297. MI->getOpcode() != ARM::t2SUBrSPi &&
  298. MI->getOpcode() != ARM::t2SUBrSPi12 &&
  299. MI->getOpcode() != ARM::tSUBspi &&
  300. MI->getOpcode() != ARM::SUBri)
  301. return false;
  302. // Make sure the offset fits in 8 bits.
  303. if (Bytes <= 0 || (Limit && Bytes >= Limit))
  304. return false;
  305. unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
  306. return (MI->getOperand(0).getReg() == Base &&
  307. MI->getOperand(1).getReg() == Base &&
  308. (MI->getOperand(2).getImm()*Scale) == Bytes &&
  309. llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
  310. MyPredReg == PredReg);
  311. }
  312. static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
  313. unsigned Bytes, unsigned Limit,
  314. ARMCC::CondCodes Pred, unsigned PredReg){
  315. unsigned MyPredReg = 0;
  316. if (!MI)
  317. return false;
  318. if (MI->getOpcode() != ARM::t2ADDri &&
  319. MI->getOpcode() != ARM::t2ADDrSPi &&
  320. MI->getOpcode() != ARM::t2ADDrSPi12 &&
  321. MI->getOpcode() != ARM::tADDspi &&
  322. MI->getOpcode() != ARM::ADDri)
  323. return false;
  324. if (Bytes <= 0 || (Limit && Bytes >= Limit))
  325. // Make sure the offset fits in 8 bits.
  326. return false;
  327. unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
  328. return (MI->getOperand(0).getReg() == Base &&
  329. MI->getOperand(1).getReg() == Base &&
  330. (MI->getOperand(2).getImm()*Scale) == Bytes &&
  331. llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
  332. MyPredReg == PredReg);
  333. }
  334. static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
  335. switch (MI->getOpcode()) {
  336. default: return 0;
  337. case ARM::LDR:
  338. case ARM::STR:
  339. case ARM::t2LDRi8:
  340. case ARM::t2LDRi12:
  341. case ARM::t2STRi8:
  342. case ARM::t2STRi12:
  343. case ARM::FLDS:
  344. case ARM::FSTS:
  345. return 4;
  346. case ARM::FLDD:
  347. case ARM::FSTD:
  348. return 8;
  349. case ARM::LDM:
  350. case ARM::STM:
  351. case ARM::t2LDM:
  352. case ARM::t2STM:
  353. return (MI->getNumOperands() - 5) * 4;
  354. case ARM::FLDMS:
  355. case ARM::FSTMS:
  356. case ARM::FLDMD:
  357. case ARM::FSTMD:
  358. return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
  359. }
  360. }
  361. /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
  362. /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
  363. ///
  364. /// stmia rn, <ra, rb, rc>
  365. /// rn := rn + 4 * 3;
  366. /// =>
  367. /// stmia rn!, <ra, rb, rc>
  368. ///
  369. /// rn := rn - 4 * 3;
  370. /// ldmia rn, <ra, rb, rc>
  371. /// =>
  372. /// ldmdb rn!, <ra, rb, rc>
  373. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
  374. MachineBasicBlock::iterator MBBI,
  375. bool &Advance,
  376. MachineBasicBlock::iterator &I) {
  377. MachineInstr *MI = MBBI;
  378. unsigned Base = MI->getOperand(0).getReg();
  379. unsigned Bytes = getLSMultipleTransferSize(MI);
  380. unsigned PredReg = 0;
  381. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  382. int Opcode = MI->getOpcode();
  383. bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
  384. Opcode == ARM::STM || Opcode == ARM::t2STM;
  385. if (isAM4) {
  386. if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
  387. return false;
  388. // Can't use the updating AM4 sub-mode if the base register is also a dest
  389. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  390. for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
  391. if (MI->getOperand(i).getReg() == Base)
  392. return false;
  393. }
  394. ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
  395. if (MBBI != MBB.begin()) {
  396. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  397. if (Mode == ARM_AM::ia &&
  398. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  399. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
  400. MI->getOperand(4).setReg(Base);
  401. MI->getOperand(4).setIsDef();
  402. MBB.erase(PrevMBBI);
  403. return true;
  404. } else if (Mode == ARM_AM::ib &&
  405. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  406. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
  407. MI->getOperand(4).setReg(Base); // WB to base
  408. MI->getOperand(4).setIsDef();
  409. MBB.erase(PrevMBBI);
  410. return true;
  411. }
  412. }
  413. if (MBBI != MBB.end()) {
  414. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  415. if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
  416. isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  417. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  418. MI->getOperand(4).setReg(Base); // WB to base
  419. MI->getOperand(4).setIsDef();
  420. if (NextMBBI == I) {
  421. Advance = true;
  422. ++I;
  423. }
  424. MBB.erase(NextMBBI);
  425. return true;
  426. } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
  427. isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  428. MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
  429. MI->getOperand(4).setReg(Base); // WB to base
  430. MI->getOperand(4).setIsDef();
  431. if (NextMBBI == I) {
  432. Advance = true;
  433. ++I;
  434. }
  435. MBB.erase(NextMBBI);
  436. return true;
  437. }
  438. }
  439. } else {
  440. // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
  441. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
  442. return false;
  443. ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
  444. unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
  445. if (MBBI != MBB.begin()) {
  446. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  447. if (Mode == ARM_AM::ia &&
  448. isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
  449. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
  450. MI->getOperand(4).setReg(Base); // WB to base
  451. MI->getOperand(4).setIsDef();
  452. MBB.erase(PrevMBBI);
  453. return true;
  454. }
  455. }
  456. if (MBBI != MBB.end()) {
  457. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  458. if (Mode == ARM_AM::ia &&
  459. isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
  460. MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
  461. MI->getOperand(4).setReg(Base); // WB to base
  462. MI->getOperand(4).setIsDef();
  463. if (NextMBBI == I) {
  464. Advance = true;
  465. ++I;
  466. }
  467. MBB.erase(NextMBBI);
  468. }
  469. return true;
  470. }
  471. }
  472. return false;
  473. }
  474. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
  475. switch (Opc) {
  476. case ARM::LDR: return ARM::LDR_PRE;
  477. case ARM::STR: return ARM::STR_PRE;
  478. case ARM::FLDS: return ARM::FLDMS;
  479. case ARM::FLDD: return ARM::FLDMD;
  480. case ARM::FSTS: return ARM::FSTMS;
  481. case ARM::FSTD: return ARM::FSTMD;
  482. case ARM::t2LDRi8:
  483. case ARM::t2LDRi12:
  484. return ARM::t2LDR_PRE;
  485. case ARM::t2STRi8:
  486. case ARM::t2STRi12:
  487. return ARM::t2STR_PRE;
  488. default: llvm_unreachable("Unhandled opcode!");
  489. }
  490. return 0;
  491. }
  492. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
  493. switch (Opc) {
  494. case ARM::LDR: return ARM::LDR_POST;
  495. case ARM::STR: return ARM::STR_POST;
  496. case ARM::FLDS: return ARM::FLDMS;
  497. case ARM::FLDD: return ARM::FLDMD;
  498. case ARM::FSTS: return ARM::FSTMS;
  499. case ARM::FSTD: return ARM::FSTMD;
  500. case ARM::t2LDRi8:
  501. case ARM::t2LDRi12:
  502. return ARM::t2LDR_POST;
  503. case ARM::t2STRi8:
  504. case ARM::t2STRi12:
  505. return ARM::t2STR_POST;
  506. default: llvm_unreachable("Unhandled opcode!");
  507. }
  508. return 0;
  509. }
  510. /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
  511. /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  512. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
  513. MachineBasicBlock::iterator MBBI,
  514. const TargetInstrInfo *TII,
  515. bool &Advance,
  516. MachineBasicBlock::iterator &I) {
  517. MachineInstr *MI = MBBI;
  518. unsigned Base = MI->getOperand(1).getReg();
  519. bool BaseKill = MI->getOperand(1).isKill();
  520. unsigned Bytes = getLSMultipleTransferSize(MI);
  521. int Opcode = MI->getOpcode();
  522. DebugLoc dl = MI->getDebugLoc();
  523. bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
  524. Opcode == ARM::FSTD || Opcode == ARM::FSTS;
  525. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  526. if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
  527. return false;
  528. else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  529. return false;
  530. else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
  531. if (MI->getOperand(2).getImm() != 0)
  532. return false;
  533. bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
  534. // Can't do the merge if the destination register is the same as the would-be
  535. // writeback register.
  536. if (isLd && MI->getOperand(0).getReg() == Base)
  537. return false;
  538. unsigned PredReg = 0;
  539. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  540. bool DoMerge = false;
  541. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  542. unsigned NewOpc = 0;
  543. // AM2 - 12 bits, thumb2 - 8 bits.
  544. unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
  545. if (MBBI != MBB.begin()) {
  546. MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
  547. if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
  548. DoMerge = true;
  549. AddSub = ARM_AM::sub;
  550. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  551. } else if (!isAM5 &&
  552. isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
  553. DoMerge = true;
  554. NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
  555. }
  556. if (DoMerge)
  557. MBB.erase(PrevMBBI);
  558. }
  559. if (!DoMerge && MBBI != MBB.end()) {
  560. MachineBasicBlock::iterator NextMBBI = next(MBBI);
  561. if (!isAM5 &&
  562. isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
  563. DoMerge = true;
  564. AddSub = ARM_AM::sub;
  565. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  566. } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
  567. DoMerge = true;
  568. NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
  569. }
  570. if (DoMerge) {
  571. if (NextMBBI == I) {
  572. Advance = true;
  573. ++I;
  574. }
  575. MBB.erase(NextMBBI);
  576. }
  577. }
  578. if (!DoMerge)
  579. return false;
  580. bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
  581. unsigned Offset = 0;
  582. if (isAM5)
  583. Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
  584. ? ARM_AM::db
  585. : ARM_AM::ia, true, (isDPR ? 2 : 1));
  586. else if (isAM2)
  587. Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  588. else
  589. Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
  590. if (isLd) {
  591. if (isAM5)
  592. // FLDMS, FLDMD
  593. BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
  594. .addReg(Base, getKillRegState(BaseKill))
  595. .addImm(Offset).addImm(Pred).addReg(PredReg)
  596. .addReg(Base, getDefRegState(true)) // WB base register
  597. .addReg(MI->getOperand(0).getReg(), RegState::Define);
  598. else if (isAM2)
  599. // LDR_PRE, LDR_POST,
  600. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
  601. .addReg(Base, RegState::Define)
  602. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  603. else
  604. // t2LDR_PRE, t2LDR_POST
  605. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
  606. .addReg(Base, RegState::Define)
  607. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  608. } else {
  609. MachineOperand &MO = MI->getOperand(0);
  610. if (isAM5)
  611. // FSTMS, FSTMD
  612. BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
  613. .addImm(Pred).addReg(PredReg)
  614. .addReg(Base, getDefRegState(true)) // WB base register
  615. .addReg(MO.getReg(), getKillRegState(MO.isKill()));
  616. else if (isAM2)
  617. // STR_PRE, STR_POST
  618. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
  619. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  620. .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
  621. else
  622. // t2STR_PRE, t2STR_POST
  623. BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
  624. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  625. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  626. }
  627. MBB.erase(MBBI);
  628. return true;
  629. }
  630. /// isMemoryOp - Returns true if instruction is a memory operations (that this
  631. /// pass is capable of operating on).
  632. static bool isMemoryOp(const MachineInstr *MI) {
  633. int Opcode = MI->getOpcode();
  634. switch (Opcode) {
  635. default: break;
  636. case ARM::LDR:
  637. case ARM::STR:
  638. return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
  639. case ARM::FLDS:
  640. case ARM::FSTS:
  641. return MI->getOperand(1).isReg();
  642. case ARM::FLDD:
  643. case ARM::FSTD:
  644. return MI->getOperand(1).isReg();
  645. case ARM::t2LDRi8:
  646. case ARM::t2LDRi12:
  647. case ARM::t2STRi8:
  648. case ARM::t2STRi12:
  649. return MI->getOperand(1).isReg();
  650. }
  651. return false;
  652. }
  653. /// AdvanceRS - Advance register scavenger to just before the earliest memory
  654. /// op that is being merged.
  655. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
  656. MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
  657. unsigned Position = MemOps[0].Position;
  658. for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
  659. if (MemOps[i].Position < Position) {
  660. Position = MemOps[i].Position;
  661. Loc = MemOps[i].MBBI;
  662. }
  663. }
  664. if (Loc != MBB.begin())
  665. RS->forward(prior(Loc));
  666. }
  667. static int getMemoryOpOffset(const MachineInstr *MI) {
  668. int Opcode = MI->getOpcode();
  669. bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
  670. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  671. unsigned NumOperands = MI->getDesc().getNumOperands();
  672. unsigned OffField = MI->getOperand(NumOperands-3).getImm();
  673. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  674. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  675. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
  676. return OffField;
  677. int Offset = isAM2
  678. ? ARM_AM::getAM2Offset(OffField)
  679. : (isAM3 ? ARM_AM::getAM3Offset(OffField)
  680. : ARM_AM::getAM5Offset(OffField) * 4);
  681. if (isAM2) {
  682. if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
  683. Offset = -Offset;
  684. } else if (isAM3) {
  685. if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
  686. Offset = -Offset;
  687. } else {
  688. if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
  689. Offset = -Offset;
  690. }
  691. return Offset;
  692. }
  693. static void InsertLDR_STR(MachineBasicBlock &MBB,
  694. MachineBasicBlock::iterator &MBBI,
  695. int OffImm, bool isDef,
  696. DebugLoc dl, unsigned NewOpc,
  697. unsigned Reg, bool RegDeadKill, bool RegUndef,
  698. unsigned BaseReg, bool BaseKill, bool BaseUndef,
  699. unsigned OffReg, bool OffKill, bool OffUndef,
  700. ARMCC::CondCodes Pred, unsigned PredReg,
  701. const TargetInstrInfo *TII, bool isT2) {
  702. int Offset = OffImm;
  703. if (!isT2) {
  704. if (OffImm < 0)
  705. Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
  706. else
  707. Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
  708. }
  709. if (isDef) {
  710. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  711. TII->get(NewOpc))
  712. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  713. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  714. if (!isT2)
  715. MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
  716. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  717. } else {
  718. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  719. TII->get(NewOpc))
  720. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  721. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  722. if (!isT2)
  723. MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
  724. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  725. }
  726. }
  727. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  728. MachineBasicBlock::iterator &MBBI) {
  729. MachineInstr *MI = &*MBBI;
  730. unsigned Opcode = MI->getOpcode();
  731. if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
  732. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
  733. unsigned EvenReg = MI->getOperand(0).getReg();
  734. unsigned OddReg = MI->getOperand(1).getReg();
  735. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  736. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  737. if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
  738. return false;
  739. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  740. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  741. bool EvenDeadKill = isLd ?
  742. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  743. bool EvenUndef = MI->getOperand(0).isUndef();
  744. bool OddDeadKill = isLd ?
  745. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  746. bool OddUndef = MI->getOperand(1).isUndef();
  747. const MachineOperand &BaseOp = MI->getOperand(2);
  748. unsigned BaseReg = BaseOp.getReg();
  749. bool BaseKill = BaseOp.isKill();
  750. bool BaseUndef = BaseOp.isUndef();
  751. unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
  752. bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
  753. bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
  754. int OffImm = getMemoryOpOffset(MI);
  755. unsigned PredReg = 0;
  756. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
  757. if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
  758. // Ascending register numbers and no offset. It's safe to change it to a
  759. // ldm or stm.
  760. unsigned NewOpc = (isLd)
  761. ? (isT2 ? ARM::t2LDM : ARM::LDM)
  762. : (isT2 ? ARM::t2STM : ARM::STM);
  763. if (isLd) {
  764. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  765. .addReg(BaseReg, getKillRegState(BaseKill))
  766. .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
  767. .addImm(Pred).addReg(PredReg)
  768. .addReg(0)
  769. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  770. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
  771. ++NumLDRD2LDM;
  772. } else {
  773. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  774. .addReg(BaseReg, getKillRegState(BaseKill))
  775. .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
  776. .addImm(Pred).addReg(PredReg)
  777. .addReg(0)
  778. .addReg(EvenReg,
  779. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  780. .addReg(OddReg,
  781. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
  782. ++NumSTRD2STM;
  783. }
  784. } else {
  785. // Split into two instructions.
  786. assert((!isT2 || !OffReg) &&
  787. "Thumb2 ldrd / strd does not encode offset register!");
  788. unsigned NewOpc = (isLd)
  789. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
  790. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
  791. DebugLoc dl = MBBI->getDebugLoc();
  792. // If this is a load and base register is killed, it may have been
  793. // re-defed by the load, make sure the first load does not clobber it.
  794. if (isLd &&
  795. (BaseKill || OffKill) &&
  796. (TRI->regsOverlap(EvenReg, BaseReg) ||
  797. (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
  798. assert(!TRI->regsOverlap(OddReg, BaseReg) &&
  799. (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
  800. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
  801. OddReg, OddDeadKill, false,
  802. BaseReg, false, BaseUndef, OffReg, false, OffUndef,
  803. Pred, PredReg, TII, isT2);
  804. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
  805. EvenReg, EvenDeadKill, false,
  806. BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
  807. Pred, PredReg, TII, isT2);
  808. } else {
  809. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
  810. EvenReg, EvenDeadKill, EvenUndef,
  811. BaseReg, false, BaseUndef, OffReg, false, OffUndef,
  812. Pred, PredReg, TII, isT2);
  813. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
  814. OddReg, OddDeadKill, OddUndef,
  815. BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
  816. Pred, PredReg, TII, isT2);
  817. }
  818. if (isLd)
  819. ++NumLDRD2LDR;
  820. else
  821. ++NumSTRD2STR;
  822. }
  823. MBBI = prior(MBBI);
  824. MBB.erase(MI);
  825. }
  826. return false;
  827. }
  828. /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
  829. /// ops of the same base and incrementing offset into LDM / STM ops.
  830. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  831. unsigned NumMerges = 0;
  832. unsigned NumMemOps = 0;
  833. MemOpQueue MemOps;
  834. unsigned CurrBase = 0;
  835. int CurrOpc = -1;
  836. unsigned CurrSize = 0;
  837. ARMCC::CondCodes CurrPred = ARMCC::AL;
  838. unsigned CurrPredReg = 0;
  839. unsigned Position = 0;
  840. SmallVector<MachineBasicBlock::iterator,4> Merges;
  841. RS->enterBasicBlock(&MBB);
  842. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  843. while (MBBI != E) {
  844. if (FixInvalidRegPairOp(MBB, MBBI))
  845. continue;
  846. bool Advance = false;
  847. bool TryMerge = false;
  848. bool Clobber = false;
  849. bool isMemOp = isMemoryOp(MBBI);
  850. if (isMemOp) {
  851. int Opcode = MBBI->getOpcode();
  852. unsigned Size = getLSMultipleTransferSize(MBBI);
  853. unsigned Base = MBBI->getOperand(1).getReg();
  854. unsigned PredReg = 0;
  855. ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
  856. int Offset = getMemoryOpOffset(MBBI);
  857. // Watch out for:
  858. // r4 := ldr [r5]
  859. // r5 := ldr [r5, #4]
  860. // r6 := ldr [r5, #8]
  861. //
  862. // The second ldr has effectively broken the chain even though it
  863. // looks like the later ldr(s) use the same base register. Try to
  864. // merge the ldr's so far, including this one. But don't try to
  865. // combine the following ldr(s).
  866. Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
  867. if (CurrBase == 0 && !Clobber) {
  868. // Start of a new chain.
  869. CurrBase = Base;
  870. CurrOpc = Opcode;
  871. CurrSize = Size;
  872. CurrPred = Pred;
  873. CurrPredReg = PredReg;
  874. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  875. NumMemOps++;
  876. Advance = true;
  877. } else {
  878. if (Clobber) {
  879. TryMerge = true;
  880. Advance = true;
  881. }
  882. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  883. // No need to match PredReg.
  884. // Continue adding to the queue.
  885. if (Offset > MemOps.back().Offset) {
  886. MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
  887. NumMemOps++;
  888. Advance = true;
  889. } else {
  890. for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
  891. I != E; ++I) {
  892. if (Offset < I->Offset) {
  893. MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
  894. NumMemOps++;
  895. Advance = true;
  896. break;
  897. } else if (Offset == I->Offset) {
  898. // Collision! This can't be merged!
  899. break;
  900. }
  901. }
  902. }
  903. }
  904. }
  905. }
  906. if (Advance) {
  907. ++Position;
  908. ++MBBI;
  909. if (MBBI == E)
  910. // Reach the end of the block, try merging the memory instructions.
  911. TryMerge = true;
  912. } else
  913. TryMerge = true;
  914. if (TryMerge) {
  915. if (NumMemOps > 1) {
  916. // Try to find a free register to use as a new base in case it's needed.
  917. // First advance to the instruction just before the start of the chain.
  918. AdvanceRS(MBB, MemOps);
  919. // Find a scratch register.
  920. unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
  921. // Process the load / store instructions.
  922. RS->forward(prior(MBBI));
  923. // Merge ops.
  924. Merges.clear();
  925. MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
  926. CurrPred, CurrPredReg, Scratch, MemOps, Merges);
  927. // Try folding preceeding/trailing base inc/dec into the generated
  928. // LDM/STM ops.
  929. for (unsigned i = 0, e = Merges.size(); i < e; ++i)
  930. if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
  931. ++NumMerges;
  932. NumMerges += Merges.size();
  933. // Try folding preceeding/trailing base inc/dec into those load/store
  934. // that were not merged to form LDM/STM ops.
  935. for (unsigned i = 0; i != NumMemOps; ++i)
  936. if (!MemOps[i].Merged)
  937. if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
  938. ++NumMerges;
  939. // RS may be pointing to an instruction that's deleted.
  940. RS->skipTo(prior(MBBI));
  941. } else if (NumMemOps == 1) {
  942. // Try folding preceeding/trailing base inc/dec into the single
  943. // load/store.
  944. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
  945. ++NumMerges;
  946. RS->forward(prior(MBBI));
  947. }
  948. }
  949. CurrBase = 0;
  950. CurrOpc = -1;
  951. CurrSize = 0;
  952. CurrPred = ARMCC::AL;
  953. CurrPredReg = 0;
  954. if (NumMemOps) {
  955. MemOps.clear();
  956. NumMemOps = 0;
  957. }
  958. // If iterator hasn't been advanced and this is not a memory op, skip it.
  959. // It can't start a new chain anyway.
  960. if (!Advance && !isMemOp && MBBI != E) {
  961. ++Position;
  962. ++MBBI;
  963. }
  964. }
  965. }
  966. return NumMerges > 0;
  967. }
  968. namespace {
  969. struct OffsetCompare {
  970. bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
  971. int LOffset = getMemoryOpOffset(LHS);
  972. int ROffset = getMemoryOpOffset(RHS);
  973. assert(LHS == RHS || LOffset != ROffset);
  974. return LOffset > ROffset;
  975. }
  976. };
  977. }
  978. /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
  979. /// (bx lr) into the preceeding stack restore so it directly restore the value
  980. /// of LR into pc.
  981. /// ldmfd sp!, {r7, lr}
  982. /// bx lr
  983. /// =>
  984. /// ldmfd sp!, {r7, pc}
  985. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  986. if (MBB.empty()) return false;
  987. MachineBasicBlock::iterator MBBI = prior(MBB.end());
  988. if (MBBI != MBB.begin() &&
  989. (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
  990. MachineInstr *PrevMI = prior(MBBI);
  991. if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
  992. MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
  993. if (MO.getReg() != ARM::LR)
  994. return false;
  995. unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
  996. PrevMI->setDesc(TII->get(NewOpc));
  997. MO.setReg(ARM::PC);
  998. MBB.erase(MBBI);
  999. return true;
  1000. }
  1001. }
  1002. return false;
  1003. }
  1004. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1005. const TargetMachine &TM = Fn.getTarget();
  1006. AFI = Fn.getInfo<ARMFunctionInfo>();
  1007. TII = TM.getInstrInfo();
  1008. TRI = TM.getRegisterInfo();
  1009. RS = new RegScavenger();
  1010. isThumb2 = AFI->isThumb2Function();
  1011. bool Modified = false;
  1012. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1013. ++MFI) {
  1014. MachineBasicBlock &MBB = *MFI;
  1015. Modified |= LoadStoreMultipleOpti(MBB);
  1016. Modified |= MergeReturnIntoLDM(MBB);
  1017. }
  1018. delete RS;
  1019. return Modified;
  1020. }
  1021. /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
  1022. /// load / stores from consecutive locations close to make it more
  1023. /// likely they will be combined later.
  1024. namespace {
  1025. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1026. static char ID;
  1027. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
  1028. const TargetData *TD;
  1029. const TargetInstrInfo *TII;
  1030. const TargetRegisterInfo *TRI;
  1031. const ARMSubtarget *STI;
  1032. MachineRegisterInfo *MRI;
  1033. MachineFunction *MF;
  1034. virtual bool runOnMachineFunction(MachineFunction &Fn);
  1035. virtual const char *getPassName() const {
  1036. return "ARM pre- register allocation load / store optimization pass";
  1037. }
  1038. private:
  1039. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1040. unsigned &NewOpc, unsigned &EvenReg,
  1041. unsigned &OddReg, unsigned &BaseReg,
  1042. unsigned &OffReg, int &Offset,
  1043. unsigned &PredReg, ARMCC::CondCodes &Pred,
  1044. bool &isT2);
  1045. bool RescheduleOps(MachineBasicBlock *MBB,
  1046. SmallVector<MachineInstr*, 4> &Ops,
  1047. unsigned Base, bool isLd,
  1048. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1049. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1050. };
  1051. char ARMPreAllocLoadStoreOpt::ID = 0;
  1052. }
  1053. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1054. TD = Fn.getTarget().getTargetData();
  1055. TII = Fn.getTarget().getInstrInfo();
  1056. TRI = Fn.getTarget().getRegisterInfo();
  1057. STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
  1058. MRI = &Fn.getRegInfo();
  1059. MF = &Fn;
  1060. bool Modified = false;
  1061. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1062. ++MFI)
  1063. Modified |= RescheduleLoadStoreInstrs(MFI);
  1064. return Modified;
  1065. }
  1066. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1067. MachineBasicBlock::iterator I,
  1068. MachineBasicBlock::iterator E,
  1069. SmallPtrSet<MachineInstr*, 4> &MemOps,
  1070. SmallSet<unsigned, 4> &MemRegs,
  1071. const TargetRegisterInfo *TRI) {
  1072. // Are there stores / loads / calls between them?
  1073. // FIXME: This is overly conservative. We should make use of alias information
  1074. // some day.
  1075. SmallSet<unsigned, 4> AddedRegPressure;
  1076. while (++I != E) {
  1077. if (MemOps.count(&*I))
  1078. continue;
  1079. const TargetInstrDesc &TID = I->getDesc();
  1080. if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
  1081. return false;
  1082. if (isLd && TID.mayStore())
  1083. return false;
  1084. if (!isLd) {
  1085. if (TID.mayLoad())
  1086. return false;
  1087. // It's not safe to move the first 'str' down.
  1088. // str r1, [r0]
  1089. // strh r5, [r0]
  1090. // str r4, [r0, #+4]
  1091. if (TID.mayStore())
  1092. return false;
  1093. }
  1094. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  1095. MachineOperand &MO = I->getOperand(j);
  1096. if (!MO.isReg())
  1097. continue;
  1098. unsigned Reg = MO.getReg();
  1099. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  1100. return false;
  1101. if (Reg != Base && !MemRegs.count(Reg))
  1102. AddedRegPressure.insert(Reg);
  1103. }
  1104. }
  1105. // Estimate register pressure increase due to the transformation.
  1106. if (MemRegs.size() <= 4)
  1107. // Ok if we are moving small number of instructions.
  1108. return true;
  1109. return AddedRegPressure.size() <= MemRegs.size() * 2;
  1110. }
  1111. bool
  1112. ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
  1113. DebugLoc &dl,
  1114. unsigned &NewOpc, unsigned &EvenReg,
  1115. unsigned &OddReg, unsigned &BaseReg,
  1116. unsigned &OffReg, int &Offset,
  1117. unsigned &PredReg,
  1118. ARMCC::CondCodes &Pred,
  1119. bool &isT2) {
  1120. // Make sure we're allowed to generate LDRD/STRD.
  1121. if (!STI->hasV5TEOps())
  1122. return false;
  1123. // FIXME: FLDS / FSTS -> FLDD / FSTD
  1124. unsigned Scale = 1;
  1125. unsigned Opcode = Op0->getOpcode();
  1126. if (Opcode == ARM::LDR)
  1127. NewOpc = ARM::LDRD;
  1128. else if (Opcode == ARM::STR)
  1129. NewOpc = ARM::STRD;
  1130. else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  1131. NewOpc = ARM::t2LDRDi8;
  1132. Scale = 4;
  1133. isT2 = true;
  1134. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  1135. NewOpc = ARM::t2STRDi8;
  1136. Scale = 4;
  1137. isT2 = true;
  1138. } else
  1139. return false;
  1140. // Make sure the offset registers match.
  1141. if (!isT2 &&
  1142. (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
  1143. return false;
  1144. // Must sure the base address satisfies i64 ld / st alignment requirement.
  1145. if (!Op0->hasOneMemOperand() ||
  1146. !(*Op0->memoperands_begin())->getValue() ||
  1147. (*Op0->memoperands_begin())->isVolatile())
  1148. return false;
  1149. unsigned Align = (*Op0->memoperands_begin())->getAlignment();
  1150. Function *Func = MF->getFunction();
  1151. unsigned ReqAlign = STI->hasV6Ops()
  1152. ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
  1153. : 8; // Pre-v6 need 8-byte align
  1154. if (Align < ReqAlign)
  1155. return false;
  1156. // Then make sure the immediate offset fits.
  1157. int OffImm = getMemoryOpOffset(Op0);
  1158. if (isT2) {
  1159. if (OffImm < 0) {
  1160. if (OffImm < -255)
  1161. // Can't fall back to t2LDRi8 / t2STRi8.
  1162. return false;
  1163. } else {
  1164. int Limit = (1 << 8) * Scale;
  1165. if (OffImm >= Limit || (OffImm & (Scale-1)))
  1166. return false;
  1167. }
  1168. Offset = OffImm;
  1169. } else {
  1170. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  1171. if (OffImm < 0) {
  1172. AddSub = ARM_AM::sub;
  1173. OffImm = - OffImm;
  1174. }
  1175. int Limit = (1 << 8) * Scale;
  1176. if (OffImm >= Limit || (OffImm & (Scale-1)))
  1177. return false;
  1178. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  1179. }
  1180. EvenReg = Op0->getOperand(0).getReg();
  1181. OddReg = Op1->getOperand(0).getReg();
  1182. if (EvenReg == OddReg)
  1183. return false;
  1184. BaseReg = Op0->getOperand(1).getReg();
  1185. if (!isT2)
  1186. OffReg = Op0->getOperand(2).getReg();
  1187. Pred = llvm::getInstrPredicate(Op0, PredReg);
  1188. dl = Op0->getDebugLoc();
  1189. return true;
  1190. }
  1191. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  1192. SmallVector<MachineInstr*, 4> &Ops,
  1193. unsigned Base, bool isLd,
  1194. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  1195. bool RetVal = false;
  1196. // Sort by offset (in reverse order).
  1197. std::sort(Ops.begin(), Ops.end(), OffsetCompare());
  1198. // The loads / stores of the same base are in order. Scan them from first to
  1199. // last and check for the followins:
  1200. // 1. Any def of base.
  1201. // 2. Any gaps.
  1202. while (Ops.size() > 1) {
  1203. unsigned FirstLoc = ~0U;
  1204. unsigned LastLoc = 0;
  1205. MachineInstr *FirstOp = 0;
  1206. MachineInstr *LastOp = 0;
  1207. int LastOffset = 0;
  1208. unsigned LastOpcode = 0;
  1209. unsigned LastBytes = 0;
  1210. unsigned NumMove = 0;
  1211. for (int i = Ops.size() - 1; i >= 0; --i) {
  1212. MachineInstr *Op = Ops[i];
  1213. unsigned Loc = MI2LocMap[Op];
  1214. if (Loc <= FirstLoc) {
  1215. FirstLoc = Loc;
  1216. FirstOp = Op;
  1217. }
  1218. if (Loc >= LastLoc) {
  1219. LastLoc = Loc;
  1220. LastOp = Op;
  1221. }
  1222. unsigned Opcode = Op->getOpcode();
  1223. if (LastOpcode && Opcode != LastOpcode)
  1224. break;
  1225. int Offset = getMemoryOpOffset(Op);
  1226. unsigned Bytes = getLSMultipleTransferSize(Op);
  1227. if (LastBytes) {
  1228. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  1229. break;
  1230. }
  1231. LastOffset = Offset;
  1232. LastBytes = Bytes;
  1233. LastOpcode = Opcode;
  1234. if (++NumMove == 8) // FIXME: Tune this limit.
  1235. break;
  1236. }
  1237. if (NumMove <= 1)
  1238. Ops.pop_back();
  1239. else {
  1240. SmallPtrSet<MachineInstr*, 4> MemOps;
  1241. SmallSet<unsigned, 4> MemRegs;
  1242. for (int i = NumMove-1; i >= 0; --i) {
  1243. MemOps.insert(Ops[i]);
  1244. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  1245. }
  1246. // Be conservative, if the instructions are too far apart, don't
  1247. // move them. We want to limit the increase of register pressure.
  1248. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  1249. if (DoMove)
  1250. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  1251. MemOps, MemRegs, TRI);
  1252. if (!DoMove) {
  1253. for (unsigned i = 0; i != NumMove; ++i)
  1254. Ops.pop_back();
  1255. } else {
  1256. // This is the new location for the loads / stores.
  1257. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  1258. while (InsertPos != MBB->end() && MemOps.count(InsertPos))
  1259. ++InsertPos;
  1260. // If we are moving a pair of loads / stores, see if it makes sense
  1261. // to try to allocate a pair of registers that can form register pairs.
  1262. MachineInstr *Op0 = Ops.back();
  1263. MachineInstr *Op1 = Ops[Ops.size()-2];
  1264. unsigned EvenReg = 0, OddReg = 0;
  1265. unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
  1266. ARMCC::CondCodes Pred = ARMCC::AL;
  1267. bool isT2 = false;
  1268. unsigned NewOpc = 0;
  1269. int Offset = 0;
  1270. DebugLoc dl;
  1271. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  1272. EvenReg, OddReg, BaseReg, OffReg,
  1273. Offset, PredReg, Pred, isT2)) {
  1274. Ops.pop_back();
  1275. Ops.pop_back();
  1276. // Form the pair instruction.
  1277. if (isLd) {
  1278. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
  1279. dl, TII->get(NewOpc))
  1280. .addReg(EvenReg, RegState::Define)
  1281. .addReg(OddReg, RegState::Define)
  1282. .addReg(BaseReg);
  1283. if (!isT2)
  1284. MIB.addReg(OffReg);
  1285. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1286. ++NumLDRDFormed;
  1287. } else {
  1288. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
  1289. dl, TII->get(NewOpc))
  1290. .addReg(EvenReg)
  1291. .addReg(OddReg)
  1292. .addReg(BaseReg);
  1293. if (!isT2)
  1294. MIB.addReg(OffReg);
  1295. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1296. ++NumSTRDFormed;
  1297. }
  1298. MBB->erase(Op0);
  1299. MBB->erase(Op1);
  1300. // Add register allocation hints to form register pairs.
  1301. MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
  1302. MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
  1303. } else {
  1304. for (unsigned i = 0; i != NumMove; ++i) {
  1305. MachineInstr *Op = Ops.back();
  1306. Ops.pop_back();
  1307. MBB->splice(InsertPos, MBB, Op);
  1308. }
  1309. }
  1310. NumLdStMoved += NumMove;
  1311. RetVal = true;
  1312. }
  1313. }
  1314. }
  1315. return RetVal;
  1316. }
  1317. bool
  1318. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  1319. bool RetVal = false;
  1320. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  1321. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
  1322. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
  1323. SmallVector<unsigned, 4> LdBases;
  1324. SmallVector<unsigned, 4> StBases;
  1325. unsigned Loc = 0;
  1326. MachineBasicBlock::iterator MBBI = MBB->begin();
  1327. MachineBasicBlock::iterator E = MBB->end();
  1328. while (MBBI != E) {
  1329. for (; MBBI != E; ++MBBI) {
  1330. MachineInstr *MI = MBBI;
  1331. const TargetInstrDesc &TID = MI->getDesc();
  1332. if (TID.isCall() || TID.isTerminator()) {
  1333. // Stop at barriers.
  1334. ++MBBI;
  1335. break;
  1336. }
  1337. MI2LocMap[MI] = Loc++;
  1338. if (!isMemoryOp(MI))
  1339. continue;
  1340. unsigned PredReg = 0;
  1341. if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
  1342. continue;
  1343. int Opc = MI->getOpcode();
  1344. bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
  1345. unsigned Base = MI->getOperand(1).getReg();
  1346. int Offset = getMemoryOpOffset(MI);
  1347. bool StopHere = false;
  1348. if (isLd) {
  1349. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  1350. Base2LdsMap.find(Base);
  1351. if (BI != Base2LdsMap.end()) {
  1352. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  1353. if (Offset == getMemoryOpOffset(BI->second[i])) {
  1354. StopHere = true;
  1355. break;
  1356. }
  1357. }
  1358. if (!StopHere)
  1359. BI->second.push_back(MI);
  1360. } else {
  1361. SmallVector<MachineInstr*, 4> MIs;
  1362. MIs.push_back(MI);
  1363. Base2LdsMap[Base] = MIs;
  1364. LdBases.push_back(Base);
  1365. }
  1366. } else {
  1367. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  1368. Base2StsMap.find(Base);
  1369. if (BI != Base2StsMap.end()) {
  1370. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  1371. if (Offset == getMemoryOpOffset(BI->second[i])) {
  1372. StopHere = true;
  1373. break;
  1374. }
  1375. }
  1376. if (!StopHere)
  1377. BI->second.push_back(MI);
  1378. } else {
  1379. SmallVector<MachineInstr*, 4> MIs;
  1380. MIs.push_back(MI);
  1381. Base2StsMap[Base] = MIs;
  1382. StBases.push_back(Base);
  1383. }
  1384. }
  1385. if (StopHere) {
  1386. // Found a duplicate (a base+offset combination that's seen earlier).
  1387. // Backtrack.
  1388. --Loc;
  1389. break;
  1390. }
  1391. }
  1392. // Re-schedule loads.
  1393. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  1394. unsigned Base = LdBases[i];
  1395. SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
  1396. if (Lds.size() > 1)
  1397. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  1398. }
  1399. // Re-schedule stores.
  1400. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  1401. unsigned Base = StBases[i];
  1402. SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
  1403. if (Sts.size() > 1)
  1404. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  1405. }
  1406. if (MBBI != E) {
  1407. Base2LdsMap.clear();
  1408. Base2StsMap.clear();
  1409. LdBases.clear();
  1410. StBases.clear();
  1411. }
  1412. }
  1413. return RetVal;
  1414. }
  1415. /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
  1416. /// optimization pass.
  1417. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  1418. if (PreAlloc)
  1419. return new ARMPreAllocLoadStoreOpt();
  1420. return new ARMLoadStoreOpt();
  1421. }